1#ifndef ONIGURUMA_REGINT_H 2#define ONIGURUMA_REGINT_H 3/********************************************************************** 4 regint.h - Onigmo (Oniguruma-mod) (regular expression library) 5**********************************************************************/ 6/*- 7 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 8 * Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp> 9 * All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33/* for debug */ 34/* #define ONIG_DEBUG_PARSE_TREE */ 35/* #define ONIG_DEBUG_COMPILE */ 36/* #define ONIG_DEBUG_SEARCH */ 37/* #define ONIG_DEBUG_MATCH */ 38/* #define ONIG_DONT_OPTIMIZE */ 39 40/* for byte-code statistical data. */ 41/* #define ONIG_DEBUG_STATISTICS */ 42 43#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ 44 defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ 45 defined(ONIG_DEBUG_STATISTICS) 46#ifndef ONIG_DEBUG 47#define ONIG_DEBUG 48#endif 49#endif 50 51#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ 52 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD86) || \ 53 defined(__mc68020__) 54#define PLATFORM_UNALIGNED_WORD_ACCESS 55#endif 56 57/* config */ 58/* spec. config */ 59#define USE_NAMED_GROUP 60#define USE_SUBEXP_CALL 61#define USE_PERL_SUBEXP_CALL 62#define USE_CAPITAL_P_NAMED_GROUP 63#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */ 64#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ 65#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ 66#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR 67/* #define USE_RECOMPILE_API */ 68/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ 69#define USE_NO_INVALID_QUANTIFIER 70 71/* internal config */ 72#define USE_PARSE_TREE_NODE_RECYCLE 73#define USE_OP_PUSH_OR_JUMP_EXACT 74#define USE_QTFR_PEEK_NEXT 75#define USE_ST_LIBRARY 76#define USE_SHARED_CCLASS_TABLE 77#define USE_SUNDAY_QUICK_SEARCH 78 79#define INIT_MATCH_STACK_SIZE 160 80#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ 81 82/* check config */ 83#if defined(USE_PERL_SUBEXP_CALL) || defined(USE_CAPITAL_P_NAMED_GROUP) 84#if !defined(USE_NAMED_GROUP) || !defined(USE_SUBEXP_CALL) 85#error USE_NAMED_GROUP and USE_SUBEXP_CALL must be defined. 86#endif 87#endif 88 89#if defined(__GNUC__) 90# define ARG_UNUSED __attribute__ ((unused)) 91#else 92# define ARG_UNUSED 93#endif 94 95/* */ 96/* escape other system UChar definition */ 97#ifndef RUBY_DEFINES_H 98#include "ruby/ruby.h" 99#undef xmalloc 100#undef xrealloc 101#undef xcalloc 102#undef xfree 103#endif 104#ifdef ONIG_ESCAPE_UCHAR_COLLISION 105#undef ONIG_ESCAPE_UCHAR_COLLISION 106#endif 107#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ 108#undef USE_MATCH_RANGE_IS_COMPLETE_RANGE 109#undef USE_CAPTURE_HISTORY 110#define USE_VARIABLE_META_CHARS 111#define USE_POSIX_API_REGION_OPTION /* needed for POSIX API support */ 112#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 113/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ 114 115/* #define USE_MULTI_THREAD_SYSTEM */ 116#define THREAD_SYSTEM_INIT /* depend on thread system */ 117#define THREAD_SYSTEM_END /* depend on thread system */ 118#define THREAD_ATOMIC_START /* depend on thread system */ 119#define THREAD_ATOMIC_END /* depend on thread system */ 120#define THREAD_PASS /* depend on thread system */ 121#ifndef xmalloc 122#define xmalloc malloc 123#define xrealloc realloc 124#define xcalloc calloc 125#define xfree free 126#endif 127 128#ifdef RUBY 129 130#define CHECK_INTERRUPT_IN_MATCH_AT rb_thread_check_ints() 131#define onig_st_init_table st_init_table 132#define onig_st_init_table_with_size st_init_table_with_size 133#define onig_st_init_numtable st_init_numtable 134#define onig_st_init_numtable_with_size st_init_numtable_with_size 135#define onig_st_init_strtable st_init_strtable 136#define onig_st_init_strtable_with_size st_init_strtable_with_size 137#define onig_st_delete st_delete 138#define onig_st_delete_safe st_delete_safe 139#define onig_st_insert st_insert 140#define onig_st_lookup st_lookup 141#define onig_st_foreach st_foreach 142#define onig_st_add_direct st_add_direct 143#define onig_st_free_table st_free_table 144#define onig_st_cleanup_safe st_cleanup_safe 145#define onig_st_copy st_copy 146#define onig_st_nothing_key_clone st_nothing_key_clone 147#define onig_st_nothing_key_free st_nothing_key_free 148#define onig_st_is_member st_is_member 149 150#define USE_UPPER_CASE_TABLE 151#else 152 153#define st_init_table onig_st_init_table 154#define st_init_table_with_size onig_st_init_table_with_size 155#define st_init_numtable onig_st_init_numtable 156#define st_init_numtable_with_size onig_st_init_numtable_with_size 157#define st_init_strtable onig_st_init_strtable 158#define st_init_strtable_with_size onig_st_init_strtable_with_size 159#define st_delete onig_st_delete 160#define st_delete_safe onig_st_delete_safe 161#define st_insert onig_st_insert 162#define st_lookup onig_st_lookup 163#define st_foreach onig_st_foreach 164#define st_add_direct onig_st_add_direct 165#define st_free_table onig_st_free_table 166#define st_cleanup_safe onig_st_cleanup_safe 167#define st_copy onig_st_copy 168#define st_nothing_key_clone onig_st_nothing_key_clone 169#define st_nothing_key_free onig_st_nothing_key_free 170/* */ 171#define onig_st_is_member st_is_member 172 173#define CHECK_INTERRUPT_IN_MATCH_AT 174 175#endif 176 177#define STATE_CHECK_STRING_THRESHOLD_LEN 7 178#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 179 180#define THREAD_PASS_LIMIT_COUNT 8 181#define xmemset memset 182#define xmemcpy memcpy 183#define xmemmove memmove 184 185#if defined(_WIN32) && !defined(__GNUC__) 186#define xalloca _alloca 187#define xvsnprintf _vsnprintf 188#else 189#define xalloca alloca 190#define xvsnprintf vsnprintf 191#endif 192 193 194#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) 195#define ONIG_STATE_INC(reg) (reg)->state++ 196#define ONIG_STATE_DEC(reg) (reg)->state-- 197 198#define ONIG_STATE_INC_THREAD(reg) do {\ 199 THREAD_ATOMIC_START;\ 200 (reg)->state++;\ 201 THREAD_ATOMIC_END;\ 202} while(0) 203#define ONIG_STATE_DEC_THREAD(reg) do {\ 204 THREAD_ATOMIC_START;\ 205 (reg)->state--;\ 206 THREAD_ATOMIC_END;\ 207} while(0) 208#else 209#define ONIG_STATE_INC(reg) /* Nothing */ 210#define ONIG_STATE_DEC(reg) /* Nothing */ 211#define ONIG_STATE_INC_THREAD(reg) /* Nothing */ 212#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ 213#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ 214 215#ifdef HAVE_STDLIB_H 216#include <stdlib.h> 217#endif 218 219#if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__)) 220#include <alloca.h> 221#endif 222 223#ifdef HAVE_STRING_H 224# include <string.h> 225#else 226# include <strings.h> 227#endif 228 229#include <ctype.h> 230#ifdef HAVE_SYS_TYPES_H 231#include <sys/types.h> 232#endif 233 234#ifdef HAVE_STDINT_H 235# include <stdint.h> 236#endif 237 238#ifdef STDC_HEADERS 239# include <stddef.h> 240#endif 241 242#ifdef __BORLANDC__ 243#include <malloc.h> 244#endif 245 246#ifdef ONIG_DEBUG 247# include <stdio.h> 248#endif 249 250#ifdef _WIN32 251#if defined(_MSC_VER) && (_MSC_VER < 1300) 252#ifndef _INTPTR_T_DEFINED 253#define _INTPTR_T_DEFINED 254typedef int intptr_t; 255#endif 256#ifndef _UINTPTR_T_DEFINED 257#define _UINTPTR_T_DEFINED 258typedef unsigned int uintptr_t; 259#endif 260#endif 261#endif /* _WIN32 */ 262 263#include "regenc.h" 264 265#if defined __GNUC__ && __GNUC__ >= 4 266#pragma GCC visibility push(default) 267#endif 268 269#ifdef MIN 270#undef MIN 271#endif 272#ifdef MAX 273#undef MAX 274#endif 275#define MIN(a,b) (((a)>(b))?(b):(a)) 276#define MAX(a,b) (((a)<(b))?(b):(a)) 277 278#define IS_NULL(p) (((void*)(p)) == (void*)0) 279#define IS_NOT_NULL(p) (((void*)(p)) != (void*)0) 280#define CHECK_NULL_RETURN(p) if (IS_NULL(p)) return NULL 281#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY 282#define NULL_UCHARP ((UChar* )0) 283 284#define ONIG_LAST_CODE_POINT (~((OnigCodePoint )0)) 285 286#ifdef PLATFORM_UNALIGNED_WORD_ACCESS 287 288#define PLATFORM_GET_INC(val,p,type) do{\ 289 val = *(type* )p;\ 290 (p) += sizeof(type);\ 291} while(0) 292 293#else 294 295#define PLATFORM_GET_INC(val,p,type) do{\ 296 xmemcpy(&val, (p), sizeof(type));\ 297 (p) += sizeof(type);\ 298} while(0) 299 300/* sizeof(OnigCodePoint) */ 301#define WORD_ALIGNMENT_SIZE SIZEOF_LONG 302 303#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ 304 (pad_size) = WORD_ALIGNMENT_SIZE \ 305 - ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ 306 if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\ 307} while (0) 308 309#define ALIGNMENT_RIGHT(addr) do {\ 310 (addr) += (WORD_ALIGNMENT_SIZE - 1);\ 311 (addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\ 312} while (0) 313 314#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ 315 316/* stack pop level */ 317#define STACK_POP_LEVEL_FREE 0 318#define STACK_POP_LEVEL_MEM_START 1 319#define STACK_POP_LEVEL_ALL 2 320 321/* optimize flags */ 322#define ONIG_OPTIMIZE_NONE 0 323#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ 324#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ 325#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (applied to a multibyte string) */ 326#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ 327#define ONIG_OPTIMIZE_MAP 5 /* char map */ 328#define ONIG_OPTIMIZE_EXACT_BM_IC 6 /* BM (ignore case) */ 329#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC 7 /* BM (applied to a multibyte string) (ignore case) */ 330 331/* bit status */ 332typedef unsigned int BitStatusType; 333 334#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8) 335#define BIT_STATUS_CLEAR(stats) (stats) = 0 336#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) 337#define BIT_STATUS_AT(stats,n) \ 338 ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) 339 340#define BIT_STATUS_ON_AT(stats,n) do {\ 341 if ((n) < (int )BIT_STATUS_BITS_NUM) \ 342 (stats) |= (1 << (n));\ 343 else\ 344 (stats) |= 1;\ 345} while (0) 346 347#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ 348 if ((n) < (int )BIT_STATUS_BITS_NUM)\ 349 (stats) |= (1 << (n));\ 350} while (0) 351 352 353#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1) 354 355#define DIGITVAL(code) ((code) - '0') 356#define ODIGITVAL(code) DIGITVAL(code) 357#define XDIGITVAL(enc,code) \ 358 (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \ 359 : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) 360 361#define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) 362#define IS_MULTILINE(option) ((option) & ONIG_OPTION_MULTILINE) 363#define IS_IGNORECASE(option) ((option) & ONIG_OPTION_IGNORECASE) 364#define IS_EXTEND(option) ((option) & ONIG_OPTION_EXTEND) 365#define IS_FIND_LONGEST(option) ((option) & ONIG_OPTION_FIND_LONGEST) 366#define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY) 367#define IS_FIND_CONDITION(option) ((option) & \ 368 (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY)) 369#define IS_NOTBOL(option) ((option) & ONIG_OPTION_NOTBOL) 370#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) 371#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) 372#define IS_ASCII_RANGE(option) ((option) & ONIG_OPTION_ASCII_RANGE) 373#define IS_POSIX_BRACKET_ALL_RANGE(option) ((option) & ONIG_OPTION_POSIX_BRACKET_ALL_RANGE) 374#define IS_WORD_BOUND_ALL_RANGE(option) ((option) & ONIG_OPTION_WORD_BOUND_ALL_RANGE) 375#define IS_NEWLINE_CRLF(option) ((option) & ONIG_OPTION_NEWLINE_CRLF) 376 377/* OP_SET_OPTION is required for these options. 378#define IS_DYNAMIC_OPTION(option) \ 379 (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0) 380*/ 381/* ignore-case and multibyte status are included in compiled code. */ 382#define IS_DYNAMIC_OPTION(option) 0 383 384#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \ 385 ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) 386 387#define REPEAT_INFINITE -1 388#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE) 389 390/* bitset */ 391#define BITS_PER_BYTE 8 392#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE) 393#define BITS_IN_ROOM ((int )sizeof(Bits) * BITS_PER_BYTE) 394#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM) 395 396#ifdef PLATFORM_UNALIGNED_WORD_ACCESS 397typedef unsigned int Bits; 398#else 399typedef unsigned char Bits; 400#endif 401typedef Bits BitSet[BITSET_SIZE]; 402typedef Bits* BitSetRef; 403 404#define SIZE_BITSET (int )sizeof(BitSet) 405 406#define BITSET_CLEAR(bs) do {\ 407 int i;\ 408 for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; } \ 409} while (0) 410 411#define BS_ROOM(bs,pos) (bs)[(int )(pos) / BITS_IN_ROOM] 412#define BS_BIT(pos) (1 << ((int )(pos) % BITS_IN_ROOM)) 413 414#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos)) 415#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos) 416#define BITSET_CLEAR_BIT(bs, pos) BS_ROOM(bs,pos) &= ~(BS_BIT(pos)) 417#define BITSET_INVERT_BIT(bs, pos) BS_ROOM(bs,pos) ^= BS_BIT(pos) 418 419/* bytes buffer */ 420typedef struct _BBuf { 421 UChar* p; 422 unsigned int used; 423 unsigned int alloc; 424} BBuf; 425 426#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) 427 428#define BBUF_SIZE_INC(buf,inc) do{\ 429 (buf)->alloc += (inc);\ 430 (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ 431 if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ 432} while (0) 433 434#define BBUF_EXPAND(buf,low) do{\ 435 do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ 436 (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ 437 if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ 438} while (0) 439 440#define BBUF_ENSURE_SIZE(buf,size) do{\ 441 unsigned int new_alloc = (buf)->alloc;\ 442 while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\ 443 if ((buf)->alloc != new_alloc) {\ 444 (buf)->p = (UChar* )xrealloc((buf)->p, new_alloc);\ 445 if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ 446 (buf)->alloc = new_alloc;\ 447 }\ 448} while (0) 449 450#define BBUF_WRITE(buf,pos,bytes,n) do{\ 451 int used = (pos) + (int )(n);\ 452 if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ 453 xmemcpy((buf)->p + (pos), (bytes), (n));\ 454 if ((buf)->used < (unsigned int )used) (buf)->used = used;\ 455} while (0) 456 457#define BBUF_WRITE1(buf,pos,byte) do{\ 458 int used = (pos) + 1;\ 459 if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ 460 (buf)->p[(pos)] = (UChar )(byte);\ 461 if ((buf)->used < (unsigned int )used) (buf)->used = used;\ 462} while (0) 463 464#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n)) 465#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte)) 466#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used) 467#define BBUF_GET_OFFSET_POS(buf) ((buf)->used) 468 469/* from < to */ 470#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\ 471 if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\ 472 xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ 473 if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\ 474} while (0) 475 476/* from > to */ 477#define BBUF_MOVE_LEFT(buf,from,to,n) do {\ 478 xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ 479} while (0) 480 481/* from > to */ 482#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\ 483 xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\ 484 (buf)->used -= (from - to);\ 485} while (0) 486 487#define BBUF_INSERT(buf,pos,bytes,n) do {\ 488 if (pos >= (buf)->used) {\ 489 BBUF_WRITE(buf,pos,bytes,n);\ 490 }\ 491 else {\ 492 BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\ 493 xmemcpy((buf)->p + (pos), (bytes), (n));\ 494 }\ 495} while (0) 496 497#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] 498 499 500#define ANCHOR_BEGIN_BUF (1<<0) 501#define ANCHOR_BEGIN_LINE (1<<1) 502#define ANCHOR_BEGIN_POSITION (1<<2) 503#define ANCHOR_END_BUF (1<<3) 504#define ANCHOR_SEMI_END_BUF (1<<4) 505#define ANCHOR_END_LINE (1<<5) 506 507#define ANCHOR_WORD_BOUND (1<<6) 508#define ANCHOR_NOT_WORD_BOUND (1<<7) 509#define ANCHOR_WORD_BEGIN (1<<8) 510#define ANCHOR_WORD_END (1<<9) 511#define ANCHOR_PREC_READ (1<<10) 512#define ANCHOR_PREC_READ_NOT (1<<11) 513#define ANCHOR_LOOK_BEHIND (1<<12) 514#define ANCHOR_LOOK_BEHIND_NOT (1<<13) 515 516#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ 517#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ 518 519#define ANCHOR_KEEP (1<<16) 520 521/* operation code */ 522enum OpCode { 523 OP_FINISH = 0, /* matching process terminator (no more alternative) */ 524 OP_END = 1, /* pattern code terminator (success end) */ 525 526 OP_EXACT1 = 2, /* single byte, N = 1 */ 527 OP_EXACT2, /* single byte, N = 2 */ 528 OP_EXACT3, /* single byte, N = 3 */ 529 OP_EXACT4, /* single byte, N = 4 */ 530 OP_EXACT5, /* single byte, N = 5 */ 531 OP_EXACTN, /* single byte */ 532 OP_EXACTMB2N1, /* mb-length = 2 N = 1 */ 533 OP_EXACTMB2N2, /* mb-length = 2 N = 2 */ 534 OP_EXACTMB2N3, /* mb-length = 2 N = 3 */ 535 OP_EXACTMB2N, /* mb-length = 2 */ 536 OP_EXACTMB3N, /* mb-length = 3 */ 537 OP_EXACTMBN, /* other length */ 538 539 OP_EXACT1_IC, /* single byte, N = 1, ignore case */ 540 OP_EXACTN_IC, /* single byte, ignore case */ 541 542 OP_CCLASS, 543 OP_CCLASS_MB, 544 OP_CCLASS_MIX, 545 OP_CCLASS_NOT, 546 OP_CCLASS_MB_NOT, 547 OP_CCLASS_MIX_NOT, 548 OP_CCLASS_NODE, /* pointer to CClassNode node */ 549 550 OP_ANYCHAR, /* "." */ 551 OP_ANYCHAR_ML, /* "." multi-line */ 552 OP_ANYCHAR_STAR, /* ".*" */ 553 OP_ANYCHAR_ML_STAR, /* ".*" multi-line */ 554 OP_ANYCHAR_STAR_PEEK_NEXT, 555 OP_ANYCHAR_ML_STAR_PEEK_NEXT, 556 557 OP_WORD, 558 OP_NOT_WORD, 559 OP_WORD_BOUND, 560 OP_NOT_WORD_BOUND, 561 OP_WORD_BEGIN, 562 OP_WORD_END, 563 564 OP_ASCII_WORD, 565 OP_NOT_ASCII_WORD, 566 OP_ASCII_WORD_BOUND, 567 OP_NOT_ASCII_WORD_BOUND, 568 OP_ASCII_WORD_BEGIN, 569 OP_ASCII_WORD_END, 570 571 OP_BEGIN_BUF, 572 OP_END_BUF, 573 OP_BEGIN_LINE, 574 OP_END_LINE, 575 OP_SEMI_END_BUF, 576 OP_BEGIN_POSITION, 577 OP_BEGIN_POS_OR_LINE, /* used for implicit anchor optimization */ 578 579 OP_BACKREF1, 580 OP_BACKREF2, 581 OP_BACKREFN, 582 OP_BACKREFN_IC, 583 OP_BACKREF_MULTI, 584 OP_BACKREF_MULTI_IC, 585 OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ 586 587 OP_MEMORY_START, 588 OP_MEMORY_START_PUSH, /* push back-tracker to stack */ 589 OP_MEMORY_END_PUSH, /* push back-tracker to stack */ 590 OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */ 591 OP_MEMORY_END, 592 OP_MEMORY_END_REC, /* push marker to stack */ 593 594 OP_KEEP, 595 596 OP_FAIL, /* pop stack and move */ 597 OP_JUMP, 598 OP_PUSH, 599 OP_POP, 600 OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ 601 OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ 602 OP_REPEAT, /* {n,m} */ 603 OP_REPEAT_NG, /* {n,m}? (non greedy) */ 604 OP_REPEAT_INC, 605 OP_REPEAT_INC_NG, /* non greedy */ 606 OP_REPEAT_INC_SG, /* search and get in stack */ 607 OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ 608 OP_NULL_CHECK_START, /* null loop checker start */ 609 OP_NULL_CHECK_END, /* null loop checker end */ 610 OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ 611 OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ 612 613 OP_PUSH_POS, /* (?=...) start */ 614 OP_POP_POS, /* (?=...) end */ 615 OP_PUSH_POS_NOT, /* (?!...) start */ 616 OP_FAIL_POS, /* (?!...) end */ 617 OP_PUSH_STOP_BT, /* (?>...) start */ 618 OP_POP_STOP_BT, /* (?>...) end */ 619 OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ 620 OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */ 621 OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */ 622 623 OP_CALL, /* \g<name> */ 624 OP_RETURN, 625 626 OP_CONDITION, 627 628 OP_STATE_CHECK_PUSH, /* combination explosion check and push */ 629 OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ 630 OP_STATE_CHECK, /* check only */ 631 OP_STATE_CHECK_ANYCHAR_STAR, 632 OP_STATE_CHECK_ANYCHAR_ML_STAR, 633 634 /* no need: IS_DYNAMIC_OPTION() == 0 */ 635 OP_SET_OPTION_PUSH, /* set option and push recover option */ 636 OP_SET_OPTION /* set option */ 637}; 638 639typedef int RelAddrType; 640typedef int AbsAddrType; 641typedef int LengthType; 642typedef int RepeatNumType; 643typedef short int MemNumType; 644typedef short int StateCheckNumType; 645typedef void* PointerType; 646 647#define SIZE_OPCODE 1 648#define SIZE_RELADDR (int )sizeof(RelAddrType) 649#define SIZE_ABSADDR (int )sizeof(AbsAddrType) 650#define SIZE_LENGTH (int )sizeof(LengthType) 651#define SIZE_MEMNUM (int )sizeof(MemNumType) 652#define SIZE_STATE_CHECK_NUM (int )sizeof(StateCheckNumType) 653#define SIZE_REPEATNUM (int )sizeof(RepeatNumType) 654#define SIZE_OPTION (int )sizeof(OnigOptionType) 655#define SIZE_CODE_POINT (int )sizeof(OnigCodePoint) 656#define SIZE_POINTER (int )sizeof(PointerType) 657 658 659#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) 660#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) 661#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType) 662#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType) 663#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) 664#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) 665#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) 666#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) 667 668/* code point's address must be aligned address. */ 669#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) 670#define GET_BYTE_INC(byte,p) do{\ 671 byte = *(p);\ 672 (p)++;\ 673} while(0) 674 675 676/* op-code + arg size */ 677#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE 678#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1) 679#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) 680#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) 681#define SIZE_OP_POP SIZE_OPCODE 682#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) 683#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) 684#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) 685#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) 686#define SIZE_OP_PUSH_POS SIZE_OPCODE 687#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR) 688#define SIZE_OP_POP_POS SIZE_OPCODE 689#define SIZE_OP_FAIL_POS SIZE_OPCODE 690#define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION) 691#define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION) 692#define SIZE_OP_FAIL SIZE_OPCODE 693#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM) 694#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM) 695#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM) 696#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM) 697#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM) 698#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM) 699#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE 700#define SIZE_OP_POP_STOP_BT SIZE_OPCODE 701#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) 702#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) 703#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH) 704#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) 705#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE 706#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) 707#define SIZE_OP_RETURN SIZE_OPCODE 708#define SIZE_OP_CONDITION (SIZE_OPCODE + SIZE_MEMNUM + SIZE_RELADDR) 709 710#ifdef USE_COMBINATION_EXPLOSION_CHECK 711#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) 712#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) 713#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) 714#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) 715#endif 716 717#define MC_ESC(syn) (syn)->meta_char_table.esc 718#define MC_ANYCHAR(syn) (syn)->meta_char_table.anychar 719#define MC_ANYTIME(syn) (syn)->meta_char_table.anytime 720#define MC_ZERO_OR_ONE_TIME(syn) (syn)->meta_char_table.zero_or_one_time 721#define MC_ONE_OR_MORE_TIME(syn) (syn)->meta_char_table.one_or_more_time 722#define MC_ANYCHAR_ANYTIME(syn) (syn)->meta_char_table.anychar_anytime 723 724#define IS_MC_ESC_CODE(code, syn) \ 725 ((code) == MC_ESC(syn) && \ 726 !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) 727 728 729#define SYN_POSIX_COMMON_OP \ 730 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ 731 ONIG_SYN_OP_DECIMAL_BACKREF | \ 732 ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_ASTERISK_ZERO_INF | \ 733 ONIG_SYN_OP_LINE_ANCHOR | \ 734 ONIG_SYN_OP_ESC_CONTROL_CHARS ) 735 736#define SYN_GNU_REGEX_OP \ 737 ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | \ 738 ONIG_SYN_OP_POSIX_BRACKET | ONIG_SYN_OP_DECIMAL_BACKREF | \ 739 ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_LPAREN_SUBEXP | \ 740 ONIG_SYN_OP_VBAR_ALT | \ 741 ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_PLUS_ONE_INF | \ 742 ONIG_SYN_OP_QMARK_ZERO_ONE | \ 743 ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR | ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR | \ 744 ONIG_SYN_OP_ESC_W_WORD | \ 745 ONIG_SYN_OP_ESC_B_WORD_BOUND | ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END | \ 746 ONIG_SYN_OP_ESC_S_WHITE_SPACE | ONIG_SYN_OP_ESC_D_DIGIT | \ 747 ONIG_SYN_OP_LINE_ANCHOR ) 748 749#define SYN_GNU_REGEX_BV \ 750 ( ONIG_SYN_CONTEXT_INDEP_ANCHORS | ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS | \ 751 ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \ 752 ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC ) 753 754 755#define NCCLASS_FLAGS(cc) ((cc)->flags) 756#define NCCLASS_FLAG_SET(cc,flag) (NCCLASS_FLAGS(cc) |= (flag)) 757#define NCCLASS_FLAG_CLEAR(cc,flag) (NCCLASS_FLAGS(cc) &= ~(flag)) 758#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0) 759 760/* cclass node */ 761#define FLAG_NCCLASS_NOT (1<<0) 762#define FLAG_NCCLASS_SHARE (1<<1) 763 764#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) 765#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE) 766#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) 767#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) 768#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE) 769 770typedef struct { 771 int type; 772 /* struct _Node* next; */ 773 /* unsigned int flags; */ 774} NodeBase; 775 776typedef struct { 777 NodeBase base; 778 unsigned int flags; 779 BitSet bs; 780 BBuf* mbuf; /* multi-byte info or NULL */ 781} CClassNode; 782 783typedef intptr_t OnigStackIndex; 784 785typedef struct _OnigStackType { 786 unsigned int type; 787 union { 788 struct { 789 UChar *pcode; /* byte code position */ 790 UChar *pstr; /* string position */ 791 UChar *pstr_prev; /* previous char position of pstr */ 792#ifdef USE_COMBINATION_EXPLOSION_CHECK 793 unsigned int state_check; 794#endif 795 UChar *pkeep; /* keep pattern position */ 796 } state; 797 struct { 798 int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ 799 UChar *pcode; /* byte code position (head of repeated target) */ 800 int num; /* repeat id */ 801 } repeat; 802 struct { 803 OnigStackIndex si; /* index of stack */ 804 } repeat_inc; 805 struct { 806 int num; /* memory num */ 807 UChar *pstr; /* start/end position */ 808 /* Following information is set, if this stack type is MEM-START */ 809 OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ 810 OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ 811 } mem; 812 struct { 813 int num; /* null check id */ 814 UChar *pstr; /* start position */ 815 } null_check; 816#ifdef USE_SUBEXP_CALL 817 struct { 818 UChar *ret_addr; /* byte code position */ 819 int num; /* null check id */ 820 UChar *pstr; /* string position */ 821 } call_frame; 822#endif 823 } u; 824} OnigStackType; 825 826typedef struct { 827 void* stack_p; 828 size_t stack_n; 829 OnigOptionType options; 830 OnigRegion* region; 831 const UChar* start; /* search start position */ 832 const UChar* gpos; /* global position (for \G: BEGIN_POSITION) */ 833#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE 834 OnigPosition best_len; /* for ONIG_OPTION_FIND_LONGEST */ 835 UChar* best_s; 836#endif 837#ifdef USE_COMBINATION_EXPLOSION_CHECK 838 void* state_check_buff; 839 int state_check_buff_size; 840#endif 841} OnigMatchArg; 842 843 844#define IS_CODE_SB_WORD(enc,code) \ 845 (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) 846 847#ifdef ONIG_DEBUG 848 849typedef struct { 850 short int opcode; 851 const char* name; 852 short int arg_type; 853} OnigOpInfoType; 854 855extern OnigOpInfoType OnigOpInfo[]; 856 857/* extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc)); */ 858 859#ifdef ONIG_DEBUG_STATISTICS 860extern void onig_statistics_init P_((void)); 861extern void onig_print_statistics P_((FILE* f)); 862#endif 863#endif 864 865extern UChar* onig_error_code_to_format P_((OnigPosition code)); 866extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); 867extern int onig_bbuf_init P_((BBuf* buf, OnigDistance size)); 868extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo, const char *sourcefile, int sourceline)); 869extern void onig_chain_reduce P_((regex_t* reg)); 870extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); 871extern void onig_transfer P_((regex_t* to, regex_t* from)); 872extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); 873extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc)); 874 875/* strend hash */ 876typedef void hash_table_type; 877#ifdef RUBY 878#include "ruby/st.h" 879typedef st_data_t hash_data_type; 880#else 881#include "st.h" 882typedef uintptr_t hash_data_type; 883#endif 884 885extern hash_table_type* onig_st_init_strend_table_with_size P_((st_index_t size)); 886extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value)); 887extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value)); 888 889/* encoding property management */ 890#define PROPERTY_LIST_ADD_PROP(Name, CR) \ 891 r = onigenc_property_list_add_property((UChar* )Name, CR,\ 892 &PropertyNameTable, &PropertyList, &PropertyListNum,\ 893 &PropertyListSize);\ 894 if (r != 0) goto end 895 896#define PROPERTY_LIST_INIT_CHECK \ 897 if (PropertyInited == 0) {\ 898 int r = onigenc_property_list_init(init_property_list);\ 899 if (r != 0) return r;\ 900 } 901 902extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize)); 903 904typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); 905 906extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)); 907 908extern size_t onig_memsize P_((const regex_t *reg)); 909extern size_t onig_region_memsize P_((const struct re_registers *regs)); 910 911#if defined __GNUC__ && __GNUC__ >= 4 912#pragma GCC visibility pop 913#endif 914 915#endif /* ONIGURUMA_REGINT_H */ 916