1/********************************************************************** 2 gb18030.c - Oniguruma (regular expression library) 3**********************************************************************/ 4/*- 5 * Copyright (c) 2005-2007 KUBO Takehiro <kubo AT jiubao DOT org> 6 * K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31#include "regenc.h" 32 33#if 1 34#define DEBUG_GB18030(arg) 35#else 36#define DEBUG_GB18030(arg) printf arg 37#endif 38 39enum { 40 C1, /* one-byte char */ 41 C2, /* one-byte or second of two-byte char */ 42 C4, /* one-byte or second or fourth of four-byte char */ 43 CM /* first of two- or four-byte char or second of two-byte char */ 44}; 45 46static const char GB18030_MAP[] = { 47 C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, 48 C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, 49 C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, 50 C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1, 51 C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, 52 C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, 53 C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, 54 C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1, 55 C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, 56 CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, 57 CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, 58 CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, 59 CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, 60 CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, 61 CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, 62 CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1 63}; 64 65typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2, S3 } state_t; 66#define A ACCEPT 67#define F FAILURE 68static const signed char trans[][0x100] = { 69 { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 70 /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 71 /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 72 /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 73 /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 74 /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 75 /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 76 /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 77 /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 78 /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 79 /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 80 /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 81 /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 82 /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 83 /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 84 /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 85 /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F 86 }, 87 { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 88 /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 89 /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 90 /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 91 /* 3 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, F, F, F, F, F, F, 92 /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 93 /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 94 /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 95 /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F, 96 /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 97 /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 98 /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 99 /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 100 /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 101 /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 102 /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 103 /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F 104 }, 105 { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 106 /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 107 /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 108 /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 109 /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 110 /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 111 /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 112 /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 113 /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 114 /* 8 */ F, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 115 /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 116 /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 117 /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 118 /* c */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 119 /* d */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 120 /* e */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 121 /* f */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, F 122 }, 123 { /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 124 /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 125 /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 126 /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 127 /* 3 */ A, A, A, A, A, A, A, A, A, A, F, F, F, F, F, F, 128 /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 129 /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 130 /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 131 /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 132 /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 133 /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 134 /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 135 /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 136 /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 137 /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 138 /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 139 /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F 140 } 141}; 142#undef A 143#undef F 144 145static int 146gb18030_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) 147{ 148 int firstbyte = *p++; 149 state_t s = trans[0][firstbyte]; 150#define RETURN(n) \ 151 return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \ 152 ONIGENC_CONSTRUCT_MBCLEN_INVALID() 153 if (s < 0) RETURN(1); 154 if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(2-1); 155 s = trans[s][*p++]; 156 if (s < 0) RETURN(2); 157 if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-2); 158 s = trans[s][*p++]; 159 if (s < 0) RETURN(3); 160 if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-3); 161 s = trans[s][*p++]; 162 RETURN(4); 163#undef RETURN 164} 165 166static OnigCodePoint 167gb18030_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) 168{ 169 int c, i, len; 170 OnigCodePoint n; 171 172 len = enclen(enc, p, end); 173 n = (OnigCodePoint )(*p++); 174 if (len == 1) return n; 175 176 for (i = 1; i < len; i++) { 177 if (p >= end) break; 178 c = *p++; 179 n <<= 8; n += c; 180 } 181 return n; 182} 183 184static int 185gb18030_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) 186{ 187 return onigenc_mb4_code_to_mbc(enc, code, buf); 188} 189 190static int 191gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, 192 UChar* lower, OnigEncoding enc) 193{ 194 return onigenc_mbn_mbc_case_fold(enc, flag, 195 pp, end, lower); 196} 197 198#if 0 199static int 200gb18030_is_mbc_ambiguous(OnigCaseFoldType flag, 201 const UChar** pp, const UChar* end, OnigEncoding enc) 202{ 203 return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end); 204} 205#endif 206 207static int 208gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) 209{ 210 return onigenc_mb4_is_code_ctype(enc, code, ctype); 211} 212 213enum state { 214 S_START, 215 S_one_C2, 216 S_one_C4, 217 S_one_CM, 218 219 S_odd_CM_one_CX, 220 S_even_CM_one_CX, 221 222 /* CMC4 : pair of "CM C4" */ 223 S_one_CMC4, 224 S_odd_CMC4, 225 S_one_C4_odd_CMC4, 226 S_even_CMC4, 227 S_one_C4_even_CMC4, 228 229 S_odd_CM_odd_CMC4, 230 S_even_CM_odd_CMC4, 231 232 S_odd_CM_even_CMC4, 233 S_even_CM_even_CMC4, 234 235 /* C4CM : pair of "C4 CM" */ 236 S_odd_C4CM, 237 S_one_CM_odd_C4CM, 238 S_even_C4CM, 239 S_one_CM_even_C4CM, 240 241 S_even_CM_odd_C4CM, 242 S_odd_CM_odd_C4CM, 243 S_even_CM_even_C4CM, 244 S_odd_CM_even_C4CM 245}; 246 247static UChar* 248gb18030_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) 249{ 250 const UChar *p; 251 enum state state = S_START; 252 253 DEBUG_GB18030(("----------------\n")); 254 for (p = s; p >= start; p--) { 255 DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p)); 256 switch (state) { 257 case S_START: 258 switch (GB18030_MAP[*p]) { 259 case C1: 260 return (UChar *)s; 261 case C2: 262 state = S_one_C2; /* C2 */ 263 break; 264 case C4: 265 state = S_one_C4; /* C4 */ 266 break; 267 case CM: 268 state = S_one_CM; /* CM */ 269 break; 270 } 271 break; 272 case S_one_C2: /* C2 */ 273 switch (GB18030_MAP[*p]) { 274 case C1: 275 case C2: 276 case C4: 277 return (UChar *)s; 278 case CM: 279 state = S_odd_CM_one_CX; /* CM C2 */ 280 break; 281 } 282 break; 283 case S_one_C4: /* C4 */ 284 switch (GB18030_MAP[*p]) { 285 case C1: 286 case C2: 287 case C4: 288 return (UChar *)s; 289 case CM: 290 state = S_one_CMC4; 291 break; 292 } 293 break; 294 case S_one_CM: /* CM */ 295 switch (GB18030_MAP[*p]) { 296 case C1: 297 case C2: 298 return (UChar *)s; 299 case C4: 300 state = S_odd_C4CM; 301 break; 302 case CM: 303 state = S_odd_CM_one_CX; /* CM CM */ 304 break; 305 } 306 break; 307 308 case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */ 309 switch (GB18030_MAP[*p]) { 310 case C1: 311 case C2: 312 case C4: 313 return (UChar *)(s - 1); 314 case CM: 315 state = S_even_CM_one_CX; 316 break; 317 } 318 break; 319 case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */ 320 switch (GB18030_MAP[*p]) { 321 case C1: 322 case C2: 323 case C4: 324 return (UChar *)s; 325 case CM: 326 state = S_odd_CM_one_CX; 327 break; 328 } 329 break; 330 331 case S_one_CMC4: /* CM C4 */ 332 switch (GB18030_MAP[*p]) { 333 case C1: 334 case C2: 335 return (UChar *)(s - 1); 336 case C4: 337 state = S_one_C4_odd_CMC4; /* C4 CM C4 */ 338 break; 339 case CM: 340 state = S_even_CM_one_CX; /* CM CM C4 */ 341 break; 342 } 343 break; 344 case S_odd_CMC4: /* CM C4 CM C4 CM C4 */ 345 switch (GB18030_MAP[*p]) { 346 case C1: 347 case C2: 348 return (UChar *)(s - 1); 349 case C4: 350 state = S_one_C4_odd_CMC4; 351 break; 352 case CM: 353 state = S_odd_CM_odd_CMC4; 354 break; 355 } 356 break; 357 case S_one_C4_odd_CMC4: /* C4 CM C4 */ 358 switch (GB18030_MAP[*p]) { 359 case C1: 360 case C2: 361 case C4: 362 return (UChar *)(s - 1); 363 case CM: 364 state = S_even_CMC4; /* CM C4 CM C4 */ 365 break; 366 } 367 break; 368 case S_even_CMC4: /* CM C4 CM C4 */ 369 switch (GB18030_MAP[*p]) { 370 case C1: 371 case C2: 372 return (UChar *)(s - 3); 373 case C4: 374 state = S_one_C4_even_CMC4; 375 break; 376 case CM: 377 state = S_odd_CM_even_CMC4; 378 break; 379 } 380 break; 381 case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */ 382 switch (GB18030_MAP[*p]) { 383 case C1: 384 case C2: 385 case C4: 386 return (UChar *)(s - 3); 387 case CM: 388 state = S_odd_CMC4; 389 break; 390 } 391 break; 392 393 case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */ 394 switch (GB18030_MAP[*p]) { 395 case C1: 396 case C2: 397 case C4: 398 return (UChar *)(s - 3); 399 case CM: 400 state = S_even_CM_odd_CMC4; 401 break; 402 } 403 break; 404 case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */ 405 switch (GB18030_MAP[*p]) { 406 case C1: 407 case C2: 408 case C4: 409 return (UChar *)(s - 1); 410 case CM: 411 state = S_odd_CM_odd_CMC4; 412 break; 413 } 414 break; 415 416 case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */ 417 switch (GB18030_MAP[*p]) { 418 case C1: 419 case C2: 420 case C4: 421 return (UChar *)(s - 1); 422 case CM: 423 state = S_even_CM_even_CMC4; 424 break; 425 } 426 break; 427 case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */ 428 switch (GB18030_MAP[*p]) { 429 case C1: 430 case C2: 431 case C4: 432 return (UChar *)(s - 3); 433 case CM: 434 state = S_odd_CM_even_CMC4; 435 break; 436 } 437 break; 438 439 case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/ 440 switch (GB18030_MAP[*p]) { 441 case C1: 442 case C2: 443 case C4: 444 return (UChar *)s; 445 case CM: 446 state = S_one_CM_odd_C4CM; /* CM C4 CM */ 447 break; 448 } 449 break; 450 case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */ 451 switch (GB18030_MAP[*p]) { 452 case C1: 453 case C2: 454 return (UChar *)(s - 2); /* |CM C4 CM */ 455 case C4: 456 state = S_even_C4CM; 457 break; 458 case CM: 459 state = S_even_CM_odd_C4CM; 460 break; 461 } 462 break; 463 case S_even_C4CM: /* C4 CM C4 CM */ 464 switch (GB18030_MAP[*p]) { 465 case C1: 466 case C2: 467 case C4: 468 return (UChar *)(s - 2); /* C4|CM C4 CM */ 469 case CM: 470 state = S_one_CM_even_C4CM; 471 break; 472 } 473 break; 474 case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */ 475 switch (GB18030_MAP[*p]) { 476 case C1: 477 case C2: 478 return (UChar *)(s - 0); /*|CM C4 CM C4|CM */ 479 case C4: 480 state = S_odd_C4CM; 481 break; 482 case CM: 483 state = S_even_CM_even_C4CM; 484 break; 485 } 486 break; 487 488 case S_even_CM_odd_C4CM: /* CM CM C4 CM */ 489 switch (GB18030_MAP[*p]) { 490 case C1: 491 case C2: 492 case C4: 493 return (UChar *)(s - 0); /* |CM CM|C4|CM */ 494 case CM: 495 state = S_odd_CM_odd_C4CM; 496 break; 497 } 498 break; 499 case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */ 500 switch (GB18030_MAP[*p]) { 501 case C1: 502 case C2: 503 case C4: 504 return (UChar *)(s - 2); /* |CM CM|CM C4 CM */ 505 case CM: 506 state = S_even_CM_odd_C4CM; 507 break; 508 } 509 break; 510 511 case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */ 512 switch (GB18030_MAP[*p]) { 513 case C1: 514 case C2: 515 case C4: 516 return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */ 517 case CM: 518 state = S_odd_CM_even_C4CM; 519 break; 520 } 521 break; 522 case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */ 523 switch (GB18030_MAP[*p]) { 524 case C1: 525 case C2: 526 case C4: 527 return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */ 528 case CM: 529 state = S_even_CM_even_C4CM; 530 break; 531 } 532 break; 533 } 534 } 535 536 DEBUG_GB18030(("state %d\n", state)); 537 switch (state) { 538 case S_START: return (UChar *)(s - 0); 539 case S_one_C2: return (UChar *)(s - 0); 540 case S_one_C4: return (UChar *)(s - 0); 541 case S_one_CM: return (UChar *)(s - 0); 542 543 case S_odd_CM_one_CX: return (UChar *)(s - 1); 544 case S_even_CM_one_CX: return (UChar *)(s - 0); 545 546 case S_one_CMC4: return (UChar *)(s - 1); 547 case S_odd_CMC4: return (UChar *)(s - 1); 548 case S_one_C4_odd_CMC4: return (UChar *)(s - 1); 549 case S_even_CMC4: return (UChar *)(s - 3); 550 case S_one_C4_even_CMC4: return (UChar *)(s - 3); 551 552 case S_odd_CM_odd_CMC4: return (UChar *)(s - 3); 553 case S_even_CM_odd_CMC4: return (UChar *)(s - 1); 554 555 case S_odd_CM_even_CMC4: return (UChar *)(s - 1); 556 case S_even_CM_even_CMC4: return (UChar *)(s - 3); 557 558 case S_odd_C4CM: return (UChar *)(s - 0); 559 case S_one_CM_odd_C4CM: return (UChar *)(s - 2); 560 case S_even_C4CM: return (UChar *)(s - 2); 561 case S_one_CM_even_C4CM: return (UChar *)(s - 0); 562 563 case S_even_CM_odd_C4CM: return (UChar *)(s - 0); 564 case S_odd_CM_odd_C4CM: return (UChar *)(s - 2); 565 case S_even_CM_even_C4CM: return (UChar *)(s - 2); 566 case S_odd_CM_even_C4CM: return (UChar *)(s - 0); 567 } 568 569 return (UChar* )s; /* never come here. (escape warning) */ 570} 571 572static int 573gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) 574{ 575 return GB18030_MAP[*s] == C1 ? TRUE : FALSE; 576} 577 578/* 579 * Name: GB18030 580 * MIBenum: 114 581 * Link: http://www.iana.org/assignments/charset-reg/GB18030 582 */ 583OnigEncodingDefine(gb18030, GB18030) = { 584 gb18030_mbc_enc_len, 585 "GB18030", /* name */ 586 4, /* max enc length */ 587 1, /* min enc length */ 588 onigenc_is_mbc_newline_0x0a, 589 gb18030_mbc_to_code, 590 onigenc_mb4_code_to_mbclen, 591 gb18030_code_to_mbc, 592 gb18030_mbc_case_fold, 593 onigenc_ascii_apply_all_case_fold, 594 onigenc_ascii_get_case_fold_codes_by_str, 595 onigenc_minimum_property_name_to_ctype, 596 gb18030_is_code_ctype, 597 onigenc_not_support_get_ctype_code_range, 598 gb18030_left_adjust_char_head, 599 gb18030_is_allowed_reverse_match, 600 0, 601 ONIGENC_FLAG_NONE, 602}; 603 604