1/********************************************************************** 2 sjis.c - Onigmo (Oniguruma-mod) (regular expression library) 3**********************************************************************/ 4/*- 5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 6 * Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31#include "regint.h" 32 33static const int EncLen_SJIS[] = { 34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 42 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 43 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 48 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 49 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 50}; 51 52static const char SJIS_CAN_BE_TRAIL_TABLE[256] = { 53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 66 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 67 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 68 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 69}; 70 71static const OnigPairCaseFoldCodes CaseFoldMap[] = { 72 /* Fullwidth Alphabet */ 73 { 0x8260, 0x8281 }, 74 { 0x8261, 0x8282 }, 75 { 0x8262, 0x8283 }, 76 { 0x8263, 0x8284 }, 77 { 0x8264, 0x8285 }, 78 { 0x8265, 0x8286 }, 79 { 0x8266, 0x8287 }, 80 { 0x8267, 0x8288 }, 81 { 0x8268, 0x8289 }, 82 { 0x8269, 0x828a }, 83 { 0x826a, 0x828b }, 84 { 0x826b, 0x828c }, 85 { 0x826c, 0x828d }, 86 { 0x826d, 0x828e }, 87 { 0x826e, 0x828f }, 88 { 0x826f, 0x8290 }, 89 { 0x8270, 0x8291 }, 90 { 0x8271, 0x8292 }, 91 { 0x8272, 0x8293 }, 92 { 0x8273, 0x8294 }, 93 { 0x8274, 0x8295 }, 94 { 0x8275, 0x8296 }, 95 { 0x8276, 0x8297 }, 96 { 0x8277, 0x8298 }, 97 { 0x8278, 0x8299 }, 98 { 0x8279, 0x829a }, 99 100 /* Greek */ 101 { 0x839f, 0x83bf }, 102 { 0x83a0, 0x83c0 }, 103 { 0x83a1, 0x83c1 }, 104 { 0x83a2, 0x83c2 }, 105 { 0x83a3, 0x83c3 }, 106 { 0x83a4, 0x83c4 }, 107 { 0x83a5, 0x83c5 }, 108 { 0x83a6, 0x83c6 }, 109 { 0x83a7, 0x83c7 }, 110 { 0x83a8, 0x83c8 }, 111 { 0x83a9, 0x83c9 }, 112 { 0x83aa, 0x83ca }, 113 { 0x83ab, 0x83cb }, 114 { 0x83ac, 0x83cc }, 115 { 0x83ad, 0x83cd }, 116 { 0x83ae, 0x83ce }, 117 { 0x83af, 0x83cf }, 118 { 0x83b0, 0x83d0 }, 119 { 0x83b1, 0x83d1 }, 120 { 0x83b2, 0x83d2 }, 121 { 0x83b3, 0x83d3 }, 122 { 0x83b4, 0x83d4 }, 123 { 0x83b5, 0x83d5 }, 124 { 0x83b6, 0x83d6 }, 125 126 /* Cyrillic */ 127 { 0x8440, 0x8470 }, 128 { 0x8441, 0x8471 }, 129 { 0x8442, 0x8472 }, 130 { 0x8443, 0x8473 }, 131 { 0x8444, 0x8474 }, 132 { 0x8445, 0x8475 }, 133 { 0x8446, 0x8476 }, 134 { 0x8447, 0x8477 }, 135 { 0x8448, 0x8478 }, 136 { 0x8449, 0x8479 }, 137 { 0x844a, 0x847a }, 138 { 0x844b, 0x847b }, 139 { 0x844c, 0x847c }, 140 { 0x844d, 0x847d }, 141 { 0x844e, 0x847e }, 142 { 0x844f, 0x8480 }, 143 { 0x8450, 0x8481 }, 144 { 0x8451, 0x8482 }, 145 { 0x8452, 0x8483 }, 146 { 0x8453, 0x8484 }, 147 { 0x8454, 0x8485 }, 148 { 0x8455, 0x8486 }, 149 { 0x8456, 0x8487 }, 150 { 0x8457, 0x8488 }, 151 { 0x8458, 0x8489 }, 152 { 0x8459, 0x848a }, 153 { 0x845a, 0x848b }, 154 { 0x845b, 0x848c }, 155 { 0x845c, 0x848d }, 156 { 0x845d, 0x848e }, 157 { 0x845e, 0x848f }, 158 { 0x845f, 0x8490 }, 159 { 0x8460, 0x8491 }, 160}; 161 162#define SJIS_ISMB_FIRST(byte) (EncLen_SJIS[byte] > 1) 163#define SJIS_ISMB_TRAIL(byte) SJIS_CAN_BE_TRAIL_TABLE[(byte)] 164 165typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t; 166#define A ACCEPT 167#define F FAILURE 168static const signed char trans[][0x100] = { 169 { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 170 /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 171 /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 172 /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 173 /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 174 /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 175 /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 176 /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 177 /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 178 /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 179 /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 180 /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 181 /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 182 /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 183 /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 184 /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 185 /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F, F, F 186 }, 187 { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 188 /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 189 /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 190 /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 191 /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, 192 /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 193 /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 194 /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 195 /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F, 196 /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 197 /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 198 /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 199 /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 200 /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 201 /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 202 /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, 203 /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, F, F, F 204 } 205}; 206#undef A 207#undef F 208 209static int 210mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) 211{ 212 int firstbyte = *p++; 213 state_t s; 214 s = trans[0][firstbyte]; 215 if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) : 216 ONIGENC_CONSTRUCT_MBCLEN_INVALID(); 217 if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_SJIS[firstbyte]-1); 218 s = trans[s][*p++]; 219 return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) : 220 ONIGENC_CONSTRUCT_MBCLEN_INVALID(); 221} 222 223static int 224code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) 225{ 226 if (code < 256) { 227 if (EncLen_SJIS[(int )code] == 1) 228 return 1; 229 else 230 return ONIGERR_INVALID_CODE_POINT_VALUE; 231 } 232 else if (code <= 0xffff) { 233 int low = code & 0xff; 234 if (! SJIS_ISMB_TRAIL(low)) 235 return ONIGERR_INVALID_CODE_POINT_VALUE; 236 return 2; 237 } 238 else 239 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; 240} 241 242static OnigCodePoint 243mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) 244{ 245 int c, i, len; 246 OnigCodePoint n; 247 248 len = mbc_enc_len(p, end, enc); 249 c = *p++; 250 n = c; 251 if (len == 1) return n; 252 253 for (i = 1; i < len; i++) { 254 if (p >= end) break; 255 c = *p++; 256 n <<= 8; n += c; 257 } 258 return n; 259} 260 261static int 262code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) 263{ 264 UChar *p = buf; 265 266 if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff)); 267 *p++ = (UChar )(code & 0xff); 268 269#if 0 270 if (mbc_enc_len(buf, p, enc) != (p - buf)) 271 return REGERR_INVALID_CODE_POINT_VALUE; 272#endif 273 return (int )(p - buf); 274} 275 276static int 277apply_all_case_fold(OnigCaseFoldType flag, 278 OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc) 279{ 280 return onigenc_apply_all_case_fold_with_map( 281 sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0, 282 flag, f, arg); 283} 284 285static OnigCodePoint 286get_lower_case(OnigCodePoint code) 287{ 288 if (ONIGENC_IS_IN_RANGE(code, 0x8260, 0x8279)) { 289 /* Fullwidth Alphabet */ 290 return (OnigCodePoint )(code + 0x0021); 291 } 292 else if (ONIGENC_IS_IN_RANGE(code, 0x839f, 0x83b6)) { 293 /* Greek */ 294 return (OnigCodePoint )(code + 0x0020); 295 } 296 else if (ONIGENC_IS_IN_RANGE(code, 0x8440, 0x8460)) { 297 /* Cyrillic */ 298 int d = (code >= 0x844f) ? 1 : 0; 299 return (OnigCodePoint )(code + (0x0030 + d)); 300 } 301 return code; 302} 303 304static OnigCodePoint 305get_upper_case(OnigCodePoint code) 306{ 307 if (ONIGENC_IS_IN_RANGE(code, 0x8281, 0x829a)) { 308 /* Fullwidth Alphabet */ 309 return (OnigCodePoint )(code - 0x0021); 310 } 311 else if (ONIGENC_IS_IN_RANGE(code, 0x83bf, 0x83d6)) { 312 /* Greek */ 313 return (OnigCodePoint )(code - 0x0020); 314 } 315 else if (ONIGENC_IS_IN_RANGE(code, 0x8470, 0x847e) || 316 ONIGENC_IS_IN_RANGE(code, 0x8480, 0x8491)) { 317 /* Cyrillic */ 318 int d = (code >= 0x8480) ? 1 : 0; 319 return (OnigCodePoint )(code - (0x0030 - d)); 320 } 321 return code; 322} 323 324static int 325get_case_fold_codes_by_str(OnigCaseFoldType flag, 326 const OnigUChar* p, const OnigUChar* end, 327 OnigCaseFoldCodeItem items[], OnigEncoding enc) 328{ 329 int len; 330 OnigCodePoint code, code_lo, code_up; 331 332 code = mbc_to_code(p, end, enc); 333 if (ONIGENC_IS_ASCII_CODE(code)) 334 return onigenc_ascii_get_case_fold_codes_by_str(flag, p, end, items, enc); 335 336 len = mbc_enc_len(p, end, enc); 337 code_lo = get_lower_case(code); 338 code_up = get_upper_case(code); 339 340 if (code != code_lo) { 341 items[0].byte_len = len; 342 items[0].code_len = 1; 343 items[0].code[0] = code_lo; 344 return 1; 345 } 346 else if (code != code_up) { 347 items[0].byte_len = len; 348 items[0].code_len = 1; 349 items[0].code[0] = code_up; 350 return 1; 351 } 352 353 return 0; 354} 355 356static int 357mbc_case_fold(OnigCaseFoldType flag, 358 const UChar** pp, const UChar* end, UChar* lower, 359 OnigEncoding enc) 360{ 361 const UChar* p = *pp; 362 363 if (ONIGENC_IS_MBC_ASCII(p)) { 364 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 365 (*pp)++; 366 return 1; 367 } 368 else { 369 OnigCodePoint code; 370 int len; 371 372 code = get_lower_case(mbc_to_code(p, end, enc)); 373 len = code_to_mbc(code, lower, enc); 374 (*pp) += len; 375 return len; /* return byte length of converted char to lower */ 376 } 377} 378 379#if 0 380static int 381is_mbc_ambiguous(OnigCaseFoldType flag, 382 const UChar** pp, const UChar* end) 383{ 384 return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end); 385 386} 387#endif 388 389#if 0 390static int 391is_code_ctype(OnigCodePoint code, unsigned int ctype) 392{ 393 if (code < 128) 394 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 395 else { 396 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { 397 return (code_to_mbclen(code) > 1 ? TRUE : FALSE); 398 } 399 } 400 401 return FALSE; 402} 403#endif 404 405static UChar* 406left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc) 407{ 408 const UChar *p; 409 int len; 410 411 if (s <= start) return (UChar* )s; 412 p = s; 413 414 if (SJIS_ISMB_TRAIL(*p)) { 415 while (p > start) { 416 if (! SJIS_ISMB_FIRST(*--p)) { 417 p++; 418 break; 419 } 420 } 421 } 422 len = mbc_enc_len(p, end, enc); 423 if (p + len > s) return (UChar* )p; 424 p += len; 425 return (UChar* )(p + ((s - p) & ~1)); 426} 427 428static int 429is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc ARG_UNUSED) 430{ 431 const UChar c = *s; 432 return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE); 433} 434 435 436static int PropertyInited = 0; 437static const OnigCodePoint** PropertyList; 438static int PropertyListNum; 439static int PropertyListSize; 440static hash_table_type* PropertyNameTable; 441 442static const OnigCodePoint CR_Hiragana[] = { 443 1, 444 0x829f, 0x82f1 445}; /* CR_Hiragana */ 446 447static const OnigCodePoint CR_Katakana[] = { 448 4, 449 0x00a6, 0x00af, 450 0x00b1, 0x00dd, 451 0x8340, 0x837e, 452 0x8380, 0x8396, 453}; /* CR_Katakana */ 454 455#ifdef ENC_CP932 456static const OnigCodePoint CR_Han[] = { 457 6, 458 0x8157, 0x8157, 459 0x889f, 0x9872, /* Kanji level 1 */ 460 0x989f, 0x9ffc, /* Kanji level 2 */ 461 0xe040, 0xeaa4, /* Kanji level 2 */ 462 0xed40, 0xeeec, /* NEC-selected IBM extended characters (without symbols) */ 463 0xfa5c, 0xfc4b, /* IBM extended characters (without symbols) */ 464}; /* CR_Han */ 465#else 466static const OnigCodePoint CR_Han[] = { 467 4, 468 0x8157, 0x8157, 469 0x889f, 0x9872, /* Kanji level 1 */ 470 0x989f, 0x9ffc, /* Kanji level 2 */ 471 0xe040, 0xeaa4, /* Kanji level 2 */ 472}; /* CR_Han */ 473#endif 474 475static const OnigCodePoint CR_Latin[] = { 476 4, 477 0x0041, 0x005a, 478 0x0061, 0x007a, 479 0x8260, 0x8279, 480 0x8281, 0x829a, 481}; /* CR_Latin */ 482 483static const OnigCodePoint CR_Greek[] = { 484 2, 485 0x839f, 0x83b6, 486 0x83bf, 0x83d6, 487}; /* CR_Greek */ 488 489static const OnigCodePoint CR_Cyrillic[] = { 490 3, 491 0x8440, 0x8460, 492 0x8470, 0x847f, 493 0x8480, 0x8491, 494}; /* CR_Cyrillic */ 495 496static int 497init_property_list(void) 498{ 499 int r; 500 501 PROPERTY_LIST_ADD_PROP("hiragana", CR_Hiragana); 502 PROPERTY_LIST_ADD_PROP("katakana", CR_Katakana); 503 PROPERTY_LIST_ADD_PROP("han", CR_Han); 504 PROPERTY_LIST_ADD_PROP("latin", CR_Latin); 505 PROPERTY_LIST_ADD_PROP("greek", CR_Greek); 506 PROPERTY_LIST_ADD_PROP("cyrillic", CR_Cyrillic); 507 PropertyInited = 1; 508 509 end: 510 return r; 511} 512 513static int 514property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end) 515{ 516 hash_data_type ctype; 517 UChar *s, *e; 518 519 PROPERTY_LIST_INIT_CHECK; 520 521 s = e = ALLOCA_N(UChar, end-p+1); 522 for (; p < end; p++) { 523 *e++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); 524 } 525 526 if (onig_st_lookup_strend(PropertyNameTable, s, e, &ctype) == 0) { 527 return onigenc_minimum_property_name_to_ctype(enc, s, e); 528 } 529 530 return (int )ctype; 531} 532 533static int 534is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) 535{ 536 if (ctype <= ONIGENC_MAX_STD_CTYPE) { 537 if (code < 128) 538 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); 539 else { 540 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) { 541 return TRUE; 542 } 543 } 544 } 545 else { 546 PROPERTY_LIST_INIT_CHECK; 547 548 ctype -= (ONIGENC_MAX_STD_CTYPE + 1); 549 if (ctype >= (unsigned int )PropertyListNum) 550 return ONIGERR_TYPE_BUG; 551 552 return onig_is_in_code_range((UChar* )PropertyList[ctype], code); 553 } 554 555 return FALSE; 556} 557 558static int 559get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, 560 const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED) 561{ 562 if (ctype <= ONIGENC_MAX_STD_CTYPE) { 563 return ONIG_NO_SUPPORT_CONFIG; 564 } 565 else { 566 *sb_out = 0x80; 567 568 PROPERTY_LIST_INIT_CHECK; 569 570 ctype -= (ONIGENC_MAX_STD_CTYPE + 1); 571 if (ctype >= (OnigCtype )PropertyListNum) 572 return ONIGERR_TYPE_BUG; 573 574 *ranges = PropertyList[ctype]; 575 return 0; 576 } 577} 578 579#ifndef ENC_CP932 580OnigEncodingDefine(shift_jis, Shift_JIS) = { 581 mbc_enc_len, 582 "Shift_JIS", /* name */ 583 2, /* max byte length */ 584 1, /* min byte length */ 585 onigenc_is_mbc_newline_0x0a, 586 mbc_to_code, 587 code_to_mbclen, 588 code_to_mbc, 589 mbc_case_fold, 590 apply_all_case_fold, 591 get_case_fold_codes_by_str, 592 property_name_to_ctype, 593 is_code_ctype, 594 get_ctype_code_range, 595 left_adjust_char_head, 596 is_allowed_reverse_match, 597 0, 598 ONIGENC_FLAG_NONE, 599}; 600/* 601 * Name: Shift_JIS 602 * MIBenum: 17 603 * Link: http://www.iana.org/assignments/character-sets 604 * Link: http://ja.wikipedia.org/wiki/Shift_JIS 605 */ 606 607/* 608 * Name: MacJapanese 609 * Link: http://unicode.org/Public/MAPPINGS/VENDORS/APPLE/JAPANESE.TXT 610 * Link: http://ja.wikipedia.org/wiki/MacJapanese 611 */ 612ENC_REPLICATE("MacJapanese", "Shift_JIS") 613ENC_ALIAS("MacJapan", "MacJapanese") 614#endif 615