1/**********************************************************************
2  gb18030.c -  Oniguruma (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2005-2007  KUBO Takehiro <kubo AT jiubao DOT org>
6 *                          K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regenc.h"
32
33#if 1
34#define DEBUG_GB18030(arg)
35#else
36#define DEBUG_GB18030(arg) printf arg
37#endif
38
39enum {
40  C1, /* one-byte char */
41  C2, /* one-byte or second of two-byte char */
42  C4, /* one-byte or second or fourth of four-byte char */
43  CM  /* first of two- or four-byte char or second of two-byte char */
44};
45
46static const char GB18030_MAP[] = {
47  C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
48  C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
49  C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
50  C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1,
51  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
52  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
53  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
54  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1,
55  C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
56  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
57  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
58  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
59  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
60  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
61  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
62  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1
63};
64
65typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2, S3 } state_t;
66#define A ACCEPT
67#define F FAILURE
68static const signed char trans[][0x100] = {
69  { /* S0   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
70    /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
71    /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
72    /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
73    /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
74    /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
75    /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
76    /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
77    /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
78    /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79    /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80    /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81    /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82    /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83    /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84    /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85    /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F
86  },
87  { /* S1   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
88    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
89    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
90    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
91    /* 3 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, F, F, F, F, F, F,
92    /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
93    /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
94    /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
95    /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F,
96    /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
97    /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
98    /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
99    /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
100    /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
101    /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
102    /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
103    /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F
104  },
105  { /* S2   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
106    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
107    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
108    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
109    /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
110    /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
111    /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
112    /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
113    /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
114    /* 8 */ F, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115    /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116    /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117    /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118    /* c */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119    /* d */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120    /* e */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
121    /* f */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, F
122  },
123  { /* S3   0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f */
124    /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
125    /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
126    /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
127    /* 3 */ A, A, A, A, A, A, A, A, A, A, F, F, F, F, F, F,
128    /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
129    /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
130    /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
131    /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
132    /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
133    /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
134    /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
135    /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
136    /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
137    /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
138    /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
139    /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
140  }
141};
142#undef A
143#undef F
144
145static int
146gb18030_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
147{
148  int firstbyte = *p++;
149  state_t s = trans[0][firstbyte];
150#define RETURN(n) \
151    return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \
152                         ONIGENC_CONSTRUCT_MBCLEN_INVALID()
153  if (s < 0) RETURN(1);
154  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(2-1);
155  s = trans[s][*p++];
156  if (s < 0) RETURN(2);
157  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-2);
158  s = trans[s][*p++];
159  if (s < 0) RETURN(3);
160  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-3);
161  s = trans[s][*p++];
162  RETURN(4);
163#undef RETURN
164}
165
166static OnigCodePoint
167gb18030_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
168{
169  int c, i, len;
170  OnigCodePoint n;
171
172  len = enclen(enc, p, end);
173  n = (OnigCodePoint )(*p++);
174  if (len == 1) return n;
175
176  for (i = 1; i < len; i++) {
177    if (p >= end) break;
178    c = *p++;
179    n <<= 8;  n += c;
180  }
181  return n;
182}
183
184static int
185gb18030_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc)
186{
187  return onigenc_mb4_code_to_mbc(enc, code, buf);
188}
189
190static int
191gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
192                      UChar* lower, OnigEncoding enc)
193{
194  return onigenc_mbn_mbc_case_fold(enc, flag,
195                                   pp, end, lower);
196}
197
198#if 0
199static int
200gb18030_is_mbc_ambiguous(OnigCaseFoldType flag,
201			 const UChar** pp, const UChar* end, OnigEncoding enc)
202{
203  return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end);
204}
205#endif
206
207static int
208gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
209{
210  return onigenc_mb4_is_code_ctype(enc, code, ctype);
211}
212
213enum state {
214  S_START,
215  S_one_C2,
216  S_one_C4,
217  S_one_CM,
218
219  S_odd_CM_one_CX,
220  S_even_CM_one_CX,
221
222  /* CMC4 : pair of "CM C4" */
223  S_one_CMC4,
224  S_odd_CMC4,
225  S_one_C4_odd_CMC4,
226  S_even_CMC4,
227  S_one_C4_even_CMC4,
228
229  S_odd_CM_odd_CMC4,
230  S_even_CM_odd_CMC4,
231
232  S_odd_CM_even_CMC4,
233  S_even_CM_even_CMC4,
234
235  /* C4CM : pair of "C4 CM" */
236  S_odd_C4CM,
237  S_one_CM_odd_C4CM,
238  S_even_C4CM,
239  S_one_CM_even_C4CM,
240
241  S_even_CM_odd_C4CM,
242  S_odd_CM_odd_C4CM,
243  S_even_CM_even_C4CM,
244  S_odd_CM_even_C4CM
245};
246
247static UChar*
248gb18030_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
249{
250  const UChar *p;
251  enum state state = S_START;
252
253  DEBUG_GB18030(("----------------\n"));
254  for (p = s; p >= start; p--) {
255    DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p));
256    switch (state) {
257    case S_START:
258      switch (GB18030_MAP[*p]) {
259      case C1:
260	return (UChar *)s;
261      case C2:
262	state = S_one_C2; /* C2 */
263	break;
264      case C4:
265	state = S_one_C4; /* C4 */
266	break;
267      case CM:
268	state = S_one_CM; /* CM */
269	break;
270      }
271      break;
272    case S_one_C2: /* C2 */
273      switch (GB18030_MAP[*p]) {
274      case C1:
275      case C2:
276      case C4:
277	return (UChar *)s;
278      case CM:
279	state = S_odd_CM_one_CX; /* CM C2 */
280	break;
281      }
282      break;
283    case S_one_C4: /* C4 */
284      switch (GB18030_MAP[*p]) {
285      case C1:
286      case C2:
287      case C4:
288	return (UChar *)s;
289      case CM:
290	state = S_one_CMC4;
291	break;
292      }
293      break;
294    case S_one_CM: /* CM */
295      switch (GB18030_MAP[*p]) {
296      case C1:
297      case C2:
298	return (UChar *)s;
299      case C4:
300	state = S_odd_C4CM;
301	break;
302      case CM:
303	state = S_odd_CM_one_CX; /* CM CM */
304	break;
305      }
306      break;
307
308    case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
309      switch (GB18030_MAP[*p]) {
310      case C1:
311      case C2:
312      case C4:
313	return (UChar *)(s - 1);
314      case CM:
315	state = S_even_CM_one_CX;
316	break;
317      }
318      break;
319    case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
320      switch (GB18030_MAP[*p]) {
321      case C1:
322      case C2:
323      case C4:
324	return (UChar *)s;
325      case CM:
326	state = S_odd_CM_one_CX;
327	break;
328      }
329      break;
330
331    case S_one_CMC4: /* CM C4 */
332      switch (GB18030_MAP[*p]) {
333      case C1:
334      case C2:
335	return (UChar *)(s - 1);
336      case C4:
337	state = S_one_C4_odd_CMC4; /* C4 CM C4 */
338	break;
339      case CM:
340	state = S_even_CM_one_CX; /* CM CM C4 */
341	break;
342      }
343      break;
344    case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
345      switch (GB18030_MAP[*p]) {
346      case C1:
347      case C2:
348	return (UChar *)(s - 1);
349      case C4:
350	state = S_one_C4_odd_CMC4;
351	break;
352      case CM:
353	state = S_odd_CM_odd_CMC4;
354	break;
355      }
356      break;
357    case S_one_C4_odd_CMC4: /* C4 CM C4 */
358      switch (GB18030_MAP[*p]) {
359      case C1:
360      case C2:
361      case C4:
362	return (UChar *)(s - 1);
363      case CM:
364	state = S_even_CMC4; /* CM C4 CM C4 */
365	break;
366      }
367      break;
368    case S_even_CMC4: /* CM C4 CM C4 */
369      switch (GB18030_MAP[*p]) {
370      case C1:
371      case C2:
372	return (UChar *)(s - 3);
373      case C4:
374	state = S_one_C4_even_CMC4;
375	break;
376      case CM:
377	state = S_odd_CM_even_CMC4;
378	break;
379      }
380      break;
381    case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
382      switch (GB18030_MAP[*p]) {
383      case C1:
384      case C2:
385      case C4:
386	return (UChar *)(s - 3);
387      case CM:
388	state = S_odd_CMC4;
389	break;
390      }
391      break;
392
393    case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */
394      switch (GB18030_MAP[*p]) {
395      case C1:
396      case C2:
397      case C4:
398	return (UChar *)(s - 3);
399      case CM:
400	state = S_even_CM_odd_CMC4;
401	break;
402      }
403      break;
404    case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
405      switch (GB18030_MAP[*p]) {
406      case C1:
407      case C2:
408      case C4:
409	return (UChar *)(s - 1);
410      case CM:
411	state = S_odd_CM_odd_CMC4;
412	break;
413      }
414      break;
415
416    case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */
417      switch (GB18030_MAP[*p]) {
418      case C1:
419      case C2:
420      case C4:
421	return (UChar *)(s - 1);
422      case CM:
423	state = S_even_CM_even_CMC4;
424	break;
425      }
426      break;
427    case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
428      switch (GB18030_MAP[*p]) {
429      case C1:
430      case C2:
431      case C4:
432	return (UChar *)(s - 3);
433      case CM:
434	state = S_odd_CM_even_CMC4;
435	break;
436      }
437      break;
438
439    case S_odd_C4CM: /* C4 CM */  /* C4 CM C4 CM C4 CM*/
440      switch (GB18030_MAP[*p]) {
441      case C1:
442      case C2:
443      case C4:
444	return (UChar *)s;
445      case CM:
446	state = S_one_CM_odd_C4CM; /* CM C4 CM */
447	break;
448      }
449      break;
450    case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
451      switch (GB18030_MAP[*p]) {
452      case C1:
453      case C2:
454	return (UChar *)(s - 2); /* |CM C4 CM */
455      case C4:
456	state = S_even_C4CM;
457	break;
458      case CM:
459	state = S_even_CM_odd_C4CM;
460	break;
461      }
462      break;
463    case S_even_C4CM: /* C4 CM C4 CM */
464      switch (GB18030_MAP[*p]) {
465      case C1:
466      case C2:
467      case C4:
468	return (UChar *)(s - 2);  /* C4|CM C4 CM */
469      case CM:
470	state = S_one_CM_even_C4CM;
471	break;
472      }
473      break;
474    case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
475      switch (GB18030_MAP[*p]) {
476      case C1:
477      case C2:
478	return (UChar *)(s - 0);  /*|CM C4 CM C4|CM */
479      case C4:
480	state = S_odd_C4CM;
481	break;
482      case CM:
483	state = S_even_CM_even_C4CM;
484	break;
485      }
486      break;
487
488    case S_even_CM_odd_C4CM: /* CM CM C4 CM */
489      switch (GB18030_MAP[*p]) {
490      case C1:
491      case C2:
492      case C4:
493	return (UChar *)(s - 0); /* |CM CM|C4|CM */
494      case CM:
495	state = S_odd_CM_odd_C4CM;
496	break;
497      }
498      break;
499    case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
500      switch (GB18030_MAP[*p]) {
501      case C1:
502      case C2:
503      case C4:
504	return (UChar *)(s - 2); /* |CM CM|CM C4 CM */
505      case CM:
506	state = S_even_CM_odd_C4CM;
507	break;
508      }
509      break;
510
511    case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */
512      switch (GB18030_MAP[*p]) {
513      case C1:
514      case C2:
515      case C4:
516	return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */
517      case CM:
518	state = S_odd_CM_even_C4CM;
519	break;
520      }
521      break;
522    case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
523      switch (GB18030_MAP[*p]) {
524      case C1:
525      case C2:
526      case C4:
527	return (UChar *)(s - 0);  /* |CM CM|CM C4 CM C4|CM */
528      case CM:
529	state = S_even_CM_even_C4CM;
530	break;
531      }
532      break;
533    }
534  }
535
536  DEBUG_GB18030(("state %d\n", state));
537  switch (state) {
538  case S_START:             return (UChar *)(s - 0);
539  case S_one_C2:            return (UChar *)(s - 0);
540  case S_one_C4:            return (UChar *)(s - 0);
541  case S_one_CM:            return (UChar *)(s - 0);
542
543  case S_odd_CM_one_CX:     return (UChar *)(s - 1);
544  case S_even_CM_one_CX:    return (UChar *)(s - 0);
545
546  case S_one_CMC4:          return (UChar *)(s - 1);
547  case S_odd_CMC4:          return (UChar *)(s - 1);
548  case S_one_C4_odd_CMC4:   return (UChar *)(s - 1);
549  case S_even_CMC4:         return (UChar *)(s - 3);
550  case S_one_C4_even_CMC4:  return (UChar *)(s - 3);
551
552  case S_odd_CM_odd_CMC4:   return (UChar *)(s - 3);
553  case S_even_CM_odd_CMC4:  return (UChar *)(s - 1);
554
555  case S_odd_CM_even_CMC4:  return (UChar *)(s - 1);
556  case S_even_CM_even_CMC4: return (UChar *)(s - 3);
557
558  case S_odd_C4CM:          return (UChar *)(s - 0);
559  case S_one_CM_odd_C4CM:   return (UChar *)(s - 2);
560  case S_even_C4CM:         return (UChar *)(s - 2);
561  case S_one_CM_even_C4CM:  return (UChar *)(s - 0);
562
563  case S_even_CM_odd_C4CM:  return (UChar *)(s - 0);
564  case S_odd_CM_odd_C4CM:   return (UChar *)(s - 2);
565  case S_even_CM_even_C4CM: return (UChar *)(s - 2);
566  case S_odd_CM_even_C4CM:  return (UChar *)(s - 0);
567  }
568
569  return (UChar* )s;  /* never come here. (escape warning) */
570}
571
572static int
573gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
574{
575  return GB18030_MAP[*s] == C1 ? TRUE : FALSE;
576}
577
578/*
579 * Name: GB18030
580 * MIBenum: 114
581 * Link: http://www.iana.org/assignments/charset-reg/GB18030
582 */
583OnigEncodingDefine(gb18030, GB18030) = {
584  gb18030_mbc_enc_len,
585  "GB18030",   /* name */
586  4,          /* max enc length */
587  1,          /* min enc length */
588  onigenc_is_mbc_newline_0x0a,
589  gb18030_mbc_to_code,
590  onigenc_mb4_code_to_mbclen,
591  gb18030_code_to_mbc,
592  gb18030_mbc_case_fold,
593  onigenc_ascii_apply_all_case_fold,
594  onigenc_ascii_get_case_fold_codes_by_str,
595  onigenc_minimum_property_name_to_ctype,
596  gb18030_is_code_ctype,
597  onigenc_not_support_get_ctype_code_range,
598  gb18030_left_adjust_char_head,
599  gb18030_is_allowed_reverse_match,
600  0,
601  ONIGENC_FLAG_NONE,
602};
603
604