Deleted Added
sdiff udiff text old ( 252583 ) new ( 260003 )
full compact
1/* $FreeBSD: head/lib/libiconv_modules/MSKanji/citrus_mskanji.c 252583 2013-07-03 18:27:45Z peter $ */
2/* $NetBSD: citrus_mskanji.c,v 1.13 2008/06/14 16:01:08 tnozaki Exp $ */
3
4/*-
5 * Copyright (c)2002 Citrus Project,
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30/*
31 * ja_JP.SJIS locale table for BSD4.4/rune
32 * version 1.0
33 * (C) Sin'ichiro MIYATANI / Phase One, Inc
34 * May 12, 1995
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by Phase One, Inc.
47 * 4. The name of Phase One, Inc. may be used to endorse or promote products
48 * derived from this software without specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63
64#include <sys/cdefs.h>
65#include <sys/types.h>
66
67#include <assert.h>
68#include <errno.h>
69#include <limits.h>
70#include <stdbool.h>
71#include <stddef.h>
72#include <stdio.h>
73#include <stdlib.h>
74#include <string.h>
75#include <wchar.h>
76
77#include "citrus_namespace.h"
78#include "citrus_types.h"
79#include "citrus_bcs.h"
80#include "citrus_module.h"
81#include "citrus_stdenc.h"
82#include "citrus_mskanji.h"
83
84
85/* ----------------------------------------------------------------------
86 * private stuffs used by templates
87 */
88
89typedef struct _MSKanjiState {
90 int chlen;
91 char ch[2];
92} _MSKanjiState;
93
94typedef struct {
95 int mode;
96#define MODE_JIS2004 1
97} _MSKanjiEncodingInfo;
98
99#define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
100#define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
101
102#define _FUNCNAME(m) _citrus_MSKanji_##m
103#define _ENCODING_INFO _MSKanjiEncodingInfo
104#define _ENCODING_STATE _MSKanjiState
105#define _ENCODING_MB_CUR_MAX(_ei_) 2
106#define _ENCODING_IS_STATE_DEPENDENT 0
107#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0
108
109
110static bool
111_mskanji1(int c)
112{
113
114 return ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc));
115}
116
117static bool
118_mskanji2(int c)
119{
120
121 return ((c >= 0x40 && c <= 0x7e) || (c >= 0x80 && c <= 0xfc));
122}
123
124static __inline void
125/*ARGSUSED*/
126_citrus_MSKanji_init_state(_MSKanjiEncodingInfo * __restrict ei __unused,
127 _MSKanjiState * __restrict s)
128{
129
130 s->chlen = 0;
131}
132
133static __inline void
134/*ARGSUSED*/
135_citrus_MSKanji_pack_state(_MSKanjiEncodingInfo * __restrict ei __unused,
136 void * __restrict pspriv, const _MSKanjiState * __restrict s)
137{
138
139 memcpy(pspriv, (const void *)s, sizeof(*s));
140}
141
142static __inline void
143/*ARGSUSED*/
144_citrus_MSKanji_unpack_state(_MSKanjiEncodingInfo * __restrict ei __unused,
145 _MSKanjiState * __restrict s, const void * __restrict pspriv)
146{
147
148 memcpy((void *)s, pspriv, sizeof(*s));
149}
150
151static int
152/*ARGSUSED*/
153_citrus_MSKanji_mbrtowc_priv(_MSKanjiEncodingInfo * __restrict ei,
154 wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
155 _MSKanjiState * __restrict psenc, size_t * __restrict nresult)
156{
157 const char *s0;
158 wchar_t wchar;
159 int chlenbak, len;
160
161 s0 = *s;
162
163 if (s0 == NULL) {
164 _citrus_MSKanji_init_state(ei, psenc);
165 *nresult = 0; /* state independent */
166 return (0);
167 }
168
169 chlenbak = psenc->chlen;
170
171 /* make sure we have the first byte in the buffer */
172 switch (psenc->chlen) {
173 case 0:
174 if (n < 1)
175 goto restart;
176 psenc->ch[0] = *s0++;
177 psenc->chlen = 1;
178 n--;
179 break;
180 case 1:
181 break;
182 default:
183 /* illegal state */
184 goto encoding_error;
185 }
186
187 len = _mskanji1(psenc->ch[0] & 0xff) ? 2 : 1;
188 while (psenc->chlen < len) {
189 if (n < 1)
190 goto restart;
191 psenc->ch[psenc->chlen] = *s0++;
192 psenc->chlen++;
193 n--;
194 }
195
196 *s = s0;
197
198 switch (len) {
199 case 1:
200 wchar = psenc->ch[0] & 0xff;
201 break;
202 case 2:
203 if (!_mskanji2(psenc->ch[1] & 0xff))
204 goto encoding_error;
205 wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff);
206 break;
207 default:
208 /* illegal state */
209 goto encoding_error;
210 }
211
212 psenc->chlen = 0;
213
214 if (pwc)
215 *pwc = wchar;
216 *nresult = wchar ? len - chlenbak : 0;
217 return (0);
218
219encoding_error:
220 psenc->chlen = 0;
221 *nresult = (size_t)-1;
222 return (EILSEQ);
223
224restart:
225 *nresult = (size_t)-2;
226 *s = s0;
227 return (0);
228}
229
230
231static int
232_citrus_MSKanji_wcrtomb_priv(_MSKanjiEncodingInfo * __restrict ei __unused,
233 char * __restrict s, size_t n, wchar_t wc,
234 _MSKanjiState * __restrict psenc __unused, size_t * __restrict nresult)
235{
236 int ret;
237
238 /* check invalid sequence */
239 if (wc & ~0xffff) {
240 ret = EILSEQ;
241 goto err;
242 }
243
244 if (wc & 0xff00) {
245 if (n < 2) {
246 ret = E2BIG;
247 goto err;
248 }
249
250 s[0] = (wc >> 8) & 0xff;
251 s[1] = wc & 0xff;
252 if (!_mskanji1(s[0] & 0xff) || !_mskanji2(s[1] & 0xff)) {
253 ret = EILSEQ;
254 goto err;
255 }
256
257 *nresult = 2;
258 return (0);
259 } else {
260 if (n < 1) {
261 ret = E2BIG;
262 goto err;
263 }
264
265 s[0] = wc & 0xff;
266 if (_mskanji1(s[0] & 0xff)) {
267 ret = EILSEQ;
268 goto err;
269 }
270
271 *nresult = 1;
272 return (0);
273 }
274
275err:
276 *nresult = (size_t)-1;
277 return (ret);
278}
279
280
281static __inline int
282/*ARGSUSED*/
283_citrus_MSKanji_stdenc_wctocs(_MSKanjiEncodingInfo * __restrict ei,
284 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
285{
286 _index_t col, row;
287 int offset;
288
289 if ((_wc_t)wc < 0x80) {
290 /* ISO-646 */
291 *csid = 0;
292 *idx = (_index_t)wc;
293 } else if ((_wc_t)wc < 0x100) {
294 /* KANA */
295 *csid = 1;
296 *idx = (_index_t)wc & 0x7F;
297 } else {
298 /* Kanji (containing Gaiji zone) */
299 /*
300 * 94^2 zone (contains a part of Gaiji (0xED40 - 0xEEFC)):
301 * 0x8140 - 0x817E -> 0x2121 - 0x215F
302 * 0x8180 - 0x819E -> 0x2160 - 0x217E
303 * 0x819F - 0x81FC -> 0x2221 - 0x227E
304 *
305 * 0x8240 - 0x827E -> 0x2321 - 0x235F
306 * ...
307 * 0x9F9F - 0x9FFc -> 0x5E21 - 0x5E7E
308 *
309 * 0xE040 - 0xE07E -> 0x5F21 - 0x5F5F
310 * ...
311 * 0xEF9F - 0xEFFC -> 0x7E21 - 0x7E7E
312 *
313 * extended Gaiji zone:
314 * 0xF040 - 0xFCFC
315 *
316 * JIS X0213-plane2:
317 * 0xF040 - 0xF09E -> 0x2121 - 0x217E
318 * 0xF140 - 0xF19E -> 0x2321 - 0x237E
319 * ...
320 * 0xF240 - 0xF29E -> 0x2521 - 0x257E
321 *
322 * 0xF09F - 0xF0FC -> 0x2821 - 0x287E
323 * 0xF29F - 0xF2FC -> 0x2C21 - 0x2C7E
324 * ...
325 * 0xF44F - 0xF49E -> 0x2F21 - 0x2F7E
326 *
327 * 0xF49F - 0xF4FC -> 0x6E21 - 0x6E7E
328 * ...
329 * 0xFC9F - 0xFCFC -> 0x7E21 - 0x7E7E
330 */
331 row = ((_wc_t)wc >> 8) & 0xFF;
332 col = (_wc_t)wc & 0xFF;
333 if (!_mskanji1(row) || !_mskanji2(col))
334 return (EILSEQ);
335 if ((ei->mode & MODE_JIS2004) == 0 || row < 0xF0) {
336 *csid = 2;
337 offset = 0x81;
338 } else {
339 *csid = 3;
340 if ((_wc_t)wc <= 0xF49E) {
341 offset = (_wc_t)wc >= 0xF29F ||
342 ((_wc_t)wc >= 0xF09F &&
343 (_wc_t)wc <= 0xF0FC) ? 0xED : 0xF0;
344 } else
345 offset = 0xCE;
346 }
347 row -= offset;
348 if (row >= 0x5F)
349 row -= 0x40;
350 row = row * 2 + 0x21;
351 col -= 0x1F;
352 if (col >= 0x61)
353 col -= 1;
354 if (col > 0x7E) {
355 row += 1;
356 col -= 0x5E;
357 }
358 *idx = ((_index_t)row << 8) | col;
359 }
360
361 return (0);
362}
363
364static __inline int
365/*ARGSUSED*/
366_citrus_MSKanji_stdenc_cstowc(_MSKanjiEncodingInfo * __restrict ei,
367 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
368{
369 uint32_t col, row;
370 int offset;
371
372 switch (csid) {
373 case 0:
374 /* ISO-646 */
375 if (idx >= 0x80)
376 return (EILSEQ);
377 *wc = (wchar_t)idx;
378 break;
379 case 1:
380 /* kana */
381 if (idx >= 0x80)
382 return (EILSEQ);
383 *wc = (wchar_t)idx + 0x80;
384 break;
385 case 3:
386 if ((ei->mode & MODE_JIS2004) == 0)
387 return (EILSEQ);
388 /*FALLTHROUGH*/
389 case 2:
390 /* kanji */
391 row = (idx >> 8);
392 if (row < 0x21)
393 return (EILSEQ);
394 if (csid == 3) {
395 if (row <= 0x2F)
396 offset = (row == 0x22 || row >= 0x26) ?
397 0xED : 0xF0;
398 else if (row >= 0x4D && row <= 0x7E)
399 offset = 0xCE;
400 else
401 return (EILSEQ);
402 } else {
403 if (row > 0x97)
404 return (EILSEQ);
405 offset = (row < 0x5F) ? 0x81 : 0xC1;
406 }
407 col = idx & 0xFF;
408 if (col < 0x21 || col > 0x7E)
409 return (EILSEQ);
410 row -= 0x21; col -= 0x21;
411 if ((row & 1) == 0) {
412 col += 0x40;
413 if (col >= 0x7F)
414 col += 1;
415 } else
416 col += 0x9F;
417 row = row / 2 + offset;
418 *wc = ((wchar_t)row << 8) | col;
419 break;
420 default:
421 return (EILSEQ);
422 }
423
424 return (0);
425}
426
427static __inline int
428/*ARGSUSED*/
429_citrus_MSKanji_stdenc_get_state_desc_generic(_MSKanjiEncodingInfo * __restrict ei __unused,
430 _MSKanjiState * __restrict psenc, int * __restrict rstate)
431{
432
433 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
434 _STDENC_SDGEN_INCOMPLETE_CHAR;
435 return (0);
436}
437
438static int
439/*ARGSUSED*/
440_citrus_MSKanji_encoding_module_init(_MSKanjiEncodingInfo * __restrict ei,
441 const void * __restrict var, size_t lenvar)
442{
443 const char *p;
444
445 p = var;
446 memset((void *)ei, 0, sizeof(*ei));
447 while (lenvar > 0) {
448 switch (_bcs_toupper(*p)) {
449 case 'J':
450 MATCH(JIS2004, ei->mode |= MODE_JIS2004);
451 break;
452 }
453 ++p;
454 --lenvar;
455 }
456
457 return (0);
458}
459
460static void
461_citrus_MSKanji_encoding_module_uninit(_MSKanjiEncodingInfo *ei __unused)
462{
463
464}
465
466/* ----------------------------------------------------------------------
467 * public interface for stdenc
468 */
469
470_CITRUS_STDENC_DECLS(MSKanji);
471_CITRUS_STDENC_DEF_OPS(MSKanji);
472
473#include "citrus_stdenc_template.h"