1120492Sfjoe/*-
2120492Sfjoe * Copyright (c) 2003 Ryuichiro Imura
3120492Sfjoe * All rights reserved.
4120492Sfjoe *
5120492Sfjoe * Redistribution and use in source and binary forms, with or without
6120492Sfjoe * modification, are permitted provided that the following conditions
7120492Sfjoe * are met:
8120492Sfjoe * 1. Redistributions of source code must retain the above copyright
9120492Sfjoe *    notice, this list of conditions and the following disclaimer.
10120492Sfjoe * 2. Redistributions in binary form must reproduce the above copyright
11120492Sfjoe *    notice, this list of conditions and the following disclaimer in the
12120492Sfjoe *    documentation and/or other materials provided with the distribution.
13120492Sfjoe *
14120492Sfjoe * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15120492Sfjoe * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16120492Sfjoe * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17120492Sfjoe * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18120492Sfjoe * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19120492Sfjoe * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20120492Sfjoe * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21120492Sfjoe * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22120492Sfjoe * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23120492Sfjoe * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24120492Sfjoe * SUCH DAMAGE.
25120492Sfjoe *
26120492Sfjoe * $FreeBSD$
27120492Sfjoe */
28120492Sfjoe
29120492Sfjoe/*
30120492Sfjoe * kiconv(3) requires shared linked, and reduce module size
31120492Sfjoe * when statically linked.
32120492Sfjoe */
33120492Sfjoe
34120492Sfjoe#ifdef PIC
35120492Sfjoe
36120492Sfjoe/*
37120492Sfjoe * Why do we need quirks?
38120492Sfjoe * Since each vendors has their own Unicode mapping rules,
39120492Sfjoe * we need some quirks until iconv(3) supports them.
40120492Sfjoe * We can define Microsoft mappings here.
41120492Sfjoe *
42120492Sfjoe * For example, the eucJP and Unocode mapping rule is based on
43120492Sfjoe * the JIS standard. Since Microsoft uses cp932 for Unicode mapping
44120492Sfjoe * witch is not truly based on the JIS standard, reading a file
45120492Sfjoe * system created by Microsoft Windows family using eucJP/Unicode
46120492Sfjoe * mapping rule will cause a problem. That's why we define eucJP-ms here.
47120492Sfjoe * The eucJP-ms has been defined by The Open Group Japan Vendor Coucil.
48120492Sfjoe *
49120492Sfjoe * Well, Apple Mac OS also has their own Unicode mappings,
50120492Sfjoe * but we won't require these quirks here, because HFS doesn't have
51120492Sfjoe * Unicode and HFS+ has decomposed Unicode which can not be
52120492Sfjoe * handled by this xlat16 converter.
53120492Sfjoe */
54120492Sfjoe
55120492Sfjoe#include <sys/types.h>
56120492Sfjoe#include <sys/iconv.h>
57120492Sfjoe
58120492Sfjoe#include <stdio.h>
59120492Sfjoe#include <string.h>
60120492Sfjoe
61120492Sfjoe#include "quirks.h"
62120492Sfjoe
63120492Sfjoe/*
64120492Sfjoe * All lists of quirk character set
65120492Sfjoe */
66120492Sfjoestatic struct {
67120492Sfjoe	int vendor; /* reserved for non MS mapping */
68120492Sfjoe	const char *base_codeset, *quirk_codeset;
69120492Sfjoe} quirk_list[] = {
70120492Sfjoe	{ KICONV_VENDOR_MICSFT,	"eucJP", "eucJP-ms" },
71120492Sfjoe	{ KICONV_VENDOR_MICSFT,	"EUC-JP", "eucJP-ms" },
72120492Sfjoe	{ KICONV_VENDOR_MICSFT,	"SJIS", "SJIS-ms" },
73120492Sfjoe	{ KICONV_VENDOR_MICSFT,	"Shift_JIS", "SJIS-ms" },
74120492Sfjoe	{ KICONV_VENDOR_MICSFT,	"Big5", "Big5-ms" }
75120492Sfjoe};
76120492Sfjoe
77120492Sfjoe/*
78120492Sfjoe * The character list to replace for Japanese MS-Windows.
79120492Sfjoe */
80120492Sfjoestatic struct quirk_replace_list quirk_jis_cp932[] = {
81120492Sfjoe	{ 0x00a2, 0xffe0 }, /* Cent Sign, Fullwidth Cent Sign */
82120492Sfjoe	{ 0x00a3, 0xffe1 }, /* Pound Sign, Fullwidth Pound Sign */
83120492Sfjoe	{ 0x00ac, 0xffe2 }, /* Not Sign, Fullwidth Not Sign */
84120492Sfjoe	{ 0x2016, 0x2225 }, /* Double Vertical Line, Parallel To */
85120492Sfjoe	{ 0x203e, 0x007e }, /* Overline, Tilde */
86120492Sfjoe	{ 0x2212, 0xff0d }, /* Minus Sign, Fullwidth Hyphenminus */
87120492Sfjoe	{ 0x301c, 0xff5e }  /* Wave Dash, Fullwidth Tilde */
88120492Sfjoe};
89120492Sfjoe
90120492Sfjoe/*
91120492Sfjoe * All entries of quirks
92120492Sfjoe */
93120492Sfjoe#define	NumOf(n)	(sizeof((n)) / sizeof((n)[0]))
94120492Sfjoestatic struct {
95120492Sfjoe	const char *quirk_codeset, *iconv_codeset, *pair_codeset;
96120492Sfjoe	struct quirk_replace_list (*replace_list)[];
97120492Sfjoe	size_t num_of_replaces;
98120492Sfjoe} quirk_table[] = {
99120492Sfjoe	{
100120492Sfjoe		"eucJP-ms", "eucJP", ENCODING_UNICODE,
101120492Sfjoe		(struct quirk_replace_list (*)[])&quirk_jis_cp932,
102120492Sfjoe		NumOf(quirk_jis_cp932)
103120492Sfjoe	},
104120492Sfjoe	{
105120492Sfjoe		"SJIS-ms", "CP932", ENCODING_UNICODE,
106120492Sfjoe		/* XXX - quirk_replace_list should be NULL */
107120492Sfjoe		(struct quirk_replace_list (*)[])&quirk_jis_cp932,
108120492Sfjoe		NumOf(quirk_jis_cp932)
109120492Sfjoe	},
110120492Sfjoe	{
111120492Sfjoe		"Big5-ms", "CP950", ENCODING_UNICODE,
112120492Sfjoe		NULL, 0
113120492Sfjoe	}
114120492Sfjoe};
115120492Sfjoe
116120492Sfjoe
117120492Sfjoeconst char *
118120492Sfjoekiconv_quirkcs(const char* base, int vendor)
119120492Sfjoe{
120120492Sfjoe	size_t i;
121120492Sfjoe
122120492Sfjoe	/*
123120492Sfjoe	 * We should compare codeset names ignoring case here,
124120492Sfjoe	 * so that quirk could be used for all of the user input
125120492Sfjoe	 * patterns.
126120492Sfjoe	 */
127120492Sfjoe	for (i = 0; i < NumOf(quirk_list); i++)
128120492Sfjoe		if (quirk_list[i].vendor == vendor &&
129120492Sfjoe		    strcasecmp(quirk_list[i].base_codeset, base) == 0)
130120492Sfjoe			return (quirk_list[i].quirk_codeset);
131120492Sfjoe
132120492Sfjoe	return (base);
133120492Sfjoe}
134120492Sfjoe
135120492Sfjoe/*
136120492Sfjoe * Internal Functions
137120492Sfjoe */
138120492Sfjoeconst char *
139120492Sfjoesearch_quirk(const char *given_codeset,
140120492Sfjoe	     const char *pair_codeset,
141120492Sfjoe	     struct quirk_replace_list **replace_list,
142120492Sfjoe	     size_t *num_of_replaces)
143120492Sfjoe{
144120492Sfjoe	size_t i;
145120492Sfjoe
146120492Sfjoe	*replace_list = NULL;
147120492Sfjoe	*num_of_replaces = 0;
148120492Sfjoe	for (i = 0; i < NumOf(quirk_table); i++)
149120492Sfjoe		if (strcmp(quirk_table[i].quirk_codeset, given_codeset) == 0) {
150120492Sfjoe			if (strcmp(quirk_table[i].pair_codeset, pair_codeset) == 0) {
151120492Sfjoe				*replace_list = *quirk_table[i].replace_list;
152120492Sfjoe				*num_of_replaces = quirk_table[i].num_of_replaces;
153120492Sfjoe			}
154120492Sfjoe			return (quirk_table[i].iconv_codeset);
155120492Sfjoe		}
156120492Sfjoe
157120492Sfjoe	return (given_codeset);
158120492Sfjoe}
159120492Sfjoe
160120492Sfjoeuint16_t
161120492Sfjoequirk_vendor2unix(uint16_t c, struct quirk_replace_list *replace_list, size_t num)
162120492Sfjoe{
163120492Sfjoe	size_t i;
164120492Sfjoe
165120492Sfjoe	for (i = 0; i < num; i++)
166120492Sfjoe		if (replace_list[i].vendor_code == c)
167120492Sfjoe			return (replace_list[i].standard_code);
168120492Sfjoe
169120492Sfjoe	return (c);
170120492Sfjoe}
171120492Sfjoe
172120492Sfjoeuint16_t
173120492Sfjoequirk_unix2vendor(uint16_t c, struct quirk_replace_list *replace_list, size_t num)
174120492Sfjoe{
175120492Sfjoe	size_t i;
176120492Sfjoe
177120492Sfjoe	for (i = 0; i < num; i++)
178120492Sfjoe		if (replace_list[i].standard_code == c)
179120492Sfjoe			return (replace_list[i].vendor_code);
180120492Sfjoe
181120492Sfjoe	return (c);
182120492Sfjoe}
183120492Sfjoe
184120492Sfjoe#else /* statically linked */
185120492Sfjoe
186194637Sdelphij#include <sys/types.h>
187194637Sdelphij#include <sys/iconv.h>
188194637Sdelphij
189120492Sfjoeconst char *
190194637Sdelphijkiconv_quirkcs(const char* base __unused, int vendor __unused)
191120492Sfjoe{
192194637Sdelphij
193120492Sfjoe	return (base);
194120492Sfjoe}
195120492Sfjoe
196120492Sfjoe#endif /* PIC */
197