quirks.c revision 120492
155682Smarkm/*-
2178825Sdfr * Copyright (c) 2003 Ryuichiro Imura
355682Smarkm * All rights reserved.
455682Smarkm *
555682Smarkm * Redistribution and use in source and binary forms, with or without
655682Smarkm * modification, are permitted provided that the following conditions
755682Smarkm * are met:
855682Smarkm * 1. Redistributions of source code must retain the above copyright
955682Smarkm *    notice, this list of conditions and the following disclaimer.
1055682Smarkm * 2. Redistributions in binary form must reproduce the above copyright
1155682Smarkm *    notice, this list of conditions and the following disclaimer in the
1255682Smarkm *    documentation and/or other materials provided with the distribution.
1355682Smarkm *
1455682Smarkm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1555682Smarkm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1655682Smarkm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1755682Smarkm * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1855682Smarkm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1955682Smarkm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2055682Smarkm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2155682Smarkm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2255682Smarkm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2355682Smarkm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2455682Smarkm * SUCH DAMAGE.
2555682Smarkm *
2655682Smarkm * $FreeBSD: head/lib/libkiconv/quirks.c 120492 2003-09-26 20:26:25Z fjoe $
2755682Smarkm */
2855682Smarkm
2955682Smarkm/*
3055682Smarkm * kiconv(3) requires shared linked, and reduce module size
3155682Smarkm * when statically linked.
3255682Smarkm */
3355682Smarkm
3455682Smarkm#ifdef PIC
35178825Sdfr
36178825Sdfr/*
3755682Smarkm * Why do we need quirks?
3878527Sassar * Since each vendors has their own Unicode mapping rules,
3978527Sassar * we need some quirks until iconv(3) supports them.
40178825Sdfr * We can define Microsoft mappings here.
41178825Sdfr *
4278527Sassar * For example, the eucJP and Unocode mapping rule is based on
43178825Sdfr * the JIS standard. Since Microsoft uses cp932 for Unicode mapping
44178825Sdfr * witch is not truly based on the JIS standard, reading a file
45178825Sdfr * system created by Microsoft Windows family using eucJP/Unicode
46178825Sdfr * mapping rule will cause a problem. That's why we define eucJP-ms here.
4755682Smarkm * The eucJP-ms has been defined by The Open Group Japan Vendor Coucil.
4855682Smarkm *
49102644Snectar * Well, Apple Mac OS also has their own Unicode mappings,
5055682Smarkm * but we won't require these quirks here, because HFS doesn't have
5155682Smarkm * Unicode and HFS+ has decomposed Unicode which can not be
5255682Smarkm * handled by this xlat16 converter.
53102644Snectar */
5457416Smarkm
5557416Smarkm#include <sys/types.h>
5655682Smarkm#include <sys/iconv.h>
5755682Smarkm
5857416Smarkm#include <stdio.h>
5955682Smarkm#include <string.h>
60102644Snectar
6155682Smarkm#include "quirks.h"
6255682Smarkm
63102644Snectar/*
6455682Smarkm * All lists of quirk character set
6555682Smarkm */
6655682Smarkmstatic struct {
6755682Smarkm	int vendor; /* reserved for non MS mapping */
6855682Smarkm	const char *base_codeset, *quirk_codeset;
6955682Smarkm} quirk_list[] = {
7055682Smarkm	{ KICONV_VENDOR_MICSFT,	"eucJP", "eucJP-ms" },
7155682Smarkm	{ KICONV_VENDOR_MICSFT,	"EUC-JP", "eucJP-ms" },
7290926Snectar	{ KICONV_VENDOR_MICSFT,	"SJIS", "SJIS-ms" },
7355682Smarkm	{ KICONV_VENDOR_MICSFT,	"Shift_JIS", "SJIS-ms" },
74102644Snectar	{ KICONV_VENDOR_MICSFT,	"Big5", "Big5-ms" }
75102644Snectar};
7655682Smarkm
7755682Smarkm/*
7855682Smarkm * The character list to replace for Japanese MS-Windows.
7990926Snectar */
8090926Snectarstatic struct quirk_replace_list quirk_jis_cp932[] = {
8155682Smarkm	{ 0x00a2, 0xffe0 }, /* Cent Sign, Fullwidth Cent Sign */
8255682Smarkm	{ 0x00a3, 0xffe1 }, /* Pound Sign, Fullwidth Pound Sign */
8355682Smarkm	{ 0x00ac, 0xffe2 }, /* Not Sign, Fullwidth Not Sign */
8455682Smarkm	{ 0x2016, 0x2225 }, /* Double Vertical Line, Parallel To */
8555682Smarkm	{ 0x203e, 0x007e }, /* Overline, Tilde */
8678527Sassar	{ 0x2212, 0xff0d }, /* Minus Sign, Fullwidth Hyphenminus */
8778527Sassar	{ 0x301c, 0xff5e }  /* Wave Dash, Fullwidth Tilde */
8878527Sassar};
8978527Sassar
9055682Smarkm/*
9155682Smarkm * All entries of quirks
9255682Smarkm */
9355682Smarkm#define	NumOf(n)	(sizeof((n)) / sizeof((n)[0]))
9455682Smarkmstatic struct {
9555682Smarkm	const char *quirk_codeset, *iconv_codeset, *pair_codeset;
9655682Smarkm	struct quirk_replace_list (*replace_list)[];
9755682Smarkm	size_t num_of_replaces;
9855682Smarkm} quirk_table[] = {
9955682Smarkm	{
10090926Snectar		"eucJP-ms", "eucJP", ENCODING_UNICODE,
101120945Snectar		(struct quirk_replace_list (*)[])&quirk_jis_cp932,
10278527Sassar		NumOf(quirk_jis_cp932)
10378527Sassar	},
10490926Snectar	{
10555682Smarkm		"SJIS-ms", "CP932", ENCODING_UNICODE,
10655682Smarkm		/* XXX - quirk_replace_list should be NULL */
10755682Smarkm		(struct quirk_replace_list (*)[])&quirk_jis_cp932,
10855682Smarkm		NumOf(quirk_jis_cp932)
10955682Smarkm	},
11055682Smarkm	{
11155682Smarkm		"Big5-ms", "CP950", ENCODING_UNICODE,
11255682Smarkm		NULL, 0
11390926Snectar	}
11455682Smarkm};
11590926Snectar
11690926Snectar
11790926Snectarconst char *
11890926Snectarkiconv_quirkcs(const char* base, int vendor)
11955682Smarkm{
12055682Smarkm	size_t i;
12155682Smarkm
12255682Smarkm	/*
123120945Snectar	 * We should compare codeset names ignoring case here,
124120945Snectar	 * so that quirk could be used for all of the user input
12555682Smarkm	 * patterns.
126120945Snectar	 */
127120945Snectar	for (i = 0; i < NumOf(quirk_list); i++)
128120945Snectar		if (quirk_list[i].vendor == vendor &&
129120945Snectar		    strcasecmp(quirk_list[i].base_codeset, base) == 0)
130120945Snectar			return (quirk_list[i].quirk_codeset);
131120945Snectar
132120945Snectar	return (base);
133120945Snectar}
134120945Snectar
135120945Snectar/*
13655682Smarkm * Internal Functions
137120945Snectar */
13855682Smarkmconst char *
13955682Smarkmsearch_quirk(const char *given_codeset,
14055682Smarkm	     const char *pair_codeset,
14155682Smarkm	     struct quirk_replace_list **replace_list,
14255682Smarkm	     size_t *num_of_replaces)
14355682Smarkm{
144120945Snectar	size_t i;
145120945Snectar
146120945Snectar	*replace_list = NULL;
14755682Smarkm	*num_of_replaces = 0;
14855682Smarkm	for (i = 0; i < NumOf(quirk_table); i++)
149120945Snectar		if (strcmp(quirk_table[i].quirk_codeset, given_codeset) == 0) {
150120945Snectar			if (strcmp(quirk_table[i].pair_codeset, pair_codeset) == 0) {
151120945Snectar				*replace_list = *quirk_table[i].replace_list;
152120945Snectar				*num_of_replaces = quirk_table[i].num_of_replaces;
153120945Snectar			}
15455682Smarkm			return (quirk_table[i].iconv_codeset);
15590926Snectar		}
15655682Smarkm
157120945Snectar	return (given_codeset);
15890926Snectar}
15955682Smarkm
16055682Smarkmuint16_t
16155682Smarkmquirk_vendor2unix(uint16_t c, struct quirk_replace_list *replace_list, size_t num)
16255682Smarkm{
16355682Smarkm	size_t i;
16455682Smarkm
16555682Smarkm	for (i = 0; i < num; i++)
16655682Smarkm		if (replace_list[i].vendor_code == c)
167120945Snectar			return (replace_list[i].standard_code);
168120945Snectar
169120945Snectar	return (c);
17055682Smarkm}
17155682Smarkm
17255682Smarkmuint16_t
17355682Smarkmquirk_unix2vendor(uint16_t c, struct quirk_replace_list *replace_list, size_t num)
17455682Smarkm{
17555682Smarkm	size_t i;
17655682Smarkm
17755682Smarkm	for (i = 0; i < num; i++)
17855682Smarkm		if (replace_list[i].standard_code == c)
17955682Smarkm			return (replace_list[i].vendor_code);
18055682Smarkm
18155682Smarkm	return (c);
18255682Smarkm}
18355682Smarkm
18455682Smarkm#else /* statically linked */
18555682Smarkm
18655682Smarkmconst char *
18755682Smarkmkiconv_quirkcs(const char* base, int vendor)
18855682Smarkm{
18955682Smarkm	return (base);
19055682Smarkm}
19155682Smarkm
19255682Smarkm#endif /* PIC */
19355682Smarkm