quirks.c revision 120492
175726Smsmith/*- 275726Smsmith * Copyright (c) 2003 Ryuichiro Imura 375726Smsmith * All rights reserved. 475726Smsmith * 575726Smsmith * Redistribution and use in source and binary forms, with or without 675726Smsmith * modification, are permitted provided that the following conditions 775726Smsmith * are met: 875726Smsmith * 1. Redistributions of source code must retain the above copyright 975726Smsmith * notice, this list of conditions and the following disclaimer. 1075726Smsmith * 2. Redistributions in binary form must reproduce the above copyright 1175726Smsmith * notice, this list of conditions and the following disclaimer in the 1275726Smsmith * documentation and/or other materials provided with the distribution. 1375726Smsmith * 1475726Smsmith * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1575726Smsmith * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1675726Smsmith * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1775726Smsmith * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1875726Smsmith * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1975726Smsmith * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2075726Smsmith * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2175726Smsmith * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2275726Smsmith * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2375726Smsmith * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2475726Smsmith * SUCH DAMAGE. 2575726Smsmith * 2675726Smsmith * $FreeBSD: head/lib/libkiconv/quirks.c 120492 2003-09-26 20:26:25Z fjoe $ 2775726Smsmith */ 2875726Smsmith 2975726Smsmith/* 30149671Srodrigc * kiconv(3) requires shared linked, and reduce module size 31149671Srodrigc * when statically linked. 32149671Srodrigc */ 3384399Sbde 34218505Simp#ifdef PIC 35199291Sattilio 3675726Smsmith/* 3784399Sbde * Why do we need quirks? 3884399Sbde * Since each vendors has their own Unicode mapping rules, 3984399Sbde * we need some quirks until iconv(3) supports them. 40199291Sattilio * We can define Microsoft mappings here. 41103662Simp * 4275726Smsmith * For example, the eucJP and Unocode mapping rule is based on 4375726Smsmith * the JIS standard. Since Microsoft uses cp932 for Unicode mapping 4475726Smsmith * witch is not truly based on the JIS standard, reading a file 4575726Smsmith * system created by Microsoft Windows family using eucJP/Unicode 4675726Smsmith * mapping rule will cause a problem. That's why we define eucJP-ms here. 4775726Smsmith * The eucJP-ms has been defined by The Open Group Japan Vendor Coucil. 4875726Smsmith * 49103662Simp * Well, Apple Mac OS also has their own Unicode mappings, 50103662Simp * but we won't require these quirks here, because HFS doesn't have 51103662Simp * Unicode and HFS+ has decomposed Unicode which can not be 52103662Simp * handled by this xlat16 converter. 53103662Simp */ 5475726Smsmith 5575726Smsmith#include <sys/types.h> 5675726Smsmith#include <sys/iconv.h> 5775726Smsmith 5875726Smsmith#include <stdio.h> 5984399Sbde#include <string.h> 6084399Sbde 6175726Smsmith#include "quirks.h" 6275726Smsmith 6375726Smsmith/* 6475726Smsmith * All lists of quirk character set 6575726Smsmith */ 6675726Smsmithstatic struct { 6775726Smsmith int vendor; /* reserved for non MS mapping */ 6875726Smsmith const char *base_codeset, *quirk_codeset; 6975726Smsmith} quirk_list[] = { 7084399Sbde { KICONV_VENDOR_MICSFT, "eucJP", "eucJP-ms" }, 7184399Sbde { KICONV_VENDOR_MICSFT, "EUC-JP", "eucJP-ms" }, 7275726Smsmith { KICONV_VENDOR_MICSFT, "SJIS", "SJIS-ms" }, 7375726Smsmith { KICONV_VENDOR_MICSFT, "Shift_JIS", "SJIS-ms" }, 7475726Smsmith { KICONV_VENDOR_MICSFT, "Big5", "Big5-ms" } 75149671Srodrigc}; 76149671Srodrigc 7775726Smsmith/* 7875726Smsmith * The character list to replace for Japanese MS-Windows. 7975726Smsmith */ 8075726Smsmithstatic struct quirk_replace_list quirk_jis_cp932[] = { 8175726Smsmith { 0x00a2, 0xffe0 }, /* Cent Sign, Fullwidth Cent Sign */ 8275726Smsmith { 0x00a3, 0xffe1 }, /* Pound Sign, Fullwidth Pound Sign */ 8375726Smsmith { 0x00ac, 0xffe2 }, /* Not Sign, Fullwidth Not Sign */ 8475726Smsmith { 0x2016, 0x2225 }, /* Double Vertical Line, Parallel To */ 8575726Smsmith { 0x203e, 0x007e }, /* Overline, Tilde */ 8675726Smsmith { 0x2212, 0xff0d }, /* Minus Sign, Fullwidth Hyphenminus */ 8775726Smsmith { 0x301c, 0xff5e } /* Wave Dash, Fullwidth Tilde */ 8875726Smsmith}; 8975726Smsmith 9075726Smsmith/* 9175726Smsmith * All entries of quirks 9275726Smsmith */ 9375726Smsmith#define NumOf(n) (sizeof((n)) / sizeof((n)[0])) 9475726Smsmithstatic struct { 9575726Smsmith const char *quirk_codeset, *iconv_codeset, *pair_codeset; 9675726Smsmith struct quirk_replace_list (*replace_list)[]; 9775726Smsmith size_t num_of_replaces; 9875726Smsmith} quirk_table[] = { 9975726Smsmith { 10075726Smsmith "eucJP-ms", "eucJP", ENCODING_UNICODE, 10175726Smsmith (struct quirk_replace_list (*)[])&quirk_jis_cp932, 10275726Smsmith NumOf(quirk_jis_cp932) 10375726Smsmith }, 10475726Smsmith { 10575726Smsmith "SJIS-ms", "CP932", ENCODING_UNICODE, 10675726Smsmith /* XXX - quirk_replace_list should be NULL */ 10775726Smsmith (struct quirk_replace_list (*)[])&quirk_jis_cp932, 10875726Smsmith NumOf(quirk_jis_cp932) 10975726Smsmith }, 11075726Smsmith { 11175726Smsmith "Big5-ms", "CP950", ENCODING_UNICODE, 11275726Smsmith NULL, 0 11375726Smsmith } 11475726Smsmith}; 11575726Smsmith 11675726Smsmith 11775726Smsmithconst char * 11875726Smsmithkiconv_quirkcs(const char* base, int vendor) 11975726Smsmith{ 12075726Smsmith size_t i; 12175726Smsmith 12275726Smsmith /* 12375726Smsmith * We should compare codeset names ignoring case here, 12475726Smsmith * so that quirk could be used for all of the user input 12575726Smsmith * patterns. 12675726Smsmith */ 12775726Smsmith for (i = 0; i < NumOf(quirk_list); i++) 12875726Smsmith if (quirk_list[i].vendor == vendor && 12975726Smsmith strcasecmp(quirk_list[i].base_codeset, base) == 0) 13075726Smsmith return (quirk_list[i].quirk_codeset); 13175726Smsmith 13275726Smsmith return (base); 13375726Smsmith} 13475726Smsmith 13575726Smsmith/* 136149671Srodrigc * Internal Functions 137149671Srodrigc */ 138149671Srodrigcconst char * 139149671Srodrigcsearch_quirk(const char *given_codeset, 140 const char *pair_codeset, 141 struct quirk_replace_list **replace_list, 142 size_t *num_of_replaces) 143{ 144 size_t i; 145 146 *replace_list = NULL; 147 *num_of_replaces = 0; 148 for (i = 0; i < NumOf(quirk_table); i++) 149 if (strcmp(quirk_table[i].quirk_codeset, given_codeset) == 0) { 150 if (strcmp(quirk_table[i].pair_codeset, pair_codeset) == 0) { 151 *replace_list = *quirk_table[i].replace_list; 152 *num_of_replaces = quirk_table[i].num_of_replaces; 153 } 154 return (quirk_table[i].iconv_codeset); 155 } 156 157 return (given_codeset); 158} 159 160uint16_t 161quirk_vendor2unix(uint16_t c, struct quirk_replace_list *replace_list, size_t num) 162{ 163 size_t i; 164 165 for (i = 0; i < num; i++) 166 if (replace_list[i].vendor_code == c) 167 return (replace_list[i].standard_code); 168 169 return (c); 170} 171 172uint16_t 173quirk_unix2vendor(uint16_t c, struct quirk_replace_list *replace_list, size_t num) 174{ 175 size_t i; 176 177 for (i = 0; i < num; i++) 178 if (replace_list[i].standard_code == c) 179 return (replace_list[i].vendor_code); 180 181 return (c); 182} 183 184#else /* statically linked */ 185 186const char * 187kiconv_quirkcs(const char* base, int vendor) 188{ 189 return (base); 190} 191 192#endif /* PIC */ 193