1120492Sfjoe/*- 2120492Sfjoe * Copyright (c) 2003 Ryuichiro Imura 3120492Sfjoe * All rights reserved. 4120492Sfjoe * 5120492Sfjoe * Redistribution and use in source and binary forms, with or without 6120492Sfjoe * modification, are permitted provided that the following conditions 7120492Sfjoe * are met: 8120492Sfjoe * 1. Redistributions of source code must retain the above copyright 9120492Sfjoe * notice, this list of conditions and the following disclaimer. 10120492Sfjoe * 2. Redistributions in binary form must reproduce the above copyright 11120492Sfjoe * notice, this list of conditions and the following disclaimer in the 12120492Sfjoe * documentation and/or other materials provided with the distribution. 13120492Sfjoe * 14120492Sfjoe * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15120492Sfjoe * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16120492Sfjoe * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17120492Sfjoe * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18120492Sfjoe * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19120492Sfjoe * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20120492Sfjoe * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21120492Sfjoe * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22120492Sfjoe * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23120492Sfjoe * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24120492Sfjoe * SUCH DAMAGE. 25120492Sfjoe * 26120492Sfjoe * $FreeBSD$ 27120492Sfjoe */ 28120492Sfjoe 29120492Sfjoe/* 30120492Sfjoe * kiconv(3) requires shared linked, and reduce module size 31120492Sfjoe * when statically linked. 32120492Sfjoe */ 33120492Sfjoe 34120492Sfjoe#ifdef PIC 35120492Sfjoe 36120492Sfjoe/* 37120492Sfjoe * Why do we need quirks? 38120492Sfjoe * Since each vendors has their own Unicode mapping rules, 39120492Sfjoe * we need some quirks until iconv(3) supports them. 40120492Sfjoe * We can define Microsoft mappings here. 41120492Sfjoe * 42120492Sfjoe * For example, the eucJP and Unocode mapping rule is based on 43120492Sfjoe * the JIS standard. Since Microsoft uses cp932 for Unicode mapping 44120492Sfjoe * witch is not truly based on the JIS standard, reading a file 45120492Sfjoe * system created by Microsoft Windows family using eucJP/Unicode 46120492Sfjoe * mapping rule will cause a problem. That's why we define eucJP-ms here. 47120492Sfjoe * The eucJP-ms has been defined by The Open Group Japan Vendor Coucil. 48120492Sfjoe * 49120492Sfjoe * Well, Apple Mac OS also has their own Unicode mappings, 50120492Sfjoe * but we won't require these quirks here, because HFS doesn't have 51120492Sfjoe * Unicode and HFS+ has decomposed Unicode which can not be 52120492Sfjoe * handled by this xlat16 converter. 53120492Sfjoe */ 54120492Sfjoe 55120492Sfjoe#include <sys/types.h> 56120492Sfjoe#include <sys/iconv.h> 57120492Sfjoe 58120492Sfjoe#include <stdio.h> 59120492Sfjoe#include <string.h> 60120492Sfjoe 61120492Sfjoe#include "quirks.h" 62120492Sfjoe 63120492Sfjoe/* 64120492Sfjoe * All lists of quirk character set 65120492Sfjoe */ 66120492Sfjoestatic struct { 67120492Sfjoe int vendor; /* reserved for non MS mapping */ 68120492Sfjoe const char *base_codeset, *quirk_codeset; 69120492Sfjoe} quirk_list[] = { 70120492Sfjoe { KICONV_VENDOR_MICSFT, "eucJP", "eucJP-ms" }, 71120492Sfjoe { KICONV_VENDOR_MICSFT, "EUC-JP", "eucJP-ms" }, 72120492Sfjoe { KICONV_VENDOR_MICSFT, "SJIS", "SJIS-ms" }, 73120492Sfjoe { KICONV_VENDOR_MICSFT, "Shift_JIS", "SJIS-ms" }, 74120492Sfjoe { KICONV_VENDOR_MICSFT, "Big5", "Big5-ms" } 75120492Sfjoe}; 76120492Sfjoe 77120492Sfjoe/* 78120492Sfjoe * The character list to replace for Japanese MS-Windows. 79120492Sfjoe */ 80120492Sfjoestatic struct quirk_replace_list quirk_jis_cp932[] = { 81120492Sfjoe { 0x00a2, 0xffe0 }, /* Cent Sign, Fullwidth Cent Sign */ 82120492Sfjoe { 0x00a3, 0xffe1 }, /* Pound Sign, Fullwidth Pound Sign */ 83120492Sfjoe { 0x00ac, 0xffe2 }, /* Not Sign, Fullwidth Not Sign */ 84120492Sfjoe { 0x2016, 0x2225 }, /* Double Vertical Line, Parallel To */ 85120492Sfjoe { 0x203e, 0x007e }, /* Overline, Tilde */ 86120492Sfjoe { 0x2212, 0xff0d }, /* Minus Sign, Fullwidth Hyphenminus */ 87120492Sfjoe { 0x301c, 0xff5e } /* Wave Dash, Fullwidth Tilde */ 88120492Sfjoe}; 89120492Sfjoe 90120492Sfjoe/* 91120492Sfjoe * All entries of quirks 92120492Sfjoe */ 93120492Sfjoe#define NumOf(n) (sizeof((n)) / sizeof((n)[0])) 94120492Sfjoestatic struct { 95120492Sfjoe const char *quirk_codeset, *iconv_codeset, *pair_codeset; 96120492Sfjoe struct quirk_replace_list (*replace_list)[]; 97120492Sfjoe size_t num_of_replaces; 98120492Sfjoe} quirk_table[] = { 99120492Sfjoe { 100120492Sfjoe "eucJP-ms", "eucJP", ENCODING_UNICODE, 101120492Sfjoe (struct quirk_replace_list (*)[])&quirk_jis_cp932, 102120492Sfjoe NumOf(quirk_jis_cp932) 103120492Sfjoe }, 104120492Sfjoe { 105120492Sfjoe "SJIS-ms", "CP932", ENCODING_UNICODE, 106120492Sfjoe /* XXX - quirk_replace_list should be NULL */ 107120492Sfjoe (struct quirk_replace_list (*)[])&quirk_jis_cp932, 108120492Sfjoe NumOf(quirk_jis_cp932) 109120492Sfjoe }, 110120492Sfjoe { 111120492Sfjoe "Big5-ms", "CP950", ENCODING_UNICODE, 112120492Sfjoe NULL, 0 113120492Sfjoe } 114120492Sfjoe}; 115120492Sfjoe 116120492Sfjoe 117120492Sfjoeconst char * 118120492Sfjoekiconv_quirkcs(const char* base, int vendor) 119120492Sfjoe{ 120120492Sfjoe size_t i; 121120492Sfjoe 122120492Sfjoe /* 123120492Sfjoe * We should compare codeset names ignoring case here, 124120492Sfjoe * so that quirk could be used for all of the user input 125120492Sfjoe * patterns. 126120492Sfjoe */ 127120492Sfjoe for (i = 0; i < NumOf(quirk_list); i++) 128120492Sfjoe if (quirk_list[i].vendor == vendor && 129120492Sfjoe strcasecmp(quirk_list[i].base_codeset, base) == 0) 130120492Sfjoe return (quirk_list[i].quirk_codeset); 131120492Sfjoe 132120492Sfjoe return (base); 133120492Sfjoe} 134120492Sfjoe 135120492Sfjoe/* 136120492Sfjoe * Internal Functions 137120492Sfjoe */ 138120492Sfjoeconst char * 139120492Sfjoesearch_quirk(const char *given_codeset, 140120492Sfjoe const char *pair_codeset, 141120492Sfjoe struct quirk_replace_list **replace_list, 142120492Sfjoe size_t *num_of_replaces) 143120492Sfjoe{ 144120492Sfjoe size_t i; 145120492Sfjoe 146120492Sfjoe *replace_list = NULL; 147120492Sfjoe *num_of_replaces = 0; 148120492Sfjoe for (i = 0; i < NumOf(quirk_table); i++) 149120492Sfjoe if (strcmp(quirk_table[i].quirk_codeset, given_codeset) == 0) { 150120492Sfjoe if (strcmp(quirk_table[i].pair_codeset, pair_codeset) == 0) { 151120492Sfjoe *replace_list = *quirk_table[i].replace_list; 152120492Sfjoe *num_of_replaces = quirk_table[i].num_of_replaces; 153120492Sfjoe } 154120492Sfjoe return (quirk_table[i].iconv_codeset); 155120492Sfjoe } 156120492Sfjoe 157120492Sfjoe return (given_codeset); 158120492Sfjoe} 159120492Sfjoe 160120492Sfjoeuint16_t 161120492Sfjoequirk_vendor2unix(uint16_t c, struct quirk_replace_list *replace_list, size_t num) 162120492Sfjoe{ 163120492Sfjoe size_t i; 164120492Sfjoe 165120492Sfjoe for (i = 0; i < num; i++) 166120492Sfjoe if (replace_list[i].vendor_code == c) 167120492Sfjoe return (replace_list[i].standard_code); 168120492Sfjoe 169120492Sfjoe return (c); 170120492Sfjoe} 171120492Sfjoe 172120492Sfjoeuint16_t 173120492Sfjoequirk_unix2vendor(uint16_t c, struct quirk_replace_list *replace_list, size_t num) 174120492Sfjoe{ 175120492Sfjoe size_t i; 176120492Sfjoe 177120492Sfjoe for (i = 0; i < num; i++) 178120492Sfjoe if (replace_list[i].standard_code == c) 179120492Sfjoe return (replace_list[i].vendor_code); 180120492Sfjoe 181120492Sfjoe return (c); 182120492Sfjoe} 183120492Sfjoe 184120492Sfjoe#else /* statically linked */ 185120492Sfjoe 186194637Sdelphij#include <sys/types.h> 187194637Sdelphij#include <sys/iconv.h> 188194637Sdelphij 189120492Sfjoeconst char * 190194637Sdelphijkiconv_quirkcs(const char* base __unused, int vendor __unused) 191120492Sfjoe{ 192194637Sdelphij 193120492Sfjoe return (base); 194120492Sfjoe} 195120492Sfjoe 196120492Sfjoe#endif /* PIC */ 197