quirks.c revision 120492
155682Smarkm/*- 2178825Sdfr * Copyright (c) 2003 Ryuichiro Imura 355682Smarkm * All rights reserved. 455682Smarkm * 555682Smarkm * Redistribution and use in source and binary forms, with or without 655682Smarkm * modification, are permitted provided that the following conditions 755682Smarkm * are met: 855682Smarkm * 1. Redistributions of source code must retain the above copyright 955682Smarkm * notice, this list of conditions and the following disclaimer. 1055682Smarkm * 2. Redistributions in binary form must reproduce the above copyright 1155682Smarkm * notice, this list of conditions and the following disclaimer in the 1255682Smarkm * documentation and/or other materials provided with the distribution. 1355682Smarkm * 1455682Smarkm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1555682Smarkm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1655682Smarkm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1755682Smarkm * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1855682Smarkm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1955682Smarkm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2055682Smarkm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2155682Smarkm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2255682Smarkm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2355682Smarkm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2455682Smarkm * SUCH DAMAGE. 2555682Smarkm * 2655682Smarkm * $FreeBSD: head/lib/libkiconv/quirks.c 120492 2003-09-26 20:26:25Z fjoe $ 2755682Smarkm */ 2855682Smarkm 2955682Smarkm/* 3055682Smarkm * kiconv(3) requires shared linked, and reduce module size 3155682Smarkm * when statically linked. 3255682Smarkm */ 3355682Smarkm 3455682Smarkm#ifdef PIC 35178825Sdfr 36178825Sdfr/* 3755682Smarkm * Why do we need quirks? 3878527Sassar * Since each vendors has their own Unicode mapping rules, 3978527Sassar * we need some quirks until iconv(3) supports them. 40178825Sdfr * We can define Microsoft mappings here. 41178825Sdfr * 4278527Sassar * For example, the eucJP and Unocode mapping rule is based on 43178825Sdfr * the JIS standard. Since Microsoft uses cp932 for Unicode mapping 44178825Sdfr * witch is not truly based on the JIS standard, reading a file 45178825Sdfr * system created by Microsoft Windows family using eucJP/Unicode 46178825Sdfr * mapping rule will cause a problem. That's why we define eucJP-ms here. 4755682Smarkm * The eucJP-ms has been defined by The Open Group Japan Vendor Coucil. 4855682Smarkm * 49102644Snectar * Well, Apple Mac OS also has their own Unicode mappings, 5055682Smarkm * but we won't require these quirks here, because HFS doesn't have 5155682Smarkm * Unicode and HFS+ has decomposed Unicode which can not be 5255682Smarkm * handled by this xlat16 converter. 53102644Snectar */ 5457416Smarkm 5557416Smarkm#include <sys/types.h> 5655682Smarkm#include <sys/iconv.h> 5755682Smarkm 5857416Smarkm#include <stdio.h> 5955682Smarkm#include <string.h> 60102644Snectar 6155682Smarkm#include "quirks.h" 6255682Smarkm 63102644Snectar/* 6455682Smarkm * All lists of quirk character set 6555682Smarkm */ 6655682Smarkmstatic struct { 6755682Smarkm int vendor; /* reserved for non MS mapping */ 6855682Smarkm const char *base_codeset, *quirk_codeset; 6955682Smarkm} quirk_list[] = { 7055682Smarkm { KICONV_VENDOR_MICSFT, "eucJP", "eucJP-ms" }, 7155682Smarkm { KICONV_VENDOR_MICSFT, "EUC-JP", "eucJP-ms" }, 7290926Snectar { KICONV_VENDOR_MICSFT, "SJIS", "SJIS-ms" }, 7355682Smarkm { KICONV_VENDOR_MICSFT, "Shift_JIS", "SJIS-ms" }, 74102644Snectar { KICONV_VENDOR_MICSFT, "Big5", "Big5-ms" } 75102644Snectar}; 7655682Smarkm 7755682Smarkm/* 7855682Smarkm * The character list to replace for Japanese MS-Windows. 7990926Snectar */ 8090926Snectarstatic struct quirk_replace_list quirk_jis_cp932[] = { 8155682Smarkm { 0x00a2, 0xffe0 }, /* Cent Sign, Fullwidth Cent Sign */ 8255682Smarkm { 0x00a3, 0xffe1 }, /* Pound Sign, Fullwidth Pound Sign */ 8355682Smarkm { 0x00ac, 0xffe2 }, /* Not Sign, Fullwidth Not Sign */ 8455682Smarkm { 0x2016, 0x2225 }, /* Double Vertical Line, Parallel To */ 8555682Smarkm { 0x203e, 0x007e }, /* Overline, Tilde */ 8678527Sassar { 0x2212, 0xff0d }, /* Minus Sign, Fullwidth Hyphenminus */ 8778527Sassar { 0x301c, 0xff5e } /* Wave Dash, Fullwidth Tilde */ 8878527Sassar}; 8978527Sassar 9055682Smarkm/* 9155682Smarkm * All entries of quirks 9255682Smarkm */ 9355682Smarkm#define NumOf(n) (sizeof((n)) / sizeof((n)[0])) 9455682Smarkmstatic struct { 9555682Smarkm const char *quirk_codeset, *iconv_codeset, *pair_codeset; 9655682Smarkm struct quirk_replace_list (*replace_list)[]; 9755682Smarkm size_t num_of_replaces; 9855682Smarkm} quirk_table[] = { 9955682Smarkm { 10090926Snectar "eucJP-ms", "eucJP", ENCODING_UNICODE, 101120945Snectar (struct quirk_replace_list (*)[])&quirk_jis_cp932, 10278527Sassar NumOf(quirk_jis_cp932) 10378527Sassar }, 10490926Snectar { 10555682Smarkm "SJIS-ms", "CP932", ENCODING_UNICODE, 10655682Smarkm /* XXX - quirk_replace_list should be NULL */ 10755682Smarkm (struct quirk_replace_list (*)[])&quirk_jis_cp932, 10855682Smarkm NumOf(quirk_jis_cp932) 10955682Smarkm }, 11055682Smarkm { 11155682Smarkm "Big5-ms", "CP950", ENCODING_UNICODE, 11255682Smarkm NULL, 0 11390926Snectar } 11455682Smarkm}; 11590926Snectar 11690926Snectar 11790926Snectarconst char * 11890926Snectarkiconv_quirkcs(const char* base, int vendor) 11955682Smarkm{ 12055682Smarkm size_t i; 12155682Smarkm 12255682Smarkm /* 123120945Snectar * We should compare codeset names ignoring case here, 124120945Snectar * so that quirk could be used for all of the user input 12555682Smarkm * patterns. 126120945Snectar */ 127120945Snectar for (i = 0; i < NumOf(quirk_list); i++) 128120945Snectar if (quirk_list[i].vendor == vendor && 129120945Snectar strcasecmp(quirk_list[i].base_codeset, base) == 0) 130120945Snectar return (quirk_list[i].quirk_codeset); 131120945Snectar 132120945Snectar return (base); 133120945Snectar} 134120945Snectar 135120945Snectar/* 13655682Smarkm * Internal Functions 137120945Snectar */ 13855682Smarkmconst char * 13955682Smarkmsearch_quirk(const char *given_codeset, 14055682Smarkm const char *pair_codeset, 14155682Smarkm struct quirk_replace_list **replace_list, 14255682Smarkm size_t *num_of_replaces) 14355682Smarkm{ 144120945Snectar size_t i; 145120945Snectar 146120945Snectar *replace_list = NULL; 14755682Smarkm *num_of_replaces = 0; 14855682Smarkm for (i = 0; i < NumOf(quirk_table); i++) 149120945Snectar if (strcmp(quirk_table[i].quirk_codeset, given_codeset) == 0) { 150120945Snectar if (strcmp(quirk_table[i].pair_codeset, pair_codeset) == 0) { 151120945Snectar *replace_list = *quirk_table[i].replace_list; 152120945Snectar *num_of_replaces = quirk_table[i].num_of_replaces; 153120945Snectar } 15455682Smarkm return (quirk_table[i].iconv_codeset); 15590926Snectar } 15655682Smarkm 157120945Snectar return (given_codeset); 15890926Snectar} 15955682Smarkm 16055682Smarkmuint16_t 16155682Smarkmquirk_vendor2unix(uint16_t c, struct quirk_replace_list *replace_list, size_t num) 16255682Smarkm{ 16355682Smarkm size_t i; 16455682Smarkm 16555682Smarkm for (i = 0; i < num; i++) 16655682Smarkm if (replace_list[i].vendor_code == c) 167120945Snectar return (replace_list[i].standard_code); 168120945Snectar 169120945Snectar return (c); 17055682Smarkm} 17155682Smarkm 17255682Smarkmuint16_t 17355682Smarkmquirk_unix2vendor(uint16_t c, struct quirk_replace_list *replace_list, size_t num) 17455682Smarkm{ 17555682Smarkm size_t i; 17655682Smarkm 17755682Smarkm for (i = 0; i < num; i++) 17855682Smarkm if (replace_list[i].standard_code == c) 17955682Smarkm return (replace_list[i].vendor_code); 18055682Smarkm 18155682Smarkm return (c); 18255682Smarkm} 18355682Smarkm 18455682Smarkm#else /* statically linked */ 18555682Smarkm 18655682Smarkmconst char * 18755682Smarkmkiconv_quirkcs(const char* base, int vendor) 18855682Smarkm{ 18955682Smarkm return (base); 19055682Smarkm} 19155682Smarkm 19255682Smarkm#endif /* PIC */ 19355682Smarkm