localcharset.c revision 1.1.1.1
1/* $NetBSD: localcharset.c,v 1.1.1.1 2016/01/10 21:36:18 christos Exp $ */ 2 3/* Determine a canonical name for the current locale's character encoding. 4 5 Copyright (C) 2000-2002 Free Software Foundation, Inc. 6 7 This program is free software; you can redistribute it and/or modify it 8 under the terms of the GNU Library General Public License as published 9 by the Free Software Foundation; either version 2, or (at your option) 10 any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Library General Public License for more details. 16 17 You should have received a copy of the GNU Library General Public 18 License along with this program; if not, write to the Free Software 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 20 USA. */ 21 22/* Written by Bruno Haible <haible@clisp.cons.org>. */ 23 24#ifdef HAVE_CONFIG_H 25# include <config.h> 26#endif 27 28#if HAVE_STDDEF_H 29# include <stddef.h> 30#endif 31 32#include <stdio.h> 33#if HAVE_STRING_H 34# include <string.h> 35#else 36# include <strings.h> 37#endif 38#if HAVE_STDLIB_H 39# include <stdlib.h> 40#endif 41 42#if defined _WIN32 || defined __WIN32__ 43# undef WIN32 /* avoid warning on mingw32 */ 44# define WIN32 45#endif 46 47#if defined __EMX__ 48/* Assume EMX program runs on OS/2, even if compiled under DOS. */ 49# define OS2 50#endif 51 52#if !defined WIN32 53# if HAVE_LANGINFO_CODESET 54# include <langinfo.h> 55# else 56# if HAVE_SETLOCALE 57# include <locale.h> 58# endif 59# endif 60#elif defined WIN32 61# define WIN32_LEAN_AND_MEAN 62# include <windows.h> 63#endif 64#if defined OS2 65# define INCL_DOS 66# include <os2.h> 67#endif 68 69#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__ 70 /* Win32, OS/2, DOS */ 71# define ISSLASH(C) ((C) == '/' || (C) == '\\') 72#endif 73 74#ifndef DIRECTORY_SEPARATOR 75# define DIRECTORY_SEPARATOR '/' 76#endif 77 78#ifndef ISSLASH 79# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR) 80#endif 81 82#ifdef HAVE_GETC_UNLOCKED 83# undef getc 84# define getc getc_unlocked 85#endif 86 87/* The following static variable is declared 'volatile' to avoid a 88 possible multithread problem in the function get_charset_aliases. If we 89 are running in a threaded environment, and if two threads initialize 90 'charset_aliases' simultaneously, both will produce the same value, 91 and everything will be ok if the two assignments to 'charset_aliases' 92 are atomic. But I don't know what will happen if the two assignments mix. */ 93#if __STDC__ != 1 94# define volatile /* empty */ 95#endif 96/* Pointer to the contents of the charset.alias file, if it has already been 97 read, else NULL. Its format is: 98 ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */ 99static const char * volatile charset_aliases; 100 101/* Return a pointer to the contents of the charset.alias file. */ 102static const char * 103get_charset_aliases () 104{ 105 const char *cp; 106 107 cp = charset_aliases; 108 if (cp == NULL) 109 { 110#if !defined WIN32 111 FILE *fp; 112 const char *dir = LIBDIR; 113 const char *base = "charset.alias"; 114 char *file_name; 115 116 /* Concatenate dir and base into freshly allocated file_name. */ 117 { 118 size_t dir_len = strlen (dir); 119 size_t base_len = strlen (base); 120 int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1])); 121 file_name = (char *) malloc (dir_len + add_slash + base_len + 1); 122 if (file_name != NULL) 123 { 124 memcpy (file_name, dir, dir_len); 125 if (add_slash) 126 file_name[dir_len] = DIRECTORY_SEPARATOR; 127 memcpy (file_name + dir_len + add_slash, base, base_len + 1); 128 } 129 } 130 131 if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL) 132 /* Out of memory or file not found, treat it as empty. */ 133 cp = ""; 134 else 135 { 136 /* Parse the file's contents. */ 137 int c; 138 char buf1[50+1]; 139 char buf2[50+1]; 140 char *res_ptr = NULL; 141 size_t res_size = 0; 142 size_t l1, l2; 143 144 for (;;) 145 { 146 c = getc (fp); 147 if (c == EOF) 148 break; 149 if (c == '\n' || c == ' ' || c == '\t') 150 continue; 151 if (c == '#') 152 { 153 /* Skip comment, to end of line. */ 154 do 155 c = getc (fp); 156 while (!(c == EOF || c == '\n')); 157 if (c == EOF) 158 break; 159 continue; 160 } 161 ungetc (c, fp); 162 if (fscanf (fp, "%50s %50s", buf1, buf2) < 2) 163 break; 164 l1 = strlen (buf1); 165 l2 = strlen (buf2); 166 if (res_size == 0) 167 { 168 res_size = l1 + 1 + l2 + 1; 169 res_ptr = (char *) malloc (res_size + 1); 170 } 171 else 172 { 173 res_size += l1 + 1 + l2 + 1; 174 res_ptr = (char *) realloc (res_ptr, res_size + 1); 175 } 176 if (res_ptr == NULL) 177 { 178 /* Out of memory. */ 179 res_size = 0; 180 break; 181 } 182 strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1); 183 strcpy (res_ptr + res_size - (l2 + 1), buf2); 184 } 185 fclose (fp); 186 if (res_size == 0) 187 cp = ""; 188 else 189 { 190 *(res_ptr + res_size) = '\0'; 191 cp = res_ptr; 192 } 193 } 194 195 if (file_name != NULL) 196 free (file_name); 197 198#else 199 200 /* To avoid the troubles of installing a separate file in the same 201 directory as the DLL and of retrieving the DLL's directory at 202 runtime, simply inline the aliases here. */ 203 204# if defined WIN32 205 cp = "CP936" "\0" "GBK" "\0" 206 "CP1361" "\0" "JOHAB" "\0"; 207# endif 208#endif 209 210 charset_aliases = cp; 211 } 212 213 return cp; 214} 215 216/* Determine the current locale's character encoding, and canonicalize it 217 into one of the canonical names listed in config.charset. 218 The result must not be freed; it is statically allocated. 219 If the canonical name cannot be determined, the result is a non-canonical 220 name. */ 221 222#ifdef STATIC 223STATIC 224#endif 225const char * 226locale_charset () 227{ 228 const char *codeset; 229 const char *aliases; 230 231#if !(defined WIN32 || defined OS2) 232 233# if HAVE_LANGINFO_CODESET 234 235 /* Most systems support nl_langinfo (CODESET) nowadays. */ 236 codeset = nl_langinfo (CODESET); 237 238# else 239 240 /* On old systems which lack it, use setlocale or getenv. */ 241 const char *locale = NULL; 242 243 /* But most old systems don't have a complete set of locales. Some 244 (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't 245 use setlocale here; it would return "C" when it doesn't support the 246 locale name the user has set. */ 247# if HAVE_SETLOCALE && 0 248 locale = setlocale (LC_CTYPE, NULL); 249# endif 250 if (locale == NULL || locale[0] == '\0') 251 { 252 locale = getenv ("LC_ALL"); 253 if (locale == NULL || locale[0] == '\0') 254 { 255 locale = getenv ("LC_CTYPE"); 256 if (locale == NULL || locale[0] == '\0') 257 locale = getenv ("LANG"); 258 } 259 } 260 261 /* On some old systems, one used to set locale = "iso8859_1". On others, 262 you set it to "language_COUNTRY.charset". In any case, we resolve it 263 through the charset.alias file. */ 264 codeset = locale; 265 266# endif 267 268#elif defined WIN32 269 270 static char buf[2 + 10 + 1]; 271 272 /* Win32 has a function returning the locale's codepage as a number. */ 273 sprintf (buf, "CP%u", GetACP ()); 274 codeset = buf; 275 276#elif defined OS2 277 278 const char *locale; 279 static char buf[2 + 10 + 1]; 280 ULONG cp[3]; 281 ULONG cplen; 282 283 /* Allow user to override the codeset, as set in the operating system, 284 with standard language environment variables. */ 285 locale = getenv ("LC_ALL"); 286 if (locale == NULL || locale[0] == '\0') 287 { 288 locale = getenv ("LC_CTYPE"); 289 if (locale == NULL || locale[0] == '\0') 290 locale = getenv ("LANG"); 291 } 292 if (locale != NULL && locale[0] != '\0') 293 { 294 /* If the locale name contains an encoding after the dot, return it. */ 295 const char *dot = strchr (locale, '.'); 296 297 if (dot != NULL) 298 { 299 const char *modifier; 300 301 dot++; 302 /* Look for the possible @... trailer and remove it, if any. */ 303 modifier = strchr (dot, '@'); 304 if (modifier == NULL) 305 return dot; 306 if (modifier - dot < sizeof (buf)) 307 { 308 memcpy (buf, dot, modifier - dot); 309 buf [modifier - dot] = '\0'; 310 return buf; 311 } 312 } 313 314 /* Resolve through the charset.alias file. */ 315 codeset = locale; 316 } 317 else 318 { 319 /* OS/2 has a function returning the locale's codepage as a number. */ 320 if (DosQueryCp (sizeof (cp), cp, &cplen)) 321 codeset = ""; 322 else 323 { 324 sprintf (buf, "CP%u", cp[0]); 325 codeset = buf; 326 } 327 } 328 329#endif 330 331 if (codeset == NULL) 332 /* The canonical name cannot be determined. */ 333 codeset = ""; 334 335 /* Resolve alias. */ 336 for (aliases = get_charset_aliases (); 337 *aliases != '\0'; 338 aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1) 339 if (strcmp (codeset, aliases) == 0 340 || (aliases[0] == '*' && aliases[1] == '\0')) 341 { 342 codeset = aliases + strlen (aliases) + 1; 343 break; 344 } 345 346 return codeset; 347} 348