1/* l10nflist.c - make localization file list. */
2
3/* Copyright (C) 1995-1999, 2000, 2001, 2002, 2005-2009 Free Software Foundation, Inc.
4   Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
5
6   This file is part of GNU Bash.
7
8   Bash is free software: you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation, either version 3 of the License, or
11   (at your option) any later version.
12
13   Bash is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with Bash.  If not, see <http://www.gnu.org/licenses/>.
20*/
21
22/* Tell glibc's <string.h> to provide a prototype for stpcpy().
23   This must come before <config.h> because <config.h> may include
24   <features.h>, and once <features.h> has been included, it's too late.  */
25#ifndef _GNU_SOURCE
26# define _GNU_SOURCE	1
27#endif
28
29#ifdef HAVE_CONFIG_H
30# include <config.h>
31#endif
32
33#include <string.h>
34
35#if defined _LIBC || defined HAVE_ARGZ_H
36# include <argz.h>
37#endif
38#include <ctype.h>
39#include <sys/types.h>
40#include <stdlib.h>
41
42#include "loadinfo.h"
43
44/* On some strange systems still no definition of NULL is found.  Sigh!  */
45#ifndef NULL
46# if defined __STDC__ && __STDC__
47#  define NULL ((void *) 0)
48# else
49#  define NULL 0
50# endif
51#endif
52
53/* @@ end of prolog @@ */
54
55#ifdef _LIBC
56/* Rename the non ANSI C functions.  This is required by the standard
57   because some ANSI C functions will require linking with this object
58   file and the name space must not be polluted.  */
59# ifndef stpcpy
60#  define stpcpy(dest, src) __stpcpy(dest, src)
61# endif
62#else
63# ifndef HAVE_STPCPY
64static char *stpcpy PARAMS ((char *dest, const char *src));
65# endif
66#endif
67
68/* Pathname support.
69   ISSLASH(C)           tests whether C is a directory separator character.
70   IS_ABSOLUTE_PATH(P)  tests whether P is an absolute path.  If it is not,
71                        it may be concatenated to a directory pathname.
72 */
73#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__
74  /* Win32, OS/2, DOS */
75# define ISSLASH(C) ((C) == '/' || (C) == '\\')
76# define HAS_DEVICE(P) \
77    ((((P)[0] >= 'A' && (P)[0] <= 'Z') || ((P)[0] >= 'a' && (P)[0] <= 'z')) \
78     && (P)[1] == ':')
79# define IS_ABSOLUTE_PATH(P) (ISSLASH ((P)[0]) || HAS_DEVICE (P))
80#else
81  /* Unix */
82# define ISSLASH(C) ((C) == '/')
83# define IS_ABSOLUTE_PATH(P) ISSLASH ((P)[0])
84#endif
85
86/* Define function which are usually not available.  */
87
88#if !defined _LIBC && !defined HAVE___ARGZ_COUNT
89/* Returns the number of strings in ARGZ.  */
90static size_t argz_count__ PARAMS ((const char *argz, size_t len));
91
92static size_t
93argz_count__ (argz, len)
94     const char *argz;
95     size_t len;
96{
97  size_t count = 0;
98  while (len > 0)
99    {
100      size_t part_len = strlen (argz);
101      argz += part_len + 1;
102      len -= part_len + 1;
103      count++;
104    }
105  return count;
106}
107# undef __argz_count
108# define __argz_count(argz, len) argz_count__ (argz, len)
109#else
110# ifdef _LIBC
111#  define __argz_count(argz, len) INTUSE(__argz_count) (argz, len)
112# endif
113#endif	/* !_LIBC && !HAVE___ARGZ_COUNT */
114
115#if !defined _LIBC && !defined HAVE___ARGZ_STRINGIFY
116/* Make '\0' separated arg vector ARGZ printable by converting all the '\0's
117   except the last into the character SEP.  */
118static void argz_stringify__ PARAMS ((char *argz, size_t len, int sep));
119
120static void
121argz_stringify__ (argz, len, sep)
122     char *argz;
123     size_t len;
124     int sep;
125{
126  while (len > 0)
127    {
128      size_t part_len = strlen (argz);
129      argz += part_len;
130      len -= part_len + 1;
131      if (len > 0)
132	*argz++ = sep;
133    }
134}
135# undef __argz_stringify
136# define __argz_stringify(argz, len, sep) argz_stringify__ (argz, len, sep)
137#else
138# ifdef _LIBC
139#  define __argz_stringify(argz, len, sep) \
140  INTUSE(__argz_stringify) (argz, len, sep)
141# endif
142#endif	/* !_LIBC && !HAVE___ARGZ_STRINGIFY */
143
144#if !defined _LIBC && !defined HAVE___ARGZ_NEXT
145static char *argz_next__ PARAMS ((char *argz, size_t argz_len,
146				  const char *entry));
147
148static char *
149argz_next__ (argz, argz_len, entry)
150     char *argz;
151     size_t argz_len;
152     const char *entry;
153{
154  if (entry)
155    {
156      if (entry < argz + argz_len)
157        entry = strchr (entry, '\0') + 1;
158
159      return entry >= argz + argz_len ? NULL : (char *) entry;
160    }
161  else
162    if (argz_len > 0)
163      return argz;
164    else
165      return 0;
166}
167# undef __argz_next
168# define __argz_next(argz, len, entry) argz_next__ (argz, len, entry)
169#endif	/* !_LIBC && !HAVE___ARGZ_NEXT */
170
171
172/* Return number of bits set in X.  */
173static int pop PARAMS ((int x));
174
175static inline int
176pop (x)
177     int x;
178{
179  /* We assume that no more than 16 bits are used.  */
180  x = ((x & ~0x5555) >> 1) + (x & 0x5555);
181  x = ((x & ~0x3333) >> 2) + (x & 0x3333);
182  x = ((x >> 4) + x) & 0x0f0f;
183  x = ((x >> 8) + x) & 0xff;
184
185  return x;
186}
187
188
189struct loaded_l10nfile *
190_nl_make_l10nflist (l10nfile_list, dirlist, dirlist_len, mask, language,
191		    territory, codeset, normalized_codeset, modifier, special,
192		    sponsor, revision, filename, do_allocate)
193     struct loaded_l10nfile **l10nfile_list;
194     const char *dirlist;
195     size_t dirlist_len;
196     int mask;
197     const char *language;
198     const char *territory;
199     const char *codeset;
200     const char *normalized_codeset;
201     const char *modifier;
202     const char *special;
203     const char *sponsor;
204     const char *revision;
205     const char *filename;
206     int do_allocate;
207{
208  char *abs_filename;
209  struct loaded_l10nfile **lastp;
210  struct loaded_l10nfile *retval;
211  char *cp;
212  size_t dirlist_count;
213  size_t entries;
214  int cnt;
215
216  /* If LANGUAGE contains an absolute directory specification, we ignore
217     DIRLIST.  */
218  if (IS_ABSOLUTE_PATH (language))
219    dirlist_len = 0;
220
221  /* Allocate room for the full file name.  */
222  abs_filename = (char *) malloc (dirlist_len
223				  + strlen (language)
224				  + ((mask & TERRITORY) != 0
225				     ? strlen (territory) + 1 : 0)
226				  + ((mask & XPG_CODESET) != 0
227				     ? strlen (codeset) + 1 : 0)
228				  + ((mask & XPG_NORM_CODESET) != 0
229				     ? strlen (normalized_codeset) + 1 : 0)
230				  + (((mask & XPG_MODIFIER) != 0
231				      || (mask & CEN_AUDIENCE) != 0)
232				     ? strlen (modifier) + 1 : 0)
233				  + ((mask & CEN_SPECIAL) != 0
234				     ? strlen (special) + 1 : 0)
235				  + (((mask & CEN_SPONSOR) != 0
236				      || (mask & CEN_REVISION) != 0)
237				     ? (1 + ((mask & CEN_SPONSOR) != 0
238					     ? strlen (sponsor) : 0)
239					+ ((mask & CEN_REVISION) != 0
240					   ? strlen (revision) + 1 : 0)) : 0)
241				  + 1 + strlen (filename) + 1);
242
243  if (abs_filename == NULL)
244    return NULL;
245
246  /* Construct file name.  */
247  cp = abs_filename;
248  if (dirlist_len > 0)
249    {
250      memcpy (cp, dirlist, dirlist_len);
251      __argz_stringify (cp, dirlist_len, PATH_SEPARATOR);
252      cp += dirlist_len;
253      cp[-1] = '/';
254    }
255
256  cp = stpcpy (cp, language);
257
258  if ((mask & TERRITORY) != 0)
259    {
260      *cp++ = '_';
261      cp = stpcpy (cp, territory);
262    }
263  if ((mask & XPG_CODESET) != 0)
264    {
265      *cp++ = '.';
266      cp = stpcpy (cp, codeset);
267    }
268  if ((mask & XPG_NORM_CODESET) != 0)
269    {
270      *cp++ = '.';
271      cp = stpcpy (cp, normalized_codeset);
272    }
273  if ((mask & (XPG_MODIFIER | CEN_AUDIENCE)) != 0)
274    {
275      /* This component can be part of both syntaces but has different
276	 leading characters.  For CEN we use `+', else `@'.  */
277      *cp++ = (mask & CEN_AUDIENCE) != 0 ? '+' : '@';
278      cp = stpcpy (cp, modifier);
279    }
280  if ((mask & CEN_SPECIAL) != 0)
281    {
282      *cp++ = '+';
283      cp = stpcpy (cp, special);
284    }
285  if ((mask & (CEN_SPONSOR | CEN_REVISION)) != 0)
286    {
287      *cp++ = ',';
288      if ((mask & CEN_SPONSOR) != 0)
289	cp = stpcpy (cp, sponsor);
290      if ((mask & CEN_REVISION) != 0)
291	{
292	  *cp++ = '_';
293	  cp = stpcpy (cp, revision);
294	}
295    }
296
297  *cp++ = '/';
298  stpcpy (cp, filename);
299
300  /* Look in list of already loaded domains whether it is already
301     available.  */
302  lastp = l10nfile_list;
303  for (retval = *l10nfile_list; retval != NULL; retval = retval->next)
304    if (retval->filename != NULL)
305      {
306	int compare = strcmp (retval->filename, abs_filename);
307	if (compare == 0)
308	  /* We found it!  */
309	  break;
310	if (compare < 0)
311	  {
312	    /* It's not in the list.  */
313	    retval = NULL;
314	    break;
315	  }
316
317	lastp = &retval->next;
318      }
319
320  if (retval != NULL || do_allocate == 0)
321    {
322      free (abs_filename);
323      return retval;
324    }
325
326  dirlist_count = (dirlist_len > 0 ? __argz_count (dirlist, dirlist_len) : 1);
327
328  /* Allocate a new loaded_l10nfile.  */
329  retval =
330    (struct loaded_l10nfile *)
331    malloc (sizeof (*retval)
332	    + (((dirlist_count << pop (mask)) + (dirlist_count > 1 ? 1 : 0))
333	       * sizeof (struct loaded_l10nfile *)));
334  if (retval == NULL)
335    return NULL;
336
337  retval->filename = abs_filename;
338
339  /* We set retval->data to NULL here; it is filled in later.
340     Setting retval->decided to 1 here means that retval does not
341     correspond to a real file (dirlist_count > 1) or is not worth
342     looking up (if an unnormalized codeset was specified).  */
343  retval->decided = (dirlist_count > 1
344		     || ((mask & XPG_CODESET) != 0
345			 && (mask & XPG_NORM_CODESET) != 0));
346  retval->data = NULL;
347
348  retval->next = *lastp;
349  *lastp = retval;
350
351  entries = 0;
352  /* Recurse to fill the inheritance list of RETVAL.
353     If the DIRLIST is a real list (i.e. DIRLIST_COUNT > 1), the RETVAL
354     entry does not correspond to a real file; retval->filename contains
355     colons.  In this case we loop across all elements of DIRLIST and
356     across all bit patterns dominated by MASK.
357     If the DIRLIST is a single directory or entirely redundant (i.e.
358     DIRLIST_COUNT == 1), we loop across all bit patterns dominated by
359     MASK, excluding MASK itself.
360     In either case, we loop down from MASK to 0.  This has the effect
361     that the extra bits in the locale name are dropped in this order:
362     first the modifier, then the territory, then the codeset, then the
363     normalized_codeset.  */
364  for (cnt = dirlist_count > 1 ? mask : mask - 1; cnt >= 0; --cnt)
365    if ((cnt & ~mask) == 0
366	&& ((cnt & CEN_SPECIFIC) == 0 || (cnt & XPG_SPECIFIC) == 0)
367	&& ((cnt & XPG_CODESET) == 0 || (cnt & XPG_NORM_CODESET) == 0))
368      {
369	if (dirlist_count > 1)
370	  {
371	    /* Iterate over all elements of the DIRLIST.  */
372	    char *dir = NULL;
373
374	    while ((dir = __argz_next ((char *) dirlist, dirlist_len, dir))
375		   != NULL)
376	      retval->successor[entries++]
377		= _nl_make_l10nflist (l10nfile_list, dir, strlen (dir) + 1,
378				      cnt, language, territory, codeset,
379				      normalized_codeset, modifier, special,
380				      sponsor, revision, filename, 1);
381	  }
382	else
383	  retval->successor[entries++]
384	    = _nl_make_l10nflist (l10nfile_list, dirlist, dirlist_len,
385				  cnt, language, territory, codeset,
386				  normalized_codeset, modifier, special,
387				  sponsor, revision, filename, 1);
388      }
389  retval->successor[entries] = NULL;
390
391  return retval;
392}
393
394/* Normalize codeset name.  There is no standard for the codeset
395   names.  Normalization allows the user to use any of the common
396   names.  The return value is dynamically allocated and has to be
397   freed by the caller.  */
398const char *
399_nl_normalize_codeset (codeset, name_len)
400     const char *codeset;
401     size_t name_len;
402{
403  int len = 0;
404  int only_digit = 1;
405  char *retval;
406  char *wp;
407  size_t cnt;
408
409  for (cnt = 0; cnt < name_len; ++cnt)
410    if (isalnum ((unsigned char) codeset[cnt]))
411      {
412	++len;
413
414	if (isalpha ((unsigned char) codeset[cnt]))
415	  only_digit = 0;
416      }
417
418  retval = (char *) malloc ((only_digit ? 3 : 0) + len + 1);
419
420  if (retval != NULL)
421    {
422      if (only_digit)
423	wp = stpcpy (retval, "iso");
424      else
425	wp = retval;
426
427      for (cnt = 0; cnt < name_len; ++cnt)
428	if (isalpha ((unsigned char) codeset[cnt]))
429	  *wp++ = tolower ((unsigned char) codeset[cnt]);
430	else if (isdigit ((unsigned char) codeset[cnt]))
431	  *wp++ = codeset[cnt];
432
433      *wp = '\0';
434    }
435
436  return (const char *) retval;
437}
438
439
440/* @@ begin of epilog @@ */
441
442/* We don't want libintl.a to depend on any other library.  So we
443   avoid the non-standard function stpcpy.  In GNU C Library this
444   function is available, though.  Also allow the symbol HAVE_STPCPY
445   to be defined.  */
446#if !_LIBC && !HAVE_STPCPY
447static char *
448stpcpy (dest, src)
449     char *dest;
450     const char *src;
451{
452  while ((*dest++ = *src++) != '\0')
453    /* Do nothing. */ ;
454  return dest - 1;
455}
456#endif
457