1/* Implementation of the dcgettext(3) function
2   Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 2, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software Foundation,
16   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22#include <sys/types.h>
23
24#ifdef __GNUC__
25# define alloca __builtin_alloca
26# define HAVE_ALLOCA 1
27#else
28# if defined HAVE_ALLOCA_H || defined _LIBC
29#  include <alloca.h>
30# else
31#  ifdef _AIX
32 #pragma alloca
33#  else
34#   ifndef alloca
35char *alloca ();
36#   endif
37#  endif
38# endif
39#endif
40
41#include <errno.h>
42#ifndef errno
43extern int errno;
44#endif
45#ifndef __set_errno
46# define __set_errno(val) errno = (val)
47#endif
48
49#if defined STDC_HEADERS || defined _LIBC
50# include <stdlib.h>
51#else
52char *getenv ();
53# ifdef HAVE_MALLOC_H
54#  include <malloc.h>
55# else
56void free ();
57# endif
58#endif
59
60#if defined HAVE_STRING_H || defined _LIBC
61# ifndef _GNU_SOURCE
62#  define _GNU_SOURCE	1
63# endif
64# include <string.h>
65#else
66# include <strings.h>
67#endif
68#if !HAVE_STRCHR && !defined _LIBC
69# ifndef strchr
70#  define strchr index
71# endif
72#endif
73
74#if defined HAVE_UNISTD_H || defined _LIBC
75# include <unistd.h>
76#endif
77
78#include "gettext.h"
79#include "gettextP.h"
80#ifdef _LIBC
81# include <libintl.h>
82#else
83# include "libgettext.h"
84#endif
85#include "hash-string.h"
86
87/* @@ end of prolog @@ */
88
89#ifdef _LIBC
90/* Rename the non ANSI C functions.  This is required by the standard
91   because some ANSI C functions will require linking with this object
92   file and the name space must not be polluted.  */
93# define getcwd __getcwd
94# define stpcpy __stpcpy
95#else
96# if !defined HAVE_GETCWD
97char *getwd ();
98#  define getcwd(buf, max) getwd (buf)
99# else
100char *getcwd ();
101# endif
102# ifndef HAVE_STPCPY
103static char *stpcpy PARAMS ((char *dest, const char *src));
104# endif
105#endif
106
107/* Amount to increase buffer size by in each try.  */
108#define PATH_INCR 32
109
110/* The following is from pathmax.h.  */
111/* Non-POSIX BSD systems might have gcc's limits.h, which doesn't define
112   PATH_MAX but might cause redefinition warnings when sys/param.h is
113   later included (as on MORE/BSD 4.3).  */
114#if defined(_POSIX_VERSION) || (defined(HAVE_LIMITS_H) && !defined(__GNUC__))
115# include <limits.h>
116#endif
117
118#ifndef _POSIX_PATH_MAX
119# define _POSIX_PATH_MAX 255
120#endif
121
122#if !defined(PATH_MAX) && defined(_PC_PATH_MAX)
123# define PATH_MAX (pathconf ("/", _PC_PATH_MAX) < 1 ? 1024 : pathconf ("/", _PC_PATH_MAX))
124#endif
125
126/* Don't include sys/param.h if it already has been.  */
127#if defined(HAVE_SYS_PARAM_H) && !defined(PATH_MAX) && !defined(MAXPATHLEN)
128# include <sys/param.h>
129#endif
130
131#if !defined(PATH_MAX) && defined(MAXPATHLEN)
132# define PATH_MAX MAXPATHLEN
133#endif
134
135#ifndef PATH_MAX
136# define PATH_MAX _POSIX_PATH_MAX
137#endif
138
139/* XPG3 defines the result of `setlocale (category, NULL)' as:
140   ``Directs `setlocale()' to query `category' and return the current
141     setting of `local'.''
142   However it does not specify the exact format.  And even worse: POSIX
143   defines this not at all.  So we can use this feature only on selected
144   system (e.g. those using GNU C Library).  */
145#ifdef _LIBC
146# define HAVE_LOCALE_NULL
147#endif
148
149/* Name of the default domain used for gettext(3) prior any call to
150   textdomain(3).  The default value for this is "messages".  */
151const char _nl_default_default_domain[] = "messages";
152
153/* Value used as the default domain for gettext(3).  */
154const char *_nl_current_default_domain = _nl_default_default_domain;
155
156/* Contains the default location of the message catalogs.  */
157const char _nl_default_dirname[] = GNULOCALEDIR;
158
159/* List with bindings of specific domains created by bindtextdomain()
160   calls.  */
161struct binding *_nl_domain_bindings;
162
163/* Prototypes for local functions.  */
164static char *find_msg PARAMS ((struct loaded_l10nfile *domain_file,
165			       const char *msgid));
166static const char *category_to_name PARAMS ((int category));
167static const char *guess_category_value PARAMS ((int category,
168						 const char *categoryname));
169
170
171/* For those loosing systems which don't have `alloca' we have to add
172   some additional code emulating it.  */
173#ifdef HAVE_ALLOCA
174/* Nothing has to be done.  */
175# define ADD_BLOCK(list, address) /* nothing */
176# define FREE_BLOCKS(list) /* nothing */
177#else
178struct block_list
179{
180  void *address;
181  struct block_list *next;
182};
183# define ADD_BLOCK(list, addr)						      \
184  do {									      \
185    struct block_list *newp = (struct block_list *) malloc (sizeof (*newp));  \
186    /* If we cannot get a free block we cannot add the new element to	      \
187       the list.  */							      \
188    if (newp != NULL) {							      \
189      newp->address = (addr);						      \
190      newp->next = (list);						      \
191      (list) = newp;							      \
192    }									      \
193  } while (0)
194# define FREE_BLOCKS(list)						      \
195  do {									      \
196    while (list != NULL) {						      \
197      struct block_list *old = list;					      \
198      list = list->next;						      \
199      free (old);							      \
200    }									      \
201  } while (0)
202# undef alloca
203# define alloca(size) (malloc (size))
204#endif	/* have alloca */
205
206
207/* Names for the libintl functions are a problem.  They must not clash
208   with existing names and they should follow ANSI C.  But this source
209   code is also used in GNU C Library where the names have a __
210   prefix.  So we have to make a difference here.  */
211#ifdef _LIBC
212# define DCGETTEXT __dcgettext
213#else
214# define DCGETTEXT dcgettext__
215#endif
216
217/* Look up MSGID in the DOMAINNAME message catalog for the current CATEGORY
218   locale.  */
219char *
220DCGETTEXT (domainname, msgid, category)
221     const char *domainname;
222     const char *msgid;
223     int category;
224{
225#ifndef HAVE_ALLOCA
226  struct block_list *block_list = NULL;
227#endif
228  struct loaded_l10nfile *domain;
229  struct binding *binding;
230  const char *categoryname;
231  const char *categoryvalue;
232  char *dirname, *xdomainname;
233  char *single_locale;
234  char *retval;
235  int saved_errno = errno;
236
237  /* If no real MSGID is given return NULL.  */
238  if (msgid == NULL)
239    return NULL;
240
241  /* If DOMAINNAME is NULL, we are interested in the default domain.  If
242     CATEGORY is not LC_MESSAGES this might not make much sense but the
243     defintion left this undefined.  */
244  if (domainname == NULL)
245    domainname = _nl_current_default_domain;
246
247  /* First find matching binding.  */
248  for (binding = _nl_domain_bindings; binding != NULL; binding = binding->next)
249    {
250      int compare = strcmp (domainname, binding->domainname);
251      if (compare == 0)
252	/* We found it!  */
253	break;
254      if (compare < 0)
255	{
256	  /* It is not in the list.  */
257	  binding = NULL;
258	  break;
259	}
260    }
261
262  if (binding == NULL)
263    dirname = (char *) _nl_default_dirname;
264  else if (binding->dirname[0] == '/')
265    dirname = binding->dirname;
266  else
267    {
268      /* We have a relative path.  Make it absolute now.  */
269      size_t dirname_len = strlen (binding->dirname) + 1;
270      size_t path_max;
271      char *ret;
272
273      path_max = (unsigned) PATH_MAX;
274      path_max += 2;		/* The getcwd docs say to do this.  */
275
276      dirname = (char *) alloca (path_max + dirname_len);
277      ADD_BLOCK (block_list, dirname);
278
279      __set_errno (0);
280      while ((ret = getcwd (dirname, path_max)) == NULL && errno == ERANGE)
281	{
282	  path_max += PATH_INCR;
283	  dirname = (char *) alloca (path_max + dirname_len);
284	  ADD_BLOCK (block_list, dirname);
285	  __set_errno (0);
286	}
287
288      if (ret == NULL)
289	{
290	  /* We cannot get the current working directory.  Don't signal an
291	     error but simply return the default string.  */
292	  FREE_BLOCKS (block_list);
293	  __set_errno (saved_errno);
294	  return (char *) msgid;
295	}
296
297      stpcpy (stpcpy (strchr (dirname, '\0'), "/"), binding->dirname);
298    }
299
300  /* Now determine the symbolic name of CATEGORY and its value.  */
301  categoryname = category_to_name (category);
302  categoryvalue = guess_category_value (category, categoryname);
303
304  xdomainname = (char *) alloca (strlen (categoryname)
305				 + strlen (domainname) + 5);
306  ADD_BLOCK (block_list, xdomainname);
307
308  stpcpy (stpcpy (stpcpy (stpcpy (xdomainname, categoryname), "/"),
309		  domainname),
310	  ".mo");
311
312  /* Creating working area.  */
313  single_locale = (char *) alloca (strlen (categoryvalue) + 1);
314  ADD_BLOCK (block_list, single_locale);
315
316
317  /* Search for the given string.  This is a loop because we perhaps
318     got an ordered list of languages to consider for th translation.  */
319  while (1)
320    {
321      /* Make CATEGORYVALUE point to the next element of the list.  */
322      while (categoryvalue[0] != '\0' && categoryvalue[0] == ':')
323	++categoryvalue;
324      if (categoryvalue[0] == '\0')
325	{
326	  /* The whole contents of CATEGORYVALUE has been searched but
327	     no valid entry has been found.  We solve this situation
328	     by implicitly appending a "C" entry, i.e. no translation
329	     will take place.  */
330	  single_locale[0] = 'C';
331	  single_locale[1] = '\0';
332	}
333      else
334	{
335	  char *cp = single_locale;
336	  while (categoryvalue[0] != '\0' && categoryvalue[0] != ':')
337	    *cp++ = *categoryvalue++;
338	  *cp = '\0';
339	}
340
341      /* If the current locale value is C (or POSIX) we don't load a
342	 domain.  Return the MSGID.  */
343      if (strcmp (single_locale, "C") == 0
344	  || strcmp (single_locale, "POSIX") == 0)
345	{
346	  FREE_BLOCKS (block_list);
347	  __set_errno (saved_errno);
348	  return (char *) msgid;
349	}
350
351
352      /* Find structure describing the message catalog matching the
353	 DOMAINNAME and CATEGORY.  */
354      domain = _nl_find_domain (dirname, single_locale, xdomainname);
355
356      if (domain != NULL)
357	{
358	  retval = find_msg (domain, msgid);
359
360	  if (retval == NULL)
361	    {
362	      int cnt;
363
364	      for (cnt = 0; domain->successor[cnt] != NULL; ++cnt)
365		{
366		  retval = find_msg (domain->successor[cnt], msgid);
367
368		  if (retval != NULL)
369		    break;
370		}
371	    }
372
373	  if (retval != NULL)
374	    {
375	      FREE_BLOCKS (block_list);
376	      __set_errno (saved_errno);
377	      return retval;
378	    }
379	}
380    }
381  /* NOTREACHED */
382}
383
384#ifdef _LIBC
385/* Alias for function name in GNU C Library.  */
386weak_alias (__dcgettext, dcgettext);
387#endif
388
389
390static char *
391find_msg (domain_file, msgid)
392     struct loaded_l10nfile *domain_file;
393     const char *msgid;
394{
395  size_t top, act, bottom;
396  struct loaded_domain *domain;
397
398  if (domain_file->decided == 0)
399    _nl_load_domain (domain_file);
400
401  if (domain_file->data == NULL)
402    return NULL;
403
404  domain = (struct loaded_domain *) domain_file->data;
405
406  /* Locate the MSGID and its translation.  */
407  if (domain->hash_size > 2 && domain->hash_tab != NULL)
408    {
409      /* Use the hashing table.  */
410      nls_uint32 len = strlen (msgid);
411      nls_uint32 hash_val = hash_string (msgid);
412      nls_uint32 idx = hash_val % domain->hash_size;
413      nls_uint32 incr = 1 + (hash_val % (domain->hash_size - 2));
414      nls_uint32 nstr = W (domain->must_swap, domain->hash_tab[idx]);
415
416      if (nstr == 0)
417	/* Hash table entry is empty.  */
418	return NULL;
419
420      if (W (domain->must_swap, domain->orig_tab[nstr - 1].length) == len
421	  && strcmp (msgid,
422		     domain->data + W (domain->must_swap,
423				       domain->orig_tab[nstr - 1].offset)) == 0)
424	return (char *) domain->data + W (domain->must_swap,
425					  domain->trans_tab[nstr - 1].offset);
426
427      while (1)
428	{
429	  if (idx >= domain->hash_size - incr)
430	    idx -= domain->hash_size - incr;
431	  else
432	    idx += incr;
433
434	  nstr = W (domain->must_swap, domain->hash_tab[idx]);
435	  if (nstr == 0)
436	    /* Hash table entry is empty.  */
437	    return NULL;
438
439	  if (W (domain->must_swap, domain->orig_tab[nstr - 1].length) == len
440	      && strcmp (msgid,
441			 domain->data + W (domain->must_swap,
442					   domain->orig_tab[nstr - 1].offset))
443	         == 0)
444	    return (char *) domain->data
445	      + W (domain->must_swap, domain->trans_tab[nstr - 1].offset);
446	}
447      /* NOTREACHED */
448    }
449
450  /* Now we try the default method:  binary search in the sorted
451     array of messages.  */
452  bottom = 0;
453  top = domain->nstrings;
454  while (bottom < top)
455    {
456      int cmp_val;
457
458      act = (bottom + top) / 2;
459      cmp_val = strcmp (msgid, domain->data
460			       + W (domain->must_swap,
461				    domain->orig_tab[act].offset));
462      if (cmp_val < 0)
463	top = act;
464      else if (cmp_val > 0)
465	bottom = act + 1;
466      else
467	break;
468    }
469
470  /* If an translation is found return this.  */
471  return bottom >= top ? NULL : (char *) domain->data
472                                + W (domain->must_swap,
473				     domain->trans_tab[act].offset);
474}
475
476
477/* Return string representation of locale CATEGORY.  */
478static const char *
479category_to_name (category)
480     int category;
481{
482  const char *retval;
483
484  switch (category)
485  {
486#ifdef LC_COLLATE
487  case LC_COLLATE:
488    retval = "LC_COLLATE";
489    break;
490#endif
491#ifdef LC_CTYPE
492  case LC_CTYPE:
493    retval = "LC_CTYPE";
494    break;
495#endif
496#ifdef LC_MONETARY
497  case LC_MONETARY:
498    retval = "LC_MONETARY";
499    break;
500#endif
501#ifdef LC_NUMERIC
502  case LC_NUMERIC:
503    retval = "LC_NUMERIC";
504    break;
505#endif
506#ifdef LC_TIME
507  case LC_TIME:
508    retval = "LC_TIME";
509    break;
510#endif
511#ifdef LC_MESSAGES
512  case LC_MESSAGES:
513    retval = "LC_MESSAGES";
514    break;
515#endif
516#ifdef LC_RESPONSE
517  case LC_RESPONSE:
518    retval = "LC_RESPONSE";
519    break;
520#endif
521#ifdef LC_ALL
522  case LC_ALL:
523    /* This might not make sense but is perhaps better than any other
524       value.  */
525    retval = "LC_ALL";
526    break;
527#endif
528  default:
529    /* If you have a better idea for a default value let me know.  */
530    retval = "LC_XXX";
531  }
532
533  return retval;
534}
535
536/* Guess value of current locale from value of the environment variables.  */
537static const char *
538guess_category_value (category, categoryname)
539     int category;
540     const char *categoryname;
541{
542  const char *retval;
543
544  /* The highest priority value is the `LANGUAGE' environment
545     variable.  This is a GNU extension.  */
546  retval = getenv ("LANGUAGE");
547  if (retval != NULL && retval[0] != '\0')
548    return retval;
549
550  /* `LANGUAGE' is not set.  So we have to proceed with the POSIX
551     methods of looking to `LC_ALL', `LC_xxx', and `LANG'.  On some
552     systems this can be done by the `setlocale' function itself.  */
553#if defined HAVE_SETLOCALE && defined HAVE_LC_MESSAGES && defined HAVE_LOCALE_NULL
554  return setlocale (category, NULL);
555#else
556  /* Setting of LC_ALL overwrites all other.  */
557  retval = getenv ("LC_ALL");
558  if (retval != NULL && retval[0] != '\0')
559    return retval;
560
561  /* Next comes the name of the desired category.  */
562  retval = getenv (categoryname);
563  if (retval != NULL && retval[0] != '\0')
564    return retval;
565
566  /* Last possibility is the LANG environment variable.  */
567  retval = getenv ("LANG");
568  if (retval != NULL && retval[0] != '\0')
569    return retval;
570
571  /* We use C as the default domain.  POSIX says this is implementation
572     defined.  */
573  return "C";
574#endif
575}
576
577/* @@ begin of epilog @@ */
578
579/* We don't want libintl.a to depend on any other library.  So we
580   avoid the non-standard function stpcpy.  In GNU C Library this
581   function is available, though.  Also allow the symbol HAVE_STPCPY
582   to be defined.  */
583#if !_LIBC && !HAVE_STPCPY
584static char *
585stpcpy (dest, src)
586     char *dest;
587     const char *src;
588{
589  while ((*dest++ = *src++) != '\0')
590    /* Do nothing. */ ;
591  return dest - 1;
592}
593#endif
594