1/* dcgettext.c -- implemenatation of the dcgettext(3) function
2   Copyright (C) 1995 Free Software Foundation, Inc.
3
4This program is free software; you can redistribute it and/or modify
5it under the terms of the GNU General Public License as published by
6the Free Software Foundation; either version 2, or (at your option)
7any later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22#include <sys/types.h>
23
24#ifdef __GNUC__
25# define alloca __builtin_alloca
26#else
27# if defined HAVE_ALLOCA_H || defined _LIBC
28#  include <alloca.h>
29# else
30#  ifdef _AIX
31 #pragma alloca
32#  else
33#   ifndef alloca
34char *alloca ();
35#   endif
36#  endif
37# endif
38#endif
39
40#include <errno.h>
41#ifndef errno
42extern int errno;
43#endif
44
45#if defined STDC_HEADERS || defined _LIBC
46# include <stdlib.h>
47#else
48char *getenv ();
49# ifdef HAVE_MALLOC_H
50#  include <malloc.h>
51# else
52void free ();
53# endif
54#endif
55
56#if defined HAVE_STRING_H || defined _LIBC
57# include <string.h>
58#else
59# include <strings.h>
60#endif
61#if !HAVE_STRCHR && !defined _LIBC
62# ifndef strchr
63#  define strchr index
64# endif
65#endif
66
67#if defined HAVE_UNISTD_H || defined _LIBC
68# include <unistd.h>
69#endif
70
71#include "gettext.h"
72#include "gettextP.h"
73#ifdef _LIBC
74# include <libintl.h>
75#else
76# include "libgettext.h"
77#endif
78#include "hash-string.h"
79
80/* @@ end of prolog @@ */
81
82#ifdef _LIBC
83/* Rename the non ANSI C functions.  This is required by the standard
84   because some ANSI C functions will require linking with this object
85   file and the name space must not be polluted.  */
86# define getcwd __getcwd
87# define stpcpy __stpcpy
88#endif
89
90#if !defined HAVE_GETCWD && !defined _LIBC
91char *getwd ();
92# define getcwd(buf, max) getwd (buf)
93#else
94char *getcwd ();
95#endif
96
97/* Amount to increase buffer size by in each try.  */
98#define PATH_INCR 32
99
100/* The following is from pathmax.h.  */
101/* Non-POSIX BSD systems might have gcc's limits.h, which doesn't define
102   PATH_MAX but might cause redefinition warnings when sys/param.h is
103   later included (as on MORE/BSD 4.3).  */
104#if defined(_POSIX_VERSION) || (defined(HAVE_LIMITS_H) && !defined(__GNUC__))
105# include <limits.h>
106#endif
107
108#ifndef _POSIX_PATH_MAX
109# define _POSIX_PATH_MAX 255
110#endif
111
112#if !defined(PATH_MAX) && defined(_PC_PATH_MAX)
113# define PATH_MAX (pathconf ("/", _PC_PATH_MAX) < 1 ? 1024 : pathconf ("/", _PC_PATH_MAX))
114#endif
115
116/* Don't include sys/param.h if it already has been.  */
117#if defined(HAVE_SYS_PARAM_H) && !defined(PATH_MAX) && !defined(MAXPATHLEN)
118# include <sys/param.h>
119#endif
120
121#if !defined(PATH_MAX) && defined(MAXPATHLEN)
122# define PATH_MAX MAXPATHLEN
123#endif
124
125#ifndef PATH_MAX
126# define PATH_MAX _POSIX_PATH_MAX
127#endif
128
129/* XPG3 defines the result of `setlocale (category, NULL)' as:
130   ``Directs `setlocale()' to query `category' and return the current
131     setting of `local'.''
132   However it does not specify the exact format.  And even worse: POSIX
133   defines this not at all.  So we can use this feature only on selected
134   system (e.g. those using GNU C Library).  */
135#ifdef _LIBC
136# define HAVE_LOCALE_NULL
137#endif
138
139/* Name of the default domain used for gettext(3) prior any call to
140   textdomain(3).  The default value for this is "messages".  */
141const char _nl_default_default_domain[] = "messages";
142
143/* Value used as the default domain for gettext(3).  */
144const char *_nl_current_default_domain = _nl_default_default_domain;
145
146/* Contains the default location of the message catalogs.  */
147const char _nl_default_dirname[] = GNULOCALEDIR;
148
149/* List with bindings of specific domains created by bindtextdomain()
150   calls.  */
151struct binding *_nl_domain_bindings;
152
153/* Prototypes for local functions.  */
154static char *find_msg __P ((struct loaded_domain *domain, const char *msgid));
155static const char *category_to_name __P((int category));
156static const char *guess_category_value __P((int category,
157					     const char *categoryname));
158
159
160/* Names for the libintl functions are a problem.  They must not clash
161   with existing names and they should follow ANSI C.  But this source
162   code is also used in GNU C Library where the names have a __
163   prefix.  So we have to make a difference here.  */
164#ifdef _LIBC
165# define DCGETTEXT __dcgettext
166#else
167# define DCGETTEXT dcgettext__
168#endif
169
170/* Look up MSGID in the DOMAINNAME message catalog for the current CATEGORY
171   locale.  */
172char *
173DCGETTEXT (domainname, msgid, category)
174     const char *domainname;
175     const char *msgid;
176     int category;
177{
178  struct loaded_domain *domain;
179  struct binding *binding;
180  const char *categoryname;
181  const char *categoryvalue;
182  char *dirname, *xdomainname;
183  char *single_locale;
184  char *retval;
185
186  /* If no real MSGID is given return NULL.  */
187  if (msgid == NULL)
188    return NULL;
189
190  /* If DOMAINNAME is NULL, we are interested in the default domain.  If
191     CATEGORY is not LC_MESSAGES this might not make much sense but the
192     defintion left this undefined.  */
193  if (domainname == NULL)
194    domainname = _nl_current_default_domain;
195
196  /* First find matching binding.  */
197  for (binding = _nl_domain_bindings; binding != NULL; binding = binding->next)
198    {
199      int compare = strcmp (domainname, binding->domainname);
200      if (compare == 0)
201	/* We found it!  */
202	break;
203      if (compare < 0)
204	{
205	  /* It is not in the list.  */
206	  binding = NULL;
207	  break;
208	}
209    }
210
211  if (binding == NULL)
212    dirname = (char *) _nl_default_dirname;
213  else if (binding->dirname[0] == '/')
214    dirname = binding->dirname;
215  else
216    {
217      /* We have a relative path.  Make it absolute now.  */
218      size_t dirname_len = strlen (binding->dirname) + 1;
219      size_t path_max;
220      char *ret;
221
222      path_max = (unsigned) PATH_MAX;
223      path_max += 2;		/* The getcwd docs say to do this.  */
224
225      dirname = (char *) alloca (path_max + dirname_len);
226
227      errno = 0;
228      while ((ret = getcwd (dirname, path_max)) == NULL && errno == ERANGE)
229	{
230	  path_max += PATH_INCR;
231	  dirname = (char *) alloca (path_max + dirname_len);
232	  errno = 0;
233	}
234
235      if (ret == NULL)
236	/* We cannot get the current working directory.  Don't signal an
237	   error but simply return the default string.  */
238	return (char *) msgid;
239
240      /* We don't want libintl.a to depend on any other library.  So
241	 we avoid the non-standard function stpcpy.  In GNU C Library
242	 this function is available, though.  Also allow the symbol
243	 HAVE_STPCPY to be defined.  */
244#if defined _LIBC || defined HAVE_STPCPY
245      stpcpy (stpcpy (strchr (dirname, '\0'), "/"), binding->dirname);
246#else
247      strcat (dirname, "/");
248      strcat (dirname, binding->dirname);
249#endif
250    }
251
252  /* Now determine the symbolic name of CATEGORY and its value.  */
253  categoryname = category_to_name (category);
254  categoryvalue = guess_category_value (category, categoryname);
255
256  xdomainname = (char *) alloca (strlen (categoryname)
257				 + strlen (domainname) + 5);
258  /* We don't want libintl.a to depend on any other library.  So we
259     avoid the non-standard function stpcpy.  In GNU C Library this
260     function is available, though.  Also allow the symbol HAVE_STPCPY
261     to be defined.  */
262#if defined _LIBC || defined HAVE_STPCPY
263  stpcpy (stpcpy (stpcpy (stpcpy (xdomainname, categoryname), "/"),
264		  domainname),
265	  ".mo");
266#else
267  strcpy (xdomainname, categoryname);
268  strcat (xdomainname, "/");
269  strcat (xdomainname, domainname);
270  strcat (xdomainname, ".mo");
271#endif
272
273  /* Creating working area.  */
274  single_locale = (char *) alloca (strlen (categoryvalue) + 1);
275
276
277  /* Search for the given string.  This is a loop because we perhaps
278     got an ordered list of languages to consider for th translation.  */
279  while (1)
280    {
281      /* Make CATEGORYVALUE point to the next element of the list.  */
282      while (categoryvalue[0] != '\0' && categoryvalue[0] == ':')
283	++categoryvalue;
284      if (categoryvalue[0] == '\0')
285	{
286	  /* The whole contents of CATEGORYVALUE has been searched but
287	     no valid entry has been found.  We solve this situation
288	     by implicitely appending a "C" entry, i.e. no translation
289	     will take place.  */
290	  single_locale[0] = 'C';
291	  single_locale[1] = '\0';
292	}
293      else
294	{
295	  char *cp = single_locale;
296	  while (categoryvalue[0] != '\0' && categoryvalue[0] != ':')
297	    *cp++ = *categoryvalue++;
298	  *cp = '\0';
299	}
300
301      /* If the current locale value is C (or POSIX) we don't load a
302	 domain.  Return the MSGID.  */
303      if (strcmp (single_locale, "C") == 0
304	  || strcmp (single_locale, "POSIX") == 0)
305	return (char *) msgid;
306
307
308      /* Find structure describing the message catalog matching the
309	 DOMAINNAME and CATEGORY.  */
310      domain = _nl_find_domain (dirname, single_locale, xdomainname);
311
312      if (domain != NULL)
313	{
314	  retval = find_msg (domain, msgid);
315
316	  if (retval == NULL)
317	    {
318	      int cnt;
319
320	      for (cnt = 0; domain->successor[cnt] != NULL; ++cnt)
321		{
322		  retval = find_msg (domain->successor[cnt], msgid);
323
324		  if (retval != NULL)
325		    break;
326		}
327	    }
328
329	  if (retval != NULL)
330	    return retval;
331	}
332    }
333  /* NOTREACHED */
334}
335
336#ifdef _LIBC
337/* Alias for function name in GNU C Library.  */
338weak_alias (__dcgettext, dcgettext);
339#endif
340
341
342static char *
343find_msg (domain, msgid)
344     struct loaded_domain *domain;
345     const char *msgid;
346{
347  size_t top, act, bottom;
348
349  if (domain->decided == 0)
350    _nl_load_domain (domain);
351
352  if (domain->data == NULL)
353    return NULL;
354
355  /* Locate the MSGID and its translation.  */
356  if (domain->hash_size > 2 && domain->hash_tab != NULL)
357    {
358      /* Use the hashing table.  */
359      nls_uint32 len = strlen (msgid);
360      nls_uint32 hash_val = hash_string (msgid);
361      nls_uint32 idx = hash_val % domain->hash_size;
362      nls_uint32 incr = 1 + (hash_val % (domain->hash_size - 2));
363      nls_uint32 nstr = W (domain->must_swap, domain->hash_tab[idx]);
364
365      if (nstr == 0)
366	/* Hash table entry is empty.  */
367	return NULL;
368
369      if (W (domain->must_swap, domain->orig_tab[nstr - 1].length) == len
370	  && strcmp (msgid,
371		     domain->data + W (domain->must_swap,
372				       domain->orig_tab[nstr - 1].offset)) == 0)
373	return (char *) domain->data + W (domain->must_swap,
374					  domain->trans_tab[nstr - 1].offset);
375
376      while (1)
377	{
378	  if (idx >= W (domain->must_swap, domain->hash_size) - incr)
379	    idx -= W (domain->must_swap, domain->hash_size) - incr;
380	  else
381	    idx += incr;
382
383	  nstr = W (domain->must_swap, domain->hash_tab[idx]);
384	  if (nstr == 0)
385	    /* Hash table entry is empty.  */
386	    return NULL;
387
388	  if (W (domain->must_swap, domain->orig_tab[nstr - 1].length) == len
389	      && strcmp (msgid,
390			 domain->data + W (domain->must_swap,
391					   domain->orig_tab[nstr - 1].offset))
392	         == 0)
393	    return (char *) domain->data
394	      + W (domain->must_swap, domain->trans_tab[nstr - 1].offset);
395	}
396      /* NOTREACHED */
397    }
398
399  /* Now we try the default method:  binary search in the sorted
400     array of messages.  */
401  bottom = 0;
402  top = domain->nstrings;
403  while (bottom < top)
404    {
405      int cmp_val;
406
407      act = (bottom + top) / 2;
408      cmp_val = strcmp (msgid, domain->data
409			       + W (domain->must_swap,
410				    domain->orig_tab[act].offset));
411      if (cmp_val < 0)
412	top = act;
413      else if (cmp_val > 0)
414	bottom = act + 1;
415      else
416	break;
417    }
418
419  /* If an translation is found return this.  */
420  return bottom >= top ? NULL : (char *) domain->data
421                                + W (domain->must_swap,
422				     domain->trans_tab[act].offset);
423}
424
425
426/* Return string representation of locale CATEGORY.  */
427static const char *category_to_name (category)
428     int category;
429{
430  const char *retval;
431
432  switch (category)
433  {
434#ifdef LC_COLLATE
435  case LC_COLLATE:
436    retval = "LC_COLLATE";
437    break;
438#endif
439#ifdef LC_CTYPE
440  case LC_CTYPE:
441    retval = "LC_CTYPE";
442    break;
443#endif
444#ifdef LC_MONETARY
445  case LC_MONETARY:
446    retval = "LC_MONETARY";
447    break;
448#endif
449#ifdef LC_NUMERIC
450  case LC_NUMERIC:
451    retval = "LC_NUMERIC";
452    break;
453#endif
454#ifdef LC_TIME
455  case LC_TIME:
456    retval = "LC_TIME";
457    break;
458#endif
459#ifdef LC_MESSAGES
460  case LC_MESSAGES:
461    retval = "LC_MESSAGES";
462    break;
463#endif
464#ifdef LC_RESPONSE
465  case LC_RESPONSE:
466    retval = "LC_RESPONSE";
467    break;
468#endif
469#ifdef LC_ALL
470  case LC_ALL:
471    /* This might not make sense but is perhaps better than any other
472       value.  */
473    retval = "LC_ALL";
474    break;
475#endif
476  default:
477    /* If you have a better idea for a default value let me know.  */
478    retval = "LC_XXX";
479  }
480
481  return retval;
482}
483
484/* Guess value of current locale from value of the environment variables.  */
485static const char *guess_category_value (category, categoryname)
486     int category;
487     const char *categoryname;
488{
489  const char *retval;
490
491  /* The highest priority value is the `LANGUAGE' environment
492     variable.  This is a GNU extension.  */
493  retval = getenv ("LANGUAGE");
494  if (retval != NULL && retval[0] != '\0')
495    return retval;
496
497  /* `LANGUAGE' is not set.  So we have to proceed with the POSIX
498     methods of looking to `LC_ALL', `LC_xxx', and `LANG'.  On some
499     systems this can be done by the `setlocale' function itself.  */
500#if defined HAVE_SETLOCALE && defined HAVE_LC_MESSAGES && defined HAVE_LOCALE_NULL
501  return setlocale (category, NULL);
502#else
503  /* Setting of LC_ALL overwrites all other.  */
504  retval = getenv ("LC_ALL");
505  if (retval != NULL && retval[0] != '\0')
506    return retval;
507
508  /* Next comes the name of the desired category.  */
509  retval = getenv (categoryname);
510  if (retval != NULL && retval[0] != '\0')
511    return retval;
512
513  /* Last possibility is the LANG environment variable.  */
514  retval = getenv ("LANG");
515  if (retval != NULL && retval[0] != '\0')
516    return retval;
517
518  /* We use C as the default domain.  POSIX says this is implementation
519     defined.  */
520  return "C";
521#endif
522}
523