1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23
24/*
25 * setlocale() intercept
26 * maintains a bitmask of non-default categories
27 * and a permanent locale namespace for pointer comparison
28 * and persistent private data for locale related functions
29 */
30
31#include <ast_standards.h>
32
33#include "lclib.h"
34
35#include <ast_wchar.h>
36#include <ctype.h>
37#include <mc.h>
38#include <namval.h>
39
40#if ( _lib_wcwidth || _lib_wctomb ) && _hdr_wctype
41#include <wctype.h>
42#endif
43
44#if _lib_wcwidth
45#undef	wcwidth
46#else
47#define wcwidth			0
48#endif
49
50#if _lib_wctomb
51#undef	wctomb
52#else
53#define wctomb			0
54#endif
55
56#ifdef mblen
57#undef	mblen
58extern int		mblen(const char*, size_t);
59#endif
60
61#undef	mbtowc
62#undef	setlocale
63#undef	strcmp
64#undef	strcoll
65#undef	strxfrm
66#undef	valid
67
68#ifndef AST_LC_CANONICAL
69#define AST_LC_CANONICAL	LC_abbreviated
70#endif
71
72#ifndef AST_LC_test
73#define AST_LC_test		(1L<<27)
74#endif
75
76#if _UWIN
77
78#include <ast_windows.h>
79
80#undef	_lib_setlocale
81#define _lib_setlocale		1
82
83#define setlocale(c,l)		native_setlocale(c,l)
84
85extern char*			uwin_setlocale(int, const char*);
86
87/*
88 * convert locale to native locale name in buf
89 */
90
91static char*
92native_locale(const char* locale, char* buf, size_t siz)
93{
94	Lc_t*				lc;
95	const Lc_attribute_list_t*	ap;
96	int				i;
97	unsigned long			lcid;
98	unsigned long			lang;
99	unsigned long			ctry;
100	char				lbuf[128];
101	char				cbuf[128];
102
103	if (locale && *locale)
104	{
105		if (!(lc = lcmake(locale)))
106			return 0;
107		lang = lc->language->index;
108		ctry = 0;
109		for (ap = lc->attributes; ap; ap = ap->next)
110			if (ctry = ap->attribute->index)
111				break;
112		if (!ctry)
113		{
114			for (i = 0; i < elementsof(lc->territory->languages); i++)
115				if (lc->territory->languages[i] == lc->language)
116				{
117					ctry = lc->territory->indices[i];
118					break;
119				}
120			if (!ctry)
121			{
122				if (!lang)
123					return 0;
124				ctry = SUBLANG_DEFAULT;
125			}
126		}
127		lcid = MAKELCID(MAKELANGID(lang, ctry), SORT_DEFAULT);
128	}
129	else
130		lcid = GetUserDefaultLCID();
131	if (GetLocaleInfo(lcid, LOCALE_SENGLANGUAGE, lbuf, sizeof(lbuf)) <= 0 ||
132	    GetLocaleInfo(lcid, LOCALE_SENGCOUNTRY, cbuf, sizeof(cbuf)) <= 0)
133		return 0;
134	if (lc->charset->ms)
135		sfsprintf(buf, siz, "%s_%s.%s", lbuf, cbuf, lc->charset->ms);
136	else
137		sfsprintf(buf, siz, "%s_%s", lbuf, cbuf);
138	return buf;
139}
140
141/*
142 * locale!=0 here
143 */
144
145static char*
146native_setlocale(int category, const char* locale)
147{
148	char*		usr;
149	char*		sys;
150	char		buf[256];
151
152	if (!(usr = native_locale(locale, buf, sizeof(buf))))
153		return 0;
154
155	/*
156	 * win32 doesn't have LC_MESSAGES
157	 */
158
159	if (category == LC_MESSAGES)
160		return (char*)locale;
161	sys = uwin_setlocale(category, usr);
162	if (ast.locale.set & AST_LC_debug)
163		sfprintf(sfstderr, "locale uwin %17s %-24s %-24s\n", lc_categories[lcindex(category, 0)].name, usr, sys);
164	return sys;
165}
166
167#else
168
169#define native_locale(a,b,c)	((char*)0)
170
171#endif
172
173/*
174 * LC_COLLATE and LC_CTYPE native support
175 */
176
177#if !_lib_mbtowc || MB_LEN_MAX <= 1
178#define mblen		0
179#define mbtowc		0
180#endif
181
182#if !_lib_strcoll
183#define	strcoll		0
184#endif
185
186#if !_lib_strxfrm
187#define	strxfrm		0
188#endif
189
190/*
191 * LC_COLLATE and LC_CTYPE debug support
192 *
193 * mutibyte debug encoding
194 *
195 *	DL0 [ '0' .. '4' ] c1 ... c4 DR0
196 *	DL1 [ '0' .. '4' ] c1 ... c4 DR1
197 *
198 * with these ligatures
199 *
200 *	ch CH sst SST
201 *
202 * and private collation order
203 *
204 * wide character display width is the low order 3 bits
205 * wctomb() uses DL1...DR1
206 */
207
208#define DEBUG_MB_CUR_MAX	7
209
210#if DEBUG_MB_CUR_MAX < MB_LEN_MAX
211#undef	DEBUG_MB_CUR_MAX
212#define DEBUG_MB_CUR_MAX	MB_LEN_MAX
213#endif
214
215#define DL0	'<'
216#define DL1	0xab		/* 8-bit mini << on xterm	*/
217#define DR0	'>'
218#define DR1	0xbb		/* 8-bit mini >> on xterm	*/
219
220#define DB	((int)sizeof(wchar_t)*8-1)
221#define DC	7		/* wchar_t embedded char bits	*/
222#define DX	(DB/DC)		/* wchar_t max embedded chars	*/
223#define DZ	(DB-DX*DC+1)	/* wchar_t embedded size bits	*/
224#define DD	3		/* # mb delimiter chars <n...>	*/
225
226static unsigned char debug_order[] =
227{
228	  0,   1,   2,   3,   4,   5,   6,   7,
229	  8,   9,  10,  11,  12,  13,  14,  15,
230	 16,  17,  18,  19,  20,  21,  22,  23,
231	 24,  25,  26,  27,  28,  29,  30,  31,
232	 99, 100, 101, 102,  98, 103, 104, 105,
233	106, 107, 108,  43, 109,  44,  42, 110,
234	 32,  33,  34,  35,  36,  37,  38,  39,
235	 40,  41, 111, 112, 113, 114, 115, 116,
236	117,  71,  72,  73,  74,  75,  76,  77,
237	 78,  79,  80,  81,  82,  83,  84,  85,
238	 86,  87,  88,  89,  90,  91,  92,  93,
239	 94,  95,  96, 118, 119, 120, 121,  97,
240	122,  45,  46,  47,  48,  49,  50,  51,
241	 52,  53,  54,  55,  56,  57,  58,  59,
242	 60,  61,  62,  63,  64,  65,  66,  67,
243	 68,  69,  70, 123, 124, 125, 126, 127,
244	128, 129, 130, 131, 132, 133, 134, 135,
245	136, 137, 138, 139, 140, 141, 142, 143,
246	144, 145, 146, 147, 148, 149, 150, 151,
247	152, 153, 154, 155, 156, 157, 158, 159,
248	160, 161, 162, 163, 164, 165, 166, 167,
249	168, 169, 170, 171, 172, 173, 174, 175,
250	176, 177, 178, 179, 180, 181, 182, 183,
251	184, 185, 186, 187, 188, 189, 190, 191,
252	192, 193, 194, 195, 196, 197, 198, 199,
253	200, 201, 202, 203, 204, 205, 206, 207,
254	208, 209, 210, 211, 212, 213, 214, 215,
255	216, 217, 218, 219, 220, 221, 222, 223,
256	224, 225, 226, 227, 228, 229, 230, 231,
257	232, 233, 234, 235, 236, 237, 238, 239,
258	240, 241, 242, 243, 244, 245, 246, 247,
259	248, 249, 250, 251, 252, 253, 254, 255,
260};
261
262static int
263debug_mbtowc(register wchar_t* p, register const char* s, size_t n)
264{
265	register const char*	q;
266	register const char*	r;
267	register int		w;
268	register int		dr;
269	wchar_t			c;
270
271	if (n < 1)
272		return -1;
273	if (!s || !*s)
274		return 0;
275	switch (((unsigned char*)s)[0])
276	{
277	case DL0:
278		dr = DR0;
279		break;
280	case DL1:
281		dr = DR1;
282		break;
283	default:
284		if (p)
285			*p = ((unsigned char*)s)[0] & ((1<<DC)-1);
286		return 1;
287	}
288	if (n < 2)
289		return -1;
290	if ((w = ((unsigned char*)s)[1]) == ((unsigned char*)s)[0])
291	{
292		if (p)
293			*p = w;
294		return 2;
295	}
296	if (w < '0' || w > ('0' + DX))
297		return -1;
298	if ((w -= '0' - DD) > n)
299		return -1;
300	r = s + w - 1;
301	q = s += 2;
302	while (q < r && *q)
303		q++;
304	if (q != r || *((unsigned char*)q) != dr)
305		return -1;
306	if (p)
307	{
308		c = 0;
309		while (--q >= s)
310		{
311			c <<= DC;
312			c |= *((unsigned char*)q);
313		}
314		c <<= DZ;
315		c |= w - DD;
316		*p = c;
317	}
318	return w;
319}
320
321static int
322debug_wctomb(char* s, wchar_t c)
323{
324	int	w;
325	int	i;
326	int	k;
327
328	w = 0;
329	if (c >= 0 && c <= UCHAR_MAX)
330	{
331		w++;
332		if (s)
333			*s = c;
334	}
335	else if ((i = c & ((1<<DZ)-1)) > DX)
336		return -1;
337	else
338	{
339		w++;
340		if (s)
341			*s++ = DL1;
342		c >>= DZ;
343		w++;
344		if (s)
345			*s++ = i + '0';
346		while (i--)
347		{
348			w++;
349			if (s)
350				*s++ = (k = c & ((1<<DC)-1)) ? k : '?';
351			c >>= DC;
352		}
353		w++;
354		if (s)
355			*s++ = DR1;
356	}
357	return w;
358}
359
360static int
361debug_mblen(const char* s, size_t n)
362{
363	return debug_mbtowc(NiL, s, n);
364}
365
366static int
367debug_wcwidth(wchar_t c)
368{
369	if (c >= 0 && c <= UCHAR_MAX)
370		return 1;
371	if ((c &= ((1<<DZ)-1)) > DX)
372		return -1;
373	return c + DD;
374}
375
376static size_t
377debug_strxfrm(register char* t, register const char* s, size_t n)
378{
379	register const char*	q;
380	register const char*	r;
381	register char*		e;
382	char*			o;
383	register size_t		z;
384	register int		w;
385
386	o = t;
387	z = 0;
388	if (e = t)
389		e += n;
390	while (s[0])
391	{
392		if ((((unsigned char*)s)[0] == DL0 || ((unsigned char*)s)[0] == DL1) && (w = s[1]) >= '0' && w <= ('0' + DC))
393		{
394			w -= '0';
395			q = s + 2;
396			r = q + w;
397			while (q < r && *q)
398				q++;
399			if (*((unsigned char*)q) == DR0 || *((unsigned char*)q) == DR1)
400			{
401				if (t)
402				{
403					for (q = s + 2; q < r; q++)
404						if (t < e)
405							*t++ = debug_order[*q];
406					while (w++ < DX)
407						if (t < e)
408							*t++ = 1;
409				}
410				s = r + 1;
411				z += DX;
412				continue;
413			}
414		}
415		if ((s[0] == 'c' || s[0] == 'C') && (s[1] == 'h' || s[1] == 'H'))
416		{
417			if (t)
418			{
419				if (t < e)
420					*t++ = debug_order[s[0]];
421				if (t < e)
422					*t++ = debug_order[s[1]];
423				if (t < e)
424					*t++ = 1;
425				if (t < e)
426					*t++ = 1;
427			}
428			s += 2;
429			z += DX;
430			continue;
431		}
432		if ((s[0] == 's' || s[0] == 'S') && (s[1] == 's' || s[1] == 'S') && (s[2] == 't' || s[2] == 'T'))
433		{
434			if (t)
435			{
436				if (t < e)
437					*t++ = debug_order[s[0]];
438				if (t < e)
439					*t++ = debug_order[s[1]];
440				if (t < e)
441					*t++ = debug_order[s[2]];
442				if (t < e)
443					*t++ = 1;
444			}
445			s += 3;
446			z += DX;
447			continue;
448		}
449		if (t)
450		{
451			if (t < e)
452				*t++ = debug_order[s[0]];
453			if (t < e)
454				*t++ = 1;
455			if (t < e)
456				*t++ = 1;
457			if (t < e)
458				*t++ = 1;
459		}
460		s++;
461		z += DX;
462	}
463	if (!t)
464		return z;
465	if (t < e)
466		*t = 0;
467	return t - o;
468}
469
470static int
471debug_strcoll(const char* a, const char* b)
472{
473	char	ab[1024];
474	char	bb[1024];
475
476	debug_strxfrm(ab, a, sizeof(ab) - 1);
477	ab[sizeof(ab)-1] = 0;
478	debug_strxfrm(bb, b, sizeof(bb) - 1);
479	bb[sizeof(bb)-1] = 0;
480	return strcmp(ab, bb);
481}
482
483/*
484 * default locale
485 */
486
487static int
488default_wcwidth(wchar_t w)
489{
490	return w >= 0 && w <= 255 && !iscntrl(w) ? 1 : -1;
491}
492
493/*
494 * called when LC_COLLATE initialized or changes
495 */
496
497static int
498set_collate(Lc_category_t* cp)
499{
500	if (locales[cp->internal]->flags & LC_debug)
501	{
502		ast.collate = debug_strcoll;
503		ast.mb_xfrm = debug_strxfrm;
504	}
505	else if (locales[cp->internal]->flags & LC_default)
506	{
507		ast.collate = strcmp;
508		ast.mb_xfrm = 0;
509	}
510	else
511	{
512		ast.collate = strcoll;
513		ast.mb_xfrm = strxfrm;
514	}
515	return 0;
516}
517
518/*
519 * workaround the interesting sjis that translates unshifted 7 bit ascii!
520 */
521
522#if _hdr_wchar && _typ_mbstate_t && _lib_mbrtowc
523
524#define mb_state_zero	((mbstate_t*)&ast.pad[sizeof(ast.pad)-2*sizeof(mbstate_t)])
525#define mb_state	((mbstate_t*)&ast.pad[sizeof(ast.pad)-sizeof(mbstate_t)])
526
527static int
528sjis_mbtowc(register wchar_t* p, register const char* s, size_t n)
529{
530	if (n && p && s && (*s == '\\' || *s == '~') && !memcmp(mb_state, mb_state_zero, sizeof(mbstate_t)))
531	{
532		*p = *s;
533		return 1;
534	}
535	return mbrtowc(p, s, n, mb_state);
536}
537
538#endif
539
540#define utf8_wctomb	wctomb
541
542static const uint32_t		utf8mask[] =
543{
544	0x00000000,
545	0x00000000,
546	0xffffff80,
547	0xfffff800,
548	0xffff0000,
549	0xffe00000,
550	0xfc000000,
551};
552
553static const signed char	utf8tab[256] =
554{
555	0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
556	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
557	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
558	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
559	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
560	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
561	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
562	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
563	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
564	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
565	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
566	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
567	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
568	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
569	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
570	4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6,-1,-1,
571};
572
573static int
574utf8_mbtowc(wchar_t* wp, const char* str, size_t n)
575{
576	register unsigned char*	sp = (unsigned char*)str;
577	register int		m;
578	register int		i;
579	register int		c;
580	register wchar_t	w = 0;
581
582	if (!sp || !n)
583		return 0;
584	if ((m = utf8tab[*sp]) > 0)
585	{
586		if (m > n)
587			return -1;
588		if (wp)
589		{
590			if (m == 1)
591			{
592				*wp = *sp;
593				return 1;
594			}
595			w = *sp & ((1<<(8-m))-1);
596			for (i = m - 1; i > 0; i--)
597			{
598				c = *++sp;
599				if ((c&0xc0) != 0x80)
600					goto invalid;
601				w = (w<<6) | (c&0x3f);
602			}
603			if (!(utf8mask[m] & w) || w >= 0xd800 && (w <= 0xdfff || w >= 0xfffe && w <= 0xffff))
604				goto invalid;
605			*wp = w;
606		}
607		return m;
608	}
609	if (!*sp)
610		return 0;
611 invalid:
612#ifdef EILSEQ
613	errno = EILSEQ;
614#endif
615	ast.mb_sync = (const char*)sp - str;
616	return -1;
617}
618
619static int
620utf8_mblen(const char* str, size_t n)
621{
622	wchar_t		w;
623
624	return utf8_mbtowc(&w, str, n);
625}
626
627/*
628 * called when LC_CTYPE initialized or changes
629 */
630
631static int
632set_ctype(Lc_category_t* cp)
633{
634	ast.mb_sync = 0;
635	if (locales[cp->internal]->flags & LC_debug)
636	{
637		ast.mb_cur_max = DEBUG_MB_CUR_MAX;
638		ast.mb_len = debug_mblen;
639		ast.mb_towc = debug_mbtowc;
640		ast.mb_width = debug_wcwidth;
641		ast.mb_conv = debug_wctomb;
642	}
643	else if ((locales[cp->internal]->flags & LC_default) || (ast.mb_cur_max = MB_CUR_MAX) <= 1 || !(ast.mb_len = mblen) || !(ast.mb_towc = mbtowc))
644	{
645		ast.mb_cur_max = 1;
646		ast.mb_len = 0;
647		ast.mb_towc = 0;
648		ast.mb_width = default_wcwidth;
649		ast.mb_conv = 0;
650	}
651	else if ((locales[cp->internal]->flags & LC_utf8) && !(ast.locale.set & AST_LC_test))
652	{
653		ast.mb_cur_max = 6;
654		ast.mb_len = utf8_mblen;
655		ast.mb_towc = utf8_mbtowc;
656		if (!(ast.mb_width = wcwidth))
657			ast.mb_width = default_wcwidth;
658		ast.mb_conv = utf8_wctomb;
659	}
660	else
661	{
662		if (!(ast.mb_width = wcwidth))
663			ast.mb_width = default_wcwidth;
664		ast.mb_conv = wctomb;
665#ifdef mb_state
666		{
667			/*
668			 * check for sjis that translates unshifted 7 bit ascii!
669			 */
670
671			char*	s;
672			char	buf[2];
673
674			mbinit();
675			buf[1] = 0;
676			*(s = buf) = '\\';
677			if (mbchar(s) != buf[0])
678			{
679				memcpy(mb_state, mb_state_zero, sizeof(mbstate_t));
680				ast.mb_towc = sjis_mbtowc;
681			}
682		}
683#endif
684	}
685	if (ast.locale.set & (AST_LC_debug|AST_LC_setlocale))
686		sfprintf(sfstderr, "locale info %17s MB_CUR_MAX=%d%s%s%s%s\n"
687			, cp->name
688			, ast.mb_cur_max
689			, ast.mb_len == debug_mblen ? " debug_mblen" : ast.mb_len == mblen ? " mblen" : ""
690			, ast.mb_towc == debug_mbtowc ? " debug_mbtowc" : ast.mb_towc == mbtowc ? " mbtowc"
691#ifdef mb_state
692				: ast.mb_towc == sjis_mbtowc ? " sjis_mbtowc"
693#endif
694				: ""
695			, ast.mb_width == debug_wcwidth ? " debug_wcwidth" : ast.mb_width == wcwidth ? " wcwidth" : ast.mb_width == default_wcwidth ? " default_wcwidth" : ""
696			, ast.mb_conv == debug_wctomb ? " debug_wctomb" : ast.mb_conv == wctomb ? " wctomb" : ""
697			);
698	return 0;
699}
700
701/*
702 * called when LC_NUMERIC initialized or changes
703 */
704
705static int
706set_numeric(Lc_category_t* cp)
707{
708	register int		category = cp->internal;
709	struct lconv*		lp;
710	Lc_numeric_t*		dp;
711
712	static Lc_numeric_t	default_numeric = { '.', -1 };
713
714	if (!LCINFO(category)->data)
715	{
716		if ((lp = localeconv()) && (dp = newof(0, Lc_numeric_t, 1, 0)))
717		{
718			dp->decimal = lp->decimal_point && *lp->decimal_point ? *(unsigned char*)lp->decimal_point : '.';
719			dp->thousand = lp->thousands_sep && *lp->thousands_sep ? *(unsigned char*)lp->thousands_sep : -1;
720		}
721		else
722			dp = &default_numeric;
723		LCINFO(category)->data = (void*)dp;
724		if (ast.locale.set & (AST_LC_debug|AST_LC_setlocale))
725			sfprintf(sfstderr, "locale info %17s decimal '%c' thousands '%c'\n", lc_categories[category].name, dp->decimal, dp->thousand >= 0 ? dp->thousand : 'X');
726	}
727	return 0;
728}
729
730/*
731 * this table is indexed by AST_LC_[A-Z]*
732 */
733
734Lc_category_t		lc_categories[] =
735{
736{ "LC_ALL",           LC_ALL,           AST_LC_ALL,           0               },
737{ "LC_COLLATE",       LC_COLLATE,       AST_LC_COLLATE,       set_collate     },
738{ "LC_CTYPE",         LC_CTYPE,         AST_LC_CTYPE,         set_ctype       },
739{ "LC_MESSAGES",      LC_MESSAGES,      AST_LC_MESSAGES,      0               },
740{ "LC_MONETARY",      LC_MONETARY,      AST_LC_MONETARY,      0               },
741{ "LC_NUMERIC",       LC_NUMERIC,       AST_LC_NUMERIC,       set_numeric     },
742{ "LC_TIME",          LC_TIME,          AST_LC_TIME,          0               },
743{ "LC_IDENTIFICATION",LC_IDENTIFICATION,AST_LC_IDENTIFICATION,0               },
744{ "LC_ADDRESS",       LC_ADDRESS,       AST_LC_ADDRESS,       0               },
745{ "LC_NAME",          LC_NAME,          AST_LC_NAME,          0               },
746{ "LC_TELEPHONE",     LC_TELEPHONE,     AST_LC_TELEPHONE,     0               },
747{ "LC_XLITERATE",     LC_XLITERATE,     AST_LC_XLITERATE,     0               },
748{ "LC_MEASUREMENT",   LC_MEASUREMENT,   AST_LC_MEASUREMENT,   0               },
749{ "LC_PAPER",         LC_PAPER,         AST_LC_PAPER,         0               },
750};
751
752static Lc_t*		lang;
753static Lc_t*		lc_all;
754
755typedef struct Unamval_s
756{
757	char*		name;
758	unsigned int	value;
759} Unamval_t;
760
761static const Unamval_t	options[] =
762{
763	"debug",		AST_LC_debug,
764	"find",			AST_LC_find,
765	"setlocale",		AST_LC_setlocale,
766	"test",			AST_LC_test,
767	"translate",		AST_LC_translate,
768	0,			0
769};
770
771/*
772 * called by stropt() to set options
773 */
774
775static int
776setopt(void* a, const void* p, int n, const char* v)
777{
778	if (p)
779	{
780		if (n)
781			ast.locale.set |= ((Unamval_t*)p)->value;
782		else
783			ast.locale.set &= ~((Unamval_t*)p)->value;
784	}
785	return 0;
786}
787
788#if !_lib_setlocale
789
790#define setlocale(c,l)		default_setlocale(c,l)
791
792static char*
793default_setlocale(int category, const char* locale)
794{
795	Lc_t*		lc;
796
797	if (locale)
798	{
799		if (!(lc = lcmake(locale)) || !(lc->flags & LC_default))
800			return 0;
801		locales[0]->flags &= ~lc->flags;
802		locales[1]->flags &= ~lc->flags;
803		return lc->name;
804	}
805	return (locales[1]->flags & (1<<category)) ? locales[1]->name : locales[0]->name;
806}
807
808#endif
809
810/*
811 * set a single AST_LC_* locale category
812 * the caller must validate category
813 * lc==0 restores the previous state
814 */
815
816static char*
817single(int category, Lc_t* lc, unsigned int flags)
818{
819	const char*	sys;
820	int		i;
821
822	if (flags & (LC_setenv|LC_setlocale))
823	{
824		if (!(ast.locale.set & AST_LC_internal))
825			lc_categories[category].prev = lc;
826		if ((flags & LC_setenv) && lc_all && locales[category])
827			return (char*)locales[category]->name;
828	}
829	if (!lc && (!(lc_categories[category].flags & LC_setlocale) || !(lc = lc_categories[category].prev)) && !(lc = lc_all) && !(lc = lc_categories[category].prev) && !(lc = lang))
830		lc = lcmake(NiL);
831	sys = 0;
832	if (locales[category] != lc)
833	{
834		if (lc_categories[category].external == -lc_categories[category].internal)
835		{
836			for (i = 1; i < AST_LC_COUNT; i++)
837				if (locales[i] == lc)
838				{
839					sys = (char*)lc->name;
840					break;
841				}
842		}
843		else if (lc->flags & (LC_debug|LC_local))
844			sys = setlocale(lc_categories[category].external, lcmake(NiL)->name);
845		else if (!(sys = setlocale(lc_categories[category].external, lc->name)) &&
846			 (streq(lc->name, lc->code) || !(sys = setlocale(lc_categories[category].external, lc->code))) &&
847			 !streq(lc->code, lc->language->code))
848				sys = setlocale(lc_categories[category].external, lc->language->code);
849		if (!sys)
850		{
851			/*
852			 * check for local override
853			 * currently this means an LC_MESSAGES dir exists
854			 */
855
856			if (!(lc->flags & LC_checked))
857			{
858				char	path[PATH_MAX];
859
860				if (mcfind(path, lc->code, NiL, LC_MESSAGES, 0))
861					lc->flags |= LC_local;
862				lc->flags |= LC_checked;
863			}
864			if (!(lc->flags & LC_local))
865				return 0;
866			if (lc_categories[category].external != -lc_categories[category].internal)
867				setlocale(lc_categories[category].external, lcmake(NiL)->name);
868		}
869		locales[category] = lc;
870		if (lc_categories[category].setf && (*lc_categories[category].setf)(&lc_categories[category]))
871		{
872			locales[category] = lc_categories[category].prev;
873			return 0;
874		}
875		if ((lc->flags & LC_default) || category == AST_LC_MESSAGES && lc->name[0] == 'e' && lc->name[1] == 'n' && (lc->name[2] == 0 || lc->name[2] == '_' && lc->name[3] == 'U'))
876			ast.locale.set &= ~(1<<category);
877		else
878			ast.locale.set |= (1<<category);
879	}
880	else if (lc_categories[category].flags ^ flags)
881	{
882		lc_categories[category].flags &= ~(LC_setenv|LC_setlocale);
883		lc_categories[category].flags |= flags;
884	}
885	else
886		return (char*)lc->name;
887	if ((ast.locale.set & (AST_LC_debug|AST_LC_setlocale)) && !(ast.locale.set & AST_LC_internal))
888		sfprintf(sfstderr, "locale set  %17s %16s %16s %16s %s%s\n", lc_categories[category].name, lc->name, sys, lc_categories[category].prev ? lc_categories[category].prev->name : NiL, (lc_categories[category].flags & LC_setlocale) ? "[setlocale]" : "", (lc_categories[category].flags & LC_setenv) ? "[setenv]" : "");
889	return (char*)lc->name;
890}
891
892/*
893 * set composite AST_LC_ALL locale categories
894 * return <0:composite-error 0:not-composite >0:composite-ok
895 */
896
897static int
898composite(register const char* s, int initialize)
899{
900	register const char*	t;
901	register int		i;
902	register int		j;
903	register int		k;
904	int			n;
905	int			m;
906	const char*		w;
907	Lc_t*			p;
908	int			cat[AST_LC_COUNT];
909	int			stk[AST_LC_COUNT];
910	char			buf[PATH_MAX / 2];
911
912	k = n = 0;
913	while (s[0] == 'L' && s[1] == 'C' && s[2] == '_')
914	{
915		n++;
916		j = 0;
917		w = s;
918		for (i = 1; i < AST_LC_COUNT; i++)
919		{
920			s = w;
921			t = lc_categories[i].name;
922			while (*t && *s++ == *t++);
923			if (!*t && *s++ == '=')
924			{
925				cat[j++] = i;
926				if (s[0] != 'L' || s[1] != 'C' || s[2] != '_')
927					break;
928				w = s;
929				i = -1;
930			}
931		}
932		for (s = w; *s && *s != '='; s++);
933		if (!*s)
934		{
935			for (i = 0; i < k; i++)
936				single(stk[i], NiL, 0);
937			return -1;
938		}
939		w = ++s;
940		for (;;)
941		{
942			if (!*s)
943			{
944				p = lcmake(w);
945				break;
946			}
947			else if (*s++ == ';')
948			{
949				if ((m = s - w - 1) >= sizeof(buf))
950					m = sizeof(buf) - 1;
951				memcpy(buf, w, m);
952				buf[m] = 0;
953				p = lcmake(buf);
954				break;
955			}
956		}
957		for (i = 0; i < j; i++)
958			if (!initialize)
959			{
960				if (!single(cat[i], p, 0))
961				{
962					for (i = 0; i < k; i++)
963						single(stk[i], NiL, 0);
964					return -1;
965				}
966				stk[k++] = cat[i];
967			}
968			else if (!lc_categories[cat[i]].prev && !(ast.locale.set & AST_LC_internal))
969				lc_categories[cat[i]].prev = p;
970	}
971	while (s[0] == '/' && s[1] && n < (AST_LC_COUNT - 1))
972	{
973		n++;
974		for (w = ++s; *s && *s != '/'; s++);
975		if (!*s)
976			p = lcmake(w);
977		else
978		{
979			if ((j = s - w - 1) >= sizeof(buf))
980				j = sizeof(buf) - 1;
981			memcpy(buf, w, j);
982			buf[j] = 0;
983			p = lcmake(buf);
984		}
985		if (!initialize)
986		{
987			if (!single(n, p, 0))
988			{
989				for (i = 1; i < n; i++)
990					single(i, NiL, 0);
991				return -1;
992			}
993		}
994		else if (!lc_categories[n].prev && !(ast.locale.set & AST_LC_internal))
995			lc_categories[n].prev = p;
996	}
997	return n;
998}
999
1000/*
1001 * setlocale() intercept
1002 *
1003 * locale:
1004 *	0	query
1005 *	""	initialize from environment (if LC_ALL)
1006 *	""	AST_LC_setenv: value unset (defer to LANG)
1007 *	"*"	AST_LC_setenv: value set (defer to LC_ALL)
1008 *	*	set (override LC_ALL)
1009 */
1010
1011char*
1012_ast_setlocale(int category, const char* locale)
1013{
1014	register char*		s;
1015	register int		i;
1016	register int		j;
1017	int			k;
1018	int			f;
1019	Lc_t*			p;
1020	int			cat[AST_LC_COUNT];
1021
1022	static Sfio_t*		sp;
1023	static int		initialized;
1024	static const char	local[] = "local";
1025
1026	if ((category = lcindex(category, 0)) < 0)
1027		return 0;
1028	if (!locale)
1029	{
1030		/*
1031		 * return the current state
1032		 */
1033
1034	compose:
1035		if (category != AST_LC_ALL && category != AST_LC_LANG)
1036			return (char*)locales[category]->name;
1037		if (!sp && !(sp = sfstropen()))
1038			return 0;
1039		for (i = 1; i < AST_LC_COUNT; i++)
1040			cat[i] = -1;
1041		for (i = 1, k = 0; i < AST_LC_COUNT; i++)
1042			if (cat[i] < 0)
1043			{
1044				k++;
1045				cat[i] = i;
1046				for (j = i + 1; j < AST_LC_COUNT; j++)
1047					if (locales[j] == locales[i])
1048						cat[j] = i;
1049			}
1050		if (k == 1)
1051			return (char*)locales[1]->name;
1052		for (i = 1; i < AST_LC_COUNT; i++)
1053			if (cat[i] >= 0 && !(locales[i]->flags & LC_default))
1054			{
1055				if (sfstrtell(sp))
1056					sfprintf(sp, ";");
1057				for (j = i, k = cat[i]; j < AST_LC_COUNT; j++)
1058					if (cat[j] == k)
1059					{
1060						cat[j] = -1;
1061						sfprintf(sp, "%s=", lc_categories[j].name);
1062					}
1063				sfprintf(sp, "%s", locales[i]->name);
1064			}
1065		if (!sfstrtell(sp))
1066			return (char*)locales[0]->name;
1067		return sfstruse(sp);
1068	}
1069	if (!ast.locale.serial++)
1070	{
1071		stropt(getenv("LC_OPTIONS"), options, sizeof(*options), setopt, NiL);
1072		initialized = 0;
1073	}
1074	if ((ast.locale.set & (AST_LC_debug|AST_LC_setlocale)) && !(ast.locale.set & AST_LC_internal))
1075		sfprintf(sfstderr, "locale user %17s %16s  %s%s\n", category == AST_LC_LANG ? "LANG" : lc_categories[category].name, locale && !*locale ? "''" : locale, initialized ? "" : "[initial]", (ast.locale.set & AST_LC_setenv) ? "[setenv]" : "");
1076	if (ast.locale.set & AST_LC_setenv)
1077	{
1078		f = LC_setenv;
1079		p = *locale ? lcmake(locale) : (Lc_t*)0;
1080	}
1081	else if (*locale)
1082	{
1083		f = LC_setlocale;
1084		p = lcmake(locale);
1085	}
1086	else if (category == AST_LC_ALL)
1087	{
1088		if (!initialized)
1089		{
1090			char*	u;
1091			char	tmp[256];
1092
1093			/*
1094			 * initialize from the environment
1095			 * precedence determined by X/Open
1096			 */
1097
1098			u = 0;
1099			if ((s = getenv("LANG")) && *s)
1100			{
1101				if (streq(s, local) && (u || (u = native_locale(locale, tmp, sizeof(tmp)))))
1102					s = u;
1103				lang = lcmake(s);
1104			}
1105			else
1106				lang = 0;
1107			if ((s = getenv("LC_ALL")) && *s)
1108			{
1109				if (streq(s, local) && (u || (u = native_locale(locale, tmp, sizeof(tmp)))))
1110					s = u;
1111				lc_all = lcmake(s);
1112			}
1113			else
1114				lc_all = 0;
1115			for (i = 1; i < AST_LC_COUNT; i++)
1116				if (lc_categories[i].flags & LC_setlocale)
1117					/* explicitly set by setlocale() */;
1118				else if ((s = getenv(lc_categories[i].name)) && *s)
1119				{
1120					if (streq(s, local) && (u || (u = native_locale(locale, tmp, sizeof(tmp)))))
1121						s = u;
1122					lc_categories[i].prev = lcmake(s);
1123				}
1124				else
1125					lc_categories[i].prev = 0;
1126			for (i = 1; i < AST_LC_COUNT; i++)
1127				if (!single(i, lc_all && !(lc_categories[i].flags & LC_setlocale) ? lc_all : lc_categories[i].prev, 0))
1128				{
1129					while (i--)
1130						single(i, NiL, 0);
1131					return 0;
1132				}
1133			if (ast.locale.set & AST_LC_debug)
1134				for (i = 1; i < AST_LC_COUNT; i++)
1135					sfprintf(sfstderr, "locale env  %17s %16s %16s %16s\n", lc_categories[i].name, locales[i]->name, "", lc_categories[i].prev ? lc_categories[i].prev->name : (char*)0);
1136			initialized = 1;
1137		}
1138		goto compose;
1139	}
1140	else if (category == AST_LC_LANG || !(p = lc_categories[category].prev))
1141	{
1142		f = 0;
1143		p = lcmake("C");
1144	}
1145	else
1146		f = 0;
1147	if (category == AST_LC_LANG)
1148	{
1149		if (lang != p)
1150		{
1151			lang = p;
1152			if (!lc_all)
1153				for (i = 1; i < AST_LC_COUNT; i++)
1154					if (!single(i, lc_categories[i].prev, 0))
1155					{
1156						while (i--)
1157							single(i, NiL, 0);
1158						return 0;
1159					}
1160		}
1161	}
1162	else if (category != AST_LC_ALL)
1163	{
1164		if (f || !lc_all)
1165			return single(category, p, f);
1166		if (p && !(ast.locale.set & AST_LC_internal))
1167			lc_categories[category].prev = p;
1168		return (char*)locales[category]->name;
1169	}
1170	else if (composite(locale, 0) < 0)
1171		return 0;
1172	else if (lc_all != p)
1173	{
1174		lc_all = p;
1175		for (i = 1; i < AST_LC_COUNT; i++)
1176			if (!single(i, lc_all && !(lc_categories[i].flags & LC_setlocale) ? lc_all : lc_categories[i].prev, 0))
1177			{
1178				while (i--)
1179					single(i, NiL, 0);
1180				return 0;
1181			}
1182	}
1183	goto compose;
1184}
1185