1/* shmbutil.h -- utility functions for multibyte characters. */
2
3/* Copyright (C) 2002-2004 Free Software Foundation, Inc.
4
5   This file is part of GNU Bash, the Bourne Again SHell.
6
7   Bash is free software: you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation, either version 3 of the License, or
10   (at your option) any later version.
11
12   Bash is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with Bash.  If not, see <http://www.gnu.org/licenses/>.
19*/
20
21#if !defined (_SH_MBUTIL_H_)
22#define _SH_MBUTIL_H_
23
24#include "stdc.h"
25
26/* Include config.h for HANDLE_MULTIBYTE */
27#include <config.h>
28
29#if defined (HANDLE_MULTIBYTE)
30
31extern size_t xmbsrtowcs __P((wchar_t *, const char **, size_t, mbstate_t *));
32extern size_t xdupmbstowcs __P((wchar_t **, char ***, const char *));
33
34extern size_t mbstrlen __P((const char *));
35
36extern char *xstrchr __P((const char *, int));
37
38#ifndef MB_INVALIDCH
39#define MB_INVALIDCH(x)		((x) == (size_t)-1 || (x) == (size_t)-2)
40#define MB_NULLWCH(x)		((x) == 0)
41#endif
42
43#define MBSLEN(s)	(((s) && (s)[0]) ? ((s)[1] ? mbstrlen (s) : 1) : 0)
44#define MB_STRLEN(s)	((MB_CUR_MAX > 1) ? MBSLEN (s) : STRLEN (s))
45
46#define MBLEN(s, n)	((MB_CUR_MAX > 1) ? mblen ((s), (n)) : 1)
47#define MBRLEN(s, n, p)	((MB_CUR_MAX > 1) ? mbrlen ((s), (n), (p)) : 1)
48
49#else /* !HANDLE_MULTIBYTE */
50
51#undef MB_LEN_MAX
52#undef MB_CUR_MAX
53
54#define MB_LEN_MAX	1
55#define MB_CUR_MAX	1
56
57#undef xstrchr
58#define xstrchr(s, c)	strchr(s, c)
59
60#ifndef MB_INVALIDCH
61#define MB_INVALIDCH(x)		(0)
62#define MB_NULLWCH(x)		(0)
63#endif
64
65#define MB_STRLEN(s)		(STRLEN(s))
66
67#define MBLEN(s, n)		1
68#define MBRLEN(s, n, p)		1
69
70#ifndef wchar_t
71#  define wchar_t	int
72#endif
73
74#endif /* !HANDLE_MULTIBYTE */
75
76/* Declare and initialize a multibyte state.  Call must be terminated
77   with `;'. */
78#if defined (HANDLE_MULTIBYTE)
79#  define DECLARE_MBSTATE \
80	mbstate_t state; \
81	memset (&state, '\0', sizeof (mbstate_t))
82#else
83#  define DECLARE_MBSTATE
84#endif  /* !HANDLE_MULTIBYTE */
85
86/* Initialize or reinitialize a multibyte state named `state'.  Call must be
87   terminated with `;'. */
88#if defined (HANDLE_MULTIBYTE)
89#  define INITIALIZE_MBSTATE memset (&state, '\0', sizeof (mbstate_t))
90#else
91#  define INITIALIZE_MBSTATE
92#endif  /* !HANDLE_MULTIBYTE */
93
94/* Advance one (possibly multi-byte) character in string _STR of length
95   _STRSIZE, starting at index _I.  STATE must have already been declared. */
96#if defined (HANDLE_MULTIBYTE)
97#  define ADVANCE_CHAR(_str, _strsize, _i) \
98    do \
99      { \
100	if (MB_CUR_MAX > 1) \
101	  { \
102	    mbstate_t state_bak; \
103	    size_t mblength; \
104\
105	    state_bak = state; \
106	    mblength = mbrlen ((_str) + (_i), (_strsize) - (_i), &state); \
107\
108	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
109	      { \
110		state = state_bak; \
111		(_i)++; \
112	      } \
113	    else if (mblength == 0) \
114	      (_i)++; \
115	    else \
116	      (_i) += mblength; \
117	  } \
118	else \
119	  (_i)++; \
120      } \
121    while (0)
122#else
123#  define ADVANCE_CHAR(_str, _strsize, _i)	(_i)++
124#endif  /* !HANDLE_MULTIBYTE */
125
126/* Advance one (possibly multibyte) character in the string _STR of length
127   _STRSIZE.
128   SPECIAL:  assume that _STR will be incremented by 1 after this call. */
129#if defined (HANDLE_MULTIBYTE)
130#  define ADVANCE_CHAR_P(_str, _strsize) \
131    do \
132      { \
133	if (MB_CUR_MAX > 1) \
134	  { \
135	    mbstate_t state_bak; \
136	    size_t mblength; \
137\
138	    state_bak = state; \
139	    mblength = mbrlen ((_str), (_strsize), &state); \
140\
141	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
142	      { \
143		state = state_bak; \
144		mblength = 1; \
145	      } \
146	    else \
147	      (_str) += (mblength < 1) ? 0 : (mblength - 1); \
148	  } \
149      } \
150    while (0)
151#else
152#  define ADVANCE_CHAR_P(_str, _strsize)
153#endif  /* !HANDLE_MULTIBYTE */
154
155/* Back up one (possibly multi-byte) character in string _STR of length
156   _STRSIZE, starting at index _I.  STATE must have already been declared. */
157#if defined (HANDLE_MULTIBYTE)
158#  define BACKUP_CHAR(_str, _strsize, _i) \
159    do \
160      { \
161	if (MB_CUR_MAX > 1) \
162	  { \
163	    mbstate_t state_bak; \
164	    size_t mblength; \
165	    int _x, _p; /* _x == temp index into string, _p == prev index */ \
166\
167	    _x = _p = 0; \
168	    while (_x < (_i)) \
169	      { \
170	        state_bak = state; \
171	        mblength = mbrlen ((_str) + (_x), (_strsize) - (_x), &state); \
172\
173		if (mblength == (size_t)-2 || mblength == (size_t)-1) \
174		  { \
175		    state = state_bak; \
176		    _x++; \
177		  } \
178		else if (mblength == 0) \
179		  _x++; \
180		else \
181		  { \
182		    _p = _x; /* _p == start of prev mbchar */ \
183		    _x += mblength; \
184		  } \
185	      } \
186	    (_i) = _p; \
187	  } \
188	else \
189	  (_i)--; \
190      } \
191    while (0)
192#else
193#  define BACKUP_CHAR(_str, _strsize, _i)	(_i)--
194#endif  /* !HANDLE_MULTIBYTE */
195
196/* Back up one (possibly multibyte) character in the string _BASE of length
197   _STRSIZE starting at _STR (_BASE <= _STR <= (_BASE + _STRSIZE) ).
198   SPECIAL: DO NOT assume that _STR will be decremented by 1 after this call. */
199#if defined (HANDLE_MULTIBYTE)
200#  define BACKUP_CHAR_P(_base, _strsize, _str) \
201    do \
202      { \
203	if (MB_CUR_MAX > 1) \
204	  { \
205	    mbstate_t state_bak; \
206	    size_t mblength; \
207	    char *_x, _p; /* _x == temp pointer into string, _p == prev pointer */ \
208\
209	    _x = _p = _base; \
210	    while (_x < (_str)) \
211	      { \
212	        state_bak = state; \
213	        mblength = mbrlen (_x, (_strsize) - _x, &state); \
214\
215		if (mblength == (size_t)-2 || mblength == (size_t)-1) \
216		  { \
217		    state = state_bak; \
218		    _x++; \
219		  } \
220		else if (mblength == 0) \
221		  _x++; \
222		else \
223		  { \
224		    _p = _x; /* _p == start of prev mbchar */ \
225		    _x += mblength; \
226		  } \
227	      } \
228	    (_str) = _p; \
229	  } \
230	else \
231	  (_str)--; \
232      } \
233    while (0)
234#else
235#  define BACKUP_CHAR_P(_base, _strsize, _str) (_str)--
236#endif  /* !HANDLE_MULTIBYTE */
237
238/* Copy a single character from the string _SRC to the string _DST.
239   _SRCEND is a pointer to the end of _SRC. */
240#if defined (HANDLE_MULTIBYTE)
241#  define COPY_CHAR_P(_dst, _src, _srcend) \
242    do \
243      { \
244	if (MB_CUR_MAX > 1) \
245	  { \
246	    mbstate_t state_bak; \
247	    size_t mblength; \
248	    int _k; \
249\
250	    state_bak = state; \
251	    mblength = mbrlen ((_src), (_srcend) - (_src), &state); \
252	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
253	      { \
254		state = state_bak; \
255		mblength = 1; \
256	      } \
257	    else \
258	      mblength = (mblength < 1) ? 1 : mblength; \
259\
260	    for (_k = 0; _k < mblength; _k++) \
261	      *(_dst)++ = *(_src)++; \
262	  } \
263	else \
264	  *(_dst)++ = *(_src)++; \
265      } \
266    while (0)
267#else
268#  define COPY_CHAR_P(_dst, _src, _srcend)	*(_dst)++ = *(_src)++
269#endif  /* !HANDLE_MULTIBYTE */
270
271/* Copy a single character from the string _SRC at index _SI to the string
272   _DST at index _DI.  _SRCEND is a pointer to the end of _SRC. */
273#if defined (HANDLE_MULTIBYTE)
274#  define COPY_CHAR_I(_dst, _di, _src, _srcend, _si) \
275    do \
276      { \
277	if (MB_CUR_MAX > 1) \
278	  { \
279	    mbstate_t state_bak; \
280	    size_t mblength; \
281	    int _k; \
282\
283	    state_bak = state; \
284	    mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src)+(_si)), &state); \
285	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
286	      { \
287		state = state_bak; \
288		mblength = 1; \
289	      } \
290	    else \
291	      mblength = (mblength < 1) ? 1 : mblength; \
292\
293	    for (_k = 0; _k < mblength; _k++) \
294	      _dst[_di++] = _src[_si++]; \
295	  } \
296	else \
297	  _dst[_di++] = _src[_si++]; \
298      } \
299    while (0)
300#else
301#  define COPY_CHAR_I(_dst, _di, _src, _srcend, _si)	_dst[_di++] = _src[_si++]
302#endif  /* !HANDLE_MULTIBYTE */
303
304/****************************************************************
305 *								*
306 * The following are only guaranteed to work in subst.c		*
307 *								*
308 ****************************************************************/
309
310#if defined (HANDLE_MULTIBYTE)
311#  define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
312    do \
313      { \
314	if (MB_CUR_MAX > 1) \
315	  { \
316	    mbstate_t state_bak; \
317	    size_t mblength; \
318	    int _i; \
319\
320	    state_bak = state; \
321	    mblength = mbrlen ((_src) + (_si), (_slen) - (_si), &state); \
322	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
323	      { \
324		state = state_bak; \
325		mblength = 1; \
326	      } \
327	    else \
328	      mblength = (mblength < 1) ? 1 : mblength; \
329\
330	    temp = xmalloc (mblength + 2); \
331	    temp[0] = _escchar; \
332	    for (_i = 0; _i < mblength; _i++) \
333	      temp[_i + 1] = _src[_si++]; \
334	    temp[mblength + 1] = '\0'; \
335\
336	    goto add_string; \
337	  } \
338	else \
339	  { \
340	    _dst[0] = _escchar; \
341	    _dst[1] = _sc; \
342	  } \
343      } \
344    while (0)
345#else
346#  define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
347    _dst[0] = _escchar; \
348    _dst[1] = _sc
349#endif  /* !HANDLE_MULTIBYTE */
350
351#if defined (HANDLE_MULTIBYTE)
352#  define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
353    do \
354      { \
355	if (MB_CUR_MAX > 1) \
356	  { \
357	    mbstate_t state_bak; \
358	    size_t mblength; \
359\
360	    state_bak = state; \
361	    mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src) + (_si)), &state); \
362	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
363	      { \
364		state = state_bak; \
365		mblength = 1; \
366	      } \
367	    else \
368	      mblength = (mblength < 1) ? 1 : mblength; \
369\
370	    FASTCOPY(((_src) + (_si)), (_dst), mblength); \
371\
372	    (_dst) += mblength; \
373	    (_si) += mblength; \
374	  } \
375	else \
376	  { \
377	    *(_dst)++ = _src[(_si)]; \
378	    (_si)++; \
379	  } \
380      } \
381    while (0)
382#else
383#  define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
384	*(_dst)++ = _src[(_si)]; \
385	(_si)++
386#endif  /* !HANDLE_MULTIBYTE */
387
388#if HANDLE_MULTIBYTE
389#  define SADD_MBCHAR(_dst, _src, _si, _srcsize) \
390    do \
391      { \
392	if (MB_CUR_MAX > 1) \
393	  { \
394	    int i; \
395	    mbstate_t state_bak; \
396	    size_t mblength; \
397\
398	    state_bak = state; \
399	    mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
400	    if (mblength == (size_t)-1 || mblength == (size_t)-2) \
401	      { \
402		state = state_bak; \
403		mblength = 1; \
404	      } \
405	    if (mblength < 1) \
406	      mblength = 1; \
407\
408	    _dst = (char *)xmalloc (mblength + 1); \
409	    for (i = 0; i < mblength; i++) \
410	      (_dst)[i] = (_src)[(_si)++]; \
411	    (_dst)[mblength] = '\0'; \
412\
413	    goto add_string; \
414	  } \
415      } \
416    while (0)
417
418#else
419#  define SADD_MBCHAR(_dst, _src, _si, _srcsize)
420#endif
421
422/* Watch out when using this -- it's just straight textual subsitution */
423#if defined (HANDLE_MULTIBYTE)
424#  define SADD_MBQCHAR_BODY(_dst, _src, _si, _srcsize) \
425\
426	    int i; \
427	    mbstate_t state_bak; \
428	    size_t mblength; \
429\
430	    state_bak = state; \
431	    mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
432	    if (mblength == (size_t)-1 || mblength == (size_t)-2) \
433	      { \
434		state = state_bak; \
435		mblength = 1; \
436	      } \
437	    if (mblength < 1) \
438	      mblength = 1; \
439\
440	    (_dst) = (char *)xmalloc (mblength + 2); \
441	    (_dst)[0] = CTLESC; \
442	    for (i = 0; i < mblength; i++) \
443	      (_dst)[i+1] = (_src)[(_si)++]; \
444	    (_dst)[mblength+1] = '\0'; \
445\
446	    goto add_string
447
448#endif /* HANDLE_MULTIBYTE */
449#endif /* _SH_MBUTIL_H_ */
450