1/* shmbutil.h -- utility functions for multibyte characters. */
2
3/* Copyright (C) 2002-2004 Free Software Foundation, Inc.
4
5   This file is part of GNU Bash, the Bourne Again SHell.
6
7   Bash is free software; you can redistribute it and/or modify it under
8   the terms of the GNU General Public License as published by the Free
9   Software Foundation; either version 2, or (at your option) any later
10   version.
11
12   Bash is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or
14   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15   for more details.
16
17   You should have received a copy of the GNU General Public License along
18   with Bash; see the file COPYING.  If not, write to the Free Software
19   Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
20
21#if !defined (_SH_MBUTIL_H_)
22#define _SH_MBUTIL_H_
23
24#include "stdc.h"
25
26/* Include config.h for HANDLE_MULTIBYTE */
27#include <config.h>
28
29#if defined (HANDLE_MULTIBYTE)
30
31extern size_t xmbsrtowcs __P((wchar_t *, const char **, size_t, mbstate_t *));
32extern size_t xdupmbstowcs __P((wchar_t **, char ***, const char *));
33
34extern size_t mbstrlen __P((const char *));
35
36extern char *xstrchr __P((const char *, int));
37
38#ifndef MB_INVALIDCH
39#define MB_INVALIDCH(x)		((x) == (size_t)-1 || (x) == (size_t)-2)
40#define MB_NULLWCH(x)		((x) == 0)
41#endif
42
43#define MBSLEN(s)	(((s) && (s)[0]) ? ((s)[1] ? mbstrlen (s) : 1) : 0)
44#define MB_STRLEN(s)	((MB_CUR_MAX > 1) ? MBSLEN (s) : STRLEN (s))
45
46#define MBLEN(s, n)	((MB_CUR_MAX > 1) ? mblen ((s), (n)) : 1)
47#define MBRLEN(s, n, p)	((MB_CUR_MAX > 1) ? mbrlen ((s), (n), (p)) : 1)
48
49#else /* !HANDLE_MULTIBYTE */
50
51#undef MB_LEN_MAX
52#undef MB_CUR_MAX
53
54#define MB_LEN_MAX	1
55#define MB_CUR_MAX	1
56
57#undef xstrchr
58#define xstrchr(s, c)	strchr(s, c)
59
60#ifndef MB_INVALIDCH
61#define MB_INVALIDCH(x)		(0)
62#define MB_NULLWCH(x)		(0)
63#endif
64
65#define MB_STRLEN(s)		(STRLEN(s))
66
67#define MBLEN(s, n)		1
68#define MBRLEN(s, n, p)		1
69
70#endif /* !HANDLE_MULTIBYTE */
71
72/* Declare and initialize a multibyte state.  Call must be terminated
73   with `;'. */
74#if defined (HANDLE_MULTIBYTE)
75#  define DECLARE_MBSTATE \
76	mbstate_t state; \
77	memset (&state, '\0', sizeof (mbstate_t))
78#else
79#  define DECLARE_MBSTATE
80#endif  /* !HANDLE_MULTIBYTE */
81
82/* Initialize or reinitialize a multibyte state named `state'.  Call must be
83   terminated with `;'. */
84#if defined (HANDLE_MULTIBYTE)
85#  define INITIALIZE_MBSTATE memset (&state, '\0', sizeof (mbstate_t))
86#else
87#  define INITIALIZE_MBSTATE
88#endif  /* !HANDLE_MULTIBYTE */
89
90/* Advance one (possibly multi-byte) character in string _STR of length
91   _STRSIZE, starting at index _I.  STATE must have already been declared. */
92#if defined (HANDLE_MULTIBYTE)
93#  define ADVANCE_CHAR(_str, _strsize, _i) \
94    do \
95      { \
96	if (MB_CUR_MAX > 1) \
97	  { \
98	    mbstate_t state_bak; \
99	    size_t mblength; \
100\
101	    state_bak = state; \
102	    mblength = mbrlen ((_str) + (_i), (_strsize) - (_i), &state); \
103\
104	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
105	      { \
106		state = state_bak; \
107		(_i)++; \
108	      } \
109	    else if (mblength == 0) \
110	      (_i)++; \
111	    else \
112	      (_i) += mblength; \
113	  } \
114	else \
115	  (_i)++; \
116      } \
117    while (0)
118#else
119#  define ADVANCE_CHAR(_str, _strsize, _i)	(_i)++
120#endif  /* !HANDLE_MULTIBYTE */
121
122/* Advance one (possibly multibyte) character in the string _STR of length
123   _STRSIZE.
124   SPECIAL:  assume that _STR will be incremented by 1 after this call. */
125#if defined (HANDLE_MULTIBYTE)
126#  define ADVANCE_CHAR_P(_str, _strsize) \
127    do \
128      { \
129	if (MB_CUR_MAX > 1) \
130	  { \
131	    mbstate_t state_bak; \
132	    size_t mblength; \
133\
134	    state_bak = state; \
135	    mblength = mbrlen ((_str), (_strsize), &state); \
136\
137	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
138	      { \
139		state = state_bak; \
140		mblength = 1; \
141	      } \
142	    else \
143	      (_str) += (mblength < 1) ? 0 : (mblength - 1); \
144	  } \
145      } \
146    while (0)
147#else
148#  define ADVANCE_CHAR_P(_str, _strsize)
149#endif  /* !HANDLE_MULTIBYTE */
150
151/* Back up one (possibly multi-byte) character in string _STR of length
152   _STRSIZE, starting at index _I.  STATE must have already been declared. */
153#if defined (HANDLE_MULTIBYTE)
154#  define BACKUP_CHAR(_str, _strsize, _i) \
155    do \
156      { \
157	if (MB_CUR_MAX > 1) \
158	  { \
159	    mbstate_t state_bak; \
160	    size_t mblength; \
161	    int _x, _p; /* _x == temp index into string, _p == prev index */ \
162\
163	    _x = _p = 0; \
164	    while (_x < (_i)) \
165	      { \
166	        state_bak = state; \
167	        mblength = mbrlen ((_str) + (_x), (_strsize) - (_x), &state); \
168\
169		if (mblength == (size_t)-2 || mblength == (size_t)-1) \
170		  { \
171		    state = state_bak; \
172		    _x++; \
173		  } \
174		else if (mblength == 0) \
175		  _x++; \
176		else \
177		  { \
178		    _p = _x; /* _p == start of prev mbchar */ \
179		    _x += mblength; \
180		  } \
181	      } \
182	    (_i) = _p; \
183	  } \
184	else \
185	  (_i)--; \
186      } \
187    while (0)
188#else
189#  define BACKUP_CHAR(_str, _strsize, _i)	(_i)--
190#endif  /* !HANDLE_MULTIBYTE */
191
192/* Back up one (possibly multibyte) character in the string _BASE of length
193   _STRSIZE starting at _STR (_BASE <= _STR <= (_BASE + _STRSIZE) ).
194   SPECIAL: DO NOT assume that _STR will be decremented by 1 after this call. */
195#if defined (HANDLE_MULTIBYTE)
196#  define BACKUP_CHAR_P(_base, _strsize, _str) \
197    do \
198      { \
199	if (MB_CUR_MAX > 1) \
200	  { \
201	    mbstate_t state_bak; \
202	    size_t mblength; \
203	    char *_x, _p; /* _x == temp pointer into string, _p == prev pointer */ \
204\
205	    _x = _p = _base; \
206	    while (_x < (_str)) \
207	      { \
208	        state_bak = state; \
209	        mblength = mbrlen (_x, (_strsize) - _x, &state); \
210\
211		if (mblength == (size_t)-2 || mblength == (size_t)-1) \
212		  { \
213		    state = state_bak; \
214		    _x++; \
215		  } \
216		else if (mblength == 0) \
217		  _x++; \
218		else \
219		  { \
220		    _p = _x; /* _p == start of prev mbchar */ \
221		    _x += mblength; \
222		  } \
223	      } \
224	    (_str) = _p; \
225	  } \
226	else \
227	  (_str)--; \
228      } \
229    while (0)
230#else
231#  define BACKUP_CHAR_P(_base, _strsize, _str) (_str)--
232#endif  /* !HANDLE_MULTIBYTE */
233
234/* Copy a single character from the string _SRC to the string _DST.
235   _SRCEND is a pointer to the end of _SRC. */
236#if defined (HANDLE_MULTIBYTE)
237#  define COPY_CHAR_P(_dst, _src, _srcend) \
238    do \
239      { \
240	if (MB_CUR_MAX > 1) \
241	  { \
242	    mbstate_t state_bak; \
243	    size_t mblength; \
244	    int _k; \
245\
246	    state_bak = state; \
247	    mblength = mbrlen ((_src), (_srcend) - (_src), &state); \
248	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
249	      { \
250		state = state_bak; \
251		mblength = 1; \
252	      } \
253	    else \
254	      mblength = (mblength < 1) ? 1 : mblength; \
255\
256	    for (_k = 0; _k < mblength; _k++) \
257	      *(_dst)++ = *(_src)++; \
258	  } \
259	else \
260	  *(_dst)++ = *(_src)++; \
261      } \
262    while (0)
263#else
264#  define COPY_CHAR_P(_dst, _src, _srcend)	*(_dst)++ = *(_src)++
265#endif  /* !HANDLE_MULTIBYTE */
266
267/* Copy a single character from the string _SRC at index _SI to the string
268   _DST at index _DI.  _SRCEND is a pointer to the end of _SRC. */
269#if defined (HANDLE_MULTIBYTE)
270#  define COPY_CHAR_I(_dst, _di, _src, _srcend, _si) \
271    do \
272      { \
273	if (MB_CUR_MAX > 1) \
274	  { \
275	    mbstate_t state_bak; \
276	    size_t mblength; \
277	    int _k; \
278\
279	    state_bak = state; \
280	    mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src)+(_si)), &state); \
281	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
282	      { \
283		state = state_bak; \
284		mblength = 1; \
285	      } \
286	    else \
287	      mblength = (mblength < 1) ? 1 : mblength; \
288\
289	    for (_k = 0; _k < mblength; _k++) \
290	      _dst[_di++] = _src[_si++]; \
291	  } \
292	else \
293	  _dst[_di++] = _src[_si++]; \
294      } \
295    while (0)
296#else
297#  define COPY_CHAR_I(_dst, _di, _src, _srcend, _si)	_dst[_di++] = _src[_si++]
298#endif  /* !HANDLE_MULTIBYTE */
299
300/****************************************************************
301 *								*
302 * The following are only guaranteed to work in subst.c		*
303 *								*
304 ****************************************************************/
305
306#if defined (HANDLE_MULTIBYTE)
307#  define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
308    do \
309      { \
310	if (MB_CUR_MAX > 1) \
311	  { \
312	    mbstate_t state_bak; \
313	    size_t mblength; \
314	    int _i; \
315\
316	    state_bak = state; \
317	    mblength = mbrlen ((_src) + (_si), (_slen) - (_si), &state); \
318	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
319	      { \
320		state = state_bak; \
321		mblength = 1; \
322	      } \
323	    else \
324	      mblength = (mblength < 1) ? 1 : mblength; \
325\
326	    temp = xmalloc (mblength + 2); \
327	    temp[0] = _escchar; \
328	    for (_i = 0; _i < mblength; _i++) \
329	      temp[_i + 1] = _src[_si++]; \
330	    temp[mblength + 1] = '\0'; \
331\
332	    goto add_string; \
333	  } \
334	else \
335	  { \
336	    _dst[0] = _escchar; \
337	    _dst[1] = _sc; \
338	  } \
339      } \
340    while (0)
341#else
342#  define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
343    _dst[0] = _escchar; \
344    _dst[1] = _sc
345#endif  /* !HANDLE_MULTIBYTE */
346
347#if defined (HANDLE_MULTIBYTE)
348#  define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
349    do \
350      { \
351	if (MB_CUR_MAX > 1) \
352	  { \
353	    mbstate_t state_bak; \
354	    size_t mblength; \
355\
356	    state_bak = state; \
357	    mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src) + (_si)), &state); \
358	    if (mblength == (size_t)-2 || mblength == (size_t)-1) \
359	      { \
360		state = state_bak; \
361		mblength = 1; \
362	      } \
363	    else \
364	      mblength = (mblength < 1) ? 1 : mblength; \
365\
366	    FASTCOPY(((_src) + (_si)), (_dst), mblength); \
367\
368	    (_dst) += mblength; \
369	    (_si) += mblength; \
370	  } \
371	else \
372	  { \
373	    *(_dst)++ = _src[(_si)]; \
374	    (_si)++; \
375	  } \
376      } \
377    while (0)
378#else
379#  define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
380	*(_dst)++ = _src[(_si)]; \
381	(_si)++
382#endif  /* !HANDLE_MULTIBYTE */
383
384#if HANDLE_MULTIBYTE
385#  define SADD_MBCHAR(_dst, _src, _si, _srcsize) \
386    do \
387      { \
388	if (MB_CUR_MAX > 1) \
389	  { \
390	    int i; \
391	    mbstate_t state_bak; \
392	    size_t mblength; \
393\
394	    state_bak = state; \
395	    mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
396	    if (mblength == (size_t)-1 || mblength == (size_t)-2) \
397	      { \
398		state = state_bak; \
399		mblength = 1; \
400	      } \
401	    if (mblength < 1) \
402	      mblength = 1; \
403\
404	    _dst = (char *)xmalloc (mblength + 1); \
405	    for (i = 0; i < mblength; i++) \
406	      (_dst)[i] = (_src)[(_si)++]; \
407	    (_dst)[mblength] = '\0'; \
408\
409	    goto add_string; \
410	  } \
411      } \
412    while (0)
413
414#else
415#  define SADD_MBCHAR(_dst, _src, _si, _srcsize)
416#endif
417
418/* Watch out when using this -- it's just straight textual subsitution */
419#if defined (HANDLE_MULTIBYTE)
420#  define SADD_MBQCHAR_BODY(_dst, _src, _si, _srcsize) \
421\
422	    int i; \
423	    mbstate_t state_bak; \
424	    size_t mblength; \
425\
426	    state_bak = state; \
427	    mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
428	    if (mblength == (size_t)-1 || mblength == (size_t)-2) \
429	      { \
430		state = state_bak; \
431		mblength = 1; \
432	      } \
433	    if (mblength < 1) \
434	      mblength = 1; \
435\
436	    (_dst) = (char *)xmalloc (mblength + 2); \
437	    (_dst)[0] = CTLESC; \
438	    for (i = 0; i < mblength; i++) \
439	      (_dst)[i+1] = (_src)[(_si)++]; \
440	    (_dst)[mblength+1] = '\0'; \
441\
442	    goto add_string
443
444#endif /* HANDLE_MULTIBYTE */
445#endif /* _SH_MBUTIL_H_ */
446