tc.str.c revision 316957
1/* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $ */
2/*
3 * tc.str.c: Short string package
4 * 	     This has been a lesson of how to write buggy code!
5 */
6/*-
7 * Copyright (c) 1980, 1991 The Regents of the University of California.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34#include "sh.h"
35
36#include <assert.h>
37#include <limits.h>
38
39RCSID("$tcsh: tc.str.c,v 3.47 2015/06/06 21:19:08 christos Exp $")
40
41#define MALLOC_INCR	128
42#ifdef WIDE_STRINGS
43#define MALLOC_SURPLUS	MB_LEN_MAX /* Space for one multibyte character */
44#else
45#define MALLOC_SURPLUS	0
46#endif
47
48#ifdef WIDE_STRINGS
49size_t
50one_mbtowc(Char *pwc, const char *s, size_t n)
51{
52    int len;
53
54    len = rt_mbtowc(pwc, s, n);
55    if (len == -1) {
56        reset_mbtowc();
57	*pwc = (unsigned char)*s | INVALID_BYTE;
58    }
59    if (len <= 0)
60	len = 1;
61    return len;
62}
63
64size_t
65one_wctomb(char *s, Char wchar)
66{
67    int len;
68
69#if INVALID_BYTE != 0
70    if ((wchar & INVALID_BYTE) == INVALID_BYTE) {    /* wchar >= INVALID_BYTE */
71	/* invalid char
72	 * exmaple)
73	 * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */
74	*s = (char)wchar;
75	len = 1;
76#else
77    if (wchar & (CHAR & INVALID_BYTE)) {
78	s[0] = wchar & (CHAR & 0xFF);
79	len = 1;
80#endif
81    } else {
82#if INVALID_BYTE != 0
83	wchar &= MAX_UTF32;
84#else
85	wchar &= CHAR;
86#endif
87#ifdef UTF16_STRINGS
88	if (wchar >= 0x10000) {
89	    /* UTF-16 systems can't handle these values directly in calls to
90	       wctomb.  Convert value to UTF-16 surrogate and call wcstombs to
91	       convert the "string" to the correct multibyte representation,
92	       if any. */
93	    wchar_t ws[3];
94	    wchar -= 0x10000;
95	    ws[0] = 0xd800 | (wchar >> 10);
96	    ws[1] = 0xdc00 | (wchar & 0x3ff);
97	    ws[2] = 0;
98	    /* The return value of wcstombs excludes the trailing 0, so len is
99	       the correct number of multibytes for the Unicode char. */
100	    len = wcstombs (s, ws, MB_CUR_MAX + 1);
101	} else
102#endif
103	len = wctomb(s, (wchar_t) wchar);
104	if (len == -1)
105	    s[0] = wchar;
106	if (len <= 0)
107	    len = 1;
108    }
109    return len;
110}
111
112int
113rt_mbtowc(Char *pwc, const char *s, size_t n)
114{
115    int ret;
116    char back[MB_LEN_MAX];
117    wchar_t tmp;
118#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
119# if defined(AUTOSET_KANJI)
120    static mbstate_t mb_zero, mb;
121    /*
122     * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
123     */
124    if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
125	!memcmp(&mb, &mb_zero, sizeof(mb)))
126    {
127	*pwc = *s;
128	return 1;
129    }
130# else
131    mbstate_t mb;
132# endif
133
134    memset (&mb, 0, sizeof mb);
135    ret = mbrtowc(&tmp, s, n, &mb);
136#else
137    ret = mbtowc(&tmp, s, n);
138#endif
139    if (ret > 0) {
140	*pwc = tmp;
141#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
142	if (tmp >= 0xd800 && tmp <= 0xdbff) {
143	    /* UTF-16 surrogate pair.  Fetch second half and compute
144	       UTF-32 value.  Dispense with the inverse test in this case. */
145	    size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
146	    if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
147		ret = -1;
148	    else {
149		*pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
150		ret += n2;
151	    }
152	} else
153#endif
154      	if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
155	    ret = -1;
156
157    } else if (ret == -2)
158	ret = -1;
159    else if (ret == 0)
160	*pwc = '\0';
161
162    return ret;
163}
164#endif
165
166#ifdef SHORT_STRINGS
167Char  **
168blk2short(char **src)
169{
170    size_t     n;
171    Char **sdst, **dst;
172
173    /*
174     * Count
175     */
176    for (n = 0; src[n] != NULL; n++)
177	continue;
178    sdst = dst = xmalloc((n + 1) * sizeof(Char *));
179
180    for (; *src != NULL; src++)
181	*dst++ = SAVE(*src);
182    *dst = NULL;
183    return (sdst);
184}
185
186char  **
187short2blk(Char **src)
188{
189    size_t     n;
190    char **sdst, **dst;
191
192    /*
193     * Count
194     */
195    for (n = 0; src[n] != NULL; n++)
196	continue;
197    sdst = dst = xmalloc((n + 1) * sizeof(char *));
198
199    for (; *src != NULL; src++)
200	*dst++ = strsave(short2str(*src));
201    *dst = NULL;
202    return (sdst);
203}
204
205Char   *
206str2short(const char *src)
207{
208    static struct Strbuf buf; /* = Strbuf_INIT; */
209
210    if (src == NULL)
211	return (NULL);
212
213    buf.len = 0;
214    while (*src) {
215	Char wc;
216
217	src += one_mbtowc(&wc, src, MB_LEN_MAX);
218	Strbuf_append1(&buf, wc);
219    }
220    Strbuf_terminate(&buf);
221    return buf.s;
222}
223
224char   *
225short2str(const Char *src)
226{
227    static char *sdst = NULL;
228    static size_t dstsize = 0;
229    char *dst, *edst;
230
231    if (src == NULL)
232	return (NULL);
233
234    if (sdst == NULL) {
235	dstsize = MALLOC_INCR;
236	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
237    }
238    dst = sdst;
239    edst = &dst[dstsize];
240    while (*src) {
241	dst += one_wctomb(dst, *src);
242	src++;
243	if (dst >= edst) {
244	    char *wdst = dst;
245	    char *wedst = edst;
246
247	    dstsize += MALLOC_INCR;
248	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
249	    edst = &sdst[dstsize];
250	    dst = &edst[-MALLOC_INCR];
251	    while (wdst > wedst) {
252		dst++;
253		wdst--;
254	    }
255	}
256    }
257    *dst = 0;
258    return (sdst);
259}
260
261#if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
262Char   *
263s_strcpy(Char *dst, const Char *src)
264{
265    Char *sdst;
266
267    sdst = dst;
268    while ((*dst++ = *src++) != '\0')
269	continue;
270    return (sdst);
271}
272
273Char   *
274s_strncpy(Char *dst, const Char *src, size_t n)
275{
276    Char *sdst;
277
278    if (n == 0)
279	return(dst);
280
281    sdst = dst;
282    do
283	if ((*dst++ = *src++) == '\0') {
284	    while (--n != 0)
285		*dst++ = '\0';
286	    return(sdst);
287	}
288    while (--n != 0);
289    return (sdst);
290}
291
292Char   *
293s_strcat(Char *dst, const Char *src)
294{
295    Strcpy(Strend(dst), src);
296    return dst;
297}
298
299#ifdef NOTUSED
300Char   *
301s_strncat(Char *dst, const Char *src, size_t n)
302{
303    Char *sdst;
304
305    if (n == 0)
306	return (dst);
307
308    sdst = dst;
309
310    while (*dst)
311	dst++;
312
313    do
314	if ((*dst++ = *src++) == '\0')
315	    return(sdst);
316    while (--n != 0)
317	continue;
318
319    *dst = '\0';
320    return (sdst);
321}
322
323#endif
324
325Char   *
326s_strchr(const Char *str, int ch)
327{
328    do
329	if (*str == ch)
330	    return ((Char *)(intptr_t)str);
331    while (*str++);
332    return (NULL);
333}
334
335Char   *
336s_strrchr(const Char *str, int ch)
337{
338    const Char *rstr;
339
340    rstr = NULL;
341    do
342	if (*str == ch)
343	    rstr = str;
344    while (*str++);
345    return ((Char *)(intptr_t)rstr);
346}
347
348size_t
349s_strlen(const Char *str)
350{
351    size_t n;
352
353    for (n = 0; *str++; n++)
354	continue;
355    return (n);
356}
357
358int
359s_strcmp(const Char *str1, const Char *str2)
360{
361    for (; *str1 && *str1 == *str2; str1++, str2++)
362	continue;
363    /*
364     * The following case analysis is necessary so that characters which look
365     * negative collate low against normal characters but high against the
366     * end-of-string NUL.
367     */
368    if (*str1 == '\0' && *str2 == '\0')
369	return (0);
370    else if (*str1 == '\0')
371	return (-1);
372    else if (*str2 == '\0')
373	return (1);
374    else
375	return (*str1 - *str2);
376}
377
378int
379s_strncmp(const Char *str1, const Char *str2, size_t n)
380{
381    if (n == 0)
382	return (0);
383    do {
384	if (*str1 != *str2) {
385	    /*
386	     * The following case analysis is necessary so that characters
387	     * which look negative collate low against normal characters
388	     * but high against the end-of-string NUL.
389	     */
390	    if (*str1 == '\0')
391		return (-1);
392	    else if (*str2 == '\0')
393		return (1);
394	    else
395		return (*str1 - *str2);
396	}
397        if (*str1 == '\0')
398	    return(0);
399	str1++, str2++;
400    } while (--n != 0);
401    return(0);
402}
403#endif /* not WIDE_STRINGS */
404
405int
406s_strcasecmp(const Char *str1, const Char *str2)
407{
408#ifdef WIDE_STRINGS
409    wint_t l1 = 0, l2 = 0;
410    for (; *str1; str1++, str2++)
411	if (*str1 == *str2)
412	    l1 = l2 = 0;
413	else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
414	    break;
415#else
416    unsigned char l1 = 0, l2 = 0;
417    for (; *str1; str1++, str2++)
418	if (*str1 == *str2)
419		l1 = l2 = 0;
420	else if ((l1 = tolower((unsigned char)*str1)) !=
421	    (l2 = tolower((unsigned char)*str2)))
422	    break;
423#endif
424    /*
425     * The following case analysis is necessary so that characters which look
426     * negative collate low against normal characters but high against the
427     * end-of-string NUL.
428     */
429    if (*str1 == '\0' && *str2 == '\0')
430	return (0);
431    else if (*str1 == '\0')
432	return (-1);
433    else if (*str2 == '\0')
434	return (1);
435    else if (l1 == l2)	/* They are zero when they are equal */
436	return (*str1 - *str2);
437    else
438	return (l1 - l2);
439}
440
441Char   *
442s_strnsave(const Char *s, size_t len)
443{
444    Char *n;
445
446    n = xmalloc((len + 1) * sizeof (*n));
447    memcpy(n, s, len * sizeof (*n));
448    n[len] = '\0';
449    return n;
450}
451
452Char   *
453s_strsave(const Char *s)
454{
455    Char   *n;
456    size_t size;
457
458    if (s == NULL)
459	s = STRNULL;
460    size = (Strlen(s) + 1) * sizeof(*n);
461    n = xmalloc(size);
462    memcpy(n, s, size);
463    return (n);
464}
465
466Char   *
467s_strspl(const Char *cp, const Char *dp)
468{
469    Char *res, *ep;
470    const Char *p, *q;
471
472    if (!cp)
473	cp = STRNULL;
474    if (!dp)
475	dp = STRNULL;
476    for (p = cp; *p++;)
477	continue;
478    for (q = dp; *q++;)
479	continue;
480    res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
481    for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
482	continue;
483    for (ep--, q = dp; (*ep++ = *q++) != '\0';)
484	continue;
485    return (res);
486}
487
488Char   *
489s_strend(const Char *cp)
490{
491    if (!cp)
492	return ((Char *)(intptr_t) cp);
493    while (*cp)
494	cp++;
495    return ((Char *)(intptr_t) cp);
496}
497
498Char   *
499s_strstr(const Char *s, const Char *t)
500{
501    do {
502	const Char *ss = s;
503	const Char *tt = t;
504
505	do
506	    if (*tt == '\0')
507		return ((Char *)(intptr_t) s);
508	while (*ss++ == *tt++);
509    } while (*s++ != '\0');
510    return (NULL);
511}
512
513#else /* !SHORT_STRINGS */
514char *
515caching_strip(const char *s)
516{
517    static char *buf = NULL;
518    static size_t buf_size = 0;
519    size_t size;
520
521    if (s == NULL)
522      return NULL;
523    size = strlen(s) + 1;
524    if (buf_size < size) {
525	buf = xrealloc(buf, size);
526	buf_size = size;
527    }
528    memcpy(buf, s, size);
529    strip(buf);
530    return buf;
531}
532#endif
533
534char   *
535short2qstr(const Char *src)
536{
537    static char *sdst = NULL;
538    static size_t dstsize = 0;
539    char *dst, *edst;
540
541    if (src == NULL)
542	return (NULL);
543
544    if (sdst == NULL) {
545	dstsize = MALLOC_INCR;
546	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
547    }
548    dst = sdst;
549    edst = &dst[dstsize];
550    while (*src) {
551	if (*src & QUOTE) {
552	    *dst++ = '\\';
553	    if (dst == edst) {
554		dstsize += MALLOC_INCR;
555		sdst = xrealloc(sdst,
556				(dstsize + MALLOC_SURPLUS) * sizeof(char));
557		edst = &sdst[dstsize];
558		dst = &edst[-MALLOC_INCR];
559	    }
560	}
561	dst += one_wctomb(dst, *src);
562	src++;
563	if (dst >= edst) {
564	    ptrdiff_t i = dst - edst;
565	    dstsize += MALLOC_INCR;
566	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
567	    edst = &sdst[dstsize];
568	    dst = &edst[-MALLOC_INCR + i];
569	}
570    }
571    *dst = 0;
572    return (sdst);
573}
574
575struct blk_buf *
576bb_alloc(void)
577{
578    return xcalloc(1, sizeof(struct blk_buf));
579}
580
581static void
582bb_store(struct blk_buf *bb, Char *str)
583{
584    if (bb->len == bb->size) { /* Keep space for terminating NULL */
585	if (bb->size == 0)
586	    bb->size = 16; /* Arbitrary */
587	else
588	    bb->size *= 2;
589	bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
590    }
591    bb->vec[bb->len] = str;
592}
593
594void
595bb_append(struct blk_buf *bb, Char *str)
596{
597    bb_store(bb, str);
598    bb->len++;
599}
600
601void
602bb_cleanup(void *xbb)
603{
604    struct blk_buf *bb;
605    size_t i;
606
607    bb = (struct blk_buf *)xbb;
608    if (bb->vec) {
609	for (i = 0; i < bb->len; i++)
610	    xfree(bb->vec[i]);
611	xfree(bb->vec);
612    }
613    bb->vec = NULL;
614    bb->len = 0;
615}
616
617void
618bb_free(void *bb)
619{
620    bb_cleanup(bb);
621    xfree(bb);
622}
623
624Char **
625bb_finish(struct blk_buf *bb)
626{
627    bb_store(bb, NULL);
628    return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
629}
630
631#define DO_STRBUF(STRBUF, CHAR, STRLEN)				\
632								\
633struct STRBUF *							\
634STRBUF##_alloc(void)						\
635{								\
636    return xcalloc(1, sizeof(struct STRBUF));			\
637}								\
638								\
639static void							\
640STRBUF##_store1(struct STRBUF *buf, CHAR c)			\
641{								\
642    if (buf->size == buf->len) {				\
643	if (buf->size == 0)					\
644	    buf->size = 64; /* Arbitrary */			\
645	else							\
646	    buf->size *= 2;					\
647	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
648    }								\
649    assert(buf->s);						\
650    buf->s[buf->len] = c;					\
651}								\
652								\
653/* Like strbuf_append1(buf, '\0'), but don't advance len */	\
654void								\
655STRBUF##_terminate(struct STRBUF *buf)				\
656{								\
657    STRBUF##_store1(buf, '\0');					\
658}								\
659								\
660void								\
661STRBUF##_append1(struct STRBUF *buf, CHAR c)			\
662{								\
663    STRBUF##_store1(buf, c);					\
664    buf->len++;							\
665}								\
666								\
667void								\
668STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len)	\
669{								\
670    if (buf->size < buf->len + len) {				\
671	if (buf->size == 0)					\
672	    buf->size = 64; /* Arbitrary */			\
673	while (buf->size < buf->len + len)			\
674	    buf->size *= 2;					\
675	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
676    }								\
677    memcpy(buf->s + buf->len, s, len * sizeof(*buf->s));	\
678    buf->len += len;						\
679}								\
680								\
681void								\
682STRBUF##_append(struct STRBUF *buf, const CHAR *s)		\
683{								\
684    STRBUF##_appendn(buf, s, STRLEN(s));			\
685}								\
686								\
687CHAR *								\
688STRBUF##_finish(struct STRBUF *buf)				\
689{								\
690    STRBUF##_append1(buf, 0);					\
691    return xrealloc(buf->s, buf->len * sizeof(*buf->s));	\
692}								\
693								\
694void								\
695STRBUF##_cleanup(void *xbuf)					\
696{								\
697    struct STRBUF *buf;						\
698								\
699    buf = xbuf;							\
700    xfree(buf->s);						\
701}								\
702								\
703void								\
704STRBUF##_free(void *xbuf)					\
705{								\
706    STRBUF##_cleanup(xbuf);					\
707    xfree(xbuf);						\
708}								\
709								\
710const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
711
712DO_STRBUF(strbuf, char, strlen);
713DO_STRBUF(Strbuf, Char, Strlen);
714