1/*
2 * tc.str.c: Short string package
3 * 	     This has been a lesson of how to write buggy code!
4 */
5/*-
6 * Copyright (c) 1980, 1991 The Regents of the University of California.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33#include "sh.h"
34
35#include <assert.h>
36#include <limits.h>
37
38#define MALLOC_INCR	128
39#ifdef WIDE_STRINGS
40#define MALLOC_SURPLUS	MB_LEN_MAX /* Space for one multibyte character */
41#else
42#define MALLOC_SURPLUS	0
43#endif
44
45#ifdef WIDE_STRINGS
46size_t
47one_mbtowc(Char *pwc, const char *s, size_t n)
48{
49    int len;
50
51    len = rt_mbtowc(pwc, s, n);
52    if (len == -1) {
53        reset_mbtowc();
54	*pwc = (unsigned char)*s | INVALID_BYTE;
55    }
56    if (len <= 0)
57	len = 1;
58    return len;
59}
60
61size_t
62one_wctomb(char *s, Char wchar)
63{
64    int len;
65
66#if INVALID_BYTE != 0
67    if ((wchar & INVALID_BYTE) == INVALID_BYTE) {    /* wchar >= INVALID_BYTE */
68	/* invalid char
69	 * exmaple)
70	 * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */
71	*s = (char)wchar;
72	len = 1;
73#else
74    if (wchar & (CHAR & INVALID_BYTE)) {
75	s[0] = wchar & (CHAR & 0xFF);
76	len = 1;
77#endif
78    } else {
79#if INVALID_BYTE != 0
80	wchar &= MAX_UTF32;
81#else
82	wchar &= CHAR;
83#endif
84#ifdef UTF16_STRINGS
85	if (wchar >= 0x10000) {
86	    /* UTF-16 systems can't handle these values directly in calls to
87	       wctomb.  Convert value to UTF-16 surrogate and call wcstombs to
88	       convert the "string" to the correct multibyte representation,
89	       if any. */
90	    wchar_t ws[3];
91	    wchar -= 0x10000;
92	    ws[0] = 0xd800 | (wchar >> 10);
93	    ws[1] = 0xdc00 | (wchar & 0x3ff);
94	    ws[2] = 0;
95	    /* The return value of wcstombs excludes the trailing 0, so len is
96	       the correct number of multibytes for the Unicode char. */
97	    len = wcstombs (s, ws, MB_CUR_MAX + 1);
98	} else
99#endif
100	len = wctomb(s, (wchar_t) wchar);
101	if (len == -1)
102	    s[0] = wchar;
103	if (len <= 0)
104	    len = 1;
105    }
106    return len;
107}
108
109int
110rt_mbtowc(Char *pwc, const char *s, size_t n)
111{
112    int ret;
113    char back[MB_LEN_MAX];
114    wchar_t tmp;
115#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
116# if defined(AUTOSET_KANJI)
117    static mbstate_t mb_zero, mb;
118    /*
119     * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
120     */
121    if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
122	!memcmp(&mb, &mb_zero, sizeof(mb)))
123    {
124	*pwc = *s;
125	return 1;
126    }
127# else
128    mbstate_t mb;
129# endif
130
131    memset (&mb, 0, sizeof mb);
132    ret = mbrtowc(&tmp, s, n, &mb);
133#else
134    ret = mbtowc(&tmp, s, n);
135#endif
136    if (ret > 0) {
137	*pwc = tmp;
138#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
139	if (tmp >= 0xd800 && tmp <= 0xdbff) {
140	    /* UTF-16 surrogate pair.  Fetch second half and compute
141	       UTF-32 value.  Dispense with the inverse test in this case. */
142	    size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
143	    if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
144		ret = -1;
145	    else {
146		*pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
147		ret += n2;
148	    }
149	} else
150#endif
151      	if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
152	    ret = -1;
153
154    } else if (ret == -2)
155	ret = -1;
156    else if (ret == 0)
157	*pwc = '\0';
158
159    return ret;
160}
161#endif
162
163#ifdef SHORT_STRINGS
164Char  **
165blk2short(char **src)
166{
167    size_t     n;
168    Char **sdst, **dst;
169
170    /*
171     * Count
172     */
173    for (n = 0; src[n] != NULL; n++)
174	continue;
175    sdst = dst = xmalloc((n + 1) * sizeof(Char *));
176
177    for (; *src != NULL; src++)
178	*dst++ = SAVE(*src);
179    *dst = NULL;
180    return (sdst);
181}
182
183char  **
184short2blk(Char **src)
185{
186    size_t     n;
187    char **sdst, **dst;
188
189    /*
190     * Count
191     */
192    for (n = 0; src[n] != NULL; n++)
193	continue;
194    sdst = dst = xmalloc((n + 1) * sizeof(char *));
195
196    for (; *src != NULL; src++)
197	*dst++ = strsave(short2str(*src));
198    *dst = NULL;
199    return (sdst);
200}
201
202Char   *
203str2short(const char *src)
204{
205    static struct Strbuf buf; /* = Strbuf_INIT; */
206
207    if (src == NULL)
208	return (NULL);
209
210    buf.len = 0;
211    while (*src) {
212	Char wc;
213
214	src += one_mbtowc(&wc, src, MB_LEN_MAX);
215	Strbuf_append1(&buf, wc);
216    }
217    Strbuf_terminate(&buf);
218    return buf.s;
219}
220
221char   *
222short2str(const Char *src)
223{
224    static char *sdst = NULL;
225    static size_t dstsize = 0;
226    char *dst, *edst;
227
228    if (src == NULL)
229	return (NULL);
230
231    if (sdst == NULL) {
232	dstsize = MALLOC_INCR;
233	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
234    }
235    dst = sdst;
236    edst = &dst[dstsize];
237    while (*src) {
238	dst += one_wctomb(dst, *src);
239	src++;
240	if (dst >= edst) {
241	    char *wdst = dst;
242	    char *wedst = edst;
243
244	    dstsize += MALLOC_INCR;
245	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
246	    edst = &sdst[dstsize];
247	    dst = &edst[-MALLOC_INCR];
248	    while (wdst > wedst) {
249		dst++;
250		wdst--;
251	    }
252	}
253    }
254    *dst = 0;
255    return (sdst);
256}
257
258#if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
259Char   *
260s_strcpy(Char *dst, const Char *src)
261{
262    Char *sdst;
263
264    sdst = dst;
265    while ((*dst++ = *src++) != '\0')
266	continue;
267    return (sdst);
268}
269
270Char   *
271s_strncpy(Char *dst, const Char *src, size_t n)
272{
273    Char *sdst;
274
275    if (n == 0)
276	return(dst);
277
278    sdst = dst;
279    do
280	if ((*dst++ = *src++) == '\0') {
281	    while (--n != 0)
282		*dst++ = '\0';
283	    return(sdst);
284	}
285    while (--n != 0);
286    return (sdst);
287}
288
289Char   *
290s_strcat(Char *dst, const Char *src)
291{
292    Strcpy(Strend(dst), src);
293    return dst;
294}
295
296#ifdef NOTUSED
297Char   *
298s_strncat(Char *dst, const Char *src, size_t n)
299{
300    Char *sdst;
301
302    if (n == 0)
303	return (dst);
304
305    sdst = dst;
306
307    while (*dst)
308	dst++;
309
310    do
311	if ((*dst++ = *src++) == '\0')
312	    return(sdst);
313    while (--n != 0)
314	continue;
315
316    *dst = '\0';
317    return (sdst);
318}
319
320#endif
321
322Char   *
323s_strchr(const Char *str, int ch)
324{
325    do
326	if (*str == ch)
327	    return ((Char *)(intptr_t)str);
328    while (*str++);
329    return (NULL);
330}
331
332Char   *
333s_strrchr(const Char *str, int ch)
334{
335    const Char *rstr;
336
337    rstr = NULL;
338    do
339	if (*str == ch)
340	    rstr = str;
341    while (*str++);
342    return ((Char *)(intptr_t)rstr);
343}
344
345size_t
346s_strlen(const Char *str)
347{
348    size_t n;
349
350    for (n = 0; *str++; n++)
351	continue;
352    return (n);
353}
354
355int
356s_strcmp(const Char *str1, const Char *str2)
357{
358    for (; *str1 && *str1 == *str2; str1++, str2++)
359	continue;
360    /*
361     * The following case analysis is necessary so that characters which look
362     * negative collate low against normal characters but high against the
363     * end-of-string NUL.
364     */
365    if (*str1 == '\0' && *str2 == '\0')
366	return (0);
367    else if (*str1 == '\0')
368	return (-1);
369    else if (*str2 == '\0')
370	return (1);
371    else
372	return (*str1 - *str2);
373}
374
375int
376s_strncmp(const Char *str1, const Char *str2, size_t n)
377{
378    if (n == 0)
379	return (0);
380    do {
381	if (*str1 != *str2) {
382	    /*
383	     * The following case analysis is necessary so that characters
384	     * which look negative collate low against normal characters
385	     * but high against the end-of-string NUL.
386	     */
387	    if (*str1 == '\0')
388		return (-1);
389	    else if (*str2 == '\0')
390		return (1);
391	    else
392		return (*str1 - *str2);
393	}
394        if (*str1 == '\0')
395	    return(0);
396	str1++, str2++;
397    } while (--n != 0);
398    return(0);
399}
400#endif /* not WIDE_STRINGS */
401
402int
403s_strcasecmp(const Char *str1, const Char *str2)
404{
405#ifdef WIDE_STRINGS
406    wint_t l1 = 0, l2 = 0;
407    for (; *str1; str1++, str2++)
408	if (*str1 == *str2)
409	    l1 = l2 = 0;
410	else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
411	    break;
412#else
413    unsigned char l1 = 0, l2 = 0;
414    for (; *str1; str1++, str2++)
415	if (*str1 == *str2)
416		l1 = l2 = 0;
417	else if ((l1 = tolower((unsigned char)*str1)) !=
418	    (l2 = tolower((unsigned char)*str2)))
419	    break;
420#endif
421    /*
422     * The following case analysis is necessary so that characters which look
423     * negative collate low against normal characters but high against the
424     * end-of-string NUL.
425     */
426    if (*str1 == '\0' && *str2 == '\0')
427	return (0);
428    else if (*str1 == '\0')
429	return (-1);
430    else if (*str2 == '\0')
431	return (1);
432    else if (l1 == l2)	/* They are zero when they are equal */
433	return (*str1 - *str2);
434    else
435	return (l1 - l2);
436}
437
438Char   *
439s_strnsave(const Char *s, size_t len)
440{
441    Char *n;
442
443    n = xmalloc((len + 1) * sizeof (*n));
444    memcpy(n, s, len * sizeof (*n));
445    n[len] = '\0';
446    return n;
447}
448
449Char   *
450s_strsave(const Char *s)
451{
452    Char   *n;
453    size_t size;
454
455    if (s == NULL)
456	s = STRNULL;
457    size = (Strlen(s) + 1) * sizeof(*n);
458    n = xmalloc(size);
459    memcpy(n, s, size);
460    return (n);
461}
462
463Char   *
464s_strspl(const Char *cp, const Char *dp)
465{
466    Char *res, *ep;
467    const Char *p, *q;
468
469    if (!cp)
470	cp = STRNULL;
471    if (!dp)
472	dp = STRNULL;
473    for (p = cp; *p++;)
474	continue;
475    for (q = dp; *q++;)
476	continue;
477    res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
478    for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
479	continue;
480    for (ep--, q = dp; (*ep++ = *q++) != '\0';)
481	continue;
482    return (res);
483}
484
485Char   *
486s_strend(const Char *cp)
487{
488    if (!cp)
489	return ((Char *)(intptr_t) cp);
490    while (*cp)
491	cp++;
492    return ((Char *)(intptr_t) cp);
493}
494
495Char   *
496s_strstr(const Char *s, const Char *t)
497{
498    do {
499	const Char *ss = s;
500	const Char *tt = t;
501
502	do
503	    if (*tt == '\0')
504		return ((Char *)(intptr_t) s);
505	while (*ss++ == *tt++);
506    } while (*s++ != '\0');
507    return (NULL);
508}
509
510#else /* !SHORT_STRINGS */
511char *
512caching_strip(const char *s)
513{
514    static char *buf = NULL;
515    static size_t buf_size = 0;
516    size_t size;
517
518    if (s == NULL)
519      return NULL;
520    size = strlen(s) + 1;
521    if (buf_size < size) {
522	buf = xrealloc(buf, size);
523	buf_size = size;
524    }
525    memcpy(buf, s, size);
526    strip(buf);
527    return buf;
528}
529#endif
530
531char   *
532short2qstr(const Char *src)
533{
534    static char *sdst = NULL;
535    static size_t dstsize = 0;
536    char *dst, *edst;
537
538    if (src == NULL)
539	return (NULL);
540
541    if (sdst == NULL) {
542	dstsize = MALLOC_INCR;
543	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
544    }
545    dst = sdst;
546    edst = &dst[dstsize];
547    while (*src) {
548	if (*src & QUOTE) {
549	    *dst++ = '\\';
550	    if (dst == edst) {
551		dstsize += MALLOC_INCR;
552		sdst = xrealloc(sdst,
553				(dstsize + MALLOC_SURPLUS) * sizeof(char));
554		edst = &sdst[dstsize];
555		dst = &edst[-MALLOC_INCR];
556	    }
557	}
558	dst += one_wctomb(dst, *src);
559	src++;
560	if (dst >= edst) {
561	    ptrdiff_t i = dst - edst;
562	    dstsize += MALLOC_INCR;
563	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
564	    edst = &sdst[dstsize];
565	    dst = &edst[-MALLOC_INCR + i];
566	}
567    }
568    *dst = 0;
569    return (sdst);
570}
571
572struct blk_buf *
573bb_alloc(void)
574{
575    return xcalloc(1, sizeof(struct blk_buf));
576}
577
578static void
579bb_store(struct blk_buf *bb, Char *str)
580{
581    if (bb->len == bb->size) { /* Keep space for terminating NULL */
582	if (bb->size == 0)
583	    bb->size = 16; /* Arbitrary */
584	else
585	    bb->size *= 2;
586	bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
587    }
588    bb->vec[bb->len] = str;
589}
590
591void
592bb_append(struct blk_buf *bb, Char *str)
593{
594    bb_store(bb, str);
595    bb->len++;
596}
597
598void
599bb_cleanup(void *xbb)
600{
601    struct blk_buf *bb;
602    size_t i;
603
604    bb = (struct blk_buf *)xbb;
605    if (bb->vec) {
606	for (i = 0; i < bb->len; i++)
607	    xfree(bb->vec[i]);
608	xfree(bb->vec);
609    }
610    bb->vec = NULL;
611    bb->len = 0;
612}
613
614void
615bb_free(void *bb)
616{
617    bb_cleanup(bb);
618    xfree(bb);
619}
620
621Char **
622bb_finish(struct blk_buf *bb)
623{
624    bb_store(bb, NULL);
625    return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
626}
627
628#define DO_STRBUF(STRBUF, CHAR, STRLEN)				\
629								\
630struct STRBUF *							\
631STRBUF##_alloc(void)						\
632{								\
633    return xcalloc(1, sizeof(struct STRBUF));			\
634}								\
635								\
636static void							\
637STRBUF##_store1(struct STRBUF *buf, CHAR c)			\
638{								\
639    if (buf->size == buf->len) {				\
640	if (buf->size == 0)					\
641	    buf->size = 64; /* Arbitrary */			\
642	else							\
643	    buf->size *= 2;					\
644	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
645    }								\
646    assert(buf->s);						\
647    buf->s[buf->len] = c;					\
648}								\
649								\
650/* Like strbuf_append1(buf, '\0'), but don't advance len */	\
651void								\
652STRBUF##_terminate(struct STRBUF *buf)				\
653{								\
654    STRBUF##_store1(buf, '\0');					\
655}								\
656								\
657void								\
658STRBUF##_append1(struct STRBUF *buf, CHAR c)			\
659{								\
660    STRBUF##_store1(buf, c);					\
661    buf->len++;							\
662}								\
663								\
664void								\
665STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len)	\
666{								\
667    if (buf->size < buf->len + len) {				\
668	if (buf->size == 0)					\
669	    buf->size = 64; /* Arbitrary */			\
670	while (buf->size < buf->len + len)			\
671	    buf->size *= 2;					\
672	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
673    }								\
674    memcpy(buf->s + buf->len, s, len * sizeof(*buf->s));	\
675    buf->len += len;						\
676}								\
677								\
678void								\
679STRBUF##_append(struct STRBUF *buf, const CHAR *s)		\
680{								\
681    STRBUF##_appendn(buf, s, STRLEN(s));			\
682}								\
683								\
684CHAR *								\
685STRBUF##_finish(struct STRBUF *buf)				\
686{								\
687    STRBUF##_append1(buf, 0);					\
688    return xrealloc(buf->s, buf->len * sizeof(*buf->s));	\
689}								\
690								\
691void								\
692STRBUF##_cleanup(void *xbuf)					\
693{								\
694    struct STRBUF *buf;						\
695								\
696    buf = xbuf;							\
697    xfree(buf->s);						\
698}								\
699								\
700void								\
701STRBUF##_free(void *xbuf)					\
702{								\
703    STRBUF##_cleanup(xbuf);					\
704    xfree(xbuf);						\
705}								\
706								\
707const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
708
709DO_STRBUF(strbuf, char, strlen);
710DO_STRBUF(Strbuf, Char, Strlen);
711