1/* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $ */
2/*
3 * tc.str.c: Short string package
4 * 	     This has been a lesson of how to write buggy code!
5 */
6/*-
7 * Copyright (c) 1980, 1991 The Regents of the University of California.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34#include "sh.h"
35
36#include <assert.h>
37#include <limits.h>
38
39RCSID("$tcsh: tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $")
40
41#define MALLOC_INCR	128
42#ifdef WIDE_STRINGS
43#define MALLOC_SURPLUS	MB_LEN_MAX /* Space for one multibyte character */
44#else
45#define MALLOC_SURPLUS	0
46#endif
47
48#ifdef WIDE_STRINGS
49size_t
50one_mbtowc(Char *pwc, const char *s, size_t n)
51{
52    int len;
53
54    len = rt_mbtowc(pwc, s, n);
55    if (len == -1) {
56        reset_mbtowc();
57	*pwc = (unsigned char)*s | INVALID_BYTE;
58    }
59    if (len <= 0)
60	len = 1;
61    return len;
62}
63
64size_t
65one_wctomb(char *s, Char wchar)
66{
67    int len;
68
69    if (wchar & INVALID_BYTE) {
70	s[0] = wchar & 0xFF;
71	len = 1;
72    } else {
73#ifdef UTF16_STRINGS
74	if (wchar >= 0x10000) {
75	    /* UTF-16 systems can't handle these values directly in calls to
76	       wctomb.  Convert value to UTF-16 surrogate and call wcstombs to
77	       convert the "string" to the correct multibyte representation,
78	       if any. */
79	    wchar_t ws[3];
80	    wchar -= 0x10000;
81	    ws[0] = 0xd800 | (wchar >> 10);
82	    ws[1] = 0xdc00 | (wchar & 0x3ff);
83	    ws[2] = 0;
84	    /* The return value of wcstombs excludes the trailing 0, so len is
85	       the correct number of multibytes for the Unicode char. */
86	    len = wcstombs (s, ws, MB_CUR_MAX + 1);
87	} else
88#endif
89	len = wctomb(s, (wchar_t) wchar);
90	if (len == -1)
91	    s[0] = wchar;
92	if (len <= 0)
93	    len = 1;
94    }
95    return len;
96}
97
98int
99rt_mbtowc(Char *pwc, const char *s, size_t n)
100{
101    int ret;
102    char back[MB_LEN_MAX];
103    wchar_t tmp;
104#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
105# if defined(AUTOSET_KANJI)
106    static mbstate_t mb_zero, mb;
107    /*
108     * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
109     */
110    if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
111	!memcmp(&mb, &mb_zero, sizeof(mb)))
112    {
113	*pwc = *s;
114	return 1;
115    }
116# else
117    mbstate_t mb;
118# endif
119
120    memset (&mb, 0, sizeof mb);
121    ret = mbrtowc(&tmp, s, n, &mb);
122#else
123    ret = mbtowc(&tmp, s, n);
124#endif
125    if (ret > 0) {
126	*pwc = tmp;
127#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
128	if (tmp >= 0xd800 && tmp <= 0xdbff) {
129	    /* UTF-16 surrogate pair.  Fetch second half and compute
130	       UTF-32 value.  Dispense with the inverse test in this case. */
131	    size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
132	    if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
133		ret = -1;
134	    else {
135		*pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
136		ret += n2;
137	    }
138	} else
139#endif
140      	if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
141	    ret = -1;
142
143    } else if (ret == -2)
144	ret = -1;
145    else if (ret == 0)
146	*pwc = '\0';
147
148    return ret;
149}
150#endif
151
152#ifdef SHORT_STRINGS
153Char  **
154blk2short(char **src)
155{
156    size_t     n;
157    Char **sdst, **dst;
158
159    /*
160     * Count
161     */
162    for (n = 0; src[n] != NULL; n++)
163	continue;
164    sdst = dst = xmalloc((n + 1) * sizeof(Char *));
165
166    for (; *src != NULL; src++)
167	*dst++ = SAVE(*src);
168    *dst = NULL;
169    return (sdst);
170}
171
172char  **
173short2blk(Char **src)
174{
175    size_t     n;
176    char **sdst, **dst;
177
178    /*
179     * Count
180     */
181    for (n = 0; src[n] != NULL; n++)
182	continue;
183    sdst = dst = xmalloc((n + 1) * sizeof(char *));
184
185    for (; *src != NULL; src++)
186	*dst++ = strsave(short2str(*src));
187    *dst = NULL;
188    return (sdst);
189}
190
191Char   *
192str2short(const char *src)
193{
194    static struct Strbuf buf; /* = Strbuf_INIT; */
195
196    if (src == NULL)
197	return (NULL);
198
199    buf.len = 0;
200    while (*src) {
201	Char wc;
202
203	src += one_mbtowc(&wc, src, MB_LEN_MAX);
204	Strbuf_append1(&buf, wc);
205    }
206    Strbuf_terminate(&buf);
207    return buf.s;
208}
209
210char   *
211short2str(const Char *src)
212{
213    static char *sdst = NULL;
214    static size_t dstsize = 0;
215    char *dst, *edst;
216
217    if (src == NULL)
218	return (NULL);
219
220    if (sdst == NULL) {
221	dstsize = MALLOC_INCR;
222	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
223    }
224    dst = sdst;
225    edst = &dst[dstsize];
226    while (*src) {
227	dst += one_wctomb(dst, *src & CHAR);
228	src++;
229	if (dst >= edst) {
230	    char *wdst = dst;
231	    char *wedst = edst;
232
233	    dstsize += MALLOC_INCR;
234	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
235	    edst = &sdst[dstsize];
236	    dst = &edst[-MALLOC_INCR];
237	    while (wdst > wedst) {
238		dst++;
239		wdst--;
240	    }
241	}
242    }
243    *dst = 0;
244    return (sdst);
245}
246
247#if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
248Char   *
249s_strcpy(Char *dst, const Char *src)
250{
251    Char *sdst;
252
253    sdst = dst;
254    while ((*dst++ = *src++) != '\0')
255	continue;
256    return (sdst);
257}
258
259Char   *
260s_strncpy(Char *dst, const Char *src, size_t n)
261{
262    Char *sdst;
263
264    if (n == 0)
265	return(dst);
266
267    sdst = dst;
268    do
269	if ((*dst++ = *src++) == '\0') {
270	    while (--n != 0)
271		*dst++ = '\0';
272	    return(sdst);
273	}
274    while (--n != 0);
275    return (sdst);
276}
277
278Char   *
279s_strcat(Char *dst, const Char *src)
280{
281    Strcpy(Strend(dst), src);
282    return dst;
283}
284
285#ifdef NOTUSED
286Char   *
287s_strncat(Char *dst, const Char *src, size_t n)
288{
289    Char *sdst;
290
291    if (n == 0)
292	return (dst);
293
294    sdst = dst;
295
296    while (*dst)
297	dst++;
298
299    do
300	if ((*dst++ = *src++) == '\0')
301	    return(sdst);
302    while (--n != 0)
303	continue;
304
305    *dst = '\0';
306    return (sdst);
307}
308
309#endif
310
311Char   *
312s_strchr(const Char *str, int ch)
313{
314    do
315	if (*str == ch)
316	    return ((Char *)(intptr_t)str);
317    while (*str++);
318    return (NULL);
319}
320
321Char   *
322s_strrchr(const Char *str, int ch)
323{
324    const Char *rstr;
325
326    rstr = NULL;
327    do
328	if (*str == ch)
329	    rstr = str;
330    while (*str++);
331    return ((Char *)(intptr_t)rstr);
332}
333
334size_t
335s_strlen(const Char *str)
336{
337    size_t n;
338
339    for (n = 0; *str++; n++)
340	continue;
341    return (n);
342}
343
344int
345s_strcmp(const Char *str1, const Char *str2)
346{
347    for (; *str1 && *str1 == *str2; str1++, str2++)
348	continue;
349    /*
350     * The following case analysis is necessary so that characters which look
351     * negative collate low against normal characters but high against the
352     * end-of-string NUL.
353     */
354    if (*str1 == '\0' && *str2 == '\0')
355	return (0);
356    else if (*str1 == '\0')
357	return (-1);
358    else if (*str2 == '\0')
359	return (1);
360    else
361	return (*str1 - *str2);
362}
363
364int
365s_strncmp(const Char *str1, const Char *str2, size_t n)
366{
367    if (n == 0)
368	return (0);
369    do {
370	if (*str1 != *str2) {
371	    /*
372	     * The following case analysis is necessary so that characters
373	     * which look negative collate low against normal characters
374	     * but high against the end-of-string NUL.
375	     */
376	    if (*str1 == '\0')
377		return (-1);
378	    else if (*str2 == '\0')
379		return (1);
380	    else
381		return (*str1 - *str2);
382	}
383        if (*str1 == '\0')
384	    return(0);
385	str1++, str2++;
386    } while (--n != 0);
387    return(0);
388}
389#endif /* not WIDE_STRINGS */
390
391int
392s_strcasecmp(const Char *str1, const Char *str2)
393{
394#ifdef WIDE_STRINGS
395    wint_t l1 = 0, l2 = 0;
396    for (; *str1; str1++, str2++)
397	if (*str1 == *str2)
398	    l1 = l2 = 0;
399	else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
400	    break;
401#else
402    unsigned char l1 = 0, l2 = 0;
403    for (; *str1; str1++, str2++)
404	if (*str1 == *str2)
405		l1 = l2 = 0;
406	else if ((l1 = tolower((unsigned char)*str1)) !=
407	    (l2 = tolower((unsigned char)*str2)))
408	    break;
409#endif
410    /*
411     * The following case analysis is necessary so that characters which look
412     * negative collate low against normal characters but high against the
413     * end-of-string NUL.
414     */
415    if (*str1 == '\0' && *str2 == '\0')
416	return (0);
417    else if (*str1 == '\0')
418	return (-1);
419    else if (*str2 == '\0')
420	return (1);
421    else if (l1 == l2)	/* They are zero when they are equal */
422	return (*str1 - *str2);
423    else
424	return (l1 - l2);
425}
426
427Char   *
428s_strnsave(const Char *s, size_t len)
429{
430    Char *n;
431
432    n = xmalloc((len + 1) * sizeof (*n));
433    memcpy(n, s, len * sizeof (*n));
434    n[len] = '\0';
435    return n;
436}
437
438Char   *
439s_strsave(const Char *s)
440{
441    Char   *n;
442    size_t size;
443
444    if (s == NULL)
445	s = STRNULL;
446    size = (Strlen(s) + 1) * sizeof(*n);
447    n = xmalloc(size);
448    memcpy(n, s, size);
449    return (n);
450}
451
452Char   *
453s_strspl(const Char *cp, const Char *dp)
454{
455    Char *res, *ep;
456    const Char *p, *q;
457
458    if (!cp)
459	cp = STRNULL;
460    if (!dp)
461	dp = STRNULL;
462    for (p = cp; *p++;)
463	continue;
464    for (q = dp; *q++;)
465	continue;
466    res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char));
467    for (ep = res, q = cp; (*ep++ = *q++) != '\0';)
468	continue;
469    for (ep--, q = dp; (*ep++ = *q++) != '\0';)
470	continue;
471    return (res);
472}
473
474Char   *
475s_strend(const Char *cp)
476{
477    if (!cp)
478	return ((Char *)(intptr_t) cp);
479    while (*cp)
480	cp++;
481    return ((Char *)(intptr_t) cp);
482}
483
484Char   *
485s_strstr(const Char *s, const Char *t)
486{
487    do {
488	const Char *ss = s;
489	const Char *tt = t;
490
491	do
492	    if (*tt == '\0')
493		return ((Char *)(intptr_t) s);
494	while (*ss++ == *tt++);
495    } while (*s++ != '\0');
496    return (NULL);
497}
498
499#else /* !SHORT_STRINGS */
500char *
501caching_strip(const char *s)
502{
503    static char *buf = NULL;
504    static size_t buf_size = 0;
505    size_t size;
506
507    if (s == NULL)
508      return NULL;
509    size = strlen(s) + 1;
510    if (buf_size < size) {
511	buf = xrealloc(buf, size);
512	buf_size = size;
513    }
514    memcpy(buf, s, size);
515    strip(buf);
516    return buf;
517}
518#endif
519
520char   *
521short2qstr(const Char *src)
522{
523    static char *sdst = NULL;
524    static size_t dstsize = 0;
525    char *dst, *edst;
526
527    if (src == NULL)
528	return (NULL);
529
530    if (sdst == NULL) {
531	dstsize = MALLOC_INCR;
532	sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char));
533    }
534    dst = sdst;
535    edst = &dst[dstsize];
536    while (*src) {
537	if (*src & QUOTE) {
538	    *dst++ = '\\';
539	    if (dst == edst) {
540		dstsize += MALLOC_INCR;
541		sdst = xrealloc(sdst,
542				(dstsize + MALLOC_SURPLUS) * sizeof(char));
543		edst = &sdst[dstsize];
544		dst = &edst[-MALLOC_INCR];
545	    }
546	}
547	dst += one_wctomb(dst, *src & CHAR);
548	src++;
549	if (dst >= edst) {
550	    ptrdiff_t i = dst - edst;
551	    dstsize += MALLOC_INCR;
552	    sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char));
553	    edst = &sdst[dstsize];
554	    dst = &edst[-MALLOC_INCR + i];
555	}
556    }
557    *dst = 0;
558    return (sdst);
559}
560
561struct blk_buf *
562bb_alloc()
563{
564    return xcalloc(1, sizeof(struct blk_buf));
565}
566
567static void
568bb_store(struct blk_buf *bb, Char *str)
569{
570    if (bb->len == bb->size) { /* Keep space for terminating NULL */
571	if (bb->size == 0)
572	    bb->size = 16; /* Arbitrary */
573	else
574	    bb->size *= 2;
575	bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec));
576    }
577    bb->vec[bb->len] = str;
578}
579
580void
581bb_append(struct blk_buf *bb, Char *str)
582{
583    bb_store(bb, str);
584    bb->len++;
585}
586
587void
588bb_cleanup(void *xbb)
589{
590    struct blk_buf *bb;
591    size_t i;
592
593    bb = xbb;
594    for (i = 0; i < bb->len; i++)
595	xfree(bb->vec[i]);
596    xfree(bb->vec);
597}
598
599void
600bb_free(void *bb)
601{
602    bb_cleanup(bb);
603    xfree(bb);
604}
605
606Char **
607bb_finish(struct blk_buf *bb)
608{
609    bb_store(bb, NULL);
610    return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec));
611}
612
613#define DO_STRBUF(STRBUF, CHAR, STRLEN)				\
614								\
615struct STRBUF *							\
616STRBUF##_alloc(void)						\
617{								\
618    return xcalloc(1, sizeof(struct STRBUF));			\
619}								\
620								\
621static void							\
622STRBUF##_store1(struct STRBUF *buf, CHAR c)			\
623{								\
624    if (buf->size == buf->len) {				\
625	if (buf->size == 0)					\
626	    buf->size = 64; /* Arbitrary */			\
627	else							\
628	    buf->size *= 2;					\
629	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
630    }								\
631    assert(buf->s);						\
632    buf->s[buf->len] = c;					\
633}								\
634								\
635/* Like strbuf_append1(buf, '\0'), but don't advance len */	\
636void								\
637STRBUF##_terminate(struct STRBUF *buf)				\
638{								\
639    STRBUF##_store1(buf, '\0');					\
640}								\
641								\
642void								\
643STRBUF##_append1(struct STRBUF *buf, CHAR c)			\
644{								\
645    STRBUF##_store1(buf, c);					\
646    buf->len++;							\
647}								\
648								\
649void								\
650STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len)	\
651{								\
652    if (buf->size < buf->len + len) {				\
653	if (buf->size == 0)					\
654	    buf->size = 64; /* Arbitrary */			\
655	while (buf->size < buf->len + len)			\
656	    buf->size *= 2;					\
657	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
658    }								\
659    memcpy(buf->s + buf->len, s, len * sizeof(*buf->s));	\
660    buf->len += len;						\
661}								\
662								\
663void								\
664STRBUF##_append(struct STRBUF *buf, const CHAR *s)		\
665{								\
666    STRBUF##_appendn(buf, s, STRLEN(s));			\
667}								\
668								\
669CHAR *								\
670STRBUF##_finish(struct STRBUF *buf)				\
671{								\
672    STRBUF##_append1(buf, 0);					\
673    return xrealloc(buf->s, buf->len * sizeof(*buf->s));	\
674}								\
675								\
676void								\
677STRBUF##_cleanup(void *xbuf)					\
678{								\
679    struct STRBUF *buf;						\
680								\
681    buf = xbuf;							\
682    xfree(buf->s);						\
683}								\
684								\
685void								\
686STRBUF##_free(void *xbuf)					\
687{								\
688    STRBUF##_cleanup(xbuf);					\
689    xfree(xbuf);						\
690}								\
691								\
692const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */
693
694DO_STRBUF(strbuf, char, strlen);
695DO_STRBUF(Strbuf, Char, Strlen);
696