1/* 2 * tc.str.c: Short string package 3 * This has been a lesson of how to write buggy code! 4 */ 5/*- 6 * Copyright (c) 1980, 1991 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33#include "sh.h" 34 35#include <assert.h> 36#include <limits.h> 37 38#define MALLOC_INCR 128 39#ifdef WIDE_STRINGS 40#define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */ 41#else 42#define MALLOC_SURPLUS 0 43#endif 44 45#ifdef WIDE_STRINGS 46size_t 47one_mbtowc(Char *pwc, const char *s, size_t n) 48{ 49 int len; 50 51 len = rt_mbtowc(pwc, s, n); 52 if (len == -1) { 53 reset_mbtowc(); 54 *pwc = (unsigned char)*s | INVALID_BYTE; 55 } 56 if (len <= 0) 57 len = 1; 58 return len; 59} 60 61size_t 62one_wctomb(char *s, Char wchar) 63{ 64 int len; 65 66#if INVALID_BYTE != 0 67 if ((wchar & INVALID_BYTE) == INVALID_BYTE) { /* wchar >= INVALID_BYTE */ 68 /* invalid char 69 * exmaple) 70 * if wchar = f0000090(=90|INVALID_BYTE), then *s = ffffff90 */ 71 *s = (char)wchar; 72 len = 1; 73#else 74 if (wchar & (CHAR & INVALID_BYTE)) { 75 s[0] = wchar & (CHAR & 0xFF); 76 len = 1; 77#endif 78 } else { 79#if INVALID_BYTE != 0 80 wchar &= MAX_UTF32; 81#else 82 wchar &= CHAR; 83#endif 84#ifdef UTF16_STRINGS 85 if (wchar >= 0x10000) { 86 /* UTF-16 systems can't handle these values directly in calls to 87 wctomb. Convert value to UTF-16 surrogate and call wcstombs to 88 convert the "string" to the correct multibyte representation, 89 if any. */ 90 wchar_t ws[3]; 91 wchar -= 0x10000; 92 ws[0] = 0xd800 | (wchar >> 10); 93 ws[1] = 0xdc00 | (wchar & 0x3ff); 94 ws[2] = 0; 95 /* The return value of wcstombs excludes the trailing 0, so len is 96 the correct number of multibytes for the Unicode char. */ 97 len = wcstombs (s, ws, MB_CUR_MAX + 1); 98 } else 99#endif 100 len = wctomb(s, (wchar_t) wchar); 101 if (len == -1) 102 s[0] = wchar; 103 if (len <= 0) 104 len = 1; 105 } 106 return len; 107} 108 109int 110rt_mbtowc(Char *pwc, const char *s, size_t n) 111{ 112 int ret; 113 char back[MB_LEN_MAX]; 114 wchar_t tmp; 115#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) 116# if defined(AUTOSET_KANJI) 117 static mbstate_t mb_zero, mb; 118 /* 119 * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII! 120 */ 121 if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') && 122 !memcmp(&mb, &mb_zero, sizeof(mb))) 123 { 124 *pwc = *s; 125 return 1; 126 } 127# else 128 mbstate_t mb; 129# endif 130 131 memset (&mb, 0, sizeof mb); 132 ret = mbrtowc(&tmp, s, n, &mb); 133#else 134 ret = mbtowc(&tmp, s, n); 135#endif 136 if (ret > 0) { 137 *pwc = tmp; 138#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) 139 if (tmp >= 0xd800 && tmp <= 0xdbff) { 140 /* UTF-16 surrogate pair. Fetch second half and compute 141 UTF-32 value. Dispense with the inverse test in this case. */ 142 size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb); 143 if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) 144 ret = -1; 145 else { 146 *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000; 147 ret += n2; 148 } 149 } else 150#endif 151 if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0) 152 ret = -1; 153 154 } else if (ret == -2) 155 ret = -1; 156 else if (ret == 0) 157 *pwc = '\0'; 158 159 return ret; 160} 161#endif 162 163#ifdef SHORT_STRINGS 164Char ** 165blk2short(char **src) 166{ 167 size_t n; 168 Char **sdst, **dst; 169 170 /* 171 * Count 172 */ 173 for (n = 0; src[n] != NULL; n++) 174 continue; 175 sdst = dst = xmalloc((n + 1) * sizeof(Char *)); 176 177 for (; *src != NULL; src++) 178 *dst++ = SAVE(*src); 179 *dst = NULL; 180 return (sdst); 181} 182 183char ** 184short2blk(Char **src) 185{ 186 size_t n; 187 char **sdst, **dst; 188 189 /* 190 * Count 191 */ 192 for (n = 0; src[n] != NULL; n++) 193 continue; 194 sdst = dst = xmalloc((n + 1) * sizeof(char *)); 195 196 for (; *src != NULL; src++) 197 *dst++ = strsave(short2str(*src)); 198 *dst = NULL; 199 return (sdst); 200} 201 202Char * 203str2short(const char *src) 204{ 205 static struct Strbuf buf; /* = Strbuf_INIT; */ 206 207 if (src == NULL) 208 return (NULL); 209 210 buf.len = 0; 211 while (*src) { 212 Char wc; 213 214 src += one_mbtowc(&wc, src, MB_LEN_MAX); 215 Strbuf_append1(&buf, wc); 216 } 217 Strbuf_terminate(&buf); 218 return buf.s; 219} 220 221char * 222short2str(const Char *src) 223{ 224 static char *sdst = NULL; 225 static size_t dstsize = 0; 226 char *dst, *edst; 227 228 if (src == NULL) 229 return (NULL); 230 231 if (sdst == NULL) { 232 dstsize = MALLOC_INCR; 233 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); 234 } 235 dst = sdst; 236 edst = &dst[dstsize]; 237 while (*src) { 238 dst += one_wctomb(dst, *src); 239 src++; 240 if (dst >= edst) { 241 char *wdst = dst; 242 char *wedst = edst; 243 244 dstsize += MALLOC_INCR; 245 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); 246 edst = &sdst[dstsize]; 247 dst = &edst[-MALLOC_INCR]; 248 while (wdst > wedst) { 249 dst++; 250 wdst--; 251 } 252 } 253 } 254 *dst = 0; 255 return (sdst); 256} 257 258#if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS) 259Char * 260s_strcpy(Char *dst, const Char *src) 261{ 262 Char *sdst; 263 264 sdst = dst; 265 while ((*dst++ = *src++) != '\0') 266 continue; 267 return (sdst); 268} 269 270Char * 271s_strncpy(Char *dst, const Char *src, size_t n) 272{ 273 Char *sdst; 274 275 if (n == 0) 276 return(dst); 277 278 sdst = dst; 279 do 280 if ((*dst++ = *src++) == '\0') { 281 while (--n != 0) 282 *dst++ = '\0'; 283 return(sdst); 284 } 285 while (--n != 0); 286 return (sdst); 287} 288 289Char * 290s_strcat(Char *dst, const Char *src) 291{ 292 Strcpy(Strend(dst), src); 293 return dst; 294} 295 296#ifdef NOTUSED 297Char * 298s_strncat(Char *dst, const Char *src, size_t n) 299{ 300 Char *sdst; 301 302 if (n == 0) 303 return (dst); 304 305 sdst = dst; 306 307 while (*dst) 308 dst++; 309 310 do 311 if ((*dst++ = *src++) == '\0') 312 return(sdst); 313 while (--n != 0) 314 continue; 315 316 *dst = '\0'; 317 return (sdst); 318} 319 320#endif 321 322Char * 323s_strchr(const Char *str, int ch) 324{ 325 do 326 if (*str == ch) 327 return ((Char *)(intptr_t)str); 328 while (*str++); 329 return (NULL); 330} 331 332Char * 333s_strrchr(const Char *str, int ch) 334{ 335 const Char *rstr; 336 337 rstr = NULL; 338 do 339 if (*str == ch) 340 rstr = str; 341 while (*str++); 342 return ((Char *)(intptr_t)rstr); 343} 344 345size_t 346s_strlen(const Char *str) 347{ 348 size_t n; 349 350 for (n = 0; *str++; n++) 351 continue; 352 return (n); 353} 354 355int 356s_strcmp(const Char *str1, const Char *str2) 357{ 358 for (; *str1 && *str1 == *str2; str1++, str2++) 359 continue; 360 /* 361 * The following case analysis is necessary so that characters which look 362 * negative collate low against normal characters but high against the 363 * end-of-string NUL. 364 */ 365 if (*str1 == '\0' && *str2 == '\0') 366 return (0); 367 else if (*str1 == '\0') 368 return (-1); 369 else if (*str2 == '\0') 370 return (1); 371 else 372 return (*str1 - *str2); 373} 374 375int 376s_strncmp(const Char *str1, const Char *str2, size_t n) 377{ 378 if (n == 0) 379 return (0); 380 do { 381 if (*str1 != *str2) { 382 /* 383 * The following case analysis is necessary so that characters 384 * which look negative collate low against normal characters 385 * but high against the end-of-string NUL. 386 */ 387 if (*str1 == '\0') 388 return (-1); 389 else if (*str2 == '\0') 390 return (1); 391 else 392 return (*str1 - *str2); 393 } 394 if (*str1 == '\0') 395 return(0); 396 str1++, str2++; 397 } while (--n != 0); 398 return(0); 399} 400#endif /* not WIDE_STRINGS */ 401 402int 403s_strcasecmp(const Char *str1, const Char *str2) 404{ 405#ifdef WIDE_STRINGS 406 wint_t l1 = 0, l2 = 0; 407 for (; *str1; str1++, str2++) 408 if (*str1 == *str2) 409 l1 = l2 = 0; 410 else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2))) 411 break; 412#else 413 unsigned char l1 = 0, l2 = 0; 414 for (; *str1; str1++, str2++) 415 if (*str1 == *str2) 416 l1 = l2 = 0; 417 else if ((l1 = tolower((unsigned char)*str1)) != 418 (l2 = tolower((unsigned char)*str2))) 419 break; 420#endif 421 /* 422 * The following case analysis is necessary so that characters which look 423 * negative collate low against normal characters but high against the 424 * end-of-string NUL. 425 */ 426 if (*str1 == '\0' && *str2 == '\0') 427 return (0); 428 else if (*str1 == '\0') 429 return (-1); 430 else if (*str2 == '\0') 431 return (1); 432 else if (l1 == l2) /* They are zero when they are equal */ 433 return (*str1 - *str2); 434 else 435 return (l1 - l2); 436} 437 438Char * 439s_strnsave(const Char *s, size_t len) 440{ 441 Char *n; 442 443 n = xmalloc((len + 1) * sizeof (*n)); 444 memcpy(n, s, len * sizeof (*n)); 445 n[len] = '\0'; 446 return n; 447} 448 449Char * 450s_strsave(const Char *s) 451{ 452 Char *n; 453 size_t size; 454 455 if (s == NULL) 456 s = STRNULL; 457 size = (Strlen(s) + 1) * sizeof(*n); 458 n = xmalloc(size); 459 memcpy(n, s, size); 460 return (n); 461} 462 463Char * 464s_strspl(const Char *cp, const Char *dp) 465{ 466 Char *res, *ep; 467 const Char *p, *q; 468 469 if (!cp) 470 cp = STRNULL; 471 if (!dp) 472 dp = STRNULL; 473 for (p = cp; *p++;) 474 continue; 475 for (q = dp; *q++;) 476 continue; 477 res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char)); 478 for (ep = res, q = cp; (*ep++ = *q++) != '\0';) 479 continue; 480 for (ep--, q = dp; (*ep++ = *q++) != '\0';) 481 continue; 482 return (res); 483} 484 485Char * 486s_strend(const Char *cp) 487{ 488 if (!cp) 489 return ((Char *)(intptr_t) cp); 490 while (*cp) 491 cp++; 492 return ((Char *)(intptr_t) cp); 493} 494 495Char * 496s_strstr(const Char *s, const Char *t) 497{ 498 do { 499 const Char *ss = s; 500 const Char *tt = t; 501 502 do 503 if (*tt == '\0') 504 return ((Char *)(intptr_t) s); 505 while (*ss++ == *tt++); 506 } while (*s++ != '\0'); 507 return (NULL); 508} 509 510#else /* !SHORT_STRINGS */ 511char * 512caching_strip(const char *s) 513{ 514 static char *buf = NULL; 515 static size_t buf_size = 0; 516 size_t size; 517 518 if (s == NULL) 519 return NULL; 520 size = strlen(s) + 1; 521 if (buf_size < size) { 522 buf = xrealloc(buf, size); 523 buf_size = size; 524 } 525 memcpy(buf, s, size); 526 strip(buf); 527 return buf; 528} 529#endif 530 531char * 532short2qstr(const Char *src) 533{ 534 static char *sdst = NULL; 535 static size_t dstsize = 0; 536 char *dst, *edst; 537 538 if (src == NULL) 539 return (NULL); 540 541 if (sdst == NULL) { 542 dstsize = MALLOC_INCR; 543 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); 544 } 545 dst = sdst; 546 edst = &dst[dstsize]; 547 while (*src) { 548 if (*src & QUOTE) { 549 *dst++ = '\\'; 550 if (dst == edst) { 551 dstsize += MALLOC_INCR; 552 sdst = xrealloc(sdst, 553 (dstsize + MALLOC_SURPLUS) * sizeof(char)); 554 edst = &sdst[dstsize]; 555 dst = &edst[-MALLOC_INCR]; 556 } 557 } 558 dst += one_wctomb(dst, *src); 559 src++; 560 if (dst >= edst) { 561 ptrdiff_t i = dst - edst; 562 dstsize += MALLOC_INCR; 563 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); 564 edst = &sdst[dstsize]; 565 dst = &edst[-MALLOC_INCR + i]; 566 } 567 } 568 *dst = 0; 569 return (sdst); 570} 571 572struct blk_buf * 573bb_alloc(void) 574{ 575 return xcalloc(1, sizeof(struct blk_buf)); 576} 577 578static void 579bb_store(struct blk_buf *bb, Char *str) 580{ 581 if (bb->len == bb->size) { /* Keep space for terminating NULL */ 582 if (bb->size == 0) 583 bb->size = 16; /* Arbitrary */ 584 else 585 bb->size *= 2; 586 bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec)); 587 } 588 bb->vec[bb->len] = str; 589} 590 591void 592bb_append(struct blk_buf *bb, Char *str) 593{ 594 bb_store(bb, str); 595 bb->len++; 596} 597 598void 599bb_cleanup(void *xbb) 600{ 601 struct blk_buf *bb; 602 size_t i; 603 604 bb = (struct blk_buf *)xbb; 605 if (bb->vec) { 606 for (i = 0; i < bb->len; i++) 607 xfree(bb->vec[i]); 608 xfree(bb->vec); 609 } 610 bb->vec = NULL; 611 bb->len = 0; 612} 613 614void 615bb_free(void *bb) 616{ 617 bb_cleanup(bb); 618 xfree(bb); 619} 620 621Char ** 622bb_finish(struct blk_buf *bb) 623{ 624 bb_store(bb, NULL); 625 return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec)); 626} 627 628#define DO_STRBUF(STRBUF, CHAR, STRLEN) \ 629 \ 630struct STRBUF * \ 631STRBUF##_alloc(void) \ 632{ \ 633 return xcalloc(1, sizeof(struct STRBUF)); \ 634} \ 635 \ 636static void \ 637STRBUF##_store1(struct STRBUF *buf, CHAR c) \ 638{ \ 639 if (buf->size == buf->len) { \ 640 if (buf->size == 0) \ 641 buf->size = 64; /* Arbitrary */ \ 642 else \ 643 buf->size *= 2; \ 644 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ 645 } \ 646 assert(buf->s); \ 647 buf->s[buf->len] = c; \ 648} \ 649 \ 650/* Like strbuf_append1(buf, '\0'), but don't advance len */ \ 651void \ 652STRBUF##_terminate(struct STRBUF *buf) \ 653{ \ 654 STRBUF##_store1(buf, '\0'); \ 655} \ 656 \ 657void \ 658STRBUF##_append1(struct STRBUF *buf, CHAR c) \ 659{ \ 660 STRBUF##_store1(buf, c); \ 661 buf->len++; \ 662} \ 663 \ 664void \ 665STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \ 666{ \ 667 if (buf->size < buf->len + len) { \ 668 if (buf->size == 0) \ 669 buf->size = 64; /* Arbitrary */ \ 670 while (buf->size < buf->len + len) \ 671 buf->size *= 2; \ 672 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ 673 } \ 674 memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \ 675 buf->len += len; \ 676} \ 677 \ 678void \ 679STRBUF##_append(struct STRBUF *buf, const CHAR *s) \ 680{ \ 681 STRBUF##_appendn(buf, s, STRLEN(s)); \ 682} \ 683 \ 684CHAR * \ 685STRBUF##_finish(struct STRBUF *buf) \ 686{ \ 687 STRBUF##_append1(buf, 0); \ 688 return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \ 689} \ 690 \ 691void \ 692STRBUF##_cleanup(void *xbuf) \ 693{ \ 694 struct STRBUF *buf; \ 695 \ 696 buf = xbuf; \ 697 xfree(buf->s); \ 698} \ 699 \ 700void \ 701STRBUF##_free(void *xbuf) \ 702{ \ 703 STRBUF##_cleanup(xbuf); \ 704 xfree(xbuf); \ 705} \ 706 \ 707const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */ 708 709DO_STRBUF(strbuf, char, strlen); 710DO_STRBUF(Strbuf, Char, Strlen); 711