1/* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $ */ 2/* 3 * tc.str.c: Short string package 4 * This has been a lesson of how to write buggy code! 5 */ 6/*- 7 * Copyright (c) 1980, 1991 The Regents of the University of California. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34#include "sh.h" 35 36#include <assert.h> 37#include <limits.h> 38 39RCSID("$tcsh: tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $") 40 41#define MALLOC_INCR 128 42#ifdef WIDE_STRINGS 43#define MALLOC_SURPLUS MB_LEN_MAX /* Space for one multibyte character */ 44#else 45#define MALLOC_SURPLUS 0 46#endif 47 48#ifdef WIDE_STRINGS 49size_t 50one_mbtowc(Char *pwc, const char *s, size_t n) 51{ 52 int len; 53 54 len = rt_mbtowc(pwc, s, n); 55 if (len == -1) { 56 reset_mbtowc(); 57 *pwc = (unsigned char)*s | INVALID_BYTE; 58 } 59 if (len <= 0) 60 len = 1; 61 return len; 62} 63 64size_t 65one_wctomb(char *s, Char wchar) 66{ 67 int len; 68 69 if (wchar & INVALID_BYTE) { 70 s[0] = wchar & 0xFF; 71 len = 1; 72 } else { 73#ifdef UTF16_STRINGS 74 if (wchar >= 0x10000) { 75 /* UTF-16 systems can't handle these values directly in calls to 76 wctomb. Convert value to UTF-16 surrogate and call wcstombs to 77 convert the "string" to the correct multibyte representation, 78 if any. */ 79 wchar_t ws[3]; 80 wchar -= 0x10000; 81 ws[0] = 0xd800 | (wchar >> 10); 82 ws[1] = 0xdc00 | (wchar & 0x3ff); 83 ws[2] = 0; 84 /* The return value of wcstombs excludes the trailing 0, so len is 85 the correct number of multibytes for the Unicode char. */ 86 len = wcstombs (s, ws, MB_CUR_MAX + 1); 87 } else 88#endif 89 len = wctomb(s, (wchar_t) wchar); 90 if (len == -1) 91 s[0] = wchar; 92 if (len <= 0) 93 len = 1; 94 } 95 return len; 96} 97 98int 99rt_mbtowc(Char *pwc, const char *s, size_t n) 100{ 101 int ret; 102 char back[MB_LEN_MAX]; 103 wchar_t tmp; 104#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) 105# if defined(AUTOSET_KANJI) 106 static mbstate_t mb_zero, mb; 107 /* 108 * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII! 109 */ 110 if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') && 111 !memcmp(&mb, &mb_zero, sizeof(mb))) 112 { 113 *pwc = *s; 114 return 1; 115 } 116# else 117 mbstate_t mb; 118# endif 119 120 memset (&mb, 0, sizeof mb); 121 ret = mbrtowc(&tmp, s, n, &mb); 122#else 123 ret = mbtowc(&tmp, s, n); 124#endif 125 if (ret > 0) { 126 *pwc = tmp; 127#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) 128 if (tmp >= 0xd800 && tmp <= 0xdbff) { 129 /* UTF-16 surrogate pair. Fetch second half and compute 130 UTF-32 value. Dispense with the inverse test in this case. */ 131 size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb); 132 if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) 133 ret = -1; 134 else { 135 *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000; 136 ret += n2; 137 } 138 } else 139#endif 140 if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0) 141 ret = -1; 142 143 } else if (ret == -2) 144 ret = -1; 145 else if (ret == 0) 146 *pwc = '\0'; 147 148 return ret; 149} 150#endif 151 152#ifdef SHORT_STRINGS 153Char ** 154blk2short(char **src) 155{ 156 size_t n; 157 Char **sdst, **dst; 158 159 /* 160 * Count 161 */ 162 for (n = 0; src[n] != NULL; n++) 163 continue; 164 sdst = dst = xmalloc((n + 1) * sizeof(Char *)); 165 166 for (; *src != NULL; src++) 167 *dst++ = SAVE(*src); 168 *dst = NULL; 169 return (sdst); 170} 171 172char ** 173short2blk(Char **src) 174{ 175 size_t n; 176 char **sdst, **dst; 177 178 /* 179 * Count 180 */ 181 for (n = 0; src[n] != NULL; n++) 182 continue; 183 sdst = dst = xmalloc((n + 1) * sizeof(char *)); 184 185 for (; *src != NULL; src++) 186 *dst++ = strsave(short2str(*src)); 187 *dst = NULL; 188 return (sdst); 189} 190 191Char * 192str2short(const char *src) 193{ 194 static struct Strbuf buf; /* = Strbuf_INIT; */ 195 196 if (src == NULL) 197 return (NULL); 198 199 buf.len = 0; 200 while (*src) { 201 Char wc; 202 203 src += one_mbtowc(&wc, src, MB_LEN_MAX); 204 Strbuf_append1(&buf, wc); 205 } 206 Strbuf_terminate(&buf); 207 return buf.s; 208} 209 210char * 211short2str(const Char *src) 212{ 213 static char *sdst = NULL; 214 static size_t dstsize = 0; 215 char *dst, *edst; 216 217 if (src == NULL) 218 return (NULL); 219 220 if (sdst == NULL) { 221 dstsize = MALLOC_INCR; 222 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); 223 } 224 dst = sdst; 225 edst = &dst[dstsize]; 226 while (*src) { 227 dst += one_wctomb(dst, *src & CHAR); 228 src++; 229 if (dst >= edst) { 230 char *wdst = dst; 231 char *wedst = edst; 232 233 dstsize += MALLOC_INCR; 234 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); 235 edst = &sdst[dstsize]; 236 dst = &edst[-MALLOC_INCR]; 237 while (wdst > wedst) { 238 dst++; 239 wdst--; 240 } 241 } 242 } 243 *dst = 0; 244 return (sdst); 245} 246 247#if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS) 248Char * 249s_strcpy(Char *dst, const Char *src) 250{ 251 Char *sdst; 252 253 sdst = dst; 254 while ((*dst++ = *src++) != '\0') 255 continue; 256 return (sdst); 257} 258 259Char * 260s_strncpy(Char *dst, const Char *src, size_t n) 261{ 262 Char *sdst; 263 264 if (n == 0) 265 return(dst); 266 267 sdst = dst; 268 do 269 if ((*dst++ = *src++) == '\0') { 270 while (--n != 0) 271 *dst++ = '\0'; 272 return(sdst); 273 } 274 while (--n != 0); 275 return (sdst); 276} 277 278Char * 279s_strcat(Char *dst, const Char *src) 280{ 281 Strcpy(Strend(dst), src); 282 return dst; 283} 284 285#ifdef NOTUSED 286Char * 287s_strncat(Char *dst, const Char *src, size_t n) 288{ 289 Char *sdst; 290 291 if (n == 0) 292 return (dst); 293 294 sdst = dst; 295 296 while (*dst) 297 dst++; 298 299 do 300 if ((*dst++ = *src++) == '\0') 301 return(sdst); 302 while (--n != 0) 303 continue; 304 305 *dst = '\0'; 306 return (sdst); 307} 308 309#endif 310 311Char * 312s_strchr(const Char *str, int ch) 313{ 314 do 315 if (*str == ch) 316 return ((Char *)(intptr_t)str); 317 while (*str++); 318 return (NULL); 319} 320 321Char * 322s_strrchr(const Char *str, int ch) 323{ 324 const Char *rstr; 325 326 rstr = NULL; 327 do 328 if (*str == ch) 329 rstr = str; 330 while (*str++); 331 return ((Char *)(intptr_t)rstr); 332} 333 334size_t 335s_strlen(const Char *str) 336{ 337 size_t n; 338 339 for (n = 0; *str++; n++) 340 continue; 341 return (n); 342} 343 344int 345s_strcmp(const Char *str1, const Char *str2) 346{ 347 for (; *str1 && *str1 == *str2; str1++, str2++) 348 continue; 349 /* 350 * The following case analysis is necessary so that characters which look 351 * negative collate low against normal characters but high against the 352 * end-of-string NUL. 353 */ 354 if (*str1 == '\0' && *str2 == '\0') 355 return (0); 356 else if (*str1 == '\0') 357 return (-1); 358 else if (*str2 == '\0') 359 return (1); 360 else 361 return (*str1 - *str2); 362} 363 364int 365s_strncmp(const Char *str1, const Char *str2, size_t n) 366{ 367 if (n == 0) 368 return (0); 369 do { 370 if (*str1 != *str2) { 371 /* 372 * The following case analysis is necessary so that characters 373 * which look negative collate low against normal characters 374 * but high against the end-of-string NUL. 375 */ 376 if (*str1 == '\0') 377 return (-1); 378 else if (*str2 == '\0') 379 return (1); 380 else 381 return (*str1 - *str2); 382 } 383 if (*str1 == '\0') 384 return(0); 385 str1++, str2++; 386 } while (--n != 0); 387 return(0); 388} 389#endif /* not WIDE_STRINGS */ 390 391int 392s_strcasecmp(const Char *str1, const Char *str2) 393{ 394#ifdef WIDE_STRINGS 395 wint_t l1 = 0, l2 = 0; 396 for (; *str1; str1++, str2++) 397 if (*str1 == *str2) 398 l1 = l2 = 0; 399 else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2))) 400 break; 401#else 402 unsigned char l1 = 0, l2 = 0; 403 for (; *str1; str1++, str2++) 404 if (*str1 == *str2) 405 l1 = l2 = 0; 406 else if ((l1 = tolower((unsigned char)*str1)) != 407 (l2 = tolower((unsigned char)*str2))) 408 break; 409#endif 410 /* 411 * The following case analysis is necessary so that characters which look 412 * negative collate low against normal characters but high against the 413 * end-of-string NUL. 414 */ 415 if (*str1 == '\0' && *str2 == '\0') 416 return (0); 417 else if (*str1 == '\0') 418 return (-1); 419 else if (*str2 == '\0') 420 return (1); 421 else if (l1 == l2) /* They are zero when they are equal */ 422 return (*str1 - *str2); 423 else 424 return (l1 - l2); 425} 426 427Char * 428s_strnsave(const Char *s, size_t len) 429{ 430 Char *n; 431 432 n = xmalloc((len + 1) * sizeof (*n)); 433 memcpy(n, s, len * sizeof (*n)); 434 n[len] = '\0'; 435 return n; 436} 437 438Char * 439s_strsave(const Char *s) 440{ 441 Char *n; 442 size_t size; 443 444 if (s == NULL) 445 s = STRNULL; 446 size = (Strlen(s) + 1) * sizeof(*n); 447 n = xmalloc(size); 448 memcpy(n, s, size); 449 return (n); 450} 451 452Char * 453s_strspl(const Char *cp, const Char *dp) 454{ 455 Char *res, *ep; 456 const Char *p, *q; 457 458 if (!cp) 459 cp = STRNULL; 460 if (!dp) 461 dp = STRNULL; 462 for (p = cp; *p++;) 463 continue; 464 for (q = dp; *q++;) 465 continue; 466 res = xmalloc(((p - cp) + (q - dp) - 1) * sizeof(Char)); 467 for (ep = res, q = cp; (*ep++ = *q++) != '\0';) 468 continue; 469 for (ep--, q = dp; (*ep++ = *q++) != '\0';) 470 continue; 471 return (res); 472} 473 474Char * 475s_strend(const Char *cp) 476{ 477 if (!cp) 478 return ((Char *)(intptr_t) cp); 479 while (*cp) 480 cp++; 481 return ((Char *)(intptr_t) cp); 482} 483 484Char * 485s_strstr(const Char *s, const Char *t) 486{ 487 do { 488 const Char *ss = s; 489 const Char *tt = t; 490 491 do 492 if (*tt == '\0') 493 return ((Char *)(intptr_t) s); 494 while (*ss++ == *tt++); 495 } while (*s++ != '\0'); 496 return (NULL); 497} 498 499#else /* !SHORT_STRINGS */ 500char * 501caching_strip(const char *s) 502{ 503 static char *buf = NULL; 504 static size_t buf_size = 0; 505 size_t size; 506 507 if (s == NULL) 508 return NULL; 509 size = strlen(s) + 1; 510 if (buf_size < size) { 511 buf = xrealloc(buf, size); 512 buf_size = size; 513 } 514 memcpy(buf, s, size); 515 strip(buf); 516 return buf; 517} 518#endif 519 520char * 521short2qstr(const Char *src) 522{ 523 static char *sdst = NULL; 524 static size_t dstsize = 0; 525 char *dst, *edst; 526 527 if (src == NULL) 528 return (NULL); 529 530 if (sdst == NULL) { 531 dstsize = MALLOC_INCR; 532 sdst = xmalloc((dstsize + MALLOC_SURPLUS) * sizeof(char)); 533 } 534 dst = sdst; 535 edst = &dst[dstsize]; 536 while (*src) { 537 if (*src & QUOTE) { 538 *dst++ = '\\'; 539 if (dst == edst) { 540 dstsize += MALLOC_INCR; 541 sdst = xrealloc(sdst, 542 (dstsize + MALLOC_SURPLUS) * sizeof(char)); 543 edst = &sdst[dstsize]; 544 dst = &edst[-MALLOC_INCR]; 545 } 546 } 547 dst += one_wctomb(dst, *src & CHAR); 548 src++; 549 if (dst >= edst) { 550 ptrdiff_t i = dst - edst; 551 dstsize += MALLOC_INCR; 552 sdst = xrealloc(sdst, (dstsize + MALLOC_SURPLUS) * sizeof(char)); 553 edst = &sdst[dstsize]; 554 dst = &edst[-MALLOC_INCR + i]; 555 } 556 } 557 *dst = 0; 558 return (sdst); 559} 560 561struct blk_buf * 562bb_alloc() 563{ 564 return xcalloc(1, sizeof(struct blk_buf)); 565} 566 567static void 568bb_store(struct blk_buf *bb, Char *str) 569{ 570 if (bb->len == bb->size) { /* Keep space for terminating NULL */ 571 if (bb->size == 0) 572 bb->size = 16; /* Arbitrary */ 573 else 574 bb->size *= 2; 575 bb->vec = xrealloc(bb->vec, bb->size * sizeof (*bb->vec)); 576 } 577 bb->vec[bb->len] = str; 578} 579 580void 581bb_append(struct blk_buf *bb, Char *str) 582{ 583 bb_store(bb, str); 584 bb->len++; 585} 586 587void 588bb_cleanup(void *xbb) 589{ 590 struct blk_buf *bb; 591 size_t i; 592 593 bb = xbb; 594 for (i = 0; i < bb->len; i++) 595 xfree(bb->vec[i]); 596 xfree(bb->vec); 597} 598 599void 600bb_free(void *bb) 601{ 602 bb_cleanup(bb); 603 xfree(bb); 604} 605 606Char ** 607bb_finish(struct blk_buf *bb) 608{ 609 bb_store(bb, NULL); 610 return xrealloc(bb->vec, (bb->len + 1) * sizeof (*bb->vec)); 611} 612 613#define DO_STRBUF(STRBUF, CHAR, STRLEN) \ 614 \ 615struct STRBUF * \ 616STRBUF##_alloc(void) \ 617{ \ 618 return xcalloc(1, sizeof(struct STRBUF)); \ 619} \ 620 \ 621static void \ 622STRBUF##_store1(struct STRBUF *buf, CHAR c) \ 623{ \ 624 if (buf->size == buf->len) { \ 625 if (buf->size == 0) \ 626 buf->size = 64; /* Arbitrary */ \ 627 else \ 628 buf->size *= 2; \ 629 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ 630 } \ 631 assert(buf->s); \ 632 buf->s[buf->len] = c; \ 633} \ 634 \ 635/* Like strbuf_append1(buf, '\0'), but don't advance len */ \ 636void \ 637STRBUF##_terminate(struct STRBUF *buf) \ 638{ \ 639 STRBUF##_store1(buf, '\0'); \ 640} \ 641 \ 642void \ 643STRBUF##_append1(struct STRBUF *buf, CHAR c) \ 644{ \ 645 STRBUF##_store1(buf, c); \ 646 buf->len++; \ 647} \ 648 \ 649void \ 650STRBUF##_appendn(struct STRBUF *buf, const CHAR *s, size_t len) \ 651{ \ 652 if (buf->size < buf->len + len) { \ 653 if (buf->size == 0) \ 654 buf->size = 64; /* Arbitrary */ \ 655 while (buf->size < buf->len + len) \ 656 buf->size *= 2; \ 657 buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ 658 } \ 659 memcpy(buf->s + buf->len, s, len * sizeof(*buf->s)); \ 660 buf->len += len; \ 661} \ 662 \ 663void \ 664STRBUF##_append(struct STRBUF *buf, const CHAR *s) \ 665{ \ 666 STRBUF##_appendn(buf, s, STRLEN(s)); \ 667} \ 668 \ 669CHAR * \ 670STRBUF##_finish(struct STRBUF *buf) \ 671{ \ 672 STRBUF##_append1(buf, 0); \ 673 return xrealloc(buf->s, buf->len * sizeof(*buf->s)); \ 674} \ 675 \ 676void \ 677STRBUF##_cleanup(void *xbuf) \ 678{ \ 679 struct STRBUF *buf; \ 680 \ 681 buf = xbuf; \ 682 xfree(buf->s); \ 683} \ 684 \ 685void \ 686STRBUF##_free(void *xbuf) \ 687{ \ 688 STRBUF##_cleanup(xbuf); \ 689 xfree(xbuf); \ 690} \ 691 \ 692const struct STRBUF STRBUF##_init /* = STRBUF##_INIT; */ 693 694DO_STRBUF(strbuf, char, strlen); 695DO_STRBUF(Strbuf, Char, Strlen); 696