1/* 2 * tclStringObj.c -- 3 * 4 * This file contains procedures that implement string operations on Tcl 5 * objects. Some string operations work with UTF strings and others 6 * require Unicode format. Functions that require knowledge of the width 7 * of each character, such as indexing, operate on Unicode data. 8 * 9 * A Unicode string is an internationalized string. Conceptually, a 10 * Unicode string is an array of 16-bit quantities organized as a sequence 11 * of properly formed UTF-8 characters. There is a one-to-one map between 12 * Unicode and UTF characters. Because Unicode characters have a fixed 13 * width, operations such as indexing operate on Unicode data. The String 14 * object is optimized for the case where each UTF char in a string is 15 * only one byte. In this case, we store the value of numChars, but we 16 * don't store the Unicode data (unless Tcl_GetUnicode is explicitly 17 * called). 18 * 19 * The String object type stores one or both formats. The default 20 * behavior is to store UTF. Once Unicode is calculated by a function, it 21 * is stored in the internal rep for future access (without an additional 22 * O(n) cost). 23 * 24 * To allow many appends to be done to an object without constantly 25 * reallocating the space for the string or Unicode representation, we 26 * allocate double the space for the string or Unicode and use the 27 * internal representation to keep track of how much space is used 28 * vs. allocated. 29 * 30 * Copyright (c) 1995-1997 Sun Microsystems, Inc. 31 * Copyright (c) 1999 by Scriptics Corporation. 32 * 33 * See the file "license.terms" for information on usage and redistribution 34 * of this file, and for a DISCLAIMER OF ALL WARRANTIES. 35 * 36 * RCS: @(#) $Id: tclStringObj.c,v 1.32.2.2 2006/09/24 21:15:11 msofer Exp $ */ 37 38#include "tclInt.h" 39 40/* 41 * Prototypes for procedures defined later in this file: 42 */ 43 44static void AppendUnicodeToUnicodeRep _ANSI_ARGS_(( 45 Tcl_Obj *objPtr, CONST Tcl_UniChar *unicode, 46 int appendNumChars)); 47static void AppendUnicodeToUtfRep _ANSI_ARGS_(( 48 Tcl_Obj *objPtr, CONST Tcl_UniChar *unicode, 49 int numChars)); 50static void AppendUtfToUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr, 51 CONST char *bytes, int numBytes)); 52static void AppendUtfToUtfRep _ANSI_ARGS_((Tcl_Obj *objPtr, 53 CONST char *bytes, int numBytes)); 54static void DupStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr, 55 Tcl_Obj *copyPtr)); 56static void FillUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr)); 57static void FreeStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); 58static void GrowUnicodeBuffer _ANSI_ARGS_((Tcl_Obj *objPtr, 59 int needed)); 60static int SetStringFromAny _ANSI_ARGS_((Tcl_Interp *interp, 61 Tcl_Obj *objPtr)); 62static void SetUnicodeObj(Tcl_Obj *objPtr, 63 CONST Tcl_UniChar *unicode, int numChars); 64static int UnicodeLength _ANSI_ARGS_((CONST Tcl_UniChar *unicode)); 65static void UpdateStringOfString _ANSI_ARGS_((Tcl_Obj *objPtr)); 66 67/* 68 * The structure below defines the string Tcl object type by means of 69 * procedures that can be invoked by generic object code. 70 */ 71 72Tcl_ObjType tclStringType = { 73 "string", /* name */ 74 FreeStringInternalRep, /* freeIntRepPro */ 75 DupStringInternalRep, /* dupIntRepProc */ 76 UpdateStringOfString, /* updateStringProc */ 77 SetStringFromAny /* setFromAnyProc */ 78}; 79 80/* 81 * The following structure is the internal rep for a String object. 82 * It keeps track of how much memory has been used and how much has been 83 * allocated for the Unicode and UTF string to enable growing and 84 * shrinking of the UTF and Unicode reps of the String object with fewer 85 * mallocs. To optimize string length and indexing operations, this 86 * structure also stores the number of characters (same of UTF and Unicode!) 87 * once that value has been computed. 88 */ 89 90typedef struct String { 91 int numChars; /* The number of chars in the string. 92 * -1 means this value has not been 93 * calculated. >= 0 means that there is a 94 * valid Unicode rep, or that the number 95 * of UTF bytes == the number of chars. */ 96 size_t allocated; /* The amount of space actually allocated 97 * for the UTF string (minus 1 byte for 98 * the termination char). */ 99 size_t uallocated; /* The amount of space actually allocated 100 * for the Unicode string (minus 2 bytes for 101 * the termination char). */ 102 int hasUnicode; /* Boolean determining whether the string 103 * has a Unicode representation. */ 104 Tcl_UniChar unicode[2]; /* The array of Unicode chars. The actual 105 * size of this field depends on the 106 * 'uallocated' field above. */ 107} String; 108 109#define STRING_MAXCHARS \ 110 (1 + (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar))) 111#define STRING_UALLOC(numChars) \ 112 ((numChars) * sizeof(Tcl_UniChar)) 113#define STRING_SIZE(ualloc) \ 114 ((unsigned) ((ualloc) \ 115 ? (sizeof(String) - sizeof(Tcl_UniChar) + (ualloc)) \ 116 : sizeof(String))) 117#define stringCheckLimits(numChars) \ 118 if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \ 119 Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \ 120 STRING_MAXCHARS); \ 121 } 122#define stringRealloc(ptr, numChars) \ 123 (String *) ckrealloc((char *) ptr, \ 124 (unsigned) STRING_SIZE(STRING_UALLOC(numChars)) ) 125#define stringAttemptRealloc(ptr, numChars) \ 126 (String *) attemptckrealloc((char *) ptr, \ 127 (unsigned) STRING_SIZE(STRING_UALLOC(numChars)) ) 128#define GET_STRING(objPtr) \ 129 ((String *) (objPtr)->internalRep.otherValuePtr) 130#define SET_STRING(objPtr, stringPtr) \ 131 (objPtr)->internalRep.otherValuePtr = (VOID *) (stringPtr) 132 133/* 134 * TCL STRING GROWTH ALGORITHM 135 * 136 * When growing strings (during an append, for example), the following growth 137 * algorithm is used: 138 * 139 * Attempt to allocate 2 * (originalLength + appendLength) 140 * On failure: 141 * attempt to allocate originalLength + 2*appendLength + 142 * TCL_GROWTH_MIN_ALLOC 143 * 144 * This algorithm allows very good performance, as it rapidly increases the 145 * memory allocated for a given string, which minimizes the number of 146 * reallocations that must be performed. However, using only the doubling 147 * algorithm can lead to a significant waste of memory. In particular, it 148 * may fail even when there is sufficient memory available to complete the 149 * append request (but there is not 2 * totalLength memory available). So when 150 * the doubling fails (because there is not enough memory available), the 151 * algorithm requests a smaller amount of memory, which is still enough to 152 * cover the request, but which hopefully will be less than the total available 153 * memory. 154 * 155 * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling 156 * of very small appends. Without this extra slush factor, a sequence 157 * of several small appends would cause several memory allocations. 158 * As long as TCL_GROWTH_MIN_ALLOC is a reasonable size, we can 159 * avoid that behavior. 160 * 161 * The growth algorithm can be tuned by adjusting the following parameters: 162 * 163 * TCL_GROWTH_MIN_ALLOC Additional space, in bytes, to allocate when 164 * the double allocation has failed. 165 * Default is 1024 (1 kilobyte). 166 */ 167#ifndef TCL_GROWTH_MIN_ALLOC 168#define TCL_GROWTH_MIN_ALLOC 1024 169#endif 170 171static void 172GrowUnicodeBuffer( 173 Tcl_Obj *objPtr, 174 int needed) 175{ 176 /* Pre-conditions: 177 * objPtr->typePtr == &tclStringType 178 * STRING_UALLOC(needed) > stringPtr->uallocated 179 * needed < STRING_MAXCHARS 180 */ 181 String *ptr = NULL, *stringPtr = GET_STRING(objPtr); 182 int attempt; 183 184 if (stringPtr->uallocated > 0) { 185 /* Subsequent appends - apply the growth algorithm. */ 186 attempt = 2 * needed; 187 if (attempt >= 0 && attempt <= STRING_MAXCHARS) { 188 ptr = stringAttemptRealloc(stringPtr, attempt); 189 } 190 if (ptr == NULL) { 191 /* 192 * Take care computing the amount of modest growth to avoid 193 * overflow into invalid argument values for attempt. 194 */ 195 unsigned int limit = STRING_MAXCHARS - needed; 196 unsigned int extra = needed - stringPtr->numChars 197 + TCL_GROWTH_MIN_ALLOC/sizeof(Tcl_UniChar); 198 int growth = (int) ((extra > limit) ? limit : extra); 199 attempt = needed + growth; 200 ptr = stringAttemptRealloc(stringPtr, attempt); 201 } 202 } 203 if (ptr == NULL) { 204 /* First allocation - just big enough; or last chance fallback. */ 205 attempt = needed; 206 ptr = stringRealloc(stringPtr, attempt); 207 } 208 stringPtr = ptr; 209 stringPtr->uallocated = STRING_UALLOC(attempt); 210 SET_STRING(objPtr, stringPtr); 211} 212 213 214/* 215 *---------------------------------------------------------------------- 216 * 217 * Tcl_NewStringObj -- 218 * 219 * This procedure is normally called when not debugging: i.e., when 220 * TCL_MEM_DEBUG is not defined. It creates a new string object and 221 * initializes it from the byte pointer and length arguments. 222 * 223 * When TCL_MEM_DEBUG is defined, this procedure just returns the 224 * result of calling the debugging version Tcl_DbNewStringObj. 225 * 226 * Results: 227 * A newly created string object is returned that has ref count zero. 228 * 229 * Side effects: 230 * The new object's internal string representation will be set to a 231 * copy of the length bytes starting at "bytes". If "length" is 232 * negative, use bytes up to the first NULL byte; i.e., assume "bytes" 233 * points to a C-style NULL-terminated string. The object's type is set 234 * to NULL. An extra NULL is added to the end of the new object's byte 235 * array. 236 * 237 *---------------------------------------------------------------------- 238 */ 239 240#ifdef TCL_MEM_DEBUG 241#undef Tcl_NewStringObj 242 243Tcl_Obj * 244Tcl_NewStringObj(bytes, length) 245 CONST char *bytes; /* Points to the first of the length bytes 246 * used to initialize the new object. */ 247 int length; /* The number of bytes to copy from "bytes" 248 * when initializing the new object. If 249 * negative, use bytes up to the first 250 * NULL byte. */ 251{ 252 return Tcl_DbNewStringObj(bytes, length, "unknown", 0); 253} 254 255#else /* if not TCL_MEM_DEBUG */ 256 257Tcl_Obj * 258Tcl_NewStringObj(bytes, length) 259 CONST char *bytes; /* Points to the first of the length bytes 260 * used to initialize the new object. */ 261 int length; /* The number of bytes to copy from "bytes" 262 * when initializing the new object. If 263 * negative, use bytes up to the first 264 * NULL byte. */ 265{ 266 register Tcl_Obj *objPtr; 267 268 if (length < 0) { 269 length = (bytes? strlen(bytes) : 0); 270 } 271 TclNewObj(objPtr); 272 TclInitStringRep(objPtr, bytes, length); 273 return objPtr; 274} 275#endif /* TCL_MEM_DEBUG */ 276 277/* 278 *---------------------------------------------------------------------- 279 * 280 * Tcl_DbNewStringObj -- 281 * 282 * This procedure is normally called when debugging: i.e., when 283 * TCL_MEM_DEBUG is defined. It creates new string objects. It is the 284 * same as the Tcl_NewStringObj procedure above except that it calls 285 * Tcl_DbCkalloc directly with the file name and line number from its 286 * caller. This simplifies debugging since then the [memory active] 287 * command will report the correct file name and line number when 288 * reporting objects that haven't been freed. 289 * 290 * When TCL_MEM_DEBUG is not defined, this procedure just returns the 291 * result of calling Tcl_NewStringObj. 292 * 293 * Results: 294 * A newly created string object is returned that has ref count zero. 295 * 296 * Side effects: 297 * The new object's internal string representation will be set to a 298 * copy of the length bytes starting at "bytes". If "length" is 299 * negative, use bytes up to the first NULL byte; i.e., assume "bytes" 300 * points to a C-style NULL-terminated string. The object's type is set 301 * to NULL. An extra NULL is added to the end of the new object's byte 302 * array. 303 * 304 *---------------------------------------------------------------------- 305 */ 306 307#ifdef TCL_MEM_DEBUG 308 309Tcl_Obj * 310Tcl_DbNewStringObj(bytes, length, file, line) 311 CONST char *bytes; /* Points to the first of the length bytes 312 * used to initialize the new object. */ 313 int length; /* The number of bytes to copy from "bytes" 314 * when initializing the new object. If 315 * negative, use bytes up to the first 316 * NULL byte. */ 317 CONST char *file; /* The name of the source file calling this 318 * procedure; used for debugging. */ 319 int line; /* Line number in the source file; used 320 * for debugging. */ 321{ 322 register Tcl_Obj *objPtr; 323 324 if (length < 0) { 325 length = (bytes? strlen(bytes) : 0); 326 } 327 TclDbNewObj(objPtr, file, line); 328 TclInitStringRep(objPtr, bytes, length); 329 return objPtr; 330} 331 332#else /* if not TCL_MEM_DEBUG */ 333 334Tcl_Obj * 335Tcl_DbNewStringObj(bytes, length, file, line) 336 CONST char *bytes; /* Points to the first of the length bytes 337 * used to initialize the new object. */ 338 register int length; /* The number of bytes to copy from "bytes" 339 * when initializing the new object. If 340 * negative, use bytes up to the first 341 * NULL byte. */ 342 CONST char *file; /* The name of the source file calling this 343 * procedure; used for debugging. */ 344 int line; /* Line number in the source file; used 345 * for debugging. */ 346{ 347 return Tcl_NewStringObj(bytes, length); 348} 349#endif /* TCL_MEM_DEBUG */ 350 351/* 352 *--------------------------------------------------------------------------- 353 * 354 * Tcl_NewUnicodeObj -- 355 * 356 * This procedure is creates a new String object and initializes 357 * it from the given Unicode String. If the Utf String is the same size 358 * as the Unicode string, don't duplicate the data. 359 * 360 * Results: 361 * The newly created object is returned. This object will have no 362 * initial string representation. The returned object has a ref count 363 * of 0. 364 * 365 * Side effects: 366 * Memory allocated for new object and copy of Unicode argument. 367 * 368 *--------------------------------------------------------------------------- 369 */ 370 371Tcl_Obj * 372Tcl_NewUnicodeObj(unicode, numChars) 373 CONST Tcl_UniChar *unicode; /* The unicode string used to initialize 374 * the new object. */ 375 int numChars; /* Number of characters in the unicode 376 * string. */ 377{ 378 Tcl_Obj *objPtr; 379 380 TclNewObj(objPtr); 381 SetUnicodeObj(objPtr, unicode, numChars); 382 return objPtr; 383} 384 385/* 386 *---------------------------------------------------------------------- 387 * 388 * Tcl_GetCharLength -- 389 * 390 * Get the length of the Unicode string from the Tcl object. 391 * 392 * Results: 393 * Pointer to unicode string representing the unicode object. 394 * 395 * Side effects: 396 * Frees old internal rep. Allocates memory for new "String" 397 * internal rep. 398 * 399 *---------------------------------------------------------------------- 400 */ 401 402int 403Tcl_GetCharLength(objPtr) 404 Tcl_Obj *objPtr; /* The String object to get the num chars of. */ 405{ 406 String *stringPtr; 407 408 SetStringFromAny(NULL, objPtr); 409 stringPtr = GET_STRING(objPtr); 410 411 /* 412 * If numChars is unknown, then calculate the number of characaters 413 * while populating the Unicode string. 414 */ 415 416 if (stringPtr->numChars == -1) { 417 register int i = objPtr->length; 418 register unsigned char *str = (unsigned char *) objPtr->bytes; 419 420 /* 421 * This is a speed sensitive function, so run specially over the 422 * string to count continuous ascii characters before resorting 423 * to the Tcl_NumUtfChars call. This is a long form of: 424 stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); 425 */ 426 427 while (i && (*str < 0xC0)) { i--; str++; } 428 stringPtr->numChars = objPtr->length - i; 429 if (i) { 430 stringPtr->numChars += Tcl_NumUtfChars(objPtr->bytes 431 + (objPtr->length - i), i); 432 } 433 434 if (stringPtr->numChars == objPtr->length) { 435 436 /* 437 * Since we've just calculated the number of chars, and all 438 * UTF chars are 1-byte long, we don't need to store the 439 * unicode string. 440 */ 441 442 stringPtr->hasUnicode = 0; 443 444 } else { 445 446 /* 447 * Since we've just calucalated the number of chars, and not 448 * all UTF chars are 1-byte long, go ahead and populate the 449 * unicode string. 450 */ 451 452 FillUnicodeRep(objPtr); 453 454 /* 455 * We need to fetch the pointer again because we have just 456 * reallocated the structure to make room for the Unicode data. 457 */ 458 459 stringPtr = GET_STRING(objPtr); 460 } 461 } 462 return stringPtr->numChars; 463} 464 465/* 466 *---------------------------------------------------------------------- 467 * 468 * Tcl_GetUniChar -- 469 * 470 * Get the index'th Unicode character from the String object. The 471 * index is assumed to be in the appropriate range. 472 * 473 * Results: 474 * Returns the index'th Unicode character in the Object. 475 * 476 * Side effects: 477 * Fills unichar with the index'th Unicode character. 478 * 479 *---------------------------------------------------------------------- 480 */ 481 482Tcl_UniChar 483Tcl_GetUniChar(objPtr, index) 484 Tcl_Obj *objPtr; /* The object to get the Unicode charater from. */ 485 int index; /* Get the index'th Unicode character. */ 486{ 487 Tcl_UniChar unichar; 488 String *stringPtr; 489 490 SetStringFromAny(NULL, objPtr); 491 stringPtr = GET_STRING(objPtr); 492 493 if (stringPtr->numChars == -1) { 494 495 /* 496 * We haven't yet calculated the length, so we don't have the 497 * Unicode str. We need to know the number of chars before we 498 * can do indexing. 499 */ 500 501 Tcl_GetCharLength(objPtr); 502 503 /* 504 * We need to fetch the pointer again because we may have just 505 * reallocated the structure. 506 */ 507 508 stringPtr = GET_STRING(objPtr); 509 } 510 if (stringPtr->hasUnicode == 0) { 511 512 /* 513 * All of the characters in the Utf string are 1 byte chars, 514 * so we don't store the unicode char. We get the Utf string 515 * and convert the index'th byte to a Unicode character. 516 */ 517 518 unichar = (Tcl_UniChar) objPtr->bytes[index]; 519 } else { 520 unichar = stringPtr->unicode[index]; 521 } 522 return unichar; 523} 524 525/* 526 *---------------------------------------------------------------------- 527 * 528 * Tcl_GetUnicode -- 529 * 530 * Get the Unicode form of the String object. If 531 * the object is not already a String object, it will be converted 532 * to one. If the String object does not have a Unicode rep, then 533 * one is create from the UTF string format. 534 * 535 * Results: 536 * Returns a pointer to the object's internal Unicode string. 537 * 538 * Side effects: 539 * Converts the object to have the String internal rep. 540 * 541 *---------------------------------------------------------------------- 542 */ 543 544Tcl_UniChar * 545Tcl_GetUnicode(objPtr) 546 Tcl_Obj *objPtr; /* The object to find the unicode string for. */ 547{ 548 String *stringPtr; 549 550 SetStringFromAny(NULL, objPtr); 551 stringPtr = GET_STRING(objPtr); 552 553 if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) { 554 555 /* 556 * We haven't yet calculated the length, or all of the characters 557 * in the Utf string are 1 byte chars (so we didn't store the 558 * unicode str). Since this function must return a unicode string, 559 * and one has not yet been stored, force the Unicode to be 560 * calculated and stored now. 561 */ 562 563 FillUnicodeRep(objPtr); 564 565 /* 566 * We need to fetch the pointer again because we have just 567 * reallocated the structure to make room for the Unicode data. 568 */ 569 570 stringPtr = GET_STRING(objPtr); 571 } 572 return stringPtr->unicode; 573} 574 575/* 576 *---------------------------------------------------------------------- 577 * 578 * Tcl_GetUnicodeFromObj -- 579 * 580 * Get the Unicode form of the String object with length. If 581 * the object is not already a String object, it will be converted 582 * to one. If the String object does not have a Unicode rep, then 583 * one is create from the UTF string format. 584 * 585 * Results: 586 * Returns a pointer to the object's internal Unicode string. 587 * 588 * Side effects: 589 * Converts the object to have the String internal rep. 590 * 591 *---------------------------------------------------------------------- 592 */ 593 594Tcl_UniChar * 595Tcl_GetUnicodeFromObj(objPtr, lengthPtr) 596 Tcl_Obj *objPtr; /* The object to find the unicode string for. */ 597 int *lengthPtr; /* If non-NULL, the location where the 598 * string rep's unichar length should be 599 * stored. If NULL, no length is stored. */ 600{ 601 String *stringPtr; 602 603 SetStringFromAny(NULL, objPtr); 604 stringPtr = GET_STRING(objPtr); 605 606 if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) { 607 608 /* 609 * We haven't yet calculated the length, or all of the characters 610 * in the Utf string are 1 byte chars (so we didn't store the 611 * unicode str). Since this function must return a unicode string, 612 * and one has not yet been stored, force the Unicode to be 613 * calculated and stored now. 614 */ 615 616 FillUnicodeRep(objPtr); 617 618 /* 619 * We need to fetch the pointer again because we have just 620 * reallocated the structure to make room for the Unicode data. 621 */ 622 623 stringPtr = GET_STRING(objPtr); 624 } 625 626 if (lengthPtr != NULL) { 627 *lengthPtr = stringPtr->numChars; 628 } 629 return stringPtr->unicode; 630} 631 632/* 633 *---------------------------------------------------------------------- 634 * 635 * Tcl_GetRange -- 636 * 637 * Create a Tcl Object that contains the chars between first and last 638 * of the object indicated by "objPtr". If the object is not already 639 * a String object, convert it to one. The first and last indices 640 * are assumed to be in the appropriate range. 641 * 642 * Results: 643 * Returns a new Tcl Object of the String type. 644 * 645 * Side effects: 646 * Changes the internal rep of "objPtr" to the String type. 647 * 648 *---------------------------------------------------------------------- 649 */ 650 651Tcl_Obj * 652Tcl_GetRange(objPtr, first, last) 653 Tcl_Obj *objPtr; /* The Tcl object to find the range of. */ 654 int first; /* First index of the range. */ 655 int last; /* Last index of the range. */ 656{ 657 Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ 658 String *stringPtr; 659 660 SetStringFromAny(NULL, objPtr); 661 stringPtr = GET_STRING(objPtr); 662 663 if (stringPtr->numChars == -1) { 664 665 /* 666 * We haven't yet calculated the length, so we don't have the 667 * Unicode str. We need to know the number of chars before we 668 * can do indexing. 669 */ 670 671 Tcl_GetCharLength(objPtr); 672 673 /* 674 * We need to fetch the pointer again because we may have just 675 * reallocated the structure. 676 */ 677 678 stringPtr = GET_STRING(objPtr); 679 } 680 681 if (objPtr->bytes && stringPtr->numChars == objPtr->length) { 682 char *str = Tcl_GetString(objPtr); 683 684 /* 685 * All of the characters in the Utf string are 1 byte chars, 686 * so we don't store the unicode char. Create a new string 687 * object containing the specified range of chars. 688 */ 689 690 newObjPtr = Tcl_NewStringObj(&str[first], last-first+1); 691 692 /* 693 * Since we know the new string only has 1-byte chars, we 694 * can set it's numChars field. 695 */ 696 697 SetStringFromAny(NULL, newObjPtr); 698 stringPtr = GET_STRING(newObjPtr); 699 stringPtr->numChars = last-first+1; 700 } else { 701 newObjPtr = Tcl_NewUnicodeObj(stringPtr->unicode + first, 702 last-first+1); 703 } 704 return newObjPtr; 705} 706 707/* 708 *---------------------------------------------------------------------- 709 * 710 * Tcl_SetStringObj -- 711 * 712 * Modify an object to hold a string that is a copy of the bytes 713 * indicated by the byte pointer and length arguments. 714 * 715 * Results: 716 * None. 717 * 718 * Side effects: 719 * The object's string representation will be set to a copy of 720 * the "length" bytes starting at "bytes". If "length" is negative, use 721 * bytes up to the first NULL byte; i.e., assume "bytes" points to a 722 * C-style NULL-terminated string. The object's old string and internal 723 * representations are freed and the object's type is set NULL. 724 * 725 *---------------------------------------------------------------------- 726 */ 727 728void 729Tcl_SetStringObj(objPtr, bytes, length) 730 register Tcl_Obj *objPtr; /* Object whose internal rep to init. */ 731 CONST char *bytes; /* Points to the first of the length bytes 732 * used to initialize the object. */ 733 register int length; /* The number of bytes to copy from "bytes" 734 * when initializing the object. If 735 * negative, use bytes up to the first 736 * NULL byte.*/ 737{ 738 register Tcl_ObjType *oldTypePtr = objPtr->typePtr; 739 740 if (Tcl_IsShared(objPtr)) { 741 panic("Tcl_SetStringObj called with shared object"); 742 } 743 744 /* 745 * Set the type to NULL and free any internal rep for the old type. 746 */ 747 748 if ((oldTypePtr != NULL) && (oldTypePtr->freeIntRepProc != NULL)) { 749 oldTypePtr->freeIntRepProc(objPtr); 750 } 751 objPtr->typePtr = NULL; 752 753 /* 754 * Free any old string rep, then set the string rep to a copy of 755 * the length bytes starting at "bytes". 756 */ 757 758 Tcl_InvalidateStringRep(objPtr); 759 if (length < 0) { 760 length = (bytes? strlen(bytes) : 0); 761 } 762 TclInitStringRep(objPtr, bytes, length); 763} 764 765/* 766 *---------------------------------------------------------------------- 767 * 768 * Tcl_SetObjLength -- 769 * 770 * This procedure changes the length of the string representation 771 * of an object. 772 * 773 * Results: 774 * None. 775 * 776 * Side effects: 777 * If the size of objPtr's string representation is greater than 778 * length, then it is reduced to length and a new terminating null 779 * byte is stored in the strength. If the length of the string 780 * representation is greater than length, the storage space is 781 * reallocated to the given length; a null byte is stored at the 782 * end, but other bytes past the end of the original string 783 * representation are undefined. The object's internal 784 * representation is changed to "expendable string". 785 * 786 *---------------------------------------------------------------------- 787 */ 788 789void 790Tcl_SetObjLength(objPtr, length) 791 register Tcl_Obj *objPtr; /* Pointer to object. This object must 792 * not currently be shared. */ 793 register int length; /* Number of bytes desired for string 794 * representation of object, not including 795 * terminating null byte. */ 796{ 797 String *stringPtr; 798 799 if (length < 0) { 800 /* 801 * Setting to a negative length is nonsense. This is probably the 802 * result of overflowing the signed integer range. 803 */ 804 Tcl_Panic("Tcl_SetObjLength: negative length requested: " 805 "%d (integer overflow?)", length); 806 } 807 if (Tcl_IsShared(objPtr)) { 808 panic("Tcl_SetObjLength called with shared object"); 809 } 810 SetStringFromAny(NULL, objPtr); 811 812 stringPtr = GET_STRING(objPtr); 813 814 /* Check that we're not extending a pure unicode string */ 815 816 if ((size_t)length > stringPtr->allocated && 817 (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) { 818 char *new; 819 820 /* 821 * Not enough space in current string. Reallocate the string 822 * space and free the old string. 823 */ 824 if (objPtr->bytes != tclEmptyStringRep && objPtr->bytes != NULL) { 825 new = (char *) ckrealloc((char *)objPtr->bytes, 826 (unsigned)(length+1)); 827 } else { 828 new = (char *) ckalloc((unsigned) (length+1)); 829 if (objPtr->bytes != NULL && objPtr->length != 0) { 830 memcpy((VOID *) new, (VOID *) objPtr->bytes, 831 (size_t) objPtr->length); 832 Tcl_InvalidateStringRep(objPtr); 833 } 834 } 835 objPtr->bytes = new; 836 stringPtr->allocated = length; 837 /* Invalidate the unicode data. */ 838 stringPtr->hasUnicode = 0; 839 } 840 841 if (objPtr->bytes != NULL) { 842 objPtr->length = length; 843 if (objPtr->bytes != tclEmptyStringRep) { 844 /* Ensure the string is NULL-terminated */ 845 objPtr->bytes[length] = 0; 846 } 847 /* Invalidate the unicode data. */ 848 stringPtr->numChars = -1; 849 stringPtr->hasUnicode = 0; 850 } else { 851 /* Changing length of pure unicode string */ 852 size_t uallocated = STRING_UALLOC(length); 853 854 stringCheckLimits(length); 855 if (uallocated > stringPtr->uallocated) { 856 stringPtr = stringRealloc(stringPtr, length); 857 SET_STRING(objPtr, stringPtr); 858 stringPtr->uallocated = uallocated; 859 } 860 stringPtr->numChars = length; 861 stringPtr->hasUnicode = (length > 0); 862 /* Ensure the string is NULL-terminated */ 863 stringPtr->unicode[length] = 0; 864 stringPtr->allocated = 0; 865 objPtr->length = 0; 866 } 867} 868 869/* 870 *---------------------------------------------------------------------- 871 * 872 * Tcl_AttemptSetObjLength -- 873 * 874 * This procedure changes the length of the string representation 875 * of an object. It uses the attempt* (non-panic'ing) memory allocators. 876 * 877 * Results: 878 * 1 if the requested memory was allocated, 0 otherwise. 879 * 880 * Side effects: 881 * If the size of objPtr's string representation is greater than 882 * length, then it is reduced to length and a new terminating null 883 * byte is stored in the strength. If the length of the string 884 * representation is greater than length, the storage space is 885 * reallocated to the given length; a null byte is stored at the 886 * end, but other bytes past the end of the original string 887 * representation are undefined. The object's internal 888 * representation is changed to "expendable string". 889 * 890 *---------------------------------------------------------------------- 891 */ 892 893int 894Tcl_AttemptSetObjLength(objPtr, length) 895 register Tcl_Obj *objPtr; /* Pointer to object. This object must 896 * not currently be shared. */ 897 register int length; /* Number of bytes desired for string 898 * representation of object, not including 899 * terminating null byte. */ 900{ 901 String *stringPtr; 902 903 if (length < 0) { 904 /* 905 * Setting to a negative length is nonsense. This is probably the 906 * result of overflowing the signed integer range. 907 */ 908 return 0; 909 } 910 if (Tcl_IsShared(objPtr)) { 911 panic("Tcl_AttemptSetObjLength called with shared object"); 912 } 913 SetStringFromAny(NULL, objPtr); 914 915 stringPtr = GET_STRING(objPtr); 916 917 /* Check that we're not extending a pure unicode string */ 918 919 if (length > (int) stringPtr->allocated && 920 (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) { 921 char *new; 922 923 /* 924 * Not enough space in current string. Reallocate the string 925 * space and free the old string. 926 */ 927 if (objPtr->bytes != tclEmptyStringRep && objPtr->bytes != NULL) { 928 new = (char *) attemptckrealloc((char *)objPtr->bytes, 929 (unsigned)(length+1)); 930 if (new == NULL) { 931 return 0; 932 } 933 } else { 934 new = (char *) attemptckalloc((unsigned) (length+1)); 935 if (new == NULL) { 936 return 0; 937 } 938 if (objPtr->bytes != NULL && objPtr->length != 0) { 939 memcpy((VOID *) new, (VOID *) objPtr->bytes, 940 (size_t) objPtr->length); 941 Tcl_InvalidateStringRep(objPtr); 942 } 943 } 944 objPtr->bytes = new; 945 stringPtr->allocated = length; 946 /* Invalidate the unicode data. */ 947 stringPtr->hasUnicode = 0; 948 } 949 950 if (objPtr->bytes != NULL) { 951 objPtr->length = length; 952 if (objPtr->bytes != tclEmptyStringRep) { 953 /* Ensure the string is NULL-terminated */ 954 objPtr->bytes[length] = 0; 955 } 956 /* Invalidate the unicode data. */ 957 stringPtr->numChars = -1; 958 stringPtr->hasUnicode = 0; 959 } else { 960 /* Changing length of pure unicode string */ 961 size_t uallocated = STRING_UALLOC(length); 962 if (length > STRING_MAXCHARS) { 963 return 0; 964 } 965 966 if (uallocated > stringPtr->uallocated) { 967 stringPtr = stringAttemptRealloc(stringPtr, length); 968 if (stringPtr == NULL) { 969 return 0; 970 } 971 SET_STRING(objPtr, stringPtr); 972 stringPtr->uallocated = uallocated; 973 } 974 stringPtr->numChars = length; 975 stringPtr->hasUnicode = (length > 0); 976 /* Ensure the string is NULL-terminated */ 977 stringPtr->unicode[length] = 0; 978 stringPtr->allocated = 0; 979 objPtr->length = 0; 980 } 981 return 1; 982} 983 984/* 985 *--------------------------------------------------------------------------- 986 * 987 * Tcl_SetUnicodeObj -- 988 * 989 * Modify an object to hold the Unicode string indicated by "unicode". 990 * 991 * Results: 992 * None. 993 * 994 * Side effects: 995 * Memory allocated for new "String" internal rep. 996 * 997 *--------------------------------------------------------------------------- 998 */ 999 1000void 1001Tcl_SetUnicodeObj(objPtr, unicode, numChars) 1002 Tcl_Obj *objPtr; /* The object to set the string of. */ 1003 CONST Tcl_UniChar *unicode; /* The unicode string used to initialize 1004 * the object. */ 1005 int numChars; /* Number of characters in the unicode 1006 * string. */ 1007{ 1008 Tcl_ObjType *typePtr = objPtr->typePtr; 1009 1010 if (Tcl_IsShared(objPtr)) { 1011 Tcl_Panic("%s called with shared object", "Tcl_SetUnicodeObj"); 1012 } 1013 if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) { 1014 typePtr->freeIntRepProc(objPtr); 1015 } 1016 SetUnicodeObj(objPtr, unicode, numChars); 1017} 1018 1019static int 1020UnicodeLength( 1021 const Tcl_UniChar *unicode) 1022{ 1023 int numChars = 0; 1024 1025 if (unicode) { 1026 while (numChars >= 0 && unicode[numChars] != 0) { 1027 numChars++; 1028 } 1029 } 1030 stringCheckLimits(numChars); 1031 return numChars; 1032} 1033 1034static void 1035SetUnicodeObj(objPtr, unicode, numChars) 1036 Tcl_Obj *objPtr; /* The object to set the string of. */ 1037 CONST Tcl_UniChar *unicode; /* The unicode string used to initialize 1038 * the object. */ 1039 int numChars; /* Number of characters in the unicode 1040 * string. */ 1041{ 1042 String *stringPtr; 1043 size_t uallocated; 1044 1045 if (numChars < 0) { 1046 numChars = UnicodeLength(unicode); 1047 } 1048 1049 /* 1050 * Allocate enough space for the String structure + Unicode string. 1051 */ 1052 1053 stringCheckLimits(numChars); 1054 uallocated = STRING_UALLOC(numChars); 1055 stringPtr = (String *) ckalloc(STRING_SIZE(uallocated)); 1056 1057 stringPtr->numChars = numChars; 1058 stringPtr->uallocated = uallocated; 1059 stringPtr->hasUnicode = (numChars > 0); 1060 stringPtr->allocated = 0; 1061 memcpy((VOID *) stringPtr->unicode, (VOID *) unicode, uallocated); 1062 stringPtr->unicode[numChars] = 0; 1063 1064 Tcl_InvalidateStringRep(objPtr); 1065 objPtr->typePtr = &tclStringType; 1066 SET_STRING(objPtr, stringPtr); 1067} 1068 1069/* 1070 *---------------------------------------------------------------------- 1071 * 1072 * Tcl_AppendToObj -- 1073 * 1074 * This procedure appends a sequence of bytes to an object. 1075 * 1076 * Results: 1077 * None. 1078 * 1079 * Side effects: 1080 * The bytes at *bytes are appended to the string representation 1081 * of objPtr. 1082 * 1083 *---------------------------------------------------------------------- 1084 */ 1085 1086void 1087Tcl_AppendToObj(objPtr, bytes, length) 1088 register Tcl_Obj *objPtr; /* Points to the object to append to. */ 1089 CONST char *bytes; /* Points to the bytes to append to the 1090 * object. */ 1091 register int length; /* The number of bytes to append from 1092 * "bytes". If < 0, then append all bytes 1093 * up to NULL byte. */ 1094{ 1095 String *stringPtr; 1096 1097 if (Tcl_IsShared(objPtr)) { 1098 panic("Tcl_AppendToObj called with shared object"); 1099 } 1100 1101 SetStringFromAny(NULL, objPtr); 1102 1103 if (length < 0) { 1104 length = (bytes ? strlen(bytes) : 0); 1105 } 1106 if (length == 0) { 1107 return; 1108 } 1109 1110 /* 1111 * If objPtr has a valid Unicode rep, then append the Unicode 1112 * conversion of "bytes" to the objPtr's Unicode rep, otherwise 1113 * append "bytes" to objPtr's string rep. 1114 */ 1115 1116 stringPtr = GET_STRING(objPtr); 1117 if (stringPtr->hasUnicode != 0) { 1118 AppendUtfToUnicodeRep(objPtr, bytes, length); 1119 1120 stringPtr = GET_STRING(objPtr); 1121 } else { 1122 AppendUtfToUtfRep(objPtr, bytes, length); 1123 } 1124} 1125 1126/* 1127 *---------------------------------------------------------------------- 1128 * 1129 * Tcl_AppendUnicodeToObj -- 1130 * 1131 * This procedure appends a Unicode string to an object in the 1132 * most efficient manner possible. Length must be >= 0. 1133 * 1134 * Results: 1135 * None. 1136 * 1137 * Side effects: 1138 * Invalidates the string rep and creates a new Unicode string. 1139 * 1140 *---------------------------------------------------------------------- 1141 */ 1142 1143void 1144Tcl_AppendUnicodeToObj(objPtr, unicode, length) 1145 register Tcl_Obj *objPtr; /* Points to the object to append to. */ 1146 CONST Tcl_UniChar *unicode; /* The unicode string to append to the 1147 * object. */ 1148 int length; /* Number of chars in "unicode". */ 1149{ 1150 String *stringPtr; 1151 1152 if (Tcl_IsShared(objPtr)) { 1153 panic("Tcl_AppendUnicodeToObj called with shared object"); 1154 } 1155 1156 if (length == 0) { 1157 return; 1158 } 1159 1160 SetStringFromAny(NULL, objPtr); 1161 stringPtr = GET_STRING(objPtr); 1162 1163 /* 1164 * If objPtr has a valid Unicode rep, then append the "unicode" 1165 * to the objPtr's Unicode rep, otherwise the UTF conversion of 1166 * "unicode" to objPtr's string rep. 1167 */ 1168 1169 if (stringPtr->hasUnicode != 0) { 1170 AppendUnicodeToUnicodeRep(objPtr, unicode, length); 1171 } else { 1172 AppendUnicodeToUtfRep(objPtr, unicode, length); 1173 } 1174} 1175 1176/* 1177 *---------------------------------------------------------------------- 1178 * 1179 * Tcl_AppendObjToObj -- 1180 * 1181 * This procedure appends the string rep of one object to another. 1182 * "objPtr" cannot be a shared object. 1183 * 1184 * Results: 1185 * None. 1186 * 1187 * Side effects: 1188 * The string rep of appendObjPtr is appended to the string 1189 * representation of objPtr. 1190 * 1191 *---------------------------------------------------------------------- 1192 */ 1193 1194void 1195Tcl_AppendObjToObj(objPtr, appendObjPtr) 1196 Tcl_Obj *objPtr; /* Points to the object to append to. */ 1197 Tcl_Obj *appendObjPtr; /* Object to append. */ 1198{ 1199 String *stringPtr; 1200 int length, numChars, allOneByteChars; 1201 char *bytes; 1202 1203 SetStringFromAny(NULL, objPtr); 1204 1205 /* 1206 * If objPtr has a valid Unicode rep, then get a Unicode string 1207 * from appendObjPtr and append it. 1208 */ 1209 1210 stringPtr = GET_STRING(objPtr); 1211 if (stringPtr->hasUnicode != 0) { 1212 1213 /* 1214 * If appendObjPtr is not of the "String" type, don't convert it. 1215 */ 1216 1217 if (appendObjPtr->typePtr == &tclStringType) { 1218 stringPtr = GET_STRING(appendObjPtr); 1219 if ((stringPtr->numChars == -1) 1220 || (stringPtr->hasUnicode == 0)) { 1221 1222 /* 1223 * If appendObjPtr is a string obj with no valid Unicode 1224 * rep, then fill its unicode rep. 1225 */ 1226 1227 FillUnicodeRep(appendObjPtr); 1228 stringPtr = GET_STRING(appendObjPtr); 1229 } 1230 AppendUnicodeToUnicodeRep(objPtr, stringPtr->unicode, 1231 stringPtr->numChars); 1232 } else { 1233 bytes = Tcl_GetStringFromObj(appendObjPtr, &length); 1234 AppendUtfToUnicodeRep(objPtr, bytes, length); 1235 } 1236 return; 1237 } 1238 1239 /* 1240 * Append to objPtr's UTF string rep. If we know the number of 1241 * characters in both objects before appending, then set the combined 1242 * number of characters in the final (appended-to) object. 1243 */ 1244 1245 bytes = Tcl_GetStringFromObj(appendObjPtr, &length); 1246 1247 allOneByteChars = 0; 1248 numChars = stringPtr->numChars; 1249 if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) { 1250 stringPtr = GET_STRING(appendObjPtr); 1251 if ((stringPtr->numChars >= 0) && (stringPtr->numChars == length)) { 1252 numChars += stringPtr->numChars; 1253 allOneByteChars = 1; 1254 } 1255 } 1256 1257 AppendUtfToUtfRep(objPtr, bytes, length); 1258 1259 if (allOneByteChars) { 1260 stringPtr = GET_STRING(objPtr); 1261 stringPtr->numChars = numChars; 1262 } 1263} 1264 1265/* 1266 *---------------------------------------------------------------------- 1267 * 1268 * AppendUnicodeToUnicodeRep -- 1269 * 1270 * This procedure appends the contents of "unicode" to the Unicode 1271 * rep of "objPtr". objPtr must already have a valid Unicode rep. 1272 * 1273 * Results: 1274 * None. 1275 * 1276 * Side effects: 1277 * objPtr's internal rep is reallocated. 1278 * 1279 *---------------------------------------------------------------------- 1280 */ 1281 1282static void 1283AppendUnicodeToUnicodeRep(objPtr, unicode, appendNumChars) 1284 Tcl_Obj *objPtr; /* Points to the object to append to. */ 1285 CONST Tcl_UniChar *unicode; /* String to append. */ 1286 int appendNumChars; /* Number of chars of "unicode" to append. */ 1287{ 1288 String *stringPtr; 1289 size_t numChars; 1290 1291 if (appendNumChars < 0) { 1292 appendNumChars = UnicodeLength(unicode); 1293 } 1294 if (appendNumChars == 0) { 1295 return; 1296 } 1297 1298 SetStringFromAny(NULL, objPtr); 1299 stringPtr = GET_STRING(objPtr); 1300 1301 /* 1302 * If not enough space has been allocated for the unicode rep, 1303 * reallocate the internal rep object with additional space. First 1304 * try to double the required allocation; if that fails, try a more 1305 * modest increase. See the "TCL STRING GROWTH ALGORITHM" comment at 1306 * the top of this file for an explanation of this growth algorithm. 1307 */ 1308 1309 numChars = stringPtr->numChars + appendNumChars; 1310 stringCheckLimits(numChars); 1311 1312 if (STRING_UALLOC(numChars) >= stringPtr->uallocated) { 1313 /* 1314 * Protect against case where unicode points into the existing 1315 * stringPtr->unicode array. Force it to follow any relocations 1316 * due to the reallocs below. 1317 */ 1318 int offset = -1; 1319 if (unicode >= stringPtr->unicode && unicode <= stringPtr->unicode 1320 + 1 + stringPtr->uallocated / sizeof(Tcl_UniChar)) { 1321 offset = unicode - stringPtr->unicode; 1322 } 1323 1324 GrowUnicodeBuffer(objPtr, numChars); 1325 stringPtr = GET_STRING(objPtr); 1326 1327 /* Relocate unicode if needed; see above. */ 1328 if (offset >= 0) { 1329 unicode = stringPtr->unicode + offset; 1330 } 1331 } 1332 1333 /* 1334 * Copy the new string onto the end of the old string, then add the 1335 * trailing null. 1336 */ 1337 1338 memcpy((VOID*) (stringPtr->unicode + stringPtr->numChars), unicode, 1339 appendNumChars * sizeof(Tcl_UniChar)); 1340 stringPtr->unicode[numChars] = 0; 1341 stringPtr->numChars = numChars; 1342 stringPtr->allocated = 0; 1343 1344 Tcl_InvalidateStringRep(objPtr); 1345} 1346 1347/* 1348 *---------------------------------------------------------------------- 1349 * 1350 * AppendUnicodeToUtfRep -- 1351 * 1352 * This procedure converts the contents of "unicode" to UTF and 1353 * appends the UTF to the string rep of "objPtr". 1354 * 1355 * Results: 1356 * None. 1357 * 1358 * Side effects: 1359 * objPtr's internal rep is reallocated. 1360 * 1361 *---------------------------------------------------------------------- 1362 */ 1363 1364static void 1365AppendUnicodeToUtfRep(objPtr, unicode, numChars) 1366 Tcl_Obj *objPtr; /* Points to the object to append to. */ 1367 CONST Tcl_UniChar *unicode; /* String to convert to UTF. */ 1368 int numChars; /* Number of chars of "unicode" to convert. */ 1369{ 1370 Tcl_DString dsPtr; 1371 CONST char *bytes; 1372 1373 if (numChars < 0) { 1374 numChars = UnicodeLength(unicode); 1375 } 1376 if (numChars == 0) { 1377 return; 1378 } 1379 1380 Tcl_DStringInit(&dsPtr); 1381 bytes = Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr); 1382 AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr)); 1383 Tcl_DStringFree(&dsPtr); 1384} 1385 1386/* 1387 *---------------------------------------------------------------------- 1388 * 1389 * AppendUtfToUnicodeRep -- 1390 * 1391 * This procedure converts the contents of "bytes" to Unicode and 1392 * appends the Unicode to the Unicode rep of "objPtr". objPtr must 1393 * already have a valid Unicode rep. 1394 * 1395 * Results: 1396 * None. 1397 * 1398 * Side effects: 1399 * objPtr's internal rep is reallocated. 1400 * 1401 *---------------------------------------------------------------------- 1402 */ 1403 1404static void 1405AppendUtfToUnicodeRep(objPtr, bytes, numBytes) 1406 Tcl_Obj *objPtr; /* Points to the object to append to. */ 1407 CONST char *bytes; /* String to convert to Unicode. */ 1408 int numBytes; /* Number of bytes of "bytes" to convert. */ 1409{ 1410 Tcl_DString dsPtr; 1411 int numChars; 1412 Tcl_UniChar *unicode; 1413 1414 if (numBytes < 0) { 1415 numBytes = (bytes ? strlen(bytes) : 0); 1416 } 1417 if (numBytes == 0) { 1418 return; 1419 } 1420 1421 Tcl_DStringInit(&dsPtr); 1422 numChars = Tcl_NumUtfChars(bytes, numBytes); 1423 unicode = (Tcl_UniChar *)Tcl_UtfToUniCharDString(bytes, numBytes, &dsPtr); 1424 AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); 1425 Tcl_DStringFree(&dsPtr); 1426} 1427 1428/* 1429 *---------------------------------------------------------------------- 1430 * 1431 * AppendUtfToUtfRep -- 1432 * 1433 * This procedure appends "numBytes" bytes of "bytes" to the UTF string 1434 * rep of "objPtr". objPtr must already have a valid String rep. 1435 * 1436 * Results: 1437 * None. 1438 * 1439 * Side effects: 1440 * objPtr's internal rep is reallocated. 1441 * 1442 *---------------------------------------------------------------------- 1443 */ 1444 1445static void 1446AppendUtfToUtfRep(objPtr, bytes, numBytes) 1447 Tcl_Obj *objPtr; /* Points to the object to append to. */ 1448 CONST char *bytes; /* String to append. */ 1449 int numBytes; /* Number of bytes of "bytes" to append. */ 1450{ 1451 String *stringPtr; 1452 int newLength, oldLength; 1453 1454 if (numBytes < 0) { 1455 numBytes = (bytes ? strlen(bytes) : 0); 1456 } 1457 if (numBytes == 0) { 1458 return; 1459 } 1460 1461 /* 1462 * Copy the new string onto the end of the old string, then add the 1463 * trailing null. 1464 */ 1465 1466 oldLength = objPtr->length; 1467 newLength = numBytes + oldLength; 1468 if (newLength < 0) { 1469 Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); 1470 } 1471 1472 stringPtr = GET_STRING(objPtr); 1473 if (newLength > (int) stringPtr->allocated) { 1474 /* 1475 * Protect against case where unicode points into the existing 1476 * stringPtr->unicode array. Force it to follow any relocations 1477 * due to the reallocs below. 1478 */ 1479 int offset = -1; 1480 if (bytes >= objPtr->bytes 1481 && bytes <= objPtr->bytes + objPtr->length) { 1482 offset = bytes - objPtr->bytes; 1483 } 1484 1485 /* 1486 * There isn't currently enough space in the string representation 1487 * so allocate additional space. First, try to double the length 1488 * required. If that fails, try a more modest allocation. See the 1489 * "TCL STRING GROWTH ALGORITHM" comment at the top of this file for an 1490 * explanation of this growth algorithm. 1491 */ 1492 1493 if (Tcl_AttemptSetObjLength(objPtr, 2 * newLength) == 0) { 1494 /* 1495 * Take care computing the amount of modest growth to avoid 1496 * overflow into invalid argument values for Tcl_SetObjLength. 1497 */ 1498 unsigned int limit = INT_MAX - newLength; 1499 unsigned int extra = numBytes + TCL_GROWTH_MIN_ALLOC; 1500 int growth = (int) ((extra > limit) ? limit : extra); 1501 1502 Tcl_SetObjLength(objPtr, newLength + growth); 1503 } 1504 1505 /* Relocate bytes if needed; see above. */ 1506 if (offset >=0) { 1507 bytes = objPtr->bytes + offset; 1508 } 1509 } 1510 1511 /* 1512 * Invalidate the unicode data. 1513 */ 1514 1515 stringPtr->numChars = -1; 1516 stringPtr->hasUnicode = 0; 1517 1518 memcpy((VOID *) (objPtr->bytes + oldLength), (VOID *) bytes, 1519 (size_t) numBytes); 1520 objPtr->bytes[newLength] = 0; 1521 objPtr->length = newLength; 1522} 1523 1524/* 1525 *---------------------------------------------------------------------- 1526 * 1527 * Tcl_AppendStringsToObjVA -- 1528 * 1529 * This procedure appends one or more null-terminated strings 1530 * to an object. 1531 * 1532 * Results: 1533 * None. 1534 * 1535 * Side effects: 1536 * The contents of all the string arguments are appended to the 1537 * string representation of objPtr. 1538 * 1539 *---------------------------------------------------------------------- 1540 */ 1541 1542void 1543Tcl_AppendStringsToObjVA (objPtr, argList) 1544 Tcl_Obj *objPtr; /* Points to the object to append to. */ 1545 va_list argList; /* Variable argument list. */ 1546{ 1547#define STATIC_LIST_SIZE 16 1548 String *stringPtr; 1549 int newLength, oldLength, attemptLength; 1550 register char *string, *dst; 1551 char *static_list[STATIC_LIST_SIZE]; 1552 char **args = static_list; 1553 int nargs_space = STATIC_LIST_SIZE; 1554 int nargs, i; 1555 1556 if (Tcl_IsShared(objPtr)) { 1557 panic("Tcl_AppendStringsToObj called with shared object"); 1558 } 1559 1560 SetStringFromAny(NULL, objPtr); 1561 1562 /* 1563 * Force the existence of a string rep. so we avoid crashes operating 1564 * on a pure unicode value. [Bug 2597185] 1565 */ 1566 1567 (void) Tcl_GetStringFromObj(objPtr, &oldLength); 1568 1569 /* 1570 * Figure out how much space is needed for all the strings, and 1571 * expand the string representation if it isn't big enough. If no 1572 * bytes would be appended, just return. Note that on some platforms 1573 * (notably OS/390) the argList is an array so we need to use memcpy. 1574 */ 1575 1576 nargs = 0; 1577 newLength = 0; 1578 while (1) { 1579 string = va_arg(argList, char *); 1580 if (string == NULL) { 1581 break; 1582 } 1583 if (nargs >= nargs_space) { 1584 /* 1585 * Expand the args buffer 1586 */ 1587 nargs_space += STATIC_LIST_SIZE; 1588 if (args == static_list) { 1589 args = (void *)ckalloc(nargs_space * sizeof(char *)); 1590 for (i = 0; i < nargs; ++i) { 1591 args[i] = static_list[i]; 1592 } 1593 } else { 1594 args = (void *)ckrealloc((void *)args, 1595 nargs_space * sizeof(char *)); 1596 } 1597 } 1598 newLength += strlen(string); 1599 args[nargs++] = string; 1600 } 1601 if (newLength == 0) { 1602 goto done; 1603 } 1604 1605 stringPtr = GET_STRING(objPtr); 1606 if (oldLength + newLength > (int) stringPtr->allocated) { 1607 1608 /* 1609 * There isn't currently enough space in the string 1610 * representation, so allocate additional space. If the current 1611 * string representation isn't empty (i.e. it looks like we're 1612 * doing a series of appends) then try to allocate extra space to 1613 * accomodate future growth: first try to double the required memory; 1614 * if that fails, try a more modest allocation. See the "TCL STRING 1615 * GROWTH ALGORITHM" comment at the top of this file for an explanation 1616 * of this growth algorithm. Otherwise, if the current string 1617 * representation is empty, exactly enough memory is allocated. 1618 */ 1619 1620 if (oldLength == 0) { 1621 Tcl_SetObjLength(objPtr, newLength); 1622 } else { 1623 attemptLength = 2 * (oldLength + newLength); 1624 if (Tcl_AttemptSetObjLength(objPtr, attemptLength) == 0) { 1625 attemptLength = oldLength + (2 * newLength) + 1626 TCL_GROWTH_MIN_ALLOC; 1627 Tcl_SetObjLength(objPtr, attemptLength); 1628 } 1629 } 1630 } 1631 1632 /* 1633 * Make a second pass through the arguments, appending all the 1634 * strings to the object. 1635 */ 1636 1637 dst = objPtr->bytes + oldLength; 1638 for (i = 0; i < nargs; ++i) { 1639 string = args[i]; 1640 if (string == NULL) { 1641 break; 1642 } 1643 while (*string != 0) { 1644 *dst = *string; 1645 dst++; 1646 string++; 1647 } 1648 } 1649 1650 /* 1651 * Add a null byte to terminate the string. However, be careful: 1652 * it's possible that the object is totally empty (if it was empty 1653 * originally and there was nothing to append). In this case dst is 1654 * NULL; just leave everything alone. 1655 */ 1656 1657 if (dst != NULL) { 1658 *dst = 0; 1659 } 1660 objPtr->length = oldLength + newLength; 1661 1662 done: 1663 /* 1664 * If we had to allocate a buffer from the heap, 1665 * free it now. 1666 */ 1667 1668 if (args != static_list) { 1669 ckfree((void *)args); 1670 } 1671#undef STATIC_LIST_SIZE 1672} 1673 1674/* 1675 *---------------------------------------------------------------------- 1676 * 1677 * Tcl_AppendStringsToObj -- 1678 * 1679 * This procedure appends one or more null-terminated strings 1680 * to an object. 1681 * 1682 * Results: 1683 * None. 1684 * 1685 * Side effects: 1686 * The contents of all the string arguments are appended to the 1687 * string representation of objPtr. 1688 * 1689 *---------------------------------------------------------------------- 1690 */ 1691 1692void 1693Tcl_AppendStringsToObj TCL_VARARGS_DEF(Tcl_Obj *,arg1) 1694{ 1695 register Tcl_Obj *objPtr; 1696 va_list argList; 1697 1698 objPtr = TCL_VARARGS_START(Tcl_Obj *,arg1,argList); 1699 Tcl_AppendStringsToObjVA(objPtr, argList); 1700 va_end(argList); 1701} 1702 1703/* 1704 *--------------------------------------------------------------------------- 1705 * 1706 * FillUnicodeRep -- 1707 * 1708 * Populate the Unicode internal rep with the Unicode form of its string 1709 * rep. The object must alread have a "String" internal rep. 1710 * 1711 * Results: 1712 * None. 1713 * 1714 * Side effects: 1715 * Reallocates the String internal rep. 1716 * 1717 *--------------------------------------------------------------------------- 1718 */ 1719 1720static void 1721FillUnicodeRep(objPtr) 1722 Tcl_Obj *objPtr; /* The object in which to fill the unicode rep. */ 1723{ 1724 String *stringPtr; 1725 size_t uallocated; 1726 char *src, *srcEnd; 1727 Tcl_UniChar *dst; 1728 src = objPtr->bytes; 1729 1730 stringPtr = GET_STRING(objPtr); 1731 if (stringPtr->numChars == -1) { 1732 stringPtr->numChars = Tcl_NumUtfChars(src, objPtr->length); 1733 } 1734 stringPtr->hasUnicode = (stringPtr->numChars > 0); 1735 1736 stringCheckLimits(stringPtr->numChars); 1737 uallocated = STRING_UALLOC(stringPtr->numChars); 1738 if (uallocated > stringPtr->uallocated) { 1739 GrowUnicodeBuffer(objPtr, stringPtr->numChars); 1740 stringPtr = GET_STRING(objPtr); 1741 } 1742 1743 /* 1744 * Convert src to Unicode and store the coverted data in "unicode". 1745 */ 1746 1747 srcEnd = src + objPtr->length; 1748 for (dst = stringPtr->unicode; src < srcEnd; dst++) { 1749 src += TclUtfToUniChar(src, dst); 1750 } 1751 *dst = 0; 1752 1753 SET_STRING(objPtr, stringPtr); 1754} 1755 1756/* 1757 *---------------------------------------------------------------------- 1758 * 1759 * DupStringInternalRep -- 1760 * 1761 * Initialize the internal representation of a new Tcl_Obj to a 1762 * copy of the internal representation of an existing string object. 1763 * 1764 * Results: 1765 * None. 1766 * 1767 * Side effects: 1768 * copyPtr's internal rep is set to a copy of srcPtr's internal 1769 * representation. 1770 * 1771 *---------------------------------------------------------------------- 1772 */ 1773 1774static void 1775DupStringInternalRep(srcPtr, copyPtr) 1776 register Tcl_Obj *srcPtr; /* Object with internal rep to copy. Must 1777 * have an internal rep of type "String". */ 1778 register Tcl_Obj *copyPtr; /* Object with internal rep to set. Must 1779 * not currently have an internal rep.*/ 1780{ 1781 String *srcStringPtr = GET_STRING(srcPtr); 1782 String *copyStringPtr = NULL; 1783 1784 /* 1785 * If the src obj is a string of 1-byte Utf chars, then copy the 1786 * string rep of the source object and create an "empty" Unicode 1787 * internal rep for the new object. Otherwise, copy Unicode 1788 * internal rep, and invalidate the string rep of the new object. 1789 */ 1790 1791 if (srcStringPtr->hasUnicode == 0) { 1792 copyStringPtr = (String *) ckalloc(sizeof(String)); 1793 copyStringPtr->uallocated = 0; 1794 } else { 1795 copyStringPtr = (String *) ckalloc( 1796 STRING_SIZE(srcStringPtr->uallocated)); 1797 copyStringPtr->uallocated = srcStringPtr->uallocated; 1798 1799 memcpy((VOID *) copyStringPtr->unicode, 1800 (VOID *) srcStringPtr->unicode, 1801 (size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar)); 1802 copyStringPtr->unicode[srcStringPtr->numChars] = 0; 1803 } 1804 copyStringPtr->numChars = srcStringPtr->numChars; 1805 copyStringPtr->hasUnicode = srcStringPtr->hasUnicode; 1806 copyStringPtr->allocated = srcStringPtr->allocated; 1807 1808 /* 1809 * Tricky point: the string value was copied by generic object 1810 * management code, so it doesn't contain any extra bytes that 1811 * might exist in the source object. 1812 */ 1813 1814 copyStringPtr->allocated = copyPtr->length; 1815 1816 SET_STRING(copyPtr, copyStringPtr); 1817 copyPtr->typePtr = &tclStringType; 1818} 1819 1820/* 1821 *---------------------------------------------------------------------- 1822 * 1823 * SetStringFromAny -- 1824 * 1825 * Create an internal representation of type "String" for an object. 1826 * 1827 * Results: 1828 * This operation always succeeds and returns TCL_OK. 1829 * 1830 * Side effects: 1831 * Any old internal reputation for objPtr is freed and the 1832 * internal representation is set to "String". 1833 * 1834 *---------------------------------------------------------------------- 1835 */ 1836 1837static int 1838SetStringFromAny(interp, objPtr) 1839 Tcl_Interp *interp; /* Used for error reporting if not NULL. */ 1840 register Tcl_Obj *objPtr; /* The object to convert. */ 1841{ 1842 /* 1843 * The Unicode object is optimized for the case where each UTF char 1844 * in a string is only one byte. In this case, we store the value of 1845 * numChars, but we don't copy the bytes to the unicodeObj->unicode. 1846 */ 1847 1848 if (objPtr->typePtr != &tclStringType) { 1849 String *stringPtr; 1850 1851 if (objPtr->typePtr != NULL) { 1852 if (objPtr->bytes == NULL) { 1853 objPtr->typePtr->updateStringProc(objPtr); 1854 } 1855 if ((objPtr->typePtr->freeIntRepProc) != NULL) { 1856 (*objPtr->typePtr->freeIntRepProc)(objPtr); 1857 } 1858 } 1859 objPtr->typePtr = &tclStringType; 1860 1861 /* 1862 * Allocate enough space for the basic String structure. 1863 */ 1864 1865 stringPtr = (String *) ckalloc(sizeof(String)); 1866 stringPtr->numChars = -1; 1867 stringPtr->uallocated = 0; 1868 stringPtr->hasUnicode = 0; 1869 1870 if (objPtr->bytes != NULL) { 1871 stringPtr->allocated = objPtr->length; 1872 if (objPtr->bytes != tclEmptyStringRep) { 1873 objPtr->bytes[objPtr->length] = 0; 1874 } 1875 } else { 1876 objPtr->length = 0; 1877 } 1878 SET_STRING(objPtr, stringPtr); 1879 } 1880 return TCL_OK; 1881} 1882 1883/* 1884 *---------------------------------------------------------------------- 1885 * 1886 * UpdateStringOfString -- 1887 * 1888 * Update the string representation for an object whose internal 1889 * representation is "String". 1890 * 1891 * Results: 1892 * None. 1893 * 1894 * Side effects: 1895 * The object's string may be set by converting its Unicode 1896 * represention to UTF format. 1897 * 1898 *---------------------------------------------------------------------- 1899 */ 1900 1901static void 1902UpdateStringOfString(objPtr) 1903 Tcl_Obj *objPtr; /* Object with string rep to update. */ 1904{ 1905 int i, size; 1906 Tcl_UniChar *unicode; 1907 char dummy[TCL_UTF_MAX]; 1908 char *dst; 1909 String *stringPtr; 1910 1911 stringPtr = GET_STRING(objPtr); 1912 if ((objPtr->bytes == NULL) || (stringPtr->allocated == 0)) { 1913 1914 if (stringPtr->numChars <= 0) { 1915 1916 /* 1917 * If there is no Unicode rep, or the string has 0 chars, 1918 * then set the string rep to an empty string. 1919 */ 1920 1921 objPtr->bytes = tclEmptyStringRep; 1922 objPtr->length = 0; 1923 return; 1924 } 1925 1926 unicode = stringPtr->unicode; 1927 1928 /* 1929 * Translate the Unicode string to UTF. "size" will hold the 1930 * amount of space the UTF string needs. 1931 */ 1932 1933 if (stringPtr->numChars <= INT_MAX/TCL_UTF_MAX 1934 && stringPtr->allocated >= (size_t) (stringPtr->numChars * TCL_UTF_MAX)) { 1935 goto copyBytes; 1936 } 1937 1938 size = 0; 1939 for (i = 0; i < stringPtr->numChars && size >= 0; i++) { 1940 size += Tcl_UniCharToUtf((int) unicode[i], dummy); 1941 } 1942 if (size < 0) { 1943 Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX); 1944 } 1945 1946 objPtr->bytes = (char *) ckalloc((unsigned) (size + 1)); 1947 objPtr->length = size; 1948 stringPtr->allocated = size; 1949 1950 copyBytes: 1951 dst = objPtr->bytes; 1952 for (i = 0; i < stringPtr->numChars; i++) { 1953 dst += Tcl_UniCharToUtf(unicode[i], dst); 1954 } 1955 *dst = '\0'; 1956 } 1957 return; 1958} 1959 1960/* 1961 *---------------------------------------------------------------------- 1962 * 1963 * FreeStringInternalRep -- 1964 * 1965 * Deallocate the storage associated with a String data object's 1966 * internal representation. 1967 * 1968 * Results: 1969 * None. 1970 * 1971 * Side effects: 1972 * Frees memory. 1973 * 1974 *---------------------------------------------------------------------- 1975 */ 1976 1977static void 1978FreeStringInternalRep(objPtr) 1979 Tcl_Obj *objPtr; /* Object with internal rep to free. */ 1980{ 1981 ckfree((char *) GET_STRING(objPtr)); 1982} 1983