1/*
2 * tclStringObj.c --
3 *
4 *	This file contains procedures that implement string operations on Tcl
5 *	objects.  Some string operations work with UTF strings and others
6 *	require Unicode format.  Functions that require knowledge of the width
7 *	of each character, such as indexing, operate on Unicode data.
8 *
9 *	A Unicode string is an internationalized string.  Conceptually, a
10 *	Unicode string is an array of 16-bit quantities organized as a sequence
11 *	of properly formed UTF-8 characters.  There is a one-to-one map between
12 *	Unicode and UTF characters.  Because Unicode characters have a fixed
13 *	width, operations such as indexing operate on Unicode data.  The String
14 *	object is optimized for the case where each UTF char in a string is
15 *	only one byte.  In this case, we store the value of numChars, but we
16 *	don't store the Unicode data (unless Tcl_GetUnicode is explicitly
17 *	called).
18 *
19 *	The String object type stores one or both formats.  The default
20 *	behavior is to store UTF.  Once Unicode is calculated by a function, it
21 *	is stored in the internal rep for future access (without an additional
22 *	O(n) cost).
23 *
24 *	To allow many appends to be done to an object without constantly
25 *	reallocating the space for the string or Unicode representation, we
26 *	allocate double the space for the string or Unicode and use the
27 *	internal representation to keep track of how much space is used
28 *	vs. allocated.
29 *
30 * Copyright (c) 1995-1997 Sun Microsystems, Inc.
31 * Copyright (c) 1999 by Scriptics Corporation.
32 *
33 * See the file "license.terms" for information on usage and redistribution
34 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
35 *
36 * RCS: @(#) $Id: tclStringObj.c,v 1.32.2.2 2006/09/24 21:15:11 msofer Exp $ */
37
38#include "tclInt.h"
39
40/*
41 * Prototypes for procedures defined later in this file:
42 */
43
44static void		AppendUnicodeToUnicodeRep _ANSI_ARGS_((
45    			    Tcl_Obj *objPtr, CONST Tcl_UniChar *unicode,
46			    int appendNumChars));
47static void		AppendUnicodeToUtfRep _ANSI_ARGS_((
48    			    Tcl_Obj *objPtr, CONST Tcl_UniChar *unicode,
49			    int numChars));
50static void		AppendUtfToUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr,
51    			    CONST char *bytes, int numBytes));
52static void		AppendUtfToUtfRep _ANSI_ARGS_((Tcl_Obj *objPtr,
53    			    CONST char *bytes, int numBytes));
54static void		DupStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr,
55			    Tcl_Obj *copyPtr));
56static void		FillUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr));
57static void		FreeStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));
58static void		GrowUnicodeBuffer _ANSI_ARGS_((Tcl_Obj *objPtr,
59			    int needed));
60static int		SetStringFromAny _ANSI_ARGS_((Tcl_Interp *interp,
61			    Tcl_Obj *objPtr));
62static void		SetUnicodeObj(Tcl_Obj *objPtr,
63			    CONST Tcl_UniChar *unicode, int numChars);
64static int		UnicodeLength _ANSI_ARGS_((CONST Tcl_UniChar *unicode));
65static void		UpdateStringOfString _ANSI_ARGS_((Tcl_Obj *objPtr));
66
67/*
68 * The structure below defines the string Tcl object type by means of
69 * procedures that can be invoked by generic object code.
70 */
71
72Tcl_ObjType tclStringType = {
73    "string",				/* name */
74    FreeStringInternalRep,		/* freeIntRepPro */
75    DupStringInternalRep,		/* dupIntRepProc */
76    UpdateStringOfString,		/* updateStringProc */
77    SetStringFromAny			/* setFromAnyProc */
78};
79
80/*
81 * The following structure is the internal rep for a String object.
82 * It keeps track of how much memory has been used and how much has been
83 * allocated for the Unicode and UTF string to enable growing and
84 * shrinking of the UTF and Unicode reps of the String object with fewer
85 * mallocs.  To optimize string length and indexing operations, this
86 * structure also stores the number of characters (same of UTF and Unicode!)
87 * once that value has been computed.
88 */
89
90typedef struct String {
91    int numChars;		/* The number of chars in the string.
92				 * -1 means this value has not been
93				 * calculated. >= 0 means that there is a
94				 * valid Unicode rep, or that the number
95				 * of UTF bytes == the number of chars. */
96    size_t allocated;		/* The amount of space actually allocated
97				 * for the UTF string (minus 1 byte for
98				 * the termination char). */
99    size_t uallocated;		/* The amount of space actually allocated
100				 * for the Unicode string (minus 2 bytes for
101				 * the termination char). */
102    int hasUnicode;		/* Boolean determining whether the string
103				 * has a Unicode representation. */
104    Tcl_UniChar unicode[2];	/* The array of Unicode chars.  The actual
105				 * size of this field depends on the
106				 * 'uallocated' field above. */
107} String;
108
109#define STRING_MAXCHARS \
110	(1 + (int)(((size_t)UINT_MAX - sizeof(String))/sizeof(Tcl_UniChar)))
111#define STRING_UALLOC(numChars)	\
112	((numChars) * sizeof(Tcl_UniChar))
113#define STRING_SIZE(ualloc) \
114    ((unsigned) ((ualloc) \
115	? (sizeof(String) - sizeof(Tcl_UniChar) + (ualloc)) \
116	: sizeof(String)))
117#define stringCheckLimits(numChars) \
118    if ((numChars) < 0 || (numChars) > STRING_MAXCHARS) { \
119	Tcl_Panic("max length for a Tcl unicode value (%d chars) exceeded", \
120		STRING_MAXCHARS); \
121    }
122#define stringRealloc(ptr, numChars) \
123	(String *) ckrealloc((char *) ptr, \
124		(unsigned) STRING_SIZE(STRING_UALLOC(numChars)) )
125#define stringAttemptRealloc(ptr, numChars) \
126	(String *) attemptckrealloc((char *) ptr, \
127		(unsigned) STRING_SIZE(STRING_UALLOC(numChars)) )
128#define GET_STRING(objPtr) \
129		((String *) (objPtr)->internalRep.otherValuePtr)
130#define SET_STRING(objPtr, stringPtr) \
131		(objPtr)->internalRep.otherValuePtr = (VOID *) (stringPtr)
132
133/*
134 * TCL STRING GROWTH ALGORITHM
135 *
136 * When growing strings (during an append, for example), the following growth
137 * algorithm is used:
138 *
139 *   Attempt to allocate 2 * (originalLength + appendLength)
140 *   On failure:
141 *	attempt to allocate originalLength + 2*appendLength +
142 *			TCL_GROWTH_MIN_ALLOC
143 *
144 * This algorithm allows very good performance, as it rapidly increases the
145 * memory allocated for a given string, which minimizes the number of
146 * reallocations that must be performed.  However, using only the doubling
147 * algorithm can lead to a significant waste of memory.  In particular, it
148 * may fail even when there is sufficient memory available to complete the
149 * append request (but there is not 2 * totalLength memory available).  So when
150 * the doubling fails (because there is not enough memory available), the
151 * algorithm requests a smaller amount of memory, which is still enough to
152 * cover the request, but which hopefully will be less than the total available
153 * memory.
154 *
155 * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling
156 * of very small appends.  Without this extra slush factor, a sequence
157 * of several small appends would cause several memory allocations.
158 * As long as TCL_GROWTH_MIN_ALLOC is a reasonable size, we can
159 * avoid that behavior.
160 *
161 * The growth algorithm can be tuned by adjusting the following parameters:
162 *
163 * TCL_GROWTH_MIN_ALLOC		Additional space, in bytes, to allocate when
164 *				the double allocation has failed.
165 *				Default is 1024 (1 kilobyte).
166 */
167#ifndef TCL_GROWTH_MIN_ALLOC
168#define TCL_GROWTH_MIN_ALLOC	1024
169#endif
170
171static void
172GrowUnicodeBuffer(
173    Tcl_Obj *objPtr,
174    int needed)
175{
176    /* Pre-conditions:
177     *  objPtr->typePtr == &tclStringType
178     *  STRING_UALLOC(needed) > stringPtr->uallocated
179     *  needed < STRING_MAXCHARS
180     */
181    String *ptr = NULL, *stringPtr = GET_STRING(objPtr);
182    int attempt;
183
184    if (stringPtr->uallocated > 0) {
185	/* Subsequent appends - apply the growth algorithm. */
186	attempt = 2 * needed;
187	if (attempt >= 0 && attempt <= STRING_MAXCHARS) {
188	    ptr = stringAttemptRealloc(stringPtr, attempt);
189	}
190	if (ptr == NULL) {
191	    /*
192	     * Take care computing the amount of modest growth to avoid
193	     * overflow into invalid argument values for attempt.
194	     */
195	    unsigned int limit = STRING_MAXCHARS - needed;
196	    unsigned int extra = needed - stringPtr->numChars
197		    + TCL_GROWTH_MIN_ALLOC/sizeof(Tcl_UniChar);
198	    int growth = (int) ((extra > limit) ? limit : extra);
199	    attempt = needed + growth;
200	    ptr = stringAttemptRealloc(stringPtr, attempt);
201	}
202    }
203    if (ptr == NULL) {
204	/* First allocation - just big enough; or last chance fallback. */
205	attempt = needed;
206	ptr = stringRealloc(stringPtr, attempt);
207    }
208    stringPtr = ptr;
209    stringPtr->uallocated = STRING_UALLOC(attempt);
210    SET_STRING(objPtr, stringPtr);
211}
212
213
214/*
215 *----------------------------------------------------------------------
216 *
217 * Tcl_NewStringObj --
218 *
219 *	This procedure is normally called when not debugging: i.e., when
220 *	TCL_MEM_DEBUG is not defined. It creates a new string object and
221 *	initializes it from the byte pointer and length arguments.
222 *
223 *	When TCL_MEM_DEBUG is defined, this procedure just returns the
224 *	result of calling the debugging version Tcl_DbNewStringObj.
225 *
226 * Results:
227 *	A newly created string object is returned that has ref count zero.
228 *
229 * Side effects:
230 *	The new object's internal string representation will be set to a
231 *	copy of the length bytes starting at "bytes". If "length" is
232 *	negative, use bytes up to the first NULL byte; i.e., assume "bytes"
233 *	points to a C-style NULL-terminated string. The object's type is set
234 *	to NULL. An extra NULL is added to the end of the new object's byte
235 *	array.
236 *
237 *----------------------------------------------------------------------
238 */
239
240#ifdef TCL_MEM_DEBUG
241#undef Tcl_NewStringObj
242
243Tcl_Obj *
244Tcl_NewStringObj(bytes, length)
245    CONST char *bytes;		/* Points to the first of the length bytes
246				 * used to initialize the new object. */
247    int length;			/* The number of bytes to copy from "bytes"
248				 * when initializing the new object. If
249				 * negative, use bytes up to the first
250				 * NULL byte. */
251{
252    return Tcl_DbNewStringObj(bytes, length, "unknown", 0);
253}
254
255#else /* if not TCL_MEM_DEBUG */
256
257Tcl_Obj *
258Tcl_NewStringObj(bytes, length)
259    CONST char *bytes;		/* Points to the first of the length bytes
260				 * used to initialize the new object. */
261    int length;			/* The number of bytes to copy from "bytes"
262				 * when initializing the new object. If
263				 * negative, use bytes up to the first
264				 * NULL byte. */
265{
266    register Tcl_Obj *objPtr;
267
268    if (length < 0) {
269	length = (bytes? strlen(bytes) : 0);
270    }
271    TclNewObj(objPtr);
272    TclInitStringRep(objPtr, bytes, length);
273    return objPtr;
274}
275#endif /* TCL_MEM_DEBUG */
276
277/*
278 *----------------------------------------------------------------------
279 *
280 * Tcl_DbNewStringObj --
281 *
282 *	This procedure is normally called when debugging: i.e., when
283 *	TCL_MEM_DEBUG is defined. It creates new string objects. It is the
284 *	same as the Tcl_NewStringObj procedure above except that it calls
285 *	Tcl_DbCkalloc directly with the file name and line number from its
286 *	caller. This simplifies debugging since then the [memory active]
287 *	command	will report the correct file name and line number when
288 *	reporting objects that haven't been freed.
289 *
290 *	When TCL_MEM_DEBUG is not defined, this procedure just returns the
291 *	result of calling Tcl_NewStringObj.
292 *
293 * Results:
294 *	A newly created string object is returned that has ref count zero.
295 *
296 * Side effects:
297 *	The new object's internal string representation will be set to a
298 *	copy of the length bytes starting at "bytes". If "length" is
299 *	negative, use bytes up to the first NULL byte; i.e., assume "bytes"
300 *	points to a C-style NULL-terminated string. The object's type is set
301 *	to NULL. An extra NULL is added to the end of the new object's byte
302 *	array.
303 *
304 *----------------------------------------------------------------------
305 */
306
307#ifdef TCL_MEM_DEBUG
308
309Tcl_Obj *
310Tcl_DbNewStringObj(bytes, length, file, line)
311    CONST char *bytes;		/* Points to the first of the length bytes
312				 * used to initialize the new object. */
313    int length;			/* The number of bytes to copy from "bytes"
314				 * when initializing the new object. If
315				 * negative, use bytes up to the first
316				 * NULL byte. */
317    CONST char *file;		/* The name of the source file calling this
318				 * procedure; used for debugging. */
319    int line;			/* Line number in the source file; used
320				 * for debugging. */
321{
322    register Tcl_Obj *objPtr;
323
324    if (length < 0) {
325	length = (bytes? strlen(bytes) : 0);
326    }
327    TclDbNewObj(objPtr, file, line);
328    TclInitStringRep(objPtr, bytes, length);
329    return objPtr;
330}
331
332#else /* if not TCL_MEM_DEBUG */
333
334Tcl_Obj *
335Tcl_DbNewStringObj(bytes, length, file, line)
336    CONST char *bytes;		/* Points to the first of the length bytes
337				 * used to initialize the new object. */
338    register int length;	/* The number of bytes to copy from "bytes"
339				 * when initializing the new object. If
340				 * negative, use bytes up to the first
341				 * NULL byte. */
342    CONST char *file;		/* The name of the source file calling this
343				 * procedure; used for debugging. */
344    int line;			/* Line number in the source file; used
345				 * for debugging. */
346{
347    return Tcl_NewStringObj(bytes, length);
348}
349#endif /* TCL_MEM_DEBUG */
350
351/*
352 *---------------------------------------------------------------------------
353 *
354 * Tcl_NewUnicodeObj --
355 *
356 *	This procedure is creates a new String object and initializes
357 *	it from the given Unicode String.  If the Utf String is the same size
358 *	as the Unicode string, don't duplicate the data.
359 *
360 * Results:
361 *	The newly created object is returned.  This object will have no
362 *	initial string representation.  The returned object has a ref count
363 *	of 0.
364 *
365 * Side effects:
366 *	Memory allocated for new object and copy of Unicode argument.
367 *
368 *---------------------------------------------------------------------------
369 */
370
371Tcl_Obj *
372Tcl_NewUnicodeObj(unicode, numChars)
373    CONST Tcl_UniChar *unicode;	/* The unicode string used to initialize
374				 * the new object. */
375    int numChars;		/* Number of characters in the unicode
376				 * string. */
377{
378    Tcl_Obj *objPtr;
379
380    TclNewObj(objPtr);
381    SetUnicodeObj(objPtr, unicode, numChars);
382    return objPtr;
383}
384
385/*
386 *----------------------------------------------------------------------
387 *
388 * Tcl_GetCharLength --
389 *
390 *	Get the length of the Unicode string from the Tcl object.
391 *
392 * Results:
393 *	Pointer to unicode string representing the unicode object.
394 *
395 * Side effects:
396 *	Frees old internal rep.  Allocates memory for new "String"
397 *	internal rep.
398 *
399 *----------------------------------------------------------------------
400 */
401
402int
403Tcl_GetCharLength(objPtr)
404    Tcl_Obj *objPtr;	/* The String object to get the num chars of. */
405{
406    String *stringPtr;
407
408    SetStringFromAny(NULL, objPtr);
409    stringPtr = GET_STRING(objPtr);
410
411    /*
412     * If numChars is unknown, then calculate the number of characaters
413     * while populating the Unicode string.
414     */
415
416    if (stringPtr->numChars == -1) {
417	register int i = objPtr->length;
418	register unsigned char *str = (unsigned char *) objPtr->bytes;
419
420	/*
421	 * This is a speed sensitive function, so run specially over the
422	 * string to count continuous ascii characters before resorting
423	 * to the Tcl_NumUtfChars call.  This is a long form of:
424	 stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length);
425	*/
426
427	while (i && (*str < 0xC0)) { i--; str++; }
428	stringPtr->numChars = objPtr->length - i;
429	if (i) {
430	    stringPtr->numChars += Tcl_NumUtfChars(objPtr->bytes
431		    + (objPtr->length - i), i);
432	}
433
434 	if (stringPtr->numChars == objPtr->length) {
435
436	    /*
437	     * Since we've just calculated the number of chars, and all
438	     * UTF chars are 1-byte long, we don't need to store the
439	     * unicode string.
440	     */
441
442	    stringPtr->hasUnicode = 0;
443
444	} else {
445
446	    /*
447	     * Since we've just calucalated the number of chars, and not
448	     * all UTF chars are 1-byte long, go ahead and populate the
449	     * unicode string.
450	     */
451
452	    FillUnicodeRep(objPtr);
453
454	    /*
455	     * We need to fetch the pointer again because we have just
456	     * reallocated the structure to make room for the Unicode data.
457	     */
458
459	    stringPtr = GET_STRING(objPtr);
460	}
461    }
462    return stringPtr->numChars;
463}
464
465/*
466 *----------------------------------------------------------------------
467 *
468 * Tcl_GetUniChar --
469 *
470 *	Get the index'th Unicode character from the String object.  The
471 *	index is assumed to be in the appropriate range.
472 *
473 * Results:
474 *	Returns the index'th Unicode character in the Object.
475 *
476 * Side effects:
477 *	Fills unichar with the index'th Unicode character.
478 *
479 *----------------------------------------------------------------------
480 */
481
482Tcl_UniChar
483Tcl_GetUniChar(objPtr, index)
484    Tcl_Obj *objPtr;	/* The object to get the Unicode charater from. */
485    int index;		/* Get the index'th Unicode character. */
486{
487    Tcl_UniChar unichar;
488    String *stringPtr;
489
490    SetStringFromAny(NULL, objPtr);
491    stringPtr = GET_STRING(objPtr);
492
493    if (stringPtr->numChars == -1) {
494
495	/*
496	 * We haven't yet calculated the length, so we don't have the
497	 * Unicode str.  We need to know the number of chars before we
498	 * can do indexing.
499	 */
500
501	Tcl_GetCharLength(objPtr);
502
503	/*
504	 * We need to fetch the pointer again because we may have just
505	 * reallocated the structure.
506	 */
507
508	stringPtr = GET_STRING(objPtr);
509    }
510    if (stringPtr->hasUnicode == 0) {
511
512	/*
513	 * All of the characters in the Utf string are 1 byte chars,
514	 * so we don't store the unicode char.  We get the Utf string
515	 * and convert the index'th byte to a Unicode character.
516	 */
517
518	unichar = (Tcl_UniChar) objPtr->bytes[index];
519    } else {
520	unichar = stringPtr->unicode[index];
521    }
522    return unichar;
523}
524
525/*
526 *----------------------------------------------------------------------
527 *
528 * Tcl_GetUnicode --
529 *
530 *	Get the Unicode form of the String object.  If
531 *	the object is not already a String object, it will be converted
532 *	to one.  If the String object does not have a Unicode rep, then
533 *	one is create from the UTF string format.
534 *
535 * Results:
536 *	Returns a pointer to the object's internal Unicode string.
537 *
538 * Side effects:
539 *	Converts the object to have the String internal rep.
540 *
541 *----------------------------------------------------------------------
542 */
543
544Tcl_UniChar *
545Tcl_GetUnicode(objPtr)
546    Tcl_Obj *objPtr;	/* The object to find the unicode string for. */
547{
548    String *stringPtr;
549
550    SetStringFromAny(NULL, objPtr);
551    stringPtr = GET_STRING(objPtr);
552
553    if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) {
554
555	/*
556	 * We haven't yet calculated the length, or all of the characters
557	 * in the Utf string are 1 byte chars (so we didn't store the
558	 * unicode str).  Since this function must return a unicode string,
559	 * and one has not yet been stored, force the Unicode to be
560	 * calculated and stored now.
561	 */
562
563	FillUnicodeRep(objPtr);
564
565	/*
566	 * We need to fetch the pointer again because we have just
567	 * reallocated the structure to make room for the Unicode data.
568	 */
569
570	stringPtr = GET_STRING(objPtr);
571    }
572    return stringPtr->unicode;
573}
574
575/*
576 *----------------------------------------------------------------------
577 *
578 * Tcl_GetUnicodeFromObj --
579 *
580 *	Get the Unicode form of the String object with length.  If
581 *	the object is not already a String object, it will be converted
582 *	to one.  If the String object does not have a Unicode rep, then
583 *	one is create from the UTF string format.
584 *
585 * Results:
586 *	Returns a pointer to the object's internal Unicode string.
587 *
588 * Side effects:
589 *	Converts the object to have the String internal rep.
590 *
591 *----------------------------------------------------------------------
592 */
593
594Tcl_UniChar *
595Tcl_GetUnicodeFromObj(objPtr, lengthPtr)
596    Tcl_Obj *objPtr;	/* The object to find the unicode string for. */
597    int *lengthPtr;	/* If non-NULL, the location where the
598			 * string rep's unichar length should be
599			 * stored. If NULL, no length is stored. */
600{
601    String *stringPtr;
602
603    SetStringFromAny(NULL, objPtr);
604    stringPtr = GET_STRING(objPtr);
605
606    if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) {
607
608	/*
609	 * We haven't yet calculated the length, or all of the characters
610	 * in the Utf string are 1 byte chars (so we didn't store the
611	 * unicode str).  Since this function must return a unicode string,
612	 * and one has not yet been stored, force the Unicode to be
613	 * calculated and stored now.
614	 */
615
616	FillUnicodeRep(objPtr);
617
618	/*
619	 * We need to fetch the pointer again because we have just
620	 * reallocated the structure to make room for the Unicode data.
621	 */
622
623	stringPtr = GET_STRING(objPtr);
624    }
625
626    if (lengthPtr != NULL) {
627	*lengthPtr = stringPtr->numChars;
628    }
629    return stringPtr->unicode;
630}
631
632/*
633 *----------------------------------------------------------------------
634 *
635 * Tcl_GetRange --
636 *
637 *	Create a Tcl Object that contains the chars between first and last
638 *	of the object indicated by "objPtr".  If the object is not already
639 *	a String object, convert it to one.  The first and last indices
640 *	are assumed to be in the appropriate range.
641 *
642 * Results:
643 *	Returns a new Tcl Object of the String type.
644 *
645 * Side effects:
646 *	Changes the internal rep of "objPtr" to the String type.
647 *
648 *----------------------------------------------------------------------
649 */
650
651Tcl_Obj *
652Tcl_GetRange(objPtr, first, last)
653    Tcl_Obj *objPtr;		/* The Tcl object to find the range of. */
654    int first;			/* First index of the range. */
655    int last;			/* Last index of the range. */
656{
657    Tcl_Obj *newObjPtr;		/* The Tcl object to find the range of. */
658    String *stringPtr;
659
660    SetStringFromAny(NULL, objPtr);
661    stringPtr = GET_STRING(objPtr);
662
663    if (stringPtr->numChars == -1) {
664
665	/*
666	 * We haven't yet calculated the length, so we don't have the
667	 * Unicode str.  We need to know the number of chars before we
668	 * can do indexing.
669	 */
670
671	Tcl_GetCharLength(objPtr);
672
673	/*
674	 * We need to fetch the pointer again because we may have just
675	 * reallocated the structure.
676	 */
677
678	stringPtr = GET_STRING(objPtr);
679    }
680
681    if (objPtr->bytes && stringPtr->numChars == objPtr->length) {
682	char *str = Tcl_GetString(objPtr);
683
684	/*
685	 * All of the characters in the Utf string are 1 byte chars,
686	 * so we don't store the unicode char.  Create a new string
687	 * object containing the specified range of chars.
688	 */
689
690	newObjPtr = Tcl_NewStringObj(&str[first], last-first+1);
691
692	/*
693	 * Since we know the new string only has 1-byte chars, we
694	 * can set it's numChars field.
695	 */
696
697	SetStringFromAny(NULL, newObjPtr);
698	stringPtr = GET_STRING(newObjPtr);
699	stringPtr->numChars = last-first+1;
700    } else {
701	newObjPtr = Tcl_NewUnicodeObj(stringPtr->unicode + first,
702		last-first+1);
703    }
704    return newObjPtr;
705}
706
707/*
708 *----------------------------------------------------------------------
709 *
710 * Tcl_SetStringObj --
711 *
712 *	Modify an object to hold a string that is a copy of the bytes
713 *	indicated by the byte pointer and length arguments.
714 *
715 * Results:
716 *	None.
717 *
718 * Side effects:
719 *	The object's string representation will be set to a copy of
720 *	the "length" bytes starting at "bytes". If "length" is negative, use
721 *	bytes up to the first NULL byte; i.e., assume "bytes" points to a
722 *	C-style NULL-terminated string. The object's old string and internal
723 *	representations are freed and the object's type is set NULL.
724 *
725 *----------------------------------------------------------------------
726 */
727
728void
729Tcl_SetStringObj(objPtr, bytes, length)
730    register Tcl_Obj *objPtr;	/* Object whose internal rep to init. */
731    CONST char *bytes;		/* Points to the first of the length bytes
732				 * used to initialize the object. */
733    register int length;	/* The number of bytes to copy from "bytes"
734				 * when initializing the object. If
735				 * negative, use bytes up to the first
736				 * NULL byte.*/
737{
738    register Tcl_ObjType *oldTypePtr = objPtr->typePtr;
739
740    if (Tcl_IsShared(objPtr)) {
741	panic("Tcl_SetStringObj called with shared object");
742    }
743
744    /*
745     * Set the type to NULL and free any internal rep for the old type.
746     */
747
748    if ((oldTypePtr != NULL) && (oldTypePtr->freeIntRepProc != NULL)) {
749	oldTypePtr->freeIntRepProc(objPtr);
750    }
751    objPtr->typePtr = NULL;
752
753    /*
754     * Free any old string rep, then set the string rep to a copy of
755     * the length bytes starting at "bytes".
756     */
757
758    Tcl_InvalidateStringRep(objPtr);
759    if (length < 0) {
760	length = (bytes? strlen(bytes) : 0);
761    }
762    TclInitStringRep(objPtr, bytes, length);
763}
764
765/*
766 *----------------------------------------------------------------------
767 *
768 * Tcl_SetObjLength --
769 *
770 *	This procedure changes the length of the string representation
771 *	of an object.
772 *
773 * Results:
774 *	None.
775 *
776 * Side effects:
777 *	If the size of objPtr's string representation is greater than
778 *	length, then it is reduced to length and a new terminating null
779 *	byte is stored in the strength.  If the length of the string
780 *	representation is greater than length, the storage space is
781 *	reallocated to the given length; a null byte is stored at the
782 *	end, but other bytes past the end of the original string
783 *	representation are undefined.  The object's internal
784 *	representation is changed to "expendable string".
785 *
786 *----------------------------------------------------------------------
787 */
788
789void
790Tcl_SetObjLength(objPtr, length)
791    register Tcl_Obj *objPtr;	/* Pointer to object.  This object must
792				 * not currently be shared. */
793    register int length;	/* Number of bytes desired for string
794				 * representation of object, not including
795				 * terminating null byte. */
796{
797    String *stringPtr;
798
799    if (length < 0) {
800	/*
801	 * Setting to a negative length is nonsense.  This is probably the
802	 * result of overflowing the signed integer range.
803	 */
804	Tcl_Panic("Tcl_SetObjLength: negative length requested: "
805		"%d (integer overflow?)", length);
806    }
807    if (Tcl_IsShared(objPtr)) {
808	panic("Tcl_SetObjLength called with shared object");
809    }
810    SetStringFromAny(NULL, objPtr);
811
812    stringPtr = GET_STRING(objPtr);
813
814    /* Check that we're not extending a pure unicode string */
815
816    if ((size_t)length > stringPtr->allocated &&
817	    (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) {
818	char *new;
819
820	/*
821	 * Not enough space in current string. Reallocate the string
822	 * space and free the old string.
823	 */
824	if (objPtr->bytes != tclEmptyStringRep && objPtr->bytes != NULL) {
825	    new = (char *) ckrealloc((char *)objPtr->bytes,
826		    (unsigned)(length+1));
827	} else {
828	    new = (char *) ckalloc((unsigned) (length+1));
829	    if (objPtr->bytes != NULL && objPtr->length != 0) {
830		memcpy((VOID *) new, (VOID *) objPtr->bytes,
831			(size_t) objPtr->length);
832		Tcl_InvalidateStringRep(objPtr);
833	    }
834	}
835	objPtr->bytes = new;
836	stringPtr->allocated = length;
837	/* Invalidate the unicode data. */
838	stringPtr->hasUnicode = 0;
839    }
840
841    if (objPtr->bytes != NULL) {
842        objPtr->length = length;
843        if (objPtr->bytes != tclEmptyStringRep) {
844            /* Ensure the string is NULL-terminated */
845            objPtr->bytes[length] = 0;
846        }
847        /* Invalidate the unicode data. */
848        stringPtr->numChars = -1;
849        stringPtr->hasUnicode = 0;
850    } else {
851        /* Changing length of pure unicode string */
852        size_t uallocated = STRING_UALLOC(length);
853
854	stringCheckLimits(length);
855        if (uallocated > stringPtr->uallocated) {
856	    stringPtr = stringRealloc(stringPtr, length);
857            SET_STRING(objPtr, stringPtr);
858            stringPtr->uallocated = uallocated;
859        }
860        stringPtr->numChars = length;
861        stringPtr->hasUnicode = (length > 0);
862        /* Ensure the string is NULL-terminated */
863        stringPtr->unicode[length] = 0;
864        stringPtr->allocated = 0;
865        objPtr->length = 0;
866    }
867}
868
869/*
870 *----------------------------------------------------------------------
871 *
872 * Tcl_AttemptSetObjLength --
873 *
874 *	This procedure changes the length of the string representation
875 *	of an object.  It uses the attempt* (non-panic'ing) memory allocators.
876 *
877 * Results:
878 *	1 if the requested memory was allocated, 0 otherwise.
879 *
880 * Side effects:
881 *	If the size of objPtr's string representation is greater than
882 *	length, then it is reduced to length and a new terminating null
883 *	byte is stored in the strength.  If the length of the string
884 *	representation is greater than length, the storage space is
885 *	reallocated to the given length; a null byte is stored at the
886 *	end, but other bytes past the end of the original string
887 *	representation are undefined.  The object's internal
888 *	representation is changed to "expendable string".
889 *
890 *----------------------------------------------------------------------
891 */
892
893int
894Tcl_AttemptSetObjLength(objPtr, length)
895    register Tcl_Obj *objPtr;	/* Pointer to object.  This object must
896				 * not currently be shared. */
897    register int length;	/* Number of bytes desired for string
898				 * representation of object, not including
899				 * terminating null byte. */
900{
901    String *stringPtr;
902
903    if (length < 0) {
904	/*
905	 * Setting to a negative length is nonsense.  This is probably the
906	 * result of overflowing the signed integer range.
907	 */
908	return 0;
909    }
910    if (Tcl_IsShared(objPtr)) {
911	panic("Tcl_AttemptSetObjLength called with shared object");
912    }
913    SetStringFromAny(NULL, objPtr);
914
915    stringPtr = GET_STRING(objPtr);
916
917    /* Check that we're not extending a pure unicode string */
918
919    if (length > (int) stringPtr->allocated &&
920	    (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) {
921	char *new;
922
923	/*
924	 * Not enough space in current string. Reallocate the string
925	 * space and free the old string.
926	 */
927	if (objPtr->bytes != tclEmptyStringRep && objPtr->bytes != NULL) {
928	    new = (char *) attemptckrealloc((char *)objPtr->bytes,
929		    (unsigned)(length+1));
930	    if (new == NULL) {
931		return 0;
932	    }
933	} else {
934	    new = (char *) attemptckalloc((unsigned) (length+1));
935	    if (new == NULL) {
936		return 0;
937	    }
938	    if (objPtr->bytes != NULL && objPtr->length != 0) {
939 	    	memcpy((VOID *) new, (VOID *) objPtr->bytes,
940 		    	(size_t) objPtr->length);
941 	    	Tcl_InvalidateStringRep(objPtr);
942	    }
943	}
944	objPtr->bytes = new;
945	stringPtr->allocated = length;
946	/* Invalidate the unicode data. */
947	stringPtr->hasUnicode = 0;
948    }
949
950    if (objPtr->bytes != NULL) {
951	objPtr->length = length;
952	if (objPtr->bytes != tclEmptyStringRep) {
953	    /* Ensure the string is NULL-terminated */
954	    objPtr->bytes[length] = 0;
955	}
956	/* Invalidate the unicode data. */
957	stringPtr->numChars = -1;
958	stringPtr->hasUnicode = 0;
959    } else {
960	/* Changing length of pure unicode string */
961	size_t uallocated = STRING_UALLOC(length);
962	if (length > STRING_MAXCHARS) {
963	    return 0;
964	}
965
966	if (uallocated > stringPtr->uallocated) {
967	    stringPtr = stringAttemptRealloc(stringPtr, length);
968	    if (stringPtr == NULL) {
969	        return 0;
970	    }
971	    SET_STRING(objPtr, stringPtr);
972	    stringPtr->uallocated = uallocated;
973	}
974	stringPtr->numChars = length;
975	stringPtr->hasUnicode = (length > 0);
976	/* Ensure the string is NULL-terminated */
977	stringPtr->unicode[length] = 0;
978	stringPtr->allocated = 0;
979	objPtr->length = 0;
980    }
981    return 1;
982}
983
984/*
985 *---------------------------------------------------------------------------
986 *
987 * Tcl_SetUnicodeObj --
988 *
989 *	Modify an object to hold the Unicode string indicated by "unicode".
990 *
991 * Results:
992 *	None.
993 *
994 * Side effects:
995 *	Memory allocated for new "String" internal rep.
996 *
997 *---------------------------------------------------------------------------
998 */
999
1000void
1001Tcl_SetUnicodeObj(objPtr, unicode, numChars)
1002    Tcl_Obj *objPtr;		/* The object to set the string of. */
1003    CONST Tcl_UniChar *unicode;	/* The unicode string used to initialize
1004				 * the object. */
1005    int numChars;		/* Number of characters in the unicode
1006				 * string. */
1007{
1008    Tcl_ObjType *typePtr = objPtr->typePtr;
1009
1010    if (Tcl_IsShared(objPtr)) {
1011	Tcl_Panic("%s called with shared object", "Tcl_SetUnicodeObj");
1012    }
1013    if ((typePtr != NULL) && (typePtr->freeIntRepProc != NULL)) {
1014	typePtr->freeIntRepProc(objPtr);
1015    }
1016    SetUnicodeObj(objPtr, unicode, numChars);
1017}
1018
1019static int
1020UnicodeLength(
1021    const Tcl_UniChar *unicode)
1022{
1023    int numChars = 0;
1024
1025    if (unicode) {
1026	while (numChars >= 0 && unicode[numChars] != 0) {
1027	    numChars++;
1028	}
1029    }
1030    stringCheckLimits(numChars);
1031    return numChars;
1032}
1033
1034static void
1035SetUnicodeObj(objPtr, unicode, numChars)
1036    Tcl_Obj *objPtr;		/* The object to set the string of. */
1037    CONST Tcl_UniChar *unicode;	/* The unicode string used to initialize
1038				 * the object. */
1039    int numChars;		/* Number of characters in the unicode
1040				 * string. */
1041{
1042    String *stringPtr;
1043    size_t uallocated;
1044
1045    if (numChars < 0) {
1046	numChars = UnicodeLength(unicode);
1047    }
1048
1049    /*
1050     * Allocate enough space for the String structure + Unicode string.
1051     */
1052
1053    stringCheckLimits(numChars);
1054    uallocated = STRING_UALLOC(numChars);
1055    stringPtr = (String *) ckalloc(STRING_SIZE(uallocated));
1056
1057    stringPtr->numChars = numChars;
1058    stringPtr->uallocated = uallocated;
1059    stringPtr->hasUnicode = (numChars > 0);
1060    stringPtr->allocated = 0;
1061    memcpy((VOID *) stringPtr->unicode, (VOID *) unicode, uallocated);
1062    stringPtr->unicode[numChars] = 0;
1063
1064    Tcl_InvalidateStringRep(objPtr);
1065    objPtr->typePtr = &tclStringType;
1066    SET_STRING(objPtr, stringPtr);
1067}
1068
1069/*
1070 *----------------------------------------------------------------------
1071 *
1072 * Tcl_AppendToObj --
1073 *
1074 *	This procedure appends a sequence of bytes to an object.
1075 *
1076 * Results:
1077 *	None.
1078 *
1079 * Side effects:
1080 *	The bytes at *bytes are appended to the string representation
1081 *	of objPtr.
1082 *
1083 *----------------------------------------------------------------------
1084 */
1085
1086void
1087Tcl_AppendToObj(objPtr, bytes, length)
1088    register Tcl_Obj *objPtr;	/* Points to the object to append to. */
1089    CONST char *bytes;		/* Points to the bytes to append to the
1090				 * object. */
1091    register int length;	/* The number of bytes to append from
1092				 * "bytes". If < 0, then append all bytes
1093				 * up to NULL byte. */
1094{
1095    String *stringPtr;
1096
1097    if (Tcl_IsShared(objPtr)) {
1098	panic("Tcl_AppendToObj called with shared object");
1099    }
1100
1101    SetStringFromAny(NULL, objPtr);
1102
1103    if (length < 0) {
1104	length = (bytes ? strlen(bytes) : 0);
1105    }
1106    if (length == 0) {
1107	return;
1108    }
1109
1110    /*
1111     * If objPtr has a valid Unicode rep, then append the Unicode
1112     * conversion of "bytes" to the objPtr's Unicode rep, otherwise
1113     * append "bytes" to objPtr's string rep.
1114     */
1115
1116    stringPtr = GET_STRING(objPtr);
1117    if (stringPtr->hasUnicode != 0) {
1118	AppendUtfToUnicodeRep(objPtr, bytes, length);
1119
1120	stringPtr = GET_STRING(objPtr);
1121    } else {
1122	AppendUtfToUtfRep(objPtr, bytes, length);
1123    }
1124}
1125
1126/*
1127 *----------------------------------------------------------------------
1128 *
1129 * Tcl_AppendUnicodeToObj --
1130 *
1131 *	This procedure appends a Unicode string to an object in the
1132 *	most efficient manner possible.  Length must be >= 0.
1133 *
1134 * Results:
1135 *	None.
1136 *
1137 * Side effects:
1138 *	Invalidates the string rep and creates a new Unicode string.
1139 *
1140 *----------------------------------------------------------------------
1141 */
1142
1143void
1144Tcl_AppendUnicodeToObj(objPtr, unicode, length)
1145    register Tcl_Obj *objPtr;	/* Points to the object to append to. */
1146    CONST Tcl_UniChar *unicode;	/* The unicode string to append to the
1147			         * object. */
1148    int length;			/* Number of chars in "unicode". */
1149{
1150    String *stringPtr;
1151
1152    if (Tcl_IsShared(objPtr)) {
1153	panic("Tcl_AppendUnicodeToObj called with shared object");
1154    }
1155
1156    if (length == 0) {
1157	return;
1158    }
1159
1160    SetStringFromAny(NULL, objPtr);
1161    stringPtr = GET_STRING(objPtr);
1162
1163    /*
1164     * If objPtr has a valid Unicode rep, then append the "unicode"
1165     * to the objPtr's Unicode rep, otherwise the UTF conversion of
1166     * "unicode" to objPtr's string rep.
1167     */
1168
1169    if (stringPtr->hasUnicode != 0) {
1170	AppendUnicodeToUnicodeRep(objPtr, unicode, length);
1171    } else {
1172	AppendUnicodeToUtfRep(objPtr, unicode, length);
1173    }
1174}
1175
1176/*
1177 *----------------------------------------------------------------------
1178 *
1179 * Tcl_AppendObjToObj --
1180 *
1181 *	This procedure appends the string rep of one object to another.
1182 *	"objPtr" cannot be a shared object.
1183 *
1184 * Results:
1185 *	None.
1186 *
1187 * Side effects:
1188 *	The string rep of appendObjPtr is appended to the string
1189 *	representation of objPtr.
1190 *
1191 *----------------------------------------------------------------------
1192 */
1193
1194void
1195Tcl_AppendObjToObj(objPtr, appendObjPtr)
1196    Tcl_Obj *objPtr;		/* Points to the object to append to. */
1197    Tcl_Obj *appendObjPtr;	/* Object to append. */
1198{
1199    String *stringPtr;
1200    int length, numChars, allOneByteChars;
1201    char *bytes;
1202
1203    SetStringFromAny(NULL, objPtr);
1204
1205    /*
1206     * If objPtr has a valid Unicode rep, then get a Unicode string
1207     * from appendObjPtr and append it.
1208     */
1209
1210    stringPtr = GET_STRING(objPtr);
1211    if (stringPtr->hasUnicode != 0) {
1212
1213	/*
1214	 * If appendObjPtr is not of the "String" type, don't convert it.
1215	 */
1216
1217	if (appendObjPtr->typePtr == &tclStringType) {
1218	    stringPtr = GET_STRING(appendObjPtr);
1219	    if ((stringPtr->numChars == -1)
1220		    || (stringPtr->hasUnicode == 0)) {
1221
1222		/*
1223		 * If appendObjPtr is a string obj with no valid Unicode
1224		 * rep, then fill its unicode rep.
1225		 */
1226
1227		FillUnicodeRep(appendObjPtr);
1228		stringPtr = GET_STRING(appendObjPtr);
1229	    }
1230	    AppendUnicodeToUnicodeRep(objPtr, stringPtr->unicode,
1231		    stringPtr->numChars);
1232	} else {
1233	    bytes = Tcl_GetStringFromObj(appendObjPtr, &length);
1234	    AppendUtfToUnicodeRep(objPtr, bytes, length);
1235	}
1236	return;
1237    }
1238
1239    /*
1240     * Append to objPtr's UTF string rep.  If we know the number of
1241     * characters in both objects before appending, then set the combined
1242     * number of characters in the final (appended-to) object.
1243     */
1244
1245    bytes = Tcl_GetStringFromObj(appendObjPtr, &length);
1246
1247    allOneByteChars = 0;
1248    numChars = stringPtr->numChars;
1249    if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) {
1250	stringPtr = GET_STRING(appendObjPtr);
1251	if ((stringPtr->numChars >= 0) && (stringPtr->numChars == length)) {
1252	    numChars += stringPtr->numChars;
1253	    allOneByteChars = 1;
1254	}
1255    }
1256
1257    AppendUtfToUtfRep(objPtr, bytes, length);
1258
1259    if (allOneByteChars) {
1260	stringPtr = GET_STRING(objPtr);
1261	stringPtr->numChars = numChars;
1262    }
1263}
1264
1265/*
1266 *----------------------------------------------------------------------
1267 *
1268 * AppendUnicodeToUnicodeRep --
1269 *
1270 *	This procedure appends the contents of "unicode" to the Unicode
1271 *	rep of "objPtr".  objPtr must already have a valid Unicode rep.
1272 *
1273 * Results:
1274 *	None.
1275 *
1276 * Side effects:
1277 *	objPtr's internal rep is reallocated.
1278 *
1279 *----------------------------------------------------------------------
1280 */
1281
1282static void
1283AppendUnicodeToUnicodeRep(objPtr, unicode, appendNumChars)
1284    Tcl_Obj *objPtr;	        /* Points to the object to append to. */
1285    CONST Tcl_UniChar *unicode; /* String to append. */
1286    int appendNumChars;	        /* Number of chars of "unicode" to append. */
1287{
1288    String *stringPtr;
1289    size_t numChars;
1290
1291    if (appendNumChars < 0) {
1292	appendNumChars = UnicodeLength(unicode);
1293    }
1294    if (appendNumChars == 0) {
1295	return;
1296    }
1297
1298    SetStringFromAny(NULL, objPtr);
1299    stringPtr = GET_STRING(objPtr);
1300
1301    /*
1302     * If not enough space has been allocated for the unicode rep,
1303     * reallocate the internal rep object with additional space.  First
1304     * try to double the required allocation; if that fails, try a more
1305     * modest increase.  See the "TCL STRING GROWTH ALGORITHM" comment at
1306     * the top of this file for an explanation of this growth algorithm.
1307     */
1308
1309    numChars = stringPtr->numChars + appendNumChars;
1310    stringCheckLimits(numChars);
1311
1312    if (STRING_UALLOC(numChars) >= stringPtr->uallocated) {
1313	/*
1314	 * Protect against case where unicode points into the existing
1315	 * stringPtr->unicode array.  Force it to follow any relocations
1316	 * due to the reallocs below.
1317	 */
1318	int offset = -1;
1319	if (unicode >= stringPtr->unicode && unicode <= stringPtr->unicode
1320		+ 1 + stringPtr->uallocated / sizeof(Tcl_UniChar)) {
1321	    offset = unicode - stringPtr->unicode;
1322	}
1323
1324	GrowUnicodeBuffer(objPtr, numChars);
1325	stringPtr = GET_STRING(objPtr);
1326
1327	/* Relocate unicode if needed; see above. */
1328	if (offset >= 0) {
1329	    unicode = stringPtr->unicode + offset;
1330	}
1331    }
1332
1333    /*
1334     * Copy the new string onto the end of the old string, then add the
1335     * trailing null.
1336     */
1337
1338    memcpy((VOID*) (stringPtr->unicode + stringPtr->numChars), unicode,
1339	    appendNumChars * sizeof(Tcl_UniChar));
1340    stringPtr->unicode[numChars] = 0;
1341    stringPtr->numChars = numChars;
1342    stringPtr->allocated = 0;
1343
1344    Tcl_InvalidateStringRep(objPtr);
1345}
1346
1347/*
1348 *----------------------------------------------------------------------
1349 *
1350 * AppendUnicodeToUtfRep --
1351 *
1352 *	This procedure converts the contents of "unicode" to UTF and
1353 *	appends the UTF to the string rep of "objPtr".
1354 *
1355 * Results:
1356 *	None.
1357 *
1358 * Side effects:
1359 *	objPtr's internal rep is reallocated.
1360 *
1361 *----------------------------------------------------------------------
1362 */
1363
1364static void
1365AppendUnicodeToUtfRep(objPtr, unicode, numChars)
1366    Tcl_Obj *objPtr;	        /* Points to the object to append to. */
1367    CONST Tcl_UniChar *unicode; /* String to convert to UTF. */
1368    int numChars;	        /* Number of chars of "unicode" to convert. */
1369{
1370    Tcl_DString dsPtr;
1371    CONST char *bytes;
1372
1373    if (numChars < 0) {
1374	numChars = UnicodeLength(unicode);
1375    }
1376    if (numChars == 0) {
1377	return;
1378    }
1379
1380    Tcl_DStringInit(&dsPtr);
1381    bytes = Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr);
1382    AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr));
1383    Tcl_DStringFree(&dsPtr);
1384}
1385
1386/*
1387 *----------------------------------------------------------------------
1388 *
1389 * AppendUtfToUnicodeRep --
1390 *
1391 *	This procedure converts the contents of "bytes" to Unicode and
1392 *	appends the Unicode to the Unicode rep of "objPtr".  objPtr must
1393 *	already have a valid Unicode rep.
1394 *
1395 * Results:
1396 *	None.
1397 *
1398 * Side effects:
1399 *	objPtr's internal rep is reallocated.
1400 *
1401 *----------------------------------------------------------------------
1402 */
1403
1404static void
1405AppendUtfToUnicodeRep(objPtr, bytes, numBytes)
1406    Tcl_Obj *objPtr;	/* Points to the object to append to. */
1407    CONST char *bytes;	/* String to convert to Unicode. */
1408    int numBytes;	/* Number of bytes of "bytes" to convert. */
1409{
1410    Tcl_DString dsPtr;
1411    int numChars;
1412    Tcl_UniChar *unicode;
1413
1414    if (numBytes < 0) {
1415	numBytes = (bytes ? strlen(bytes) : 0);
1416    }
1417    if (numBytes == 0) {
1418	return;
1419    }
1420
1421    Tcl_DStringInit(&dsPtr);
1422    numChars = Tcl_NumUtfChars(bytes, numBytes);
1423    unicode = (Tcl_UniChar *)Tcl_UtfToUniCharDString(bytes, numBytes, &dsPtr);
1424    AppendUnicodeToUnicodeRep(objPtr, unicode, numChars);
1425    Tcl_DStringFree(&dsPtr);
1426}
1427
1428/*
1429 *----------------------------------------------------------------------
1430 *
1431 * AppendUtfToUtfRep --
1432 *
1433 *	This procedure appends "numBytes" bytes of "bytes" to the UTF string
1434 *	rep of "objPtr".  objPtr must already have a valid String rep.
1435 *
1436 * Results:
1437 *	None.
1438 *
1439 * Side effects:
1440 *	objPtr's internal rep is reallocated.
1441 *
1442 *----------------------------------------------------------------------
1443 */
1444
1445static void
1446AppendUtfToUtfRep(objPtr, bytes, numBytes)
1447    Tcl_Obj *objPtr;	/* Points to the object to append to. */
1448    CONST char *bytes;	/* String to append. */
1449    int numBytes;	/* Number of bytes of "bytes" to append. */
1450{
1451    String *stringPtr;
1452    int newLength, oldLength;
1453
1454    if (numBytes < 0) {
1455	numBytes = (bytes ? strlen(bytes) : 0);
1456    }
1457    if (numBytes == 0) {
1458	return;
1459    }
1460
1461    /*
1462     * Copy the new string onto the end of the old string, then add the
1463     * trailing null.
1464     */
1465
1466    oldLength = objPtr->length;
1467    newLength = numBytes + oldLength;
1468    if (newLength < 0) {
1469	Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
1470    }
1471
1472    stringPtr = GET_STRING(objPtr);
1473    if (newLength > (int) stringPtr->allocated) {
1474        /*
1475	 * Protect against case where unicode points into the existing
1476	 * stringPtr->unicode array.  Force it to follow any relocations
1477	 * due to the reallocs below.
1478	 */
1479	int offset = -1;
1480	if (bytes >= objPtr->bytes
1481		&& bytes <= objPtr->bytes + objPtr->length) {
1482	    offset = bytes - objPtr->bytes;
1483	}
1484
1485	/*
1486	 * There isn't currently enough space in the string representation
1487	 * so allocate additional space.  First, try to double the length
1488	 * required.  If that fails, try a more modest allocation.  See the
1489	 * "TCL STRING GROWTH ALGORITHM" comment at the top of this file for an
1490	 * explanation of this growth algorithm.
1491	 */
1492
1493	if (Tcl_AttemptSetObjLength(objPtr, 2 * newLength) == 0) {
1494	    /*
1495	     * Take care computing the amount of modest growth to avoid
1496	     * overflow into invalid argument values for Tcl_SetObjLength.
1497	     */
1498	    unsigned int limit = INT_MAX - newLength;
1499	    unsigned int extra = numBytes + TCL_GROWTH_MIN_ALLOC;
1500	    int growth = (int) ((extra > limit) ? limit : extra);
1501
1502	    Tcl_SetObjLength(objPtr, newLength + growth);
1503	}
1504
1505	/* Relocate bytes if needed; see above. */
1506	if (offset >=0) {
1507	    bytes = objPtr->bytes + offset;
1508	}
1509    }
1510
1511    /*
1512     * Invalidate the unicode data.
1513     */
1514
1515    stringPtr->numChars = -1;
1516    stringPtr->hasUnicode = 0;
1517
1518    memcpy((VOID *) (objPtr->bytes + oldLength), (VOID *) bytes,
1519	    (size_t) numBytes);
1520    objPtr->bytes[newLength] = 0;
1521    objPtr->length = newLength;
1522}
1523
1524/*
1525 *----------------------------------------------------------------------
1526 *
1527 * Tcl_AppendStringsToObjVA --
1528 *
1529 *	This procedure appends one or more null-terminated strings
1530 *	to an object.
1531 *
1532 * Results:
1533 *	None.
1534 *
1535 * Side effects:
1536 *	The contents of all the string arguments are appended to the
1537 *	string representation of objPtr.
1538 *
1539 *----------------------------------------------------------------------
1540 */
1541
1542void
1543Tcl_AppendStringsToObjVA (objPtr, argList)
1544    Tcl_Obj *objPtr;		/* Points to the object to append to. */
1545    va_list argList;		/* Variable argument list. */
1546{
1547#define STATIC_LIST_SIZE 16
1548    String *stringPtr;
1549    int newLength, oldLength, attemptLength;
1550    register char *string, *dst;
1551    char *static_list[STATIC_LIST_SIZE];
1552    char **args = static_list;
1553    int nargs_space = STATIC_LIST_SIZE;
1554    int nargs, i;
1555
1556    if (Tcl_IsShared(objPtr)) {
1557	panic("Tcl_AppendStringsToObj called with shared object");
1558    }
1559
1560    SetStringFromAny(NULL, objPtr);
1561
1562    /*
1563     * Force the existence of a string rep. so we avoid crashes operating
1564     * on a pure unicode value.  [Bug 2597185]
1565     */
1566
1567    (void) Tcl_GetStringFromObj(objPtr, &oldLength);
1568
1569    /*
1570     * Figure out how much space is needed for all the strings, and
1571     * expand the string representation if it isn't big enough. If no
1572     * bytes would be appended, just return.  Note that on some platforms
1573     * (notably OS/390) the argList is an array so we need to use memcpy.
1574     */
1575
1576    nargs = 0;
1577    newLength = 0;
1578    while (1) {
1579	string = va_arg(argList, char *);
1580	if (string == NULL) {
1581	    break;
1582	}
1583 	if (nargs >= nargs_space) {
1584 	    /*
1585 	     * Expand the args buffer
1586 	     */
1587 	    nargs_space += STATIC_LIST_SIZE;
1588 	    if (args == static_list) {
1589 	    	args = (void *)ckalloc(nargs_space * sizeof(char *));
1590 		for (i = 0; i < nargs; ++i) {
1591 		    args[i] = static_list[i];
1592 		}
1593 	    } else {
1594 		args = (void *)ckrealloc((void *)args,
1595			nargs_space * sizeof(char *));
1596 	    }
1597 	}
1598	newLength += strlen(string);
1599	args[nargs++] = string;
1600    }
1601    if (newLength == 0) {
1602	goto done;
1603    }
1604
1605    stringPtr = GET_STRING(objPtr);
1606    if (oldLength + newLength > (int) stringPtr->allocated) {
1607
1608	/*
1609	 * There isn't currently enough space in the string
1610	 * representation, so allocate additional space.  If the current
1611	 * string representation isn't empty (i.e. it looks like we're
1612	 * doing a series of appends) then try to allocate extra space to
1613	 * accomodate future growth: first try to double the required memory;
1614	 * if that fails, try a more modest allocation.  See the "TCL STRING
1615	 * GROWTH ALGORITHM" comment at the top of this file for an explanation
1616	 * of this growth algorithm.  Otherwise, if the current string
1617	 * representation is empty, exactly enough memory is allocated.
1618	 */
1619
1620	if (oldLength == 0) {
1621	    Tcl_SetObjLength(objPtr, newLength);
1622	} else {
1623	    attemptLength = 2 * (oldLength + newLength);
1624	    if (Tcl_AttemptSetObjLength(objPtr, attemptLength) == 0) {
1625		attemptLength = oldLength + (2 * newLength) +
1626		    TCL_GROWTH_MIN_ALLOC;
1627		Tcl_SetObjLength(objPtr, attemptLength);
1628	    }
1629	}
1630    }
1631
1632    /*
1633     * Make a second pass through the arguments, appending all the
1634     * strings to the object.
1635     */
1636
1637    dst = objPtr->bytes + oldLength;
1638    for (i = 0; i < nargs; ++i) {
1639 	string = args[i];
1640	if (string == NULL) {
1641	    break;
1642	}
1643	while (*string != 0) {
1644	    *dst = *string;
1645	    dst++;
1646	    string++;
1647	}
1648    }
1649
1650    /*
1651     * Add a null byte to terminate the string.  However, be careful:
1652     * it's possible that the object is totally empty (if it was empty
1653     * originally and there was nothing to append).  In this case dst is
1654     * NULL; just leave everything alone.
1655     */
1656
1657    if (dst != NULL) {
1658	*dst = 0;
1659    }
1660    objPtr->length = oldLength + newLength;
1661
1662    done:
1663    /*
1664     * If we had to allocate a buffer from the heap,
1665     * free it now.
1666     */
1667
1668    if (args != static_list) {
1669     	ckfree((void *)args);
1670    }
1671#undef STATIC_LIST_SIZE
1672}
1673
1674/*
1675 *----------------------------------------------------------------------
1676 *
1677 * Tcl_AppendStringsToObj --
1678 *
1679 *	This procedure appends one or more null-terminated strings
1680 *	to an object.
1681 *
1682 * Results:
1683 *	None.
1684 *
1685 * Side effects:
1686 *	The contents of all the string arguments are appended to the
1687 *	string representation of objPtr.
1688 *
1689 *----------------------------------------------------------------------
1690 */
1691
1692void
1693Tcl_AppendStringsToObj TCL_VARARGS_DEF(Tcl_Obj *,arg1)
1694{
1695    register Tcl_Obj *objPtr;
1696    va_list argList;
1697
1698    objPtr = TCL_VARARGS_START(Tcl_Obj *,arg1,argList);
1699    Tcl_AppendStringsToObjVA(objPtr, argList);
1700    va_end(argList);
1701}
1702
1703/*
1704 *---------------------------------------------------------------------------
1705 *
1706 * FillUnicodeRep --
1707 *
1708 *	Populate the Unicode internal rep with the Unicode form of its string
1709 *	rep.  The object must alread have a "String" internal rep.
1710 *
1711 * Results:
1712 *	None.
1713 *
1714 * Side effects:
1715 *	Reallocates the String internal rep.
1716 *
1717 *---------------------------------------------------------------------------
1718 */
1719
1720static void
1721FillUnicodeRep(objPtr)
1722    Tcl_Obj *objPtr;	/* The object in which to fill the unicode rep. */
1723{
1724    String *stringPtr;
1725    size_t uallocated;
1726    char *src, *srcEnd;
1727    Tcl_UniChar *dst;
1728    src = objPtr->bytes;
1729
1730    stringPtr = GET_STRING(objPtr);
1731    if (stringPtr->numChars == -1) {
1732	stringPtr->numChars = Tcl_NumUtfChars(src, objPtr->length);
1733    }
1734    stringPtr->hasUnicode = (stringPtr->numChars > 0);
1735
1736    stringCheckLimits(stringPtr->numChars);
1737    uallocated = STRING_UALLOC(stringPtr->numChars);
1738    if (uallocated > stringPtr->uallocated) {
1739	GrowUnicodeBuffer(objPtr, stringPtr->numChars);
1740	stringPtr = GET_STRING(objPtr);
1741    }
1742
1743    /*
1744     * Convert src to Unicode and store the coverted data in "unicode".
1745     */
1746
1747    srcEnd = src + objPtr->length;
1748    for (dst = stringPtr->unicode; src < srcEnd; dst++) {
1749	src += TclUtfToUniChar(src, dst);
1750    }
1751    *dst = 0;
1752
1753    SET_STRING(objPtr, stringPtr);
1754}
1755
1756/*
1757 *----------------------------------------------------------------------
1758 *
1759 * DupStringInternalRep --
1760 *
1761 *	Initialize the internal representation of a new Tcl_Obj to a
1762 *	copy of the internal representation of an existing string object.
1763 *
1764 * Results:
1765 *	None.
1766 *
1767 * Side effects:
1768 *	copyPtr's internal rep is set to a copy of srcPtr's internal
1769 *	representation.
1770 *
1771 *----------------------------------------------------------------------
1772 */
1773
1774static void
1775DupStringInternalRep(srcPtr, copyPtr)
1776    register Tcl_Obj *srcPtr;	/* Object with internal rep to copy.  Must
1777				 * have an internal rep of type "String". */
1778    register Tcl_Obj *copyPtr;	/* Object with internal rep to set.  Must
1779				 * not currently have an internal rep.*/
1780{
1781    String *srcStringPtr = GET_STRING(srcPtr);
1782    String *copyStringPtr = NULL;
1783
1784    /*
1785     * If the src obj is a string of 1-byte Utf chars, then copy the
1786     * string rep of the source object and create an "empty" Unicode
1787     * internal rep for the new object.  Otherwise, copy Unicode
1788     * internal rep, and invalidate the string rep of the new object.
1789     */
1790
1791    if (srcStringPtr->hasUnicode == 0) {
1792    	copyStringPtr = (String *) ckalloc(sizeof(String));
1793	copyStringPtr->uallocated = 0;
1794    } else {
1795	copyStringPtr = (String *) ckalloc(
1796	    STRING_SIZE(srcStringPtr->uallocated));
1797	copyStringPtr->uallocated = srcStringPtr->uallocated;
1798
1799	memcpy((VOID *) copyStringPtr->unicode,
1800		(VOID *) srcStringPtr->unicode,
1801		(size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar));
1802	copyStringPtr->unicode[srcStringPtr->numChars] = 0;
1803    }
1804    copyStringPtr->numChars = srcStringPtr->numChars;
1805    copyStringPtr->hasUnicode = srcStringPtr->hasUnicode;
1806    copyStringPtr->allocated = srcStringPtr->allocated;
1807
1808    /*
1809     * Tricky point: the string value was copied by generic object
1810     * management code, so it doesn't contain any extra bytes that
1811     * might exist in the source object.
1812     */
1813
1814    copyStringPtr->allocated = copyPtr->length;
1815
1816    SET_STRING(copyPtr, copyStringPtr);
1817    copyPtr->typePtr = &tclStringType;
1818}
1819
1820/*
1821 *----------------------------------------------------------------------
1822 *
1823 * SetStringFromAny --
1824 *
1825 *	Create an internal representation of type "String" for an object.
1826 *
1827 * Results:
1828 *	This operation always succeeds and returns TCL_OK.
1829 *
1830 * Side effects:
1831 *	Any old internal reputation for objPtr is freed and the
1832 *	internal representation is set to "String".
1833 *
1834 *----------------------------------------------------------------------
1835 */
1836
1837static int
1838SetStringFromAny(interp, objPtr)
1839    Tcl_Interp *interp;		/* Used for error reporting if not NULL. */
1840    register Tcl_Obj *objPtr;	/* The object to convert. */
1841{
1842    /*
1843     * The Unicode object is optimized for the case where each UTF char
1844     * in a string is only one byte.  In this case, we store the value of
1845     * numChars, but we don't copy the bytes to the unicodeObj->unicode.
1846     */
1847
1848    if (objPtr->typePtr != &tclStringType) {
1849	String *stringPtr;
1850
1851	if (objPtr->typePtr != NULL) {
1852	    if (objPtr->bytes == NULL) {
1853		objPtr->typePtr->updateStringProc(objPtr);
1854	    }
1855	    if ((objPtr->typePtr->freeIntRepProc) != NULL) {
1856		(*objPtr->typePtr->freeIntRepProc)(objPtr);
1857	    }
1858	}
1859	objPtr->typePtr = &tclStringType;
1860
1861	/*
1862	 * Allocate enough space for the basic String structure.
1863	 */
1864
1865	stringPtr = (String *) ckalloc(sizeof(String));
1866	stringPtr->numChars = -1;
1867	stringPtr->uallocated = 0;
1868	stringPtr->hasUnicode = 0;
1869
1870	if (objPtr->bytes != NULL) {
1871	    stringPtr->allocated = objPtr->length;
1872	    if (objPtr->bytes != tclEmptyStringRep) {
1873		objPtr->bytes[objPtr->length] = 0;
1874	    }
1875	} else {
1876	    objPtr->length = 0;
1877	}
1878	SET_STRING(objPtr, stringPtr);
1879    }
1880    return TCL_OK;
1881}
1882
1883/*
1884 *----------------------------------------------------------------------
1885 *
1886 * UpdateStringOfString --
1887 *
1888 *	Update the string representation for an object whose internal
1889 *	representation is "String".
1890 *
1891 * Results:
1892 *	None.
1893 *
1894 * Side effects:
1895 *	The object's string may be set by converting its Unicode
1896 *	represention to UTF format.
1897 *
1898 *----------------------------------------------------------------------
1899 */
1900
1901static void
1902UpdateStringOfString(objPtr)
1903    Tcl_Obj *objPtr;		/* Object with string rep to update. */
1904{
1905    int i, size;
1906    Tcl_UniChar *unicode;
1907    char dummy[TCL_UTF_MAX];
1908    char *dst;
1909    String *stringPtr;
1910
1911    stringPtr = GET_STRING(objPtr);
1912    if ((objPtr->bytes == NULL) || (stringPtr->allocated == 0)) {
1913
1914	if (stringPtr->numChars <= 0) {
1915
1916	    /*
1917	     * If there is no Unicode rep, or the string has 0 chars,
1918	     * then set the string rep to an empty string.
1919	     */
1920
1921	    objPtr->bytes = tclEmptyStringRep;
1922	    objPtr->length = 0;
1923	    return;
1924	}
1925
1926	unicode = stringPtr->unicode;
1927
1928	/*
1929	 * Translate the Unicode string to UTF.  "size" will hold the
1930	 * amount of space the UTF string needs.
1931	 */
1932
1933	if (stringPtr->numChars <= INT_MAX/TCL_UTF_MAX
1934		&& stringPtr->allocated >= (size_t) (stringPtr->numChars * TCL_UTF_MAX)) {
1935	    goto copyBytes;
1936	}
1937
1938	size = 0;
1939	for (i = 0; i < stringPtr->numChars && size >= 0; i++) {
1940	    size += Tcl_UniCharToUtf((int) unicode[i], dummy);
1941	}
1942	if (size < 0) {
1943	    Tcl_Panic("max size for a Tcl value (%d bytes) exceeded", INT_MAX);
1944	}
1945
1946	objPtr->bytes = (char *) ckalloc((unsigned) (size + 1));
1947	objPtr->length = size;
1948	stringPtr->allocated = size;
1949
1950    copyBytes:
1951	dst = objPtr->bytes;
1952	for (i = 0; i < stringPtr->numChars; i++) {
1953	    dst += Tcl_UniCharToUtf(unicode[i], dst);
1954	}
1955	*dst = '\0';
1956    }
1957    return;
1958}
1959
1960/*
1961 *----------------------------------------------------------------------
1962 *
1963 * FreeStringInternalRep --
1964 *
1965 *	Deallocate the storage associated with a String data object's
1966 *	internal representation.
1967 *
1968 * Results:
1969 *	None.
1970 *
1971 * Side effects:
1972 *	Frees memory.
1973 *
1974 *----------------------------------------------------------------------
1975 */
1976
1977static void
1978FreeStringInternalRep(objPtr)
1979    Tcl_Obj *objPtr;		/* Object with internal rep to free. */
1980{
1981    ckfree((char *) GET_STRING(objPtr));
1982}
1983