1/*
2 * tclCmdMZ.c --
3 *
4 *	This file contains the top-level command routines for most of the Tcl
5 *	built-in commands whose names begin with the letters M to Z. It
6 *	contains only commands in the generic core (i.e. those that don't
7 *	depend much upon UNIX facilities).
8 *
9 * Copyright (c) 1987-1993 The Regents of the University of California.
10 * Copyright (c) 1994-1997 Sun Microsystems, Inc.
11 * Copyright (c) 1998-2000 Scriptics Corporation.
12 * Copyright (c) 2002 ActiveState Corporation.
13 * Copyright (c) 2003 Donal K. Fellows.
14 *
15 * See the file "license.terms" for information on usage and redistribution of
16 * this file, and for a DISCLAIMER OF ALL WARRANTIES.
17 *
18 * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.9 2010/08/12 08:55:38 dkf Exp $
19 */
20
21#include "tclInt.h"
22#include "tclRegexp.h"
23
24static int		UniCharIsAscii(int character);
25static int		UniCharIsHexDigit(int character);
26
27/*
28 *----------------------------------------------------------------------
29 *
30 * Tcl_PwdObjCmd --
31 *
32 *	This procedure is invoked to process the "pwd" Tcl command. See the
33 *	user documentation for details on what it does.
34 *
35 * Results:
36 *	A standard Tcl result.
37 *
38 * Side effects:
39 *	See the user documentation.
40 *
41 *----------------------------------------------------------------------
42 */
43
44int
45Tcl_PwdObjCmd(
46    ClientData dummy,		/* Not used. */
47    Tcl_Interp *interp,		/* Current interpreter. */
48    int objc,			/* Number of arguments. */
49    Tcl_Obj *CONST objv[])	/* Argument objects. */
50{
51    Tcl_Obj *retVal;
52
53    if (objc != 1) {
54	Tcl_WrongNumArgs(interp, 1, objv, NULL);
55	return TCL_ERROR;
56    }
57
58    retVal = Tcl_FSGetCwd(interp);
59    if (retVal == NULL) {
60	return TCL_ERROR;
61    }
62    Tcl_SetObjResult(interp, retVal);
63    Tcl_DecrRefCount(retVal);
64    return TCL_OK;
65}
66
67/*
68 *----------------------------------------------------------------------
69 *
70 * Tcl_RegexpObjCmd --
71 *
72 *	This procedure is invoked to process the "regexp" Tcl command. See
73 *	the user documentation for details on what it does.
74 *
75 * Results:
76 *	A standard Tcl result.
77 *
78 * Side effects:
79 *	See the user documentation.
80 *
81 *----------------------------------------------------------------------
82 */
83
84int
85Tcl_RegexpObjCmd(
86    ClientData dummy,		/* Not used. */
87    Tcl_Interp *interp,		/* Current interpreter. */
88    int objc,			/* Number of arguments. */
89    Tcl_Obj *CONST objv[])	/* Argument objects. */
90{
91    int i, indices, match, about, offset, all, doinline, numMatchesSaved;
92    int cflags, eflags, stringLength, matchLength;
93    Tcl_RegExp regExpr;
94    Tcl_Obj *objPtr, *startIndex = NULL, *resultPtr = NULL;
95    Tcl_RegExpInfo info;
96    static CONST char *options[] = {
97	"-all",		"-about",	"-indices",	"-inline",
98	"-expanded",	"-line",	"-linestop",	"-lineanchor",
99	"-nocase",	"-start",	"--",		NULL
100    };
101    enum options {
102	REGEXP_ALL,	REGEXP_ABOUT,	REGEXP_INDICES,	REGEXP_INLINE,
103	REGEXP_EXPANDED,REGEXP_LINE,	REGEXP_LINESTOP,REGEXP_LINEANCHOR,
104	REGEXP_NOCASE,	REGEXP_START,	REGEXP_LAST
105    };
106
107    indices = 0;
108    about = 0;
109    cflags = TCL_REG_ADVANCED;
110    eflags = 0;
111    offset = 0;
112    all = 0;
113    doinline = 0;
114
115    for (i = 1; i < objc; i++) {
116	char *name;
117	int index;
118
119	name = TclGetString(objv[i]);
120	if (name[0] != '-') {
121	    break;
122	}
123	if (Tcl_GetIndexFromObj(interp, objv[i], options, "switch", TCL_EXACT,
124		&index) != TCL_OK) {
125	    goto optionError;
126	}
127	switch ((enum options) index) {
128	case REGEXP_ALL:
129	    all = 1;
130	    break;
131	case REGEXP_INDICES:
132	    indices = 1;
133	    break;
134	case REGEXP_INLINE:
135	    doinline = 1;
136	    break;
137	case REGEXP_NOCASE:
138	    cflags |= TCL_REG_NOCASE;
139	    break;
140	case REGEXP_ABOUT:
141	    about = 1;
142	    break;
143	case REGEXP_EXPANDED:
144	    cflags |= TCL_REG_EXPANDED;
145	    break;
146	case REGEXP_LINE:
147	    cflags |= TCL_REG_NEWLINE;
148	    break;
149	case REGEXP_LINESTOP:
150	    cflags |= TCL_REG_NLSTOP;
151	    break;
152	case REGEXP_LINEANCHOR:
153	    cflags |= TCL_REG_NLANCH;
154	    break;
155	case REGEXP_START: {
156	    int temp;
157	    if (++i >= objc) {
158		goto endOfForLoop;
159	    }
160	    if (TclGetIntForIndexM(interp, objv[i], 0, &temp) != TCL_OK) {
161		goto optionError;
162	    }
163	    if (startIndex) {
164		Tcl_DecrRefCount(startIndex);
165	    }
166	    startIndex = objv[i];
167	    Tcl_IncrRefCount(startIndex);
168	    break;
169	}
170	case REGEXP_LAST:
171	    i++;
172	    goto endOfForLoop;
173	}
174    }
175
176  endOfForLoop:
177    if ((objc - i) < (2 - about)) {
178	Tcl_WrongNumArgs(interp, 1, objv,
179	    "?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?");
180	goto optionError;
181    }
182    objc -= i;
183    objv += i;
184
185    /*
186     * Check if the user requested -inline, but specified match variables; a
187     * no-no.
188     */
189
190    if (doinline && ((objc - 2) != 0)) {
191	Tcl_AppendResult(interp, "regexp match variables not allowed"
192		" when using -inline", NULL);
193	goto optionError;
194    }
195
196    /*
197     * Handle the odd about case separately.
198     */
199
200    if (about) {
201	regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
202	if ((regExpr == NULL) || (TclRegAbout(interp, regExpr) < 0)) {
203	optionError:
204	    if (startIndex) {
205		Tcl_DecrRefCount(startIndex);
206	    }
207	    return TCL_ERROR;
208	}
209	return TCL_OK;
210    }
211
212    /*
213     * Get the length of the string that we are matching against so we can do
214     * the termination test for -all matches. Do this before getting the
215     * regexp to avoid shimmering problems.
216     */
217
218    objPtr = objv[1];
219    stringLength = Tcl_GetCharLength(objPtr);
220
221    if (startIndex) {
222	TclGetIntForIndexM(NULL, startIndex, stringLength, &offset);
223	Tcl_DecrRefCount(startIndex);
224	if (offset < 0) {
225	    offset = 0;
226	}
227    }
228
229    regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
230    if (regExpr == NULL) {
231	return TCL_ERROR;
232    }
233
234    objc -= 2;
235    objv += 2;
236
237    if (doinline) {
238	/*
239	 * Save all the subexpressions, as we will return them as a list
240	 */
241
242	numMatchesSaved = -1;
243    } else {
244	/*
245	 * Save only enough subexpressions for matches we want to keep, expect
246	 * in the case of -all, where we need to keep at least one to know
247	 * where to move the offset.
248	 */
249
250	numMatchesSaved = (objc == 0) ? all : objc;
251    }
252
253    /*
254     * The following loop is to handle multiple matches within the same source
255     * string; each iteration handles one match. If "-all" hasn't been
256     * specified then the loop body only gets executed once. We terminate the
257     * loop when the starting offset is past the end of the string.
258     */
259
260    while (1) {
261	/*
262	 * Pass either 0 or TCL_REG_NOTBOL in the eflags. Passing
263	 * TCL_REG_NOTBOL indicates that the character at offset should not be
264	 * considered the start of the line. If for example the pattern {^} is
265	 * passed and -start is positive, then the pattern will not match the
266	 * start of the string unless the previous character is a newline.
267	 */
268
269	if ((offset == 0) || ((offset > 0) &&
270		(Tcl_GetUniChar(objPtr, offset-1) == (Tcl_UniChar)'\n'))) {
271	    eflags = 0;
272	} else {
273	    eflags = TCL_REG_NOTBOL;
274	}
275
276	match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset,
277		numMatchesSaved, eflags);
278	if (match < 0) {
279	    return TCL_ERROR;
280	}
281
282	if (match == 0) {
283	    /*
284	     * We want to set the value of the intepreter result only when
285	     * this is the first time through the loop.
286	     */
287
288	    if (all <= 1) {
289		/*
290		 * If inlining, the interpreter's object result remains an
291		 * empty list, otherwise set it to an integer object w/ value
292		 * 0.
293		 */
294
295		if (!doinline) {
296		    Tcl_SetObjResult(interp, Tcl_NewIntObj(0));
297		}
298		return TCL_OK;
299	    }
300	    break;
301	}
302
303	/*
304	 * If additional variable names have been specified, return index
305	 * information in those variables.
306	 */
307
308	Tcl_RegExpGetInfo(regExpr, &info);
309	if (doinline) {
310	    /*
311	     * It's the number of substitutions, plus one for the matchVar at
312	     * index 0
313	     */
314
315	    objc = info.nsubs + 1;
316	    if (all <= 1) {
317		resultPtr = Tcl_NewObj();
318	    }
319	}
320	for (i = 0; i < objc; i++) {
321	    Tcl_Obj *newPtr;
322
323	    if (indices) {
324		int start, end;
325		Tcl_Obj *objs[2];
326
327		/*
328		 * Only adjust the match area if there was a match for that
329		 * area. (Scriptics Bug 4391/SF Bug #219232)
330		 */
331
332		if (i <= info.nsubs && info.matches[i].start >= 0) {
333		    start = offset + info.matches[i].start;
334		    end = offset + info.matches[i].end;
335
336		    /*
337		     * Adjust index so it refers to the last character in the
338		     * match instead of the first character after the match.
339		     */
340
341		    if (end >= offset) {
342			end--;
343		    }
344		} else {
345		    start = -1;
346		    end = -1;
347		}
348
349		objs[0] = Tcl_NewLongObj(start);
350		objs[1] = Tcl_NewLongObj(end);
351
352		newPtr = Tcl_NewListObj(2, objs);
353	    } else {
354		if (i <= info.nsubs) {
355		    newPtr = Tcl_GetRange(objPtr,
356			    offset + info.matches[i].start,
357			    offset + info.matches[i].end - 1);
358		} else {
359		    newPtr = Tcl_NewObj();
360		}
361	    }
362	    if (doinline) {
363		if (Tcl_ListObjAppendElement(interp, resultPtr, newPtr)
364			!= TCL_OK) {
365		    Tcl_DecrRefCount(newPtr);
366		    Tcl_DecrRefCount(resultPtr);
367		    return TCL_ERROR;
368		}
369	    } else {
370		Tcl_Obj *valuePtr;
371		valuePtr = Tcl_ObjSetVar2(interp, objv[i], NULL, newPtr, 0);
372		if (valuePtr == NULL) {
373		    Tcl_AppendResult(interp, "couldn't set variable \"",
374			    TclGetString(objv[i]), "\"", NULL);
375		    return TCL_ERROR;
376		}
377	    }
378	}
379
380	if (all == 0) {
381	    break;
382	}
383
384	/*
385	 * Adjust the offset to the character just after the last one in the
386	 * matchVar and increment all to count how many times we are making a
387	 * match. We always increment the offset by at least one to prevent
388	 * endless looping (as in the case: regexp -all {a*} a). Otherwise,
389	 * when we match the NULL string at the end of the input string, we
390	 * will loop indefinately (because the length of the match is 0, so
391	 * offset never changes).
392	 */
393
394	matchLength = info.matches[0].end - info.matches[0].start;
395	offset += info.matches[0].end;
396
397	/*
398	 * A match of length zero could happen for {^} {$} or {.*} and in
399	 * these cases we always want to bump the index up one.
400	 */
401
402	if (matchLength == 0) {
403	    offset++;
404	}
405	all++;
406	if (offset >= stringLength) {
407	    break;
408	}
409    }
410
411    /*
412     * Set the interpreter's object result to an integer object with value 1
413     * if -all wasn't specified, otherwise it's all-1 (the number of times
414     * through the while - 1).
415     */
416
417    if (doinline) {
418	Tcl_SetObjResult(interp, resultPtr);
419    } else {
420	Tcl_SetObjResult(interp, Tcl_NewIntObj(all ? all-1 : 1));
421    }
422    return TCL_OK;
423}
424
425/*
426 *----------------------------------------------------------------------
427 *
428 * Tcl_RegsubObjCmd --
429 *
430 *	This procedure is invoked to process the "regsub" Tcl command. See the
431 *	user documentation for details on what it does.
432 *
433 * Results:
434 *	A standard Tcl result.
435 *
436 * Side effects:
437 *	See the user documentation.
438 *
439 *----------------------------------------------------------------------
440 */
441
442int
443Tcl_RegsubObjCmd(
444    ClientData dummy,		/* Not used. */
445    Tcl_Interp *interp,		/* Current interpreter. */
446    int objc,			/* Number of arguments. */
447    Tcl_Obj *CONST objv[])	/* Argument objects. */
448{
449    int idx, result, cflags, all, wlen, wsublen, numMatches, offset;
450    int start, end, subStart, subEnd, match;
451    Tcl_RegExp regExpr;
452    Tcl_RegExpInfo info;
453    Tcl_Obj *resultPtr, *subPtr, *objPtr, *startIndex = NULL;
454    Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec, *wend;
455
456    static CONST char *options[] = {
457	"-all",		"-nocase",	"-expanded",
458	"-line",	"-linestop",	"-lineanchor",	"-start",
459	"--",		NULL
460    };
461    enum options {
462	REGSUB_ALL,	REGSUB_NOCASE,	REGSUB_EXPANDED,
463	REGSUB_LINE,	REGSUB_LINESTOP, REGSUB_LINEANCHOR,	REGSUB_START,
464	REGSUB_LAST
465    };
466
467    cflags = TCL_REG_ADVANCED;
468    all = 0;
469    offset = 0;
470    resultPtr = NULL;
471
472    for (idx = 1; idx < objc; idx++) {
473	char *name;
474	int index;
475
476	name = TclGetString(objv[idx]);
477	if (name[0] != '-') {
478	    break;
479	}
480	if (Tcl_GetIndexFromObj(interp, objv[idx], options, "switch",
481		TCL_EXACT, &index) != TCL_OK) {
482	    goto optionError;
483	}
484	switch ((enum options) index) {
485	case REGSUB_ALL:
486	    all = 1;
487	    break;
488	case REGSUB_NOCASE:
489	    cflags |= TCL_REG_NOCASE;
490	    break;
491	case REGSUB_EXPANDED:
492	    cflags |= TCL_REG_EXPANDED;
493	    break;
494	case REGSUB_LINE:
495	    cflags |= TCL_REG_NEWLINE;
496	    break;
497	case REGSUB_LINESTOP:
498	    cflags |= TCL_REG_NLSTOP;
499	    break;
500	case REGSUB_LINEANCHOR:
501	    cflags |= TCL_REG_NLANCH;
502	    break;
503	case REGSUB_START: {
504	    int temp;
505	    if (++idx >= objc) {
506		goto endOfForLoop;
507	    }
508	    if (TclGetIntForIndexM(interp, objv[idx], 0, &temp) != TCL_OK) {
509		goto optionError;
510	    }
511	    if (startIndex) {
512		Tcl_DecrRefCount(startIndex);
513	    }
514	    startIndex = objv[idx];
515	    Tcl_IncrRefCount(startIndex);
516	    break;
517	}
518	case REGSUB_LAST:
519	    idx++;
520	    goto endOfForLoop;
521	}
522    }
523
524  endOfForLoop:
525    if (objc-idx < 3 || objc-idx > 4) {
526	Tcl_WrongNumArgs(interp, 1, objv,
527		"?switches? exp string subSpec ?varName?");
528    optionError:
529	if (startIndex) {
530	    Tcl_DecrRefCount(startIndex);
531	}
532	return TCL_ERROR;
533    }
534
535    objc -= idx;
536    objv += idx;
537
538    if (startIndex) {
539	int stringLength = Tcl_GetCharLength(objv[1]);
540
541	TclGetIntForIndexM(NULL, startIndex, stringLength, &offset);
542	Tcl_DecrRefCount(startIndex);
543	if (offset < 0) {
544	    offset = 0;
545	}
546    }
547
548    if (all && (offset == 0)
549	    && (strpbrk(TclGetString(objv[2]), "&\\") == NULL)
550	    && (strpbrk(TclGetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) {
551	/*
552	 * This is a simple one pair string map situation. We make use of a
553	 * slightly modified version of the one pair STR_MAP code.
554	 */
555
556	int slen, nocase;
557	int (*strCmpFn)(CONST Tcl_UniChar*,CONST Tcl_UniChar*,unsigned long);
558	Tcl_UniChar *p, wsrclc;
559
560	numMatches = 0;
561	nocase = (cflags & TCL_REG_NOCASE);
562	strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp;
563
564	wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen);
565	wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen);
566	wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen);
567	wend = wstring + wlen - (slen ? slen - 1 : 0);
568	result = TCL_OK;
569
570	if (slen == 0) {
571	    /*
572	     * regsub behavior for "" matches between each character. 'string
573	     * map' skips the "" case.
574	     */
575
576	    if (wstring < wend) {
577		resultPtr = Tcl_NewUnicodeObj(wstring, 0);
578		Tcl_IncrRefCount(resultPtr);
579		for (; wstring < wend; wstring++) {
580		    Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen);
581		    Tcl_AppendUnicodeToObj(resultPtr, wstring, 1);
582		    numMatches++;
583		}
584		wlen = 0;
585	    }
586	} else {
587	    wsrclc = Tcl_UniCharToLower(*wsrc);
588	    for (p = wfirstChar = wstring; wstring < wend; wstring++) {
589		if ((*wstring == *wsrc ||
590			(nocase && Tcl_UniCharToLower(*wstring)==wsrclc)) &&
591			(slen==1 || (strCmpFn(wstring, wsrc,
592				(unsigned long) slen) == 0))) {
593		    if (numMatches == 0) {
594			resultPtr = Tcl_NewUnicodeObj(wstring, 0);
595			Tcl_IncrRefCount(resultPtr);
596		    }
597		    if (p != wstring) {
598			Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p);
599			p = wstring + slen;
600		    } else {
601			p += slen;
602		    }
603		    wstring = p - 1;
604
605		    Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen);
606		    numMatches++;
607		}
608	    }
609	    if (numMatches) {
610		wlen    = wfirstChar + wlen - p;
611		wstring = p;
612	    }
613	}
614	objPtr = NULL;
615	subPtr = NULL;
616	goto regsubDone;
617    }
618
619    regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags);
620    if (regExpr == NULL) {
621	return TCL_ERROR;
622    }
623
624    /*
625     * Make sure to avoid problems where the objects are shared. This can
626     * cause RegExpObj <> UnicodeObj shimmering that causes data corruption.
627     * [Bug #461322]
628     */
629
630    if (objv[1] == objv[0]) {
631	objPtr = Tcl_DuplicateObj(objv[1]);
632    } else {
633	objPtr = objv[1];
634    }
635    wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen);
636    if (objv[2] == objv[0]) {
637	subPtr = Tcl_DuplicateObj(objv[2]);
638    } else {
639	subPtr = objv[2];
640    }
641    wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen);
642
643    result = TCL_OK;
644
645    /*
646     * The following loop is to handle multiple matches within the same source
647     * string; each iteration handles one match and its corresponding
648     * substitution. If "-all" hasn't been specified then the loop body only
649     * gets executed once. We must use 'offset <= wlen' in particular for the
650     * case where the regexp pattern can match the empty string - this is
651     * useful when doing, say, 'regsub -- ^ $str ...' when $str might be
652     * empty.
653     */
654
655    numMatches = 0;
656    for ( ; offset <= wlen; ) {
657
658	/*
659	 * The flags argument is set if string is part of a larger string, so
660	 * that "^" won't match.
661	 */
662
663	match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset,
664		10 /* matches */, ((offset > 0 &&
665		(wstring[offset-1] != (Tcl_UniChar)'\n'))
666		? TCL_REG_NOTBOL : 0));
667
668	if (match < 0) {
669	    result = TCL_ERROR;
670	    goto done;
671	}
672	if (match == 0) {
673	    break;
674	}
675	if (numMatches == 0) {
676	    resultPtr = Tcl_NewUnicodeObj(wstring, 0);
677	    Tcl_IncrRefCount(resultPtr);
678	    if (offset > 0) {
679		/*
680		 * Copy the initial portion of the string in if an offset was
681		 * specified.
682		 */
683
684		Tcl_AppendUnicodeToObj(resultPtr, wstring, offset);
685	    }
686	}
687	numMatches++;
688
689	/*
690	 * Copy the portion of the source string before the match to the
691	 * result variable.
692	 */
693
694	Tcl_RegExpGetInfo(regExpr, &info);
695	start = info.matches[0].start;
696	end = info.matches[0].end;
697	Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start);
698
699	/*
700	 * Append the subSpec argument to the variable, making appropriate
701	 * substitutions. This code is a bit hairy because of the backslash
702	 * conventions and because the code saves up ranges of characters in
703	 * subSpec to reduce the number of calls to Tcl_SetVar.
704	 */
705
706	wsrc = wfirstChar = wsubspec;
707	wend = wsubspec + wsublen;
708	for (ch = *wsrc; wsrc != wend; wsrc++, ch = *wsrc) {
709	    if (ch == '&') {
710		idx = 0;
711	    } else if (ch == '\\') {
712		ch = wsrc[1];
713		if ((ch >= '0') && (ch <= '9')) {
714		    idx = ch - '0';
715		} else if ((ch == '\\') || (ch == '&')) {
716		    *wsrc = ch;
717		    Tcl_AppendUnicodeToObj(resultPtr, wfirstChar,
718			    wsrc - wfirstChar + 1);
719		    *wsrc = '\\';
720		    wfirstChar = wsrc + 2;
721		    wsrc++;
722		    continue;
723		} else {
724		    continue;
725		}
726	    } else {
727		continue;
728	    }
729
730	    if (wfirstChar != wsrc) {
731		Tcl_AppendUnicodeToObj(resultPtr, wfirstChar,
732			wsrc - wfirstChar);
733	    }
734
735	    if (idx <= info.nsubs) {
736		subStart = info.matches[idx].start;
737		subEnd = info.matches[idx].end;
738		if ((subStart >= 0) && (subEnd >= 0)) {
739		    Tcl_AppendUnicodeToObj(resultPtr,
740			    wstring + offset + subStart, subEnd - subStart);
741		}
742	    }
743
744	    if (*wsrc == '\\') {
745		wsrc++;
746	    }
747	    wfirstChar = wsrc + 1;
748	}
749
750	if (wfirstChar != wsrc) {
751	    Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar);
752	}
753
754	if (end == 0) {
755	    /*
756	     * Always consume at least one character of the input string in
757	     * order to prevent infinite loops.
758	     */
759
760	    if (offset < wlen) {
761		Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
762	    }
763	    offset++;
764	} else {
765	    offset += end;
766	    if (start == end) {
767		/*
768		 * We matched an empty string, which means we must go forward
769		 * one more step so we don't match again at the same spot.
770		 */
771
772		if (offset < wlen) {
773		    Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1);
774		}
775		offset++;
776	    }
777	}
778	if (!all) {
779	    break;
780	}
781    }
782
783    /*
784     * Copy the portion of the source string after the last match to the
785     * result variable.
786     */
787
788  regsubDone:
789    if (numMatches == 0) {
790	/*
791	 * On zero matches, just ignore the offset, since it shouldn't matter
792	 * to us in this case, and the user may have skewed it.
793	 */
794
795	resultPtr = objv[1];
796	Tcl_IncrRefCount(resultPtr);
797    } else if (offset < wlen) {
798	Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset);
799    }
800    if (objc == 4) {
801	if (Tcl_ObjSetVar2(interp, objv[3], NULL, resultPtr, 0) == NULL) {
802	    Tcl_AppendResult(interp, "couldn't set variable \"",
803		    TclGetString(objv[3]), "\"", NULL);
804	    result = TCL_ERROR;
805	} else {
806	    /*
807	     * Set the interpreter's object result to an integer object
808	     * holding the number of matches.
809	     */
810
811	    Tcl_SetObjResult(interp, Tcl_NewIntObj(numMatches));
812	}
813    } else {
814	/*
815	 * No varname supplied, so just return the modified string.
816	 */
817
818	Tcl_SetObjResult(interp, resultPtr);
819    }
820
821  done:
822    if (objPtr && (objv[1] == objv[0])) {
823	Tcl_DecrRefCount(objPtr);
824    }
825    if (subPtr && (objv[2] == objv[0])) {
826	Tcl_DecrRefCount(subPtr);
827    }
828    if (resultPtr) {
829	Tcl_DecrRefCount(resultPtr);
830    }
831    return result;
832}
833
834/*
835 *----------------------------------------------------------------------
836 *
837 * Tcl_RenameObjCmd --
838 *
839 *	This procedure is invoked to process the "rename" Tcl command. See the
840 *	user documentation for details on what it does.
841 *
842 * Results:
843 *	A standard Tcl object result.
844 *
845 * Side effects:
846 *	See the user documentation.
847 *
848 *----------------------------------------------------------------------
849 */
850
851int
852Tcl_RenameObjCmd(
853    ClientData dummy,		/* Arbitrary value passed to the command. */
854    Tcl_Interp *interp,		/* Current interpreter. */
855    int objc,			/* Number of arguments. */
856    Tcl_Obj *CONST objv[])	/* Argument objects. */
857{
858    char *oldName, *newName;
859
860    if (objc != 3) {
861	Tcl_WrongNumArgs(interp, 1, objv, "oldName newName");
862	return TCL_ERROR;
863    }
864
865    oldName = TclGetString(objv[1]);
866    newName = TclGetString(objv[2]);
867    return TclRenameCommand(interp, oldName, newName);
868}
869
870/*
871 *----------------------------------------------------------------------
872 *
873 * Tcl_ReturnObjCmd --
874 *
875 *	This object-based procedure is invoked to process the "return" Tcl
876 *	command. See the user documentation for details on what it does.
877 *
878 * Results:
879 *	A standard Tcl object result.
880 *
881 * Side effects:
882 *	See the user documentation.
883 *
884 *----------------------------------------------------------------------
885 */
886
887int
888Tcl_ReturnObjCmd(
889    ClientData dummy,		/* Not used. */
890    Tcl_Interp *interp,		/* Current interpreter. */
891    int objc,			/* Number of arguments. */
892    Tcl_Obj *CONST objv[])	/* Argument objects. */
893{
894    int code, level;
895    Tcl_Obj *returnOpts;
896
897    /*
898     * General syntax: [return ?-option value ...? ?result?]
899     * An even number of words means an explicit result argument is present.
900     */
901
902    int explicitResult = (0 == (objc % 2));
903    int numOptionWords = objc - 1 - explicitResult;
904
905    if (TCL_ERROR == TclMergeReturnOptions(interp, numOptionWords, objv+1,
906	    &returnOpts, &code, &level)) {
907	return TCL_ERROR;
908    }
909
910    code = TclProcessReturn(interp, code, level, returnOpts);
911    if (explicitResult) {
912	Tcl_SetObjResult(interp, objv[objc-1]);
913    }
914    return code;
915}
916
917/*
918 *----------------------------------------------------------------------
919 *
920 * Tcl_SourceObjCmd --
921 *
922 *	This procedure is invoked to process the "source" Tcl command. See the
923 *	user documentation for details on what it does.
924 *
925 * Results:
926 *	A standard Tcl object result.
927 *
928 * Side effects:
929 *	See the user documentation.
930 *
931 *----------------------------------------------------------------------
932 */
933
934int
935Tcl_SourceObjCmd(
936    ClientData dummy,		/* Not used. */
937    Tcl_Interp *interp,		/* Current interpreter. */
938    int objc,			/* Number of arguments. */
939    Tcl_Obj *CONST objv[])	/* Argument objects. */
940{
941    CONST char *encodingName = NULL;
942    Tcl_Obj *fileName;
943
944    if (objc != 2 && objc !=4) {
945	Tcl_WrongNumArgs(interp, 1, objv, "?-encoding name? fileName");
946	return TCL_ERROR;
947    }
948
949    fileName = objv[objc-1];
950
951    if (objc == 4) {
952	static CONST char *options[] = {
953	    "-encoding", NULL
954	};
955	int index;
956
957	if (TCL_ERROR == Tcl_GetIndexFromObj(interp, objv[1], options,
958		"option", TCL_EXACT, &index)) {
959	    return TCL_ERROR;
960	}
961	encodingName = TclGetString(objv[2]);
962    }
963
964    return Tcl_FSEvalFileEx(interp, fileName, encodingName);
965}
966
967/*
968 *----------------------------------------------------------------------
969 *
970 * Tcl_SplitObjCmd --
971 *
972 *	This procedure is invoked to process the "split" Tcl command. See the
973 *	user documentation for details on what it does.
974 *
975 * Results:
976 *	A standard Tcl result.
977 *
978 * Side effects:
979 *	See the user documentation.
980 *
981 *----------------------------------------------------------------------
982 */
983
984int
985Tcl_SplitObjCmd(
986    ClientData dummy,		/* Not used. */
987    Tcl_Interp *interp,		/* Current interpreter. */
988    int objc,			/* Number of arguments. */
989    Tcl_Obj *CONST objv[])	/* Argument objects. */
990{
991    Tcl_UniChar ch;
992    int len;
993    char *splitChars, *stringPtr, *end;
994    int splitCharLen, stringLen;
995    Tcl_Obj *listPtr, *objPtr;
996
997    if (objc == 2) {
998	splitChars = " \n\t\r";
999	splitCharLen = 4;
1000    } else if (objc == 3) {
1001	splitChars = TclGetStringFromObj(objv[2], &splitCharLen);
1002    } else {
1003	Tcl_WrongNumArgs(interp, 1, objv, "string ?splitChars?");
1004	return TCL_ERROR;
1005    }
1006
1007    stringPtr = TclGetStringFromObj(objv[1], &stringLen);
1008    end = stringPtr + stringLen;
1009    listPtr = Tcl_NewObj();
1010
1011    if (stringLen == 0) {
1012	/*
1013	 * Do nothing.
1014	 */
1015    } else if (splitCharLen == 0) {
1016	Tcl_HashTable charReuseTable;
1017	Tcl_HashEntry *hPtr;
1018	int isNew;
1019
1020	/*
1021	 * Handle the special case of splitting on every character.
1022	 *
1023	 * Uses a hash table to ensure that each kind of character has only
1024	 * one Tcl_Obj instance (multiply-referenced) in the final list. This
1025	 * is a *major* win when splitting on a long string (especially in the
1026	 * megabyte range!) - DKF
1027	 */
1028
1029	Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS);
1030
1031	for ( ; stringPtr < end; stringPtr += len) {
1032	    len = TclUtfToUniChar(stringPtr, &ch);
1033
1034	    /*
1035	     * Assume Tcl_UniChar is an integral type...
1036	     */
1037
1038	    hPtr = Tcl_CreateHashEntry(&charReuseTable, (char*)0+ch, &isNew);
1039	    if (isNew) {
1040		TclNewStringObj(objPtr, stringPtr, len);
1041
1042		/*
1043		 * Don't need to fiddle with refcount...
1044		 */
1045
1046		Tcl_SetHashValue(hPtr, (ClientData) objPtr);
1047	    } else {
1048		objPtr = (Tcl_Obj *) Tcl_GetHashValue(hPtr);
1049	    }
1050	    Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
1051	}
1052	Tcl_DeleteHashTable(&charReuseTable);
1053
1054    } else if (splitCharLen == 1) {
1055	char *p;
1056
1057	/*
1058	 * Handle the special case of splitting on a single character. This is
1059	 * only true for the one-char ASCII case, as one unicode char is > 1
1060	 * byte in length.
1061	 */
1062
1063	while (*stringPtr && (p=strchr(stringPtr,(int)*splitChars)) != NULL) {
1064	    objPtr = Tcl_NewStringObj(stringPtr, p - stringPtr);
1065	    Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
1066	    stringPtr = p + 1;
1067	}
1068	TclNewStringObj(objPtr, stringPtr, end - stringPtr);
1069	Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
1070    } else {
1071	char *element, *p, *splitEnd;
1072	int splitLen;
1073	Tcl_UniChar splitChar;
1074
1075	/*
1076	 * Normal case: split on any of a given set of characters. Discard
1077	 * instances of the split characters.
1078	 */
1079
1080	splitEnd = splitChars + splitCharLen;
1081
1082	for (element = stringPtr; stringPtr < end; stringPtr += len) {
1083	    len = TclUtfToUniChar(stringPtr, &ch);
1084	    for (p = splitChars; p < splitEnd; p += splitLen) {
1085		splitLen = TclUtfToUniChar(p, &splitChar);
1086		if (ch == splitChar) {
1087		    TclNewStringObj(objPtr, element, stringPtr - element);
1088		    Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
1089		    element = stringPtr + len;
1090		    break;
1091		}
1092	    }
1093	}
1094
1095	TclNewStringObj(objPtr, element, stringPtr - element);
1096	Tcl_ListObjAppendElement(NULL, listPtr, objPtr);
1097    }
1098    Tcl_SetObjResult(interp, listPtr);
1099    return TCL_OK;
1100}
1101
1102/*
1103 *----------------------------------------------------------------------
1104 *
1105 * StringFirstCmd --
1106 *
1107 *	This procedure is invoked to process the "string first" Tcl command.
1108 *	See the user documentation for details on what it does.
1109 *
1110 * Results:
1111 *	A standard Tcl result.
1112 *
1113 * Side effects:
1114 *	See the user documentation.
1115 *
1116 *----------------------------------------------------------------------
1117 */
1118
1119static int
1120StringFirstCmd(
1121    ClientData dummy,		/* Not used. */
1122    Tcl_Interp *interp,		/* Current interpreter. */
1123    int objc,			/* Number of arguments. */
1124    Tcl_Obj *const objv[])	/* Argument objects. */
1125{
1126    Tcl_UniChar *ustring1, *ustring2;
1127    int match, start, length1, length2;
1128
1129    if (objc < 3 || objc > 4) {
1130	Tcl_WrongNumArgs(interp, 1, objv,
1131		"needleString haystackString ?startIndex?");
1132	return TCL_ERROR;
1133    }
1134
1135    /*
1136     * We are searching string2 for the sequence string1.
1137     */
1138
1139    match = -1;
1140    start = 0;
1141    length2 = -1;
1142
1143    ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1);
1144    ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2);
1145
1146    if (objc == 4) {
1147	/*
1148	 * If a startIndex is specified, we will need to fast forward to that
1149	 * point in the string before we think about a match.
1150	 */
1151
1152	if (TclGetIntForIndexM(interp, objv[3], length2-1, &start) != TCL_OK){
1153	    return TCL_ERROR;
1154	}
1155
1156	/*
1157	 * Reread to prevent shimmering problems.
1158	 */
1159
1160	ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1);
1161	ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2);
1162
1163	if (start >= length2) {
1164	    goto str_first_done;
1165	} else if (start > 0) {
1166	    ustring2 += start;
1167	    length2 -= start;
1168	} else if (start < 0) {
1169	    /*
1170	     * Invalid start index mapped to string start; Bug #423581
1171	     */
1172
1173	    start = 0;
1174	}
1175    }
1176
1177    /*
1178     * If the length of the needle is more than the length of the haystack, it
1179     * cannot be contained in there so we can avoid searching. [Bug 2960021]
1180     */
1181
1182    if (length1 > 0 && length1 <= length2) {
1183	register Tcl_UniChar *p, *end;
1184
1185	end = ustring2 + length2 - length1 + 1;
1186	for (p = ustring2;  p < end;  p++) {
1187	    /*
1188	     * Scan forward to find the first character.
1189	     */
1190
1191	    if ((*p == *ustring1) && (TclUniCharNcmp(ustring1, p,
1192		    (unsigned long) length1) == 0)) {
1193		match = p - ustring2;
1194		break;
1195	    }
1196	}
1197    }
1198
1199    /*
1200     * Compute the character index of the matching string by counting the
1201     * number of characters before the match.
1202     */
1203
1204    if ((match != -1) && (objc == 4)) {
1205	match += start;
1206    }
1207
1208  str_first_done:
1209    Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
1210    return TCL_OK;
1211}
1212
1213/*
1214 *----------------------------------------------------------------------
1215 *
1216 * StringLastCmd --
1217 *
1218 *	This procedure is invoked to process the "string last" Tcl command.
1219 *	See the user documentation for details on what it does.
1220 *
1221 * Results:
1222 *	A standard Tcl result.
1223 *
1224 * Side effects:
1225 *	See the user documentation.
1226 *
1227 *----------------------------------------------------------------------
1228 */
1229
1230static int
1231StringLastCmd(
1232    ClientData dummy,		/* Not used. */
1233    Tcl_Interp *interp,		/* Current interpreter. */
1234    int objc,			/* Number of arguments. */
1235    Tcl_Obj *const objv[])	/* Argument objects. */
1236{
1237    Tcl_UniChar *ustring1, *ustring2, *p;
1238    int match, start, length1, length2;
1239
1240    if (objc < 3 || objc > 4) {
1241	Tcl_WrongNumArgs(interp, 1, objv,
1242		"needleString haystackString ?startIndex?");
1243	return TCL_ERROR;
1244    }
1245
1246    /*
1247     * We are searching string2 for the sequence string1.
1248     */
1249
1250    match = -1;
1251    start = 0;
1252    length2 = -1;
1253
1254    ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1);
1255    ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2);
1256
1257    if (objc == 4) {
1258	/*
1259	 * If a startIndex is specified, we will need to restrict the string
1260	 * range to that char index in the string
1261	 */
1262
1263	if (TclGetIntForIndexM(interp, objv[3], length2-1, &start) != TCL_OK){
1264	    return TCL_ERROR;
1265	}
1266
1267	/*
1268	 * Reread to prevent shimmering problems.
1269	 */
1270
1271	ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1);
1272	ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2);
1273
1274	if (start < 0) {
1275	    goto str_last_done;
1276	} else if (start < length2) {
1277	    p = ustring2 + start + 1 - length1;
1278	} else {
1279	    p = ustring2 + length2 - length1;
1280	}
1281    } else {
1282	p = ustring2 + length2 - length1;
1283    }
1284
1285    /*
1286     * If the length of the needle is more than the length of the haystack, it
1287     * cannot be contained in there so we can avoid searching. [Bug 2960021]
1288     */
1289
1290    if (length1 > 0 && length1 <= length2) {
1291	for (; p >= ustring2; p--) {
1292	    /*
1293	     * Scan backwards to find the first character.
1294	     */
1295
1296	    if ((*p == *ustring1) && !memcmp(ustring1, p,
1297		    sizeof(Tcl_UniChar) * (size_t)length1)) {
1298		match = p - ustring2;
1299		break;
1300	    }
1301	}
1302    }
1303
1304  str_last_done:
1305    Tcl_SetObjResult(interp, Tcl_NewIntObj(match));
1306    return TCL_OK;
1307}
1308
1309/*
1310 *----------------------------------------------------------------------
1311 *
1312 * StringIndexCmd --
1313 *
1314 *	This procedure is invoked to process the "string index" Tcl command.
1315 *	See the user documentation for details on what it does. Note that this
1316 *	command only functions correctly on properly formed Tcl UTF strings.
1317 *
1318 * Results:
1319 *	A standard Tcl result.
1320 *
1321 * Side effects:
1322 *	See the user documentation.
1323 *
1324 *----------------------------------------------------------------------
1325 */
1326
1327static int
1328StringIndexCmd(
1329    ClientData dummy,		/* Not used. */
1330    Tcl_Interp *interp,		/* Current interpreter. */
1331    int objc,			/* Number of arguments. */
1332    Tcl_Obj *const objv[])	/* Argument objects. */
1333{
1334    int length, index;
1335
1336    if (objc != 3) {
1337	Tcl_WrongNumArgs(interp, 1, objv, "string charIndex");
1338	return TCL_ERROR;
1339    }
1340
1341    /*
1342     * If we have a ByteArray object, avoid indexing in the Utf string since
1343     * the byte array contains one byte per character. Otherwise, use the
1344     * Unicode string rep to get the index'th char.
1345     */
1346
1347    if (objv[1]->typePtr == &tclByteArrayType) {
1348	const unsigned char *string =
1349		Tcl_GetByteArrayFromObj(objv[1], &length);
1350
1351	if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK){
1352	    return TCL_ERROR;
1353	}
1354	string = Tcl_GetByteArrayFromObj(objv[1], &length);
1355	if ((index >= 0) && (index < length)) {
1356	    Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(string + index, 1));
1357	}
1358    } else {
1359	/*
1360	 * Get Unicode char length to calulate what 'end' means.
1361	 */
1362
1363	length = Tcl_GetCharLength(objv[1]);
1364
1365	if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK){
1366	    return TCL_ERROR;
1367	}
1368	if ((index >= 0) && (index < length)) {
1369	    char buf[TCL_UTF_MAX];
1370	    Tcl_UniChar ch;
1371
1372	    ch = Tcl_GetUniChar(objv[1], index);
1373	    length = Tcl_UniCharToUtf(ch, buf);
1374	    Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length));
1375	}
1376    }
1377    return TCL_OK;
1378}
1379
1380/*
1381 *----------------------------------------------------------------------
1382 *
1383 * StringIsCmd --
1384 *
1385 *	This procedure is invoked to process the "string is" Tcl command. See
1386 *	the user documentation for details on what it does. Note that this
1387 *	command only functions correctly on properly formed Tcl UTF strings.
1388 *
1389 * Results:
1390 *	A standard Tcl result.
1391 *
1392 * Side effects:
1393 *	See the user documentation.
1394 *
1395 *----------------------------------------------------------------------
1396 */
1397
1398static int
1399StringIsCmd(
1400    ClientData dummy,		/* Not used. */
1401    Tcl_Interp *interp,		/* Current interpreter. */
1402    int objc,			/* Number of arguments. */
1403    Tcl_Obj *const objv[])	/* Argument objects. */
1404{
1405    const char *string1, *end, *stop;
1406    Tcl_UniChar ch;
1407    int (*chcomp)(int) = NULL;	/* The UniChar comparison function. */
1408    int i, failat = 0, result = 1, strict = 0, index, length1, length2;
1409    Tcl_Obj *objPtr, *failVarObj = NULL;
1410    Tcl_WideInt w;
1411
1412    static const char *isClasses[] = {
1413	"alnum",	"alpha",	"ascii",	"control",
1414	"boolean",	"digit",	"double",	"false",
1415	"graph",	"integer",	"list",		"lower",
1416	"print",	"punct",	"space",	"true",
1417	"upper",	"wideinteger",	"wordchar",	"xdigit",
1418	NULL
1419    };
1420    enum isClasses {
1421	STR_IS_ALNUM, STR_IS_ALPHA,	STR_IS_ASCII,  STR_IS_CONTROL,
1422	STR_IS_BOOL,  STR_IS_DIGIT,	STR_IS_DOUBLE, STR_IS_FALSE,
1423	STR_IS_GRAPH, STR_IS_INT,	STR_IS_LIST,   STR_IS_LOWER,
1424	STR_IS_PRINT, STR_IS_PUNCT,	STR_IS_SPACE,  STR_IS_TRUE,
1425	STR_IS_UPPER, STR_IS_WIDE,	STR_IS_WORD,   STR_IS_XDIGIT
1426    };
1427    static const char *isOptions[] = {
1428	"-strict", "-failindex", NULL
1429    };
1430    enum isOptions {
1431	OPT_STRICT, OPT_FAILIDX
1432    };
1433
1434    if (objc < 3 || objc > 6) {
1435	Tcl_WrongNumArgs(interp, 1, objv,
1436		"class ?-strict? ?-failindex var? str");
1437	return TCL_ERROR;
1438    }
1439    if (Tcl_GetIndexFromObj(interp, objv[1], isClasses, "class", 0,
1440	    &index) != TCL_OK) {
1441	return TCL_ERROR;
1442    }
1443
1444    if (objc != 3) {
1445	for (i = 2; i < objc-1; i++) {
1446	    int idx2;
1447
1448	    if (Tcl_GetIndexFromObj(interp, objv[i], isOptions, "option", 0,
1449		    &idx2) != TCL_OK) {
1450		return TCL_ERROR;
1451	    }
1452	    switch ((enum isOptions) idx2) {
1453	    case OPT_STRICT:
1454		strict = 1;
1455		break;
1456	    case OPT_FAILIDX:
1457		if (i+1 >= objc-1) {
1458		    Tcl_WrongNumArgs(interp, 2, objv,
1459			    "?-strict? ?-failindex var? str");
1460		    return TCL_ERROR;
1461		}
1462		failVarObj = objv[++i];
1463		break;
1464	    }
1465	}
1466    }
1467
1468    /*
1469     * We get the objPtr so that we can short-cut for some classes by checking
1470     * the object type (int and double), but we need the string otherwise,
1471     * because we don't want any conversion of type occuring (as, for example,
1472     * Tcl_Get*FromObj would do).
1473     */
1474
1475    objPtr = objv[objc-1];
1476
1477    /*
1478     * When entering here, result == 1 and failat == 0.
1479     */
1480
1481    switch ((enum isClasses) index) {
1482    case STR_IS_ALNUM:
1483	chcomp = Tcl_UniCharIsAlnum;
1484	break;
1485    case STR_IS_ALPHA:
1486	chcomp = Tcl_UniCharIsAlpha;
1487	break;
1488    case STR_IS_ASCII:
1489	chcomp = UniCharIsAscii;
1490	break;
1491    case STR_IS_BOOL:
1492    case STR_IS_TRUE:
1493    case STR_IS_FALSE:
1494	if (TCL_OK != Tcl_ConvertToType(NULL, objPtr, &tclBooleanType)) {
1495	    if (strict) {
1496		result = 0;
1497	    } else {
1498		string1 = TclGetStringFromObj(objPtr, &length1);
1499		result = length1 == 0;
1500	    }
1501	} else if (((index == STR_IS_TRUE) &&
1502		objPtr->internalRep.longValue == 0)
1503	    || ((index == STR_IS_FALSE) &&
1504		objPtr->internalRep.longValue != 0)) {
1505	    result = 0;
1506	}
1507	break;
1508    case STR_IS_CONTROL:
1509	chcomp = Tcl_UniCharIsControl;
1510	break;
1511    case STR_IS_DIGIT:
1512	chcomp = Tcl_UniCharIsDigit;
1513	break;
1514    case STR_IS_DOUBLE: {
1515	/* TODO */
1516	if ((objPtr->typePtr == &tclDoubleType) ||
1517		(objPtr->typePtr == &tclIntType) ||
1518#ifndef NO_WIDE_TYPE
1519		(objPtr->typePtr == &tclWideIntType) ||
1520#endif
1521		(objPtr->typePtr == &tclBignumType)) {
1522	    break;
1523	}
1524	string1 = TclGetStringFromObj(objPtr, &length1);
1525	if (length1 == 0) {
1526	    if (strict) {
1527		result = 0;
1528	    }
1529	    goto str_is_done;
1530	}
1531	end = string1 + length1;
1532	if (TclParseNumber(NULL, objPtr, NULL, NULL, -1,
1533		(const char **) &stop, 0) != TCL_OK) {
1534	    result = 0;
1535	    failat = 0;
1536	} else {
1537	    failat = stop - string1;
1538	    if (stop < end) {
1539		result = 0;
1540		TclFreeIntRep(objPtr);
1541		objPtr->typePtr = NULL;
1542	    }
1543	}
1544	break;
1545    }
1546    case STR_IS_GRAPH:
1547	chcomp = Tcl_UniCharIsGraph;
1548	break;
1549    case STR_IS_INT:
1550	if (TCL_OK == TclGetIntFromObj(NULL, objPtr, &i)) {
1551	    break;
1552	}
1553	goto failedIntParse;
1554    case STR_IS_WIDE:
1555	if (TCL_OK == Tcl_GetWideIntFromObj(NULL, objPtr, &w)) {
1556	    break;
1557	}
1558
1559    failedIntParse:
1560	string1 = TclGetStringFromObj(objPtr, &length1);
1561	if (length1 == 0) {
1562	    if (strict) {
1563		result = 0;
1564	    }
1565	    goto str_is_done;
1566	}
1567	result = 0;
1568	if (failVarObj == NULL) {
1569	    /*
1570	     * Don't bother computing the failure point if we're not going to
1571	     * return it.
1572	     */
1573
1574	    break;
1575	}
1576	end = string1 + length1;
1577	if (TclParseNumber(NULL, objPtr, NULL, NULL, -1,
1578		(const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) {
1579	    if (stop == end) {
1580		/*
1581		 * Entire string parses as an integer, but rejected by
1582		 * Tcl_Get(Wide)IntFromObj() so we must have overflowed the
1583		 * target type, and our convention is to return failure at
1584		 * index -1 in that situation.
1585		 */
1586
1587		failat = -1;
1588	    } else {
1589		/*
1590		 * Some prefix parsed as an integer, but not the whole string,
1591		 * so return failure index as the point where parsing stopped.
1592		 * Clear out the internal rep, since keeping it would leave
1593		 * *objPtr in an inconsistent state.
1594		 */
1595
1596		failat = stop - string1;
1597		TclFreeIntRep(objPtr);
1598		objPtr->typePtr = NULL;
1599	    }
1600	} else {
1601	    /*
1602	     * No prefix is a valid integer. Fail at beginning.
1603	     */
1604
1605	    failat = 0;
1606	}
1607	break;
1608    case STR_IS_LIST:
1609	/*
1610	 * We ignore the strictness here, since empty strings are always
1611	 * well-formed lists.
1612	 */
1613
1614	if (TCL_OK == TclListObjLength(NULL, objPtr, &length2)) {
1615	    break;
1616	}
1617
1618	if (failVarObj != NULL) {
1619	    /*
1620	     * Need to figure out where the list parsing failed, which is
1621	     * fairly expensive. This is adapted from the core of
1622	     * SetListFromAny().
1623	     */
1624
1625	    const char *elemStart, *nextElem;
1626	    int lenRemain, elemSize, hasBrace;
1627	    register const char *p;
1628
1629	    string1 = TclGetStringFromObj(objPtr, &length1);
1630	    end = string1 + length1;
1631	    failat = -1;
1632	    for (p=string1, lenRemain=length1; lenRemain > 0;
1633		    p=nextElem, lenRemain=end-nextElem) {
1634		if (TCL_ERROR == TclFindElement(NULL, p, lenRemain,
1635			&elemStart, &nextElem, &elemSize, &hasBrace)) {
1636		    Tcl_Obj *tmpStr;
1637
1638		    /*
1639		     * This is the simplest way of getting the number of
1640		     * characters parsed. Note that this is not the same as
1641		     * the number of bytes when parsing strings with non-ASCII
1642		     * characters in them.
1643		     *
1644		     * Skip leading spaces first. This is only really an issue
1645		     * if it is the first "element" that has the failure.
1646		     */
1647
1648		    while (isspace(UCHAR(*p))) {		/* INTL: ? */
1649			p++;
1650		    }
1651		    TclNewStringObj(tmpStr, string1, p-string1);
1652		    failat = Tcl_GetCharLength(tmpStr);
1653		    TclDecrRefCount(tmpStr);
1654		    break;
1655		}
1656	    }
1657	}
1658	result = 0;
1659	break;
1660    case STR_IS_LOWER:
1661	chcomp = Tcl_UniCharIsLower;
1662	break;
1663    case STR_IS_PRINT:
1664	chcomp = Tcl_UniCharIsPrint;
1665	break;
1666    case STR_IS_PUNCT:
1667	chcomp = Tcl_UniCharIsPunct;
1668	break;
1669    case STR_IS_SPACE:
1670	chcomp = Tcl_UniCharIsSpace;
1671	break;
1672    case STR_IS_UPPER:
1673	chcomp = Tcl_UniCharIsUpper;
1674	break;
1675    case STR_IS_WORD:
1676	chcomp = Tcl_UniCharIsWordChar;
1677	break;
1678    case STR_IS_XDIGIT:
1679	chcomp = UniCharIsHexDigit;
1680	break;
1681    }
1682
1683    if (chcomp != NULL) {
1684	string1 = TclGetStringFromObj(objPtr, &length1);
1685	if (length1 == 0) {
1686	    if (strict) {
1687		result = 0;
1688	    }
1689	    goto str_is_done;
1690	}
1691	end = string1 + length1;
1692	for (; string1 < end; string1 += length2, failat++) {
1693	    length2 = TclUtfToUniChar(string1, &ch);
1694	    if (!chcomp(ch)) {
1695		result = 0;
1696		break;
1697	    }
1698	}
1699    }
1700
1701    /*
1702     * Only set the failVarObj when we will return 0 and we have indicated a
1703     * valid fail index (>= 0).
1704     */
1705
1706 str_is_done:
1707    if ((result == 0) && (failVarObj != NULL) &&
1708	Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(failat),
1709		TCL_LEAVE_ERR_MSG) == NULL) {
1710	return TCL_ERROR;
1711    }
1712    Tcl_SetObjResult(interp, Tcl_NewBooleanObj(result));
1713    return TCL_OK;
1714}
1715
1716static int
1717UniCharIsAscii(
1718    int character)
1719{
1720    return (character >= 0) && (character < 0x80);
1721}
1722
1723static int
1724UniCharIsHexDigit(
1725    int character)
1726{
1727    return (character >= 0) && (character < 0x80) && isxdigit(character);
1728}
1729
1730/*
1731 *----------------------------------------------------------------------
1732 *
1733 * StringMapCmd --
1734 *
1735 *	This procedure is invoked to process the "string map" Tcl command. See
1736 *	the user documentation for details on what it does. Note that this
1737 *	command only functions correctly on properly formed Tcl UTF strings.
1738 *
1739 * Results:
1740 *	A standard Tcl result.
1741 *
1742 * Side effects:
1743 *	See the user documentation.
1744 *
1745 *----------------------------------------------------------------------
1746 */
1747
1748static int
1749StringMapCmd(
1750    ClientData dummy,		/* Not used. */
1751    Tcl_Interp *interp,		/* Current interpreter. */
1752    int objc,			/* Number of arguments. */
1753    Tcl_Obj *const objv[])	/* Argument objects. */
1754{
1755    int length1, length2, mapElemc, index;
1756    int nocase = 0, mapWithDict = 0, copySource = 0;
1757    Tcl_Obj **mapElemv, *sourceObj, *resultPtr;
1758    Tcl_UniChar *ustring1, *ustring2, *p, *end;
1759    int (*strCmpFn)(const Tcl_UniChar*, const Tcl_UniChar*, unsigned long);
1760
1761    if (objc < 3 || objc > 4) {
1762	Tcl_WrongNumArgs(interp, 1, objv, "?-nocase? charMap string");
1763	return TCL_ERROR;
1764    }
1765
1766    if (objc == 4) {
1767	const char *string = TclGetStringFromObj(objv[1], &length2);
1768
1769	if ((length2 > 1) &&
1770		strncmp(string, "-nocase", (size_t) length2) == 0) {
1771	    nocase = 1;
1772	} else {
1773	    Tcl_AppendResult(interp, "bad option \"", string,
1774		    "\": must be -nocase", NULL);
1775	    return TCL_ERROR;
1776	}
1777    }
1778
1779    /*
1780     * This test is tricky, but has to be that way or you get other strange
1781     * inconsistencies (see test string-10.20 for illustration why!)
1782     */
1783
1784    if (objv[objc-2]->typePtr == &tclDictType && objv[objc-2]->bytes == NULL){
1785	int i, done;
1786	Tcl_DictSearch search;
1787
1788	/*
1789	 * We know the type exactly, so all dict operations will succeed for
1790	 * sure. This shortens this code quite a bit.
1791	 */
1792
1793	Tcl_DictObjSize(interp, objv[objc-2], &mapElemc);
1794	if (mapElemc == 0) {
1795	    /*
1796	     * Empty charMap, just return whatever string was given.
1797	     */
1798
1799	    Tcl_SetObjResult(interp, objv[objc-1]);
1800	    return TCL_OK;
1801	}
1802
1803	mapElemc *= 2;
1804	mapWithDict = 1;
1805
1806	/*
1807	 * Copy the dictionary out into an array; that's the easiest way to
1808	 * adapt this code...
1809	 */
1810
1811	mapElemv = (Tcl_Obj **)
1812		TclStackAlloc(interp, sizeof(Tcl_Obj *) * mapElemc);
1813	Tcl_DictObjFirst(interp, objv[objc-2], &search, mapElemv+0,
1814		mapElemv+1, &done);
1815	for (i=2 ; i<mapElemc ; i+=2) {
1816	    Tcl_DictObjNext(&search, mapElemv+i, mapElemv+i+1, &done);
1817	}
1818	Tcl_DictObjDone(&search);
1819    } else {
1820	if (TclListObjGetElements(interp, objv[objc-2], &mapElemc,
1821		&mapElemv) != TCL_OK) {
1822	    return TCL_ERROR;
1823	}
1824	if (mapElemc == 0) {
1825	    /*
1826	     * empty charMap, just return whatever string was given.
1827	     */
1828
1829	    Tcl_SetObjResult(interp, objv[objc-1]);
1830	    return TCL_OK;
1831	} else if (mapElemc & 1) {
1832	    /*
1833	     * The charMap must be an even number of key/value items.
1834	     */
1835
1836	    Tcl_SetObjResult(interp,
1837		    Tcl_NewStringObj("char map list unbalanced", -1));
1838	    return TCL_ERROR;
1839	}
1840    }
1841
1842    /*
1843     * Take a copy of the source string object if it is the same as the map
1844     * string to cut out nasty sharing crashes. [Bug 1018562]
1845     */
1846
1847    if (objv[objc-2] == objv[objc-1]) {
1848	sourceObj = Tcl_DuplicateObj(objv[objc-1]);
1849	copySource = 1;
1850    } else {
1851	sourceObj = objv[objc-1];
1852    }
1853    ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1);
1854    if (length1 == 0) {
1855	/*
1856	 * Empty input string, just stop now.
1857	 */
1858
1859	goto done;
1860    }
1861    end = ustring1 + length1;
1862
1863    strCmpFn = (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
1864
1865    /*
1866     * Force result to be Unicode
1867     */
1868
1869    resultPtr = Tcl_NewUnicodeObj(ustring1, 0);
1870
1871    if (mapElemc == 2) {
1872	/*
1873	 * Special case for one map pair which avoids the extra for loop and
1874	 * extra calls to get Unicode data. The algorithm is otherwise
1875	 * identical to the multi-pair case. This will be >30% faster on
1876	 * larger strings.
1877	 */
1878
1879	int mapLen;
1880	Tcl_UniChar *mapString, u2lc;
1881
1882	ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2);
1883	p = ustring1;
1884	if ((length2 > length1) || (length2 == 0)) {
1885	    /*
1886	     * Match string is either longer than input or empty.
1887	     */
1888
1889	    ustring1 = end;
1890	} else {
1891	    mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);
1892	    u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);
1893	    for (; ustring1 < end; ustring1++) {
1894		if (((*ustring1 == *ustring2) ||
1895			(nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) &&
1896			(length2==1 || strCmpFn(ustring1, ustring2,
1897				(unsigned long) length2) == 0)) {
1898		    if (p != ustring1) {
1899			Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p);
1900			p = ustring1 + length2;
1901		    } else {
1902			p += length2;
1903		    }
1904		    ustring1 = p - 1;
1905
1906		    Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen);
1907		}
1908	    }
1909	}
1910    } else {
1911	Tcl_UniChar **mapStrings, *u2lc = NULL;
1912	int *mapLens;
1913
1914	/*
1915	 * Precompute pointers to the unicode string and length. This saves us
1916	 * repeated function calls later, significantly speeding up the
1917	 * algorithm. We only need the lowercase first char in the nocase
1918	 * case.
1919	 */
1920
1921	mapStrings = (Tcl_UniChar **) TclStackAlloc(interp,
1922		mapElemc * 2 * sizeof(Tcl_UniChar *));
1923	mapLens = (int *) TclStackAlloc(interp, mapElemc * 2 * sizeof(int));
1924	if (nocase) {
1925	    u2lc = (Tcl_UniChar *) TclStackAlloc(interp,
1926		    mapElemc * sizeof(Tcl_UniChar));
1927	}
1928	for (index = 0; index < mapElemc; index++) {
1929	    mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index],
1930		    mapLens+index);
1931	    if (nocase && ((index % 2) == 0)) {
1932		u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]);
1933	    }
1934	}
1935	for (p = ustring1; ustring1 < end; ustring1++) {
1936	    for (index = 0; index < mapElemc; index += 2) {
1937		/*
1938		 * Get the key string to match on.
1939		 */
1940
1941		ustring2 = mapStrings[index];
1942		length2 = mapLens[index];
1943		if ((length2 > 0) && ((*ustring1 == *ustring2) || (nocase &&
1944			(Tcl_UniCharToLower(*ustring1) == u2lc[index/2]))) &&
1945			/* Restrict max compare length. */
1946			(end-ustring1 >= length2) && ((length2 == 1) ||
1947			!strCmpFn(ustring2, ustring1, (unsigned) length2))) {
1948		    if (p != ustring1) {
1949			/*
1950			 * Put the skipped chars onto the result first.
1951			 */
1952
1953			Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p);
1954			p = ustring1 + length2;
1955		    } else {
1956			p += length2;
1957		    }
1958
1959		    /*
1960		     * Adjust len to be full length of matched string.
1961		     */
1962
1963		    ustring1 = p - 1;
1964
1965		    /*
1966		     * Append the map value to the unicode string.
1967		     */
1968
1969		    Tcl_AppendUnicodeToObj(resultPtr,
1970			    mapStrings[index+1], mapLens[index+1]);
1971		    break;
1972		}
1973	    }
1974	}
1975	if (nocase) {
1976	    TclStackFree(interp, u2lc);
1977	}
1978	TclStackFree(interp, mapLens);
1979	TclStackFree(interp, mapStrings);
1980    }
1981    if (p != ustring1) {
1982	/*
1983	 * Put the rest of the unmapped chars onto result.
1984	 */
1985
1986	Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p);
1987    }
1988    Tcl_SetObjResult(interp, resultPtr);
1989  done:
1990    if (mapWithDict) {
1991	TclStackFree(interp, mapElemv);
1992    }
1993    if (copySource) {
1994	Tcl_DecrRefCount(sourceObj);
1995    }
1996    return TCL_OK;
1997}
1998
1999/*
2000 *----------------------------------------------------------------------
2001 *
2002 * StringMatchCmd --
2003 *
2004 *	This procedure is invoked to process the "string match" Tcl command.
2005 *	See the user documentation for details on what it does. Note that this
2006 *	command only functions correctly on properly formed Tcl UTF strings.
2007 *
2008 * Results:
2009 *	A standard Tcl result.
2010 *
2011 * Side effects:
2012 *	See the user documentation.
2013 *
2014 *----------------------------------------------------------------------
2015 */
2016
2017static int
2018StringMatchCmd(
2019    ClientData dummy,		/* Not used. */
2020    Tcl_Interp *interp,		/* Current interpreter. */
2021    int objc,			/* Number of arguments. */
2022    Tcl_Obj *const objv[])	/* Argument objects. */
2023{
2024    int nocase = 0;
2025
2026    if (objc < 3 || objc > 4) {
2027	Tcl_WrongNumArgs(interp, 1, objv, "?-nocase? pattern string");
2028	return TCL_ERROR;
2029    }
2030
2031    if (objc == 4) {
2032	int length;
2033	const char *string = TclGetStringFromObj(objv[1], &length);
2034
2035	if ((length > 1) &&
2036	    strncmp(string, "-nocase", (size_t) length) == 0) {
2037	    nocase = TCL_MATCH_NOCASE;
2038	} else {
2039	    Tcl_AppendResult(interp, "bad option \"", string,
2040		    "\": must be -nocase", NULL);
2041	    return TCL_ERROR;
2042	}
2043    }
2044    Tcl_SetObjResult(interp, Tcl_NewBooleanObj(
2045		TclStringMatchObj(objv[objc-1], objv[objc-2], nocase)));
2046    return TCL_OK;
2047}
2048
2049/*
2050 *----------------------------------------------------------------------
2051 *
2052 * StringRangeCmd --
2053 *
2054 *	This procedure is invoked to process the "string range" Tcl command.
2055 *	See the user documentation for details on what it does. Note that this
2056 *	command only functions correctly on properly formed Tcl UTF strings.
2057 *
2058 * Results:
2059 *	A standard Tcl result.
2060 *
2061 * Side effects:
2062 *	See the user documentation.
2063 *
2064 *----------------------------------------------------------------------
2065 */
2066
2067static int
2068StringRangeCmd(
2069    ClientData dummy,		/* Not used. */
2070    Tcl_Interp *interp,		/* Current interpreter. */
2071    int objc,			/* Number of arguments. */
2072    Tcl_Obj *const objv[])	/* Argument objects. */
2073{
2074    const unsigned char *string;
2075    int length, first, last;
2076
2077    if (objc != 4) {
2078	Tcl_WrongNumArgs(interp, 1, objv, "string first last");
2079	return TCL_ERROR;
2080    }
2081
2082    /*
2083     * If we have a ByteArray object, avoid indexing in the Utf string since
2084     * the byte array contains one byte per character. Otherwise, use the
2085     * Unicode string rep to get the range.
2086     */
2087
2088    if (objv[1]->typePtr == &tclByteArrayType) {
2089	string = Tcl_GetByteArrayFromObj(objv[1], &length);
2090	length--;
2091    } else {
2092	/*
2093	 * Get the length in actual characters.
2094	 */
2095
2096	string = NULL;
2097	length = Tcl_GetCharLength(objv[1]) - 1;
2098    }
2099
2100    if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK ||
2101	    TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK) {
2102	return TCL_ERROR;
2103    }
2104
2105    if (first < 0) {
2106	first = 0;
2107    }
2108    if (last >= length) {
2109	last = length;
2110    }
2111    if (last >= first) {
2112	if (string != NULL) {
2113	    /*
2114	     * Reread the string to prevent shimmering nasties.
2115	     */
2116
2117	    string = Tcl_GetByteArrayFromObj(objv[1], &length);
2118	    Tcl_SetObjResult(interp,
2119		    Tcl_NewByteArrayObj(string+first, last - first + 1));
2120	} else {
2121	    Tcl_SetObjResult(interp, Tcl_GetRange(objv[1], first, last));
2122	}
2123    }
2124    return TCL_OK;
2125}
2126
2127/*
2128 *----------------------------------------------------------------------
2129 *
2130 * StringReptCmd --
2131 *
2132 *	This procedure is invoked to process the "string repeat" Tcl command.
2133 *	See the user documentation for details on what it does. Note that this
2134 *	command only functions correctly on properly formed Tcl UTF strings.
2135 *
2136 * Results:
2137 *	A standard Tcl result.
2138 *
2139 * Side effects:
2140 *	See the user documentation.
2141 *
2142 *----------------------------------------------------------------------
2143 */
2144
2145static int
2146StringReptCmd(
2147    ClientData dummy,		/* Not used. */
2148    Tcl_Interp *interp,		/* Current interpreter. */
2149    int objc,			/* Number of arguments. */
2150    Tcl_Obj *const objv[])	/* Argument objects. */
2151{
2152    const char *string1;
2153    char *string2;
2154    int count, index, length1, length2;
2155    Tcl_Obj *resultPtr;
2156
2157    if (objc != 3) {
2158	Tcl_WrongNumArgs(interp, 1, objv, "string count");
2159	return TCL_ERROR;
2160    }
2161
2162    if (TclGetIntFromObj(interp, objv[2], &count) != TCL_OK) {
2163	return TCL_ERROR;
2164    }
2165
2166    /*
2167     * Check for cases that allow us to skip copying stuff.
2168     */
2169
2170    if (count == 1) {
2171	Tcl_SetObjResult(interp, objv[1]);
2172	goto done;
2173    } else if (count < 1) {
2174	goto done;
2175    }
2176    string1 = TclGetStringFromObj(objv[1], &length1);
2177    if (length1 <= 0) {
2178	goto done;
2179    }
2180
2181    /*
2182     * Only build up a string that has data. Instead of building it up with
2183     * repeated appends, we just allocate the necessary space once and copy
2184     * the string value in.
2185     *
2186     * We have to worry about overflow [Bugs 714106, 2561746].
2187     * At this point we know 1 <= length1 <= INT_MAX and 2 <= count <= INT_MAX.
2188     * We need to keep 2 <= length2 <= INT_MAX.
2189     */
2190
2191    if (count > (INT_MAX / length1)) {
2192	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
2193		"result exceeds max size for a Tcl value (%d bytes)", INT_MAX));
2194	return TCL_ERROR;
2195    }
2196    length2 = length1 * count;
2197
2198    /*
2199     * Include space for the NUL.
2200     */
2201
2202    string2 = attemptckalloc((unsigned) length2 + 1);
2203    if (string2 == NULL) {
2204	/*
2205	 * Alloc failed. Note that in this case we try to do an error message
2206	 * since this is a case that's most likely when the alloc is large and
2207	 * that's easy to do with this API. Note that if we fail allocating a
2208	 * short string, this will likely keel over too (and fatally).
2209	 */
2210
2211	Tcl_SetObjResult(interp, Tcl_ObjPrintf(
2212		"string size overflow, out of memory allocating %u bytes",
2213		length2 + 1));
2214	return TCL_ERROR;
2215    }
2216    for (index = 0; index < count; index++) {
2217	memcpy(string2 + (length1 * index), string1, (size_t) length1);
2218    }
2219    string2[length2] = '\0';
2220
2221    /*
2222     * We have to directly assign this instead of using Tcl_SetStringObj (and
2223     * indirectly TclInitStringRep) because that makes another copy of the
2224     * data.
2225     */
2226
2227    TclNewObj(resultPtr);
2228    resultPtr->bytes = string2;
2229    resultPtr->length = length2;
2230    Tcl_SetObjResult(interp, resultPtr);
2231
2232  done:
2233    return TCL_OK;
2234}
2235
2236/*
2237 *----------------------------------------------------------------------
2238 *
2239 * StringRplcCmd --
2240 *
2241 *	This procedure is invoked to process the "string replace" Tcl command.
2242 *	See the user documentation for details on what it does. Note that this
2243 *	command only functions correctly on properly formed Tcl UTF strings.
2244 *
2245 * Results:
2246 *	A standard Tcl result.
2247 *
2248 * Side effects:
2249 *	See the user documentation.
2250 *
2251 *----------------------------------------------------------------------
2252 */
2253
2254static int
2255StringRplcCmd(
2256    ClientData dummy,		/* Not used. */
2257    Tcl_Interp *interp,		/* Current interpreter. */
2258    int objc,			/* Number of arguments. */
2259    Tcl_Obj *const objv[])	/* Argument objects. */
2260{
2261    Tcl_UniChar *ustring;
2262    int first, last, length;
2263
2264    if (objc < 4 || objc > 5) {
2265	Tcl_WrongNumArgs(interp, 1, objv, "string first last ?string?");
2266	return TCL_ERROR;
2267    }
2268
2269    ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
2270    length--;
2271
2272    if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK ||
2273	    TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK){
2274	return TCL_ERROR;
2275    }
2276
2277    if ((last < first) || (last < 0) || (first > length)) {
2278	Tcl_SetObjResult(interp, objv[1]);
2279    } else {
2280	Tcl_Obj *resultPtr;
2281
2282	ustring = Tcl_GetUnicodeFromObj(objv[1], &length);
2283	length--;
2284
2285	if (first < 0) {
2286	    first = 0;
2287	}
2288
2289	resultPtr = Tcl_NewUnicodeObj(ustring, first);
2290	if (objc == 5) {
2291	    Tcl_AppendObjToObj(resultPtr, objv[4]);
2292	}
2293	if (last < length) {
2294	    Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1,
2295		    length - last);
2296	}
2297	Tcl_SetObjResult(interp, resultPtr);
2298    }
2299    return TCL_OK;
2300}
2301
2302/*
2303 *----------------------------------------------------------------------
2304 *
2305 * StringRevCmd --
2306 *
2307 *	This procedure is invoked to process the "string reverse" Tcl command.
2308 *	See the user documentation for details on what it does. Note that this
2309 *	command only functions correctly on properly formed Tcl UTF strings.
2310 *
2311 * Results:
2312 *	A standard Tcl result.
2313 *
2314 * Side effects:
2315 *	See the user documentation.
2316 *
2317 *----------------------------------------------------------------------
2318 */
2319
2320static int
2321StringRevCmd(
2322    ClientData dummy,		/* Not used. */
2323    Tcl_Interp *interp,		/* Current interpreter. */
2324    int objc,			/* Number of arguments. */
2325    Tcl_Obj *const objv[])	/* Argument objects. */
2326{
2327    if (objc != 2) {
2328	Tcl_WrongNumArgs(interp, 1, objv, "string");
2329	return TCL_ERROR;
2330    }
2331
2332    Tcl_SetObjResult(interp, TclStringObjReverse(objv[1]));
2333    return TCL_OK;
2334}
2335
2336/*
2337 *----------------------------------------------------------------------
2338 *
2339 * StringStartCmd --
2340 *
2341 *	This procedure is invoked to process the "string wordstart" Tcl
2342 *	command. See the user documentation for details on what it does. Note
2343 *	that this command only functions correctly on properly formed Tcl UTF
2344 *	strings.
2345 *
2346 * Results:
2347 *	A standard Tcl result.
2348 *
2349 * Side effects:
2350 *	See the user documentation.
2351 *
2352 *----------------------------------------------------------------------
2353 */
2354
2355static int
2356StringStartCmd(
2357    ClientData dummy,		/* Not used. */
2358    Tcl_Interp *interp,		/* Current interpreter. */
2359    int objc,			/* Number of arguments. */
2360    Tcl_Obj *const objv[])	/* Argument objects. */
2361{
2362    Tcl_UniChar ch;
2363    const char *p, *string;
2364    int cur, index, length, numChars;
2365
2366    if (objc != 3) {
2367	Tcl_WrongNumArgs(interp, 1, objv, "string index");
2368	return TCL_ERROR;
2369    }
2370
2371    string = TclGetStringFromObj(objv[1], &length);
2372    numChars = Tcl_NumUtfChars(string, length);
2373    if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
2374	return TCL_ERROR;
2375    }
2376    string = TclGetStringFromObj(objv[1], &length);
2377    if (index >= numChars) {
2378	index = numChars - 1;
2379    }
2380    cur = 0;
2381    if (index > 0) {
2382	p = Tcl_UtfAtIndex(string, index);
2383	for (cur = index; cur >= 0; cur--) {
2384	    TclUtfToUniChar(p, &ch);
2385	    if (!Tcl_UniCharIsWordChar(ch)) {
2386		break;
2387	    }
2388	    p = Tcl_UtfPrev(p, string);
2389	}
2390	if (cur != index) {
2391	    cur += 1;
2392	}
2393    }
2394    Tcl_SetObjResult(interp, Tcl_NewIntObj(cur));
2395    return TCL_OK;
2396}
2397
2398/*
2399 *----------------------------------------------------------------------
2400 *
2401 * StringEndCmd --
2402 *
2403 *	This procedure is invoked to process the "string wordend" Tcl command.
2404 *	See the user documentation for details on what it does. Note that this
2405 *	command only functions correctly on properly formed Tcl UTF strings.
2406 *
2407 * Results:
2408 *	A standard Tcl result.
2409 *
2410 * Side effects:
2411 *	See the user documentation.
2412 *
2413 *----------------------------------------------------------------------
2414 */
2415
2416static int
2417StringEndCmd(
2418    ClientData dummy,		/* Not used. */
2419    Tcl_Interp *interp,		/* Current interpreter. */
2420    int objc,			/* Number of arguments. */
2421    Tcl_Obj *const objv[])	/* Argument objects. */
2422{
2423    Tcl_UniChar ch;
2424    const char *p, *end, *string;
2425    int cur, index, length, numChars;
2426
2427    if (objc != 3) {
2428	Tcl_WrongNumArgs(interp, 1, objv, "string index");
2429	return TCL_ERROR;
2430    }
2431
2432    string = TclGetStringFromObj(objv[1], &length);
2433    numChars = Tcl_NumUtfChars(string, length);
2434    if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) {
2435	return TCL_ERROR;
2436    }
2437    string = TclGetStringFromObj(objv[1], &length);
2438    if (index < 0) {
2439	index = 0;
2440    }
2441    if (index < numChars) {
2442	p = Tcl_UtfAtIndex(string, index);
2443	end = string+length;
2444	for (cur = index; p < end; cur++) {
2445	    p += TclUtfToUniChar(p, &ch);
2446	    if (!Tcl_UniCharIsWordChar(ch)) {
2447		break;
2448	    }
2449	}
2450	if (cur == index) {
2451	    cur++;
2452	}
2453    } else {
2454	cur = numChars;
2455    }
2456    Tcl_SetObjResult(interp, Tcl_NewIntObj(cur));
2457    return TCL_OK;
2458}
2459
2460/*
2461 *----------------------------------------------------------------------
2462 *
2463 * StringEqualCmd --
2464 *
2465 *	This procedure is invoked to process the "string equal" Tcl command.
2466 *	See the user documentation for details on what it does. Note that this
2467 *	command only functions correctly on properly formed Tcl UTF strings.
2468 *
2469 * Results:
2470 *	A standard Tcl result.
2471 *
2472 * Side effects:
2473 *	See the user documentation.
2474 *
2475 *----------------------------------------------------------------------
2476 */
2477
2478static int
2479StringEqualCmd(
2480    ClientData dummy,		/* Not used. */
2481    Tcl_Interp *interp,		/* Current interpreter. */
2482    int objc,			/* Number of arguments. */
2483    Tcl_Obj *const objv[])	/* Argument objects. */
2484{
2485    /*
2486     * Remember to keep code here in some sync with the byte-compiled versions
2487     * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
2488     * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
2489     */
2490
2491    char *string1, *string2;
2492    int length1, length2, i, match, length, nocase = 0, reqlength = -1;
2493    typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
2494    strCmpFn_t strCmpFn;
2495
2496    if (objc < 3 || objc > 6) {
2497    str_cmp_args:
2498	Tcl_WrongNumArgs(interp, 1, objv,
2499		"?-nocase? ?-length int? string1 string2");
2500	return TCL_ERROR;
2501    }
2502
2503    for (i = 1; i < objc-2; i++) {
2504	string2 = TclGetStringFromObj(objv[i], &length2);
2505	if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) {
2506	    nocase = 1;
2507	} else if ((length2 > 1)
2508		&& !strncmp(string2, "-length", (size_t)length2)) {
2509	    if (i+1 >= objc-2) {
2510		goto str_cmp_args;
2511	    }
2512	    ++i;
2513	    if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) {
2514		return TCL_ERROR;
2515	    }
2516	} else {
2517	    Tcl_AppendResult(interp, "bad option \"", string2,
2518		    "\": must be -nocase or -length", NULL);
2519	    return TCL_ERROR;
2520	}
2521    }
2522
2523    /*
2524     * From now on, we only access the two objects at the end of the argument
2525     * array.
2526     */
2527
2528    objv += objc-2;
2529
2530    if ((reqlength == 0) || (objv[0] == objv[1])) {
2531	/*
2532	 * Always match at 0 chars of if it is the same obj.
2533	 */
2534
2535	Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1));
2536	return TCL_OK;
2537    }
2538
2539    if (!nocase && objv[0]->typePtr == &tclByteArrayType &&
2540	    objv[1]->typePtr == &tclByteArrayType) {
2541	/*
2542	 * Use binary versions of comparisons since that won't cause undue
2543	 * type conversions and it is much faster. Only do this if we're
2544	 * case-sensitive (which is all that really makes sense with byte
2545	 * arrays anyway, and we have no memcasecmp() for some reason... :^)
2546	 */
2547
2548	string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
2549	string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
2550	strCmpFn = (strCmpFn_t) memcmp;
2551    } else if ((objv[0]->typePtr == &tclStringType)
2552	    && (objv[1]->typePtr == &tclStringType)) {
2553	/*
2554	 * Do a unicode-specific comparison if both of the args are of String
2555	 * type. In benchmark testing this proved the most efficient check
2556	 * between the unicode and string comparison operations.
2557	 */
2558
2559	string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
2560	string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
2561	strCmpFn = (strCmpFn_t)
2562		(nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
2563    } else {
2564	/*
2565	 * As a catch-all we will work with UTF-8. We cannot use memcmp() as
2566	 * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
2567	 * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
2568	 * case-sensitive and no specific length was requested.
2569	 */
2570
2571	string1 = (char *) TclGetStringFromObj(objv[0], &length1);
2572	string2 = (char *) TclGetStringFromObj(objv[1], &length2);
2573	if ((reqlength < 0) && !nocase) {
2574	    strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
2575	} else {
2576	    length1 = Tcl_NumUtfChars(string1, length1);
2577	    length2 = Tcl_NumUtfChars(string2, length2);
2578	    strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
2579	}
2580    }
2581
2582    if ((reqlength < 0) && (length1 != length2)) {
2583	match = 1;		/* This will be reversed below. */
2584    } else {
2585	length = (length1 < length2) ? length1 : length2;
2586	if (reqlength > 0 && reqlength < length) {
2587	    length = reqlength;
2588	} else if (reqlength < 0) {
2589	    /*
2590	     * The requested length is negative, so we ignore it by setting it
2591	     * to length + 1 so we correct the match var.
2592	     */
2593
2594	    reqlength = length + 1;
2595	}
2596
2597	match = strCmpFn(string1, string2, (unsigned) length);
2598	if ((match == 0) && (reqlength > length)) {
2599	    match = length1 - length2;
2600	}
2601    }
2602
2603    Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1));
2604    return TCL_OK;
2605}
2606
2607/*
2608 *----------------------------------------------------------------------
2609 *
2610 * StringCmpCmd --
2611 *
2612 *	This procedure is invoked to process the "string compare" Tcl command.
2613 *	See the user documentation for details on what it does. Note that this
2614 *	command only functions correctly on properly formed Tcl UTF strings.
2615 *
2616 * Results:
2617 *	A standard Tcl result.
2618 *
2619 * Side effects:
2620 *	See the user documentation.
2621 *
2622 *----------------------------------------------------------------------
2623 */
2624
2625static int
2626StringCmpCmd(
2627    ClientData dummy,		/* Not used. */
2628    Tcl_Interp *interp,		/* Current interpreter. */
2629    int objc,			/* Number of arguments. */
2630    Tcl_Obj *const objv[])	/* Argument objects. */
2631{
2632    /*
2633     * Remember to keep code here in some sync with the byte-compiled versions
2634     * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as
2635     * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...).
2636     */
2637
2638    char *string1, *string2;
2639    int length1, length2, i, match, length, nocase = 0, reqlength = -1;
2640    typedef int (*strCmpFn_t)(const char *, const char *, unsigned int);
2641    strCmpFn_t strCmpFn;
2642
2643    if (objc < 3 || objc > 6) {
2644    str_cmp_args:
2645	Tcl_WrongNumArgs(interp, 1, objv,
2646		"?-nocase? ?-length int? string1 string2");
2647	return TCL_ERROR;
2648    }
2649
2650    for (i = 1; i < objc-2; i++) {
2651	string2 = TclGetStringFromObj(objv[i], &length2);
2652	if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) {
2653	    nocase = 1;
2654	} else if ((length2 > 1)
2655		&& !strncmp(string2, "-length", (size_t)length2)) {
2656	    if (i+1 >= objc-2) {
2657		goto str_cmp_args;
2658	    }
2659	    ++i;
2660	    if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) {
2661		return TCL_ERROR;
2662	    }
2663	} else {
2664	    Tcl_AppendResult(interp, "bad option \"", string2,
2665		    "\": must be -nocase or -length", NULL);
2666	    return TCL_ERROR;
2667	}
2668    }
2669
2670    /*
2671     * From now on, we only access the two objects at the end of the argument
2672     * array.
2673     */
2674
2675    objv += objc-2;
2676
2677    if ((reqlength == 0) || (objv[0] == objv[1])) {
2678	/*
2679	 * Always match at 0 chars of if it is the same obj.
2680	 */
2681
2682	Tcl_SetObjResult(interp, Tcl_NewBooleanObj(0));
2683	return TCL_OK;
2684    }
2685
2686    if (!nocase && objv[0]->typePtr == &tclByteArrayType &&
2687	    objv[1]->typePtr == &tclByteArrayType) {
2688	/*
2689	 * Use binary versions of comparisons since that won't cause undue
2690	 * type conversions and it is much faster. Only do this if we're
2691	 * case-sensitive (which is all that really makes sense with byte
2692	 * arrays anyway, and we have no memcasecmp() for some reason... :^)
2693	 */
2694
2695	string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1);
2696	string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2);
2697	strCmpFn = (strCmpFn_t) memcmp;
2698    } else if ((objv[0]->typePtr == &tclStringType)
2699	    && (objv[1]->typePtr == &tclStringType)) {
2700	/*
2701	 * Do a unicode-specific comparison if both of the args are of String
2702	 * type. In benchmark testing this proved the most efficient check
2703	 * between the unicode and string comparison operations.
2704	 */
2705
2706	string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1);
2707	string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2);
2708	strCmpFn = (strCmpFn_t)
2709		(nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp);
2710    } else {
2711	/*
2712	 * As a catch-all we will work with UTF-8. We cannot use memcmp() as
2713	 * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's
2714	 * utf rep). We can use the more efficient TclpUtfNcmp2 if we are
2715	 * case-sensitive and no specific length was requested.
2716	 */
2717
2718	string1 = (char *) TclGetStringFromObj(objv[0], &length1);
2719	string2 = (char *) TclGetStringFromObj(objv[1], &length2);
2720	if ((reqlength < 0) && !nocase) {
2721	    strCmpFn = (strCmpFn_t) TclpUtfNcmp2;
2722	} else {
2723	    length1 = Tcl_NumUtfChars(string1, length1);
2724	    length2 = Tcl_NumUtfChars(string2, length2);
2725	    strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp);
2726	}
2727    }
2728
2729    length = (length1 < length2) ? length1 : length2;
2730    if (reqlength > 0 && reqlength < length) {
2731	length = reqlength;
2732    } else if (reqlength < 0) {
2733	/*
2734	 * The requested length is negative, so we ignore it by setting it to
2735	 * length + 1 so we correct the match var.
2736	 */
2737
2738	reqlength = length + 1;
2739    }
2740
2741    match = strCmpFn(string1, string2, (unsigned) length);
2742    if ((match == 0) && (reqlength > length)) {
2743	match = length1 - length2;
2744    }
2745
2746    Tcl_SetObjResult(interp,
2747	    Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0));
2748    return TCL_OK;
2749}
2750
2751/*
2752 *----------------------------------------------------------------------
2753 *
2754 * StringBytesCmd --
2755 *
2756 *	This procedure is invoked to process the "string bytelength" Tcl
2757 *	command. See the user documentation for details on what it does. Note
2758 *	that this command only functions correctly on properly formed Tcl UTF
2759 *	strings.
2760 *
2761 * Results:
2762 *	A standard Tcl result.
2763 *
2764 * Side effects:
2765 *	See the user documentation.
2766 *
2767 *----------------------------------------------------------------------
2768 */
2769
2770static int
2771StringBytesCmd(
2772    ClientData dummy,		/* Not used. */
2773    Tcl_Interp *interp,		/* Current interpreter. */
2774    int objc,			/* Number of arguments. */
2775    Tcl_Obj *const objv[])	/* Argument objects. */
2776{
2777    int length;
2778
2779    if (objc != 2) {
2780	Tcl_WrongNumArgs(interp, 1, objv, "string");
2781	return TCL_ERROR;
2782    }
2783
2784    (void) TclGetStringFromObj(objv[1], &length);
2785    Tcl_SetObjResult(interp, Tcl_NewIntObj(length));
2786    return TCL_OK;
2787}
2788
2789/*
2790 *----------------------------------------------------------------------
2791 *
2792 * StringLenCmd --
2793 *
2794 *	This procedure is invoked to process the "string length" Tcl command.
2795 *	See the user documentation for details on what it does. Note that this
2796 *	command only functions correctly on properly formed Tcl UTF strings.
2797 *
2798 * Results:
2799 *	A standard Tcl result.
2800 *
2801 * Side effects:
2802 *	See the user documentation.
2803 *
2804 *----------------------------------------------------------------------
2805 */
2806
2807static int
2808StringLenCmd(
2809    ClientData dummy,		/* Not used. */
2810    Tcl_Interp *interp,		/* Current interpreter. */
2811    int objc,			/* Number of arguments. */
2812    Tcl_Obj *const objv[])	/* Argument objects. */
2813{
2814    int length;
2815
2816    if (objc != 2) {
2817	Tcl_WrongNumArgs(interp, 1, objv, "string");
2818	return TCL_ERROR;
2819    }
2820
2821    /*
2822     * If we have a ByteArray object, avoid recomputing the string since the
2823     * byte array contains one byte per character. Otherwise, use the Unicode
2824     * string rep to calculate the length.
2825     */
2826
2827    if (objv[1]->typePtr == &tclByteArrayType) {
2828	(void) Tcl_GetByteArrayFromObj(objv[1], &length);
2829    } else {
2830	length = Tcl_GetCharLength(objv[1]);
2831    }
2832    Tcl_SetObjResult(interp, Tcl_NewIntObj(length));
2833    return TCL_OK;
2834}
2835
2836/*
2837 *----------------------------------------------------------------------
2838 *
2839 * StringLowerCmd --
2840 *
2841 *	This procedure is invoked to process the "string tolower" Tcl command.
2842 *	See the user documentation for details on what it does. Note that this
2843 *	command only functions correctly on properly formed Tcl UTF strings.
2844 *
2845 * Results:
2846 *	A standard Tcl result.
2847 *
2848 * Side effects:
2849 *	See the user documentation.
2850 *
2851 *----------------------------------------------------------------------
2852 */
2853
2854static int
2855StringLowerCmd(
2856    ClientData dummy,		/* Not used. */
2857    Tcl_Interp *interp,		/* Current interpreter. */
2858    int objc,			/* Number of arguments. */
2859    Tcl_Obj *const objv[])	/* Argument objects. */
2860{
2861    int length1, length2;
2862    char *string1, *string2;
2863
2864    if (objc < 2 || objc > 4) {
2865	Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?");
2866	return TCL_ERROR;
2867    }
2868
2869    string1 = TclGetStringFromObj(objv[1], &length1);
2870
2871    if (objc == 2) {
2872	Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1);
2873
2874	length1 = Tcl_UtfToLower(TclGetString(resultPtr));
2875	Tcl_SetObjLength(resultPtr, length1);
2876	Tcl_SetObjResult(interp, resultPtr);
2877    } else {
2878	int first, last;
2879	const char *start, *end;
2880	Tcl_Obj *resultPtr;
2881
2882	length1 = Tcl_NumUtfChars(string1, length1) - 1;
2883	if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) {
2884	    return TCL_ERROR;
2885	}
2886	if (first < 0) {
2887	    first = 0;
2888	}
2889	last = first;
2890
2891	if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1,
2892		&last) != TCL_OK)) {
2893	    return TCL_ERROR;
2894	}
2895
2896	if (last >= length1) {
2897	    last = length1;
2898	}
2899	if (last < first) {
2900	    Tcl_SetObjResult(interp, objv[1]);
2901	    return TCL_OK;
2902	}
2903
2904	string1 = TclGetStringFromObj(objv[1], &length1);
2905	start = Tcl_UtfAtIndex(string1, first);
2906	end = Tcl_UtfAtIndex(start, last - first + 1);
2907	resultPtr = Tcl_NewStringObj(string1, end - string1);
2908	string2 = TclGetString(resultPtr) + (start - string1);
2909
2910	length2 = Tcl_UtfToLower(string2);
2911	Tcl_SetObjLength(resultPtr, length2 + (start - string1));
2912
2913	Tcl_AppendToObj(resultPtr, end, -1);
2914	Tcl_SetObjResult(interp, resultPtr);
2915    }
2916
2917    return TCL_OK;
2918}
2919
2920/*
2921 *----------------------------------------------------------------------
2922 *
2923 * StringUpperCmd --
2924 *
2925 *	This procedure is invoked to process the "string toupper" Tcl command.
2926 *	See the user documentation for details on what it does. Note that this
2927 *	command only functions correctly on properly formed Tcl UTF strings.
2928 *
2929 * Results:
2930 *	A standard Tcl result.
2931 *
2932 * Side effects:
2933 *	See the user documentation.
2934 *
2935 *----------------------------------------------------------------------
2936 */
2937
2938static int
2939StringUpperCmd(
2940    ClientData dummy,		/* Not used. */
2941    Tcl_Interp *interp,		/* Current interpreter. */
2942    int objc,			/* Number of arguments. */
2943    Tcl_Obj *const objv[])	/* Argument objects. */
2944{
2945    int length1, length2;
2946    char *string1, *string2;
2947
2948    if (objc < 2 || objc > 4) {
2949	Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?");
2950	return TCL_ERROR;
2951    }
2952
2953    string1 = TclGetStringFromObj(objv[1], &length1);
2954
2955    if (objc == 2) {
2956	Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1);
2957
2958	length1 = Tcl_UtfToUpper(TclGetString(resultPtr));
2959	Tcl_SetObjLength(resultPtr, length1);
2960	Tcl_SetObjResult(interp, resultPtr);
2961    } else {
2962	int first, last;
2963	const char *start, *end;
2964	Tcl_Obj *resultPtr;
2965
2966	length1 = Tcl_NumUtfChars(string1, length1) - 1;
2967	if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) {
2968	    return TCL_ERROR;
2969	}
2970	if (first < 0) {
2971	    first = 0;
2972	}
2973	last = first;
2974
2975	if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1,
2976		&last) != TCL_OK)) {
2977	    return TCL_ERROR;
2978	}
2979
2980	if (last >= length1) {
2981	    last = length1;
2982	}
2983	if (last < first) {
2984	    Tcl_SetObjResult(interp, objv[1]);
2985	    return TCL_OK;
2986	}
2987
2988	string1 = TclGetStringFromObj(objv[1], &length1);
2989	start = Tcl_UtfAtIndex(string1, first);
2990	end = Tcl_UtfAtIndex(start, last - first + 1);
2991	resultPtr = Tcl_NewStringObj(string1, end - string1);
2992	string2 = TclGetString(resultPtr) + (start - string1);
2993
2994	length2 = Tcl_UtfToUpper(string2);
2995	Tcl_SetObjLength(resultPtr, length2 + (start - string1));
2996
2997	Tcl_AppendToObj(resultPtr, end, -1);
2998	Tcl_SetObjResult(interp, resultPtr);
2999    }
3000
3001    return TCL_OK;
3002}
3003
3004/*
3005 *----------------------------------------------------------------------
3006 *
3007 * StringTitleCmd --
3008 *
3009 *	This procedure is invoked to process the "string totitle" Tcl command.
3010 *	See the user documentation for details on what it does. Note that this
3011 *	command only functions correctly on properly formed Tcl UTF strings.
3012 *
3013 * Results:
3014 *	A standard Tcl result.
3015 *
3016 * Side effects:
3017 *	See the user documentation.
3018 *
3019 *----------------------------------------------------------------------
3020 */
3021
3022static int
3023StringTitleCmd(
3024    ClientData dummy,		/* Not used. */
3025    Tcl_Interp *interp,		/* Current interpreter. */
3026    int objc,			/* Number of arguments. */
3027    Tcl_Obj *const objv[])	/* Argument objects. */
3028{
3029    int length1, length2;
3030    char *string1, *string2;
3031
3032    if (objc < 2 || objc > 4) {
3033	Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?");
3034	return TCL_ERROR;
3035    }
3036
3037    string1 = TclGetStringFromObj(objv[1], &length1);
3038
3039    if (objc == 2) {
3040	Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1);
3041
3042	length1 = Tcl_UtfToTitle(TclGetString(resultPtr));
3043	Tcl_SetObjLength(resultPtr, length1);
3044	Tcl_SetObjResult(interp, resultPtr);
3045    } else {
3046	int first, last;
3047	const char *start, *end;
3048	Tcl_Obj *resultPtr;
3049
3050	length1 = Tcl_NumUtfChars(string1, length1) - 1;
3051	if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) {
3052	    return TCL_ERROR;
3053	}
3054	if (first < 0) {
3055	    first = 0;
3056	}
3057	last = first;
3058
3059	if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1,
3060		&last) != TCL_OK)) {
3061	    return TCL_ERROR;
3062	}
3063
3064	if (last >= length1) {
3065	    last = length1;
3066	}
3067	if (last < first) {
3068	    Tcl_SetObjResult(interp, objv[1]);
3069	    return TCL_OK;
3070	}
3071
3072	string1 = TclGetStringFromObj(objv[1], &length1);
3073	start = Tcl_UtfAtIndex(string1, first);
3074	end = Tcl_UtfAtIndex(start, last - first + 1);
3075	resultPtr = Tcl_NewStringObj(string1, end - string1);
3076	string2 = TclGetString(resultPtr) + (start - string1);
3077
3078	length2 = Tcl_UtfToTitle(string2);
3079	Tcl_SetObjLength(resultPtr, length2 + (start - string1));
3080
3081	Tcl_AppendToObj(resultPtr, end, -1);
3082	Tcl_SetObjResult(interp, resultPtr);
3083    }
3084
3085    return TCL_OK;
3086}
3087
3088/*
3089 *----------------------------------------------------------------------
3090 *
3091 * StringTrimCmd --
3092 *
3093 *	This procedure is invoked to process the "string trim" Tcl command.
3094 *	See the user documentation for details on what it does. Note that this
3095 *	command only functions correctly on properly formed Tcl UTF strings.
3096 *
3097 * Results:
3098 *	A standard Tcl result.
3099 *
3100 * Side effects:
3101 *	See the user documentation.
3102 *
3103 *----------------------------------------------------------------------
3104 */
3105
3106static int
3107StringTrimCmd(
3108    ClientData dummy,		/* Not used. */
3109    Tcl_Interp *interp,		/* Current interpreter. */
3110    int objc,			/* Number of arguments. */
3111    Tcl_Obj *const objv[])	/* Argument objects. */
3112{
3113    Tcl_UniChar ch, trim;
3114    register const char *p, *end;
3115    const char *check, *checkEnd, *string1, *string2;
3116    int offset, length1, length2;
3117
3118    if (objc == 3) {
3119	string2 = TclGetStringFromObj(objv[2], &length2);
3120    } else if (objc == 2) {
3121	string2 = " \t\n\r";
3122	length2 = strlen(string2);
3123    } else {
3124	Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?");
3125	return TCL_ERROR;
3126    }
3127    string1 = TclGetStringFromObj(objv[1], &length1);
3128    checkEnd = string2 + length2;
3129
3130    /*
3131     * The outer loop iterates over the string. The inner loop iterates over
3132     * the trim characters. The loops terminate as soon as a non-trim
3133     * character is discovered and string1 is left pointing at the first
3134     * non-trim character.
3135     */
3136
3137    end = string1 + length1;
3138    for (p = string1; p < end; p += offset) {
3139	offset = TclUtfToUniChar(p, &ch);
3140
3141	for (check = string2; ; ) {
3142	    if (check >= checkEnd) {
3143		p = end;
3144		break;
3145	    }
3146	    check += TclUtfToUniChar(check, &trim);
3147	    if (ch == trim) {
3148		length1 -= offset;
3149		string1 += offset;
3150		break;
3151	    }
3152	}
3153    }
3154
3155    /*
3156     * The outer loop iterates over the string. The inner loop iterates over
3157     * the trim characters. The loops terminate as soon as a non-trim
3158     * character is discovered and length1 marks the last non-trim character.
3159     */
3160
3161    end = string1;
3162    for (p = string1 + length1; p > end; ) {
3163	p = Tcl_UtfPrev(p, string1);
3164	offset = TclUtfToUniChar(p, &ch);
3165	check = string2;
3166	while (1) {
3167	    if (check >= checkEnd) {
3168		p = end;
3169		break;
3170	    }
3171	    check += TclUtfToUniChar(check, &trim);
3172	    if (ch == trim) {
3173		length1 -= offset;
3174		break;
3175	    }
3176	}
3177    }
3178
3179    Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1));
3180    return TCL_OK;
3181}
3182
3183/*
3184 *----------------------------------------------------------------------
3185 *
3186 * StringTrimLCmd --
3187 *
3188 *	This procedure is invoked to process the "string trimleft" Tcl
3189 *	command. See the user documentation for details on what it does. Note
3190 *	that this command only functions correctly on properly formed Tcl UTF
3191 *	strings.
3192 *
3193 * Results:
3194 *	A standard Tcl result.
3195 *
3196 * Side effects:
3197 *	See the user documentation.
3198 *
3199 *----------------------------------------------------------------------
3200 */
3201
3202static int
3203StringTrimLCmd(
3204    ClientData dummy,		/* Not used. */
3205    Tcl_Interp *interp,		/* Current interpreter. */
3206    int objc,			/* Number of arguments. */
3207    Tcl_Obj *const objv[])	/* Argument objects. */
3208{
3209    Tcl_UniChar ch, trim;
3210    register const char *p, *end;
3211    const char *check, *checkEnd, *string1, *string2;
3212    int offset, length1, length2;
3213
3214    if (objc == 3) {
3215	string2 = TclGetStringFromObj(objv[2], &length2);
3216    } else if (objc == 2) {
3217	string2 = " \t\n\r";
3218	length2 = strlen(string2);
3219    } else {
3220	Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?");
3221	return TCL_ERROR;
3222    }
3223    string1 = TclGetStringFromObj(objv[1], &length1);
3224    checkEnd = string2 + length2;
3225
3226    /*
3227     * The outer loop iterates over the string. The inner loop iterates over
3228     * the trim characters. The loops terminate as soon as a non-trim
3229     * character is discovered and string1 is left pointing at the first
3230     * non-trim character.
3231     */
3232
3233    end = string1 + length1;
3234    for (p = string1; p < end; p += offset) {
3235	offset = TclUtfToUniChar(p, &ch);
3236
3237	for (check = string2; ; ) {
3238	    if (check >= checkEnd) {
3239		p = end;
3240		break;
3241	    }
3242	    check += TclUtfToUniChar(check, &trim);
3243	    if (ch == trim) {
3244		length1 -= offset;
3245		string1 += offset;
3246		break;
3247	    }
3248	}
3249    }
3250
3251    Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1));
3252    return TCL_OK;
3253}
3254
3255/*
3256 *----------------------------------------------------------------------
3257 *
3258 * StringTrimRCmd --
3259 *
3260 *	This procedure is invoked to process the "string trimright" Tcl
3261 *	command. See the user documentation for details on what it does. Note
3262 *	that this command only functions correctly on properly formed Tcl UTF
3263 *	strings.
3264 *
3265 * Results:
3266 *	A standard Tcl result.
3267 *
3268 * Side effects:
3269 *	See the user documentation.
3270 *
3271 *----------------------------------------------------------------------
3272 */
3273
3274static int
3275StringTrimRCmd(
3276    ClientData dummy,		/* Not used. */
3277    Tcl_Interp *interp,		/* Current interpreter. */
3278    int objc,			/* Number of arguments. */
3279    Tcl_Obj *const objv[])	/* Argument objects. */
3280{
3281    Tcl_UniChar ch, trim;
3282    register const char *p, *end;
3283    const char *check, *checkEnd, *string1, *string2;
3284    int offset, length1, length2;
3285
3286    if (objc == 3) {
3287	string2 = TclGetStringFromObj(objv[2], &length2);
3288    } else if (objc == 2) {
3289	string2 = " \t\n\r";
3290	length2 = strlen(string2);
3291    } else {
3292	Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?");
3293	return TCL_ERROR;
3294    }
3295    string1 = TclGetStringFromObj(objv[1], &length1);
3296    checkEnd = string2 + length2;
3297
3298    /*
3299     * The outer loop iterates over the string. The inner loop iterates over
3300     * the trim characters. The loops terminate as soon as a non-trim
3301     * character is discovered and length1 marks the last non-trim character.
3302     */
3303
3304    end = string1;
3305    for (p = string1 + length1; p > end; ) {
3306	p = Tcl_UtfPrev(p, string1);
3307	offset = TclUtfToUniChar(p, &ch);
3308	check = string2;
3309	while (1) {
3310	    if (check >= checkEnd) {
3311		p = end;
3312		break;
3313	    }
3314	    check += TclUtfToUniChar(check, &trim);
3315	    if (ch == trim) {
3316		length1 -= offset;
3317		break;
3318	    }
3319	}
3320    }
3321
3322    Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1));
3323    return TCL_OK;
3324}
3325
3326/*
3327 *----------------------------------------------------------------------
3328 *
3329 * TclInitStringCmd --
3330 *
3331 *	This procedure creates the "string" Tcl command. See the user
3332 *	documentation for details on what it does. Note that this command only
3333 *	functions correctly on properly formed Tcl UTF strings.
3334 *
3335 *	Also note that the primary methods here (equal, compare, match, ...)
3336 *	have bytecode equivalents. You will find the code for those in
3337 *	tclExecute.c. The code here will only be used in the non-bc case (like
3338 *	in an 'eval').
3339 *
3340 * Results:
3341 *	A standard Tcl result.
3342 *
3343 * Side effects:
3344 *	See the user documentation.
3345 *
3346 *----------------------------------------------------------------------
3347 */
3348
3349Tcl_Command
3350TclInitStringCmd(
3351    Tcl_Interp *interp)		/* Current interpreter. */
3352{
3353    static const EnsembleImplMap stringImplMap[] = {
3354	{"bytelength",	StringBytesCmd,	NULL},
3355	{"compare",	StringCmpCmd,	TclCompileStringCmpCmd},
3356	{"equal",	StringEqualCmd,	TclCompileStringEqualCmd},
3357	{"first",	StringFirstCmd,	NULL},
3358	{"index",	StringIndexCmd,	TclCompileStringIndexCmd},
3359	{"is",		StringIsCmd,	NULL},
3360	{"last",	StringLastCmd,	NULL},
3361	{"length",	StringLenCmd,	TclCompileStringLenCmd},
3362	{"map",		StringMapCmd,	NULL},
3363	{"match",	StringMatchCmd,	TclCompileStringMatchCmd},
3364	{"range",	StringRangeCmd,	NULL},
3365	{"repeat",	StringReptCmd,	NULL},
3366	{"replace",	StringRplcCmd,	NULL},
3367	{"reverse",	StringRevCmd,	NULL},
3368	{"tolower",	StringLowerCmd,	NULL},
3369	{"toupper",	StringUpperCmd,	NULL},
3370	{"totitle",	StringTitleCmd,	NULL},
3371	{"trim",	StringTrimCmd,	NULL},
3372	{"trimleft",	StringTrimLCmd,	NULL},
3373	{"trimright",	StringTrimRCmd,	NULL},
3374	{"wordend",	StringEndCmd,	NULL},
3375	{"wordstart",	StringStartCmd,	NULL},
3376	{NULL}
3377    };
3378
3379    return TclMakeEnsemble(interp, "string", stringImplMap);
3380}
3381
3382/*
3383 *----------------------------------------------------------------------
3384 *
3385 * Tcl_SubstObjCmd --
3386 *
3387 *	This procedure is invoked to process the "subst" Tcl command. See the
3388 *	user documentation for details on what it does. This command relies on
3389 *	Tcl_SubstObj() for its implementation.
3390 *
3391 * Results:
3392 *	A standard Tcl result.
3393 *
3394 * Side effects:
3395 *	See the user documentation.
3396 *
3397 *----------------------------------------------------------------------
3398 */
3399
3400int
3401Tcl_SubstObjCmd(
3402    ClientData dummy,		/* Not used. */
3403    Tcl_Interp *interp,		/* Current interpreter. */
3404    int objc,			/* Number of arguments. */
3405    Tcl_Obj *CONST objv[])	/* Argument objects. */
3406{
3407    static CONST char *substOptions[] = {
3408	"-nobackslashes", "-nocommands", "-novariables", NULL
3409    };
3410    enum substOptions {
3411	SUBST_NOBACKSLASHES, SUBST_NOCOMMANDS, SUBST_NOVARS
3412    };
3413    Tcl_Obj *resultPtr;
3414    int flags, i;
3415
3416    /*
3417     * Parse command-line options.
3418     */
3419
3420    flags = TCL_SUBST_ALL;
3421    for (i = 1; i < (objc-1); i++) {
3422	int optionIndex;
3423
3424	if (Tcl_GetIndexFromObj(interp, objv[i], substOptions, "switch", 0,
3425		&optionIndex) != TCL_OK) {
3426	    return TCL_ERROR;
3427	}
3428	switch (optionIndex) {
3429	case SUBST_NOBACKSLASHES:
3430	    flags &= ~TCL_SUBST_BACKSLASHES;
3431	    break;
3432	case SUBST_NOCOMMANDS:
3433	    flags &= ~TCL_SUBST_COMMANDS;
3434	    break;
3435	case SUBST_NOVARS:
3436	    flags &= ~TCL_SUBST_VARIABLES;
3437	    break;
3438	default:
3439	    Tcl_Panic("Tcl_SubstObjCmd: bad option index to SubstOptions");
3440	}
3441    }
3442    if (i != objc-1) {
3443	Tcl_WrongNumArgs(interp, 1, objv,
3444		"?-nobackslashes? ?-nocommands? ?-novariables? string");
3445	return TCL_ERROR;
3446    }
3447
3448    /*
3449     * Perform the substitution.
3450     */
3451
3452    resultPtr = Tcl_SubstObj(interp, objv[i], flags);
3453
3454    if (resultPtr == NULL) {
3455	return TCL_ERROR;
3456    }
3457    Tcl_SetObjResult(interp, resultPtr);
3458    return TCL_OK;
3459}
3460
3461/*
3462 *----------------------------------------------------------------------
3463 *
3464 * Tcl_SwitchObjCmd --
3465 *
3466 *	This object-based procedure is invoked to process the "switch" Tcl
3467 *	command. See the user documentation for details on what it does.
3468 *
3469 * Results:
3470 *	A standard Tcl object result.
3471 *
3472 * Side effects:
3473 *	See the user documentation.
3474 *
3475 *----------------------------------------------------------------------
3476 */
3477
3478int
3479Tcl_SwitchObjCmd(
3480    ClientData dummy,		/* Not used. */
3481    Tcl_Interp *interp,		/* Current interpreter. */
3482    int objc,			/* Number of arguments. */
3483    Tcl_Obj *CONST objv[])	/* Argument objects. */
3484{
3485    int i,j, index, mode, foundmode, result, splitObjs, numMatchesSaved;
3486    int noCase, patternLength;
3487    char *pattern;
3488    Tcl_Obj *stringObj, *indexVarObj, *matchVarObj;
3489    Tcl_Obj *CONST *savedObjv = objv;
3490    Tcl_RegExp regExpr = NULL;
3491    Interp *iPtr = (Interp *) interp;
3492    int pc = 0;
3493    int bidx = 0;		/* Index of body argument. */
3494    Tcl_Obj *blist = NULL;	/* List obj which is the body */
3495    CmdFrame *ctxPtr;		/* Copy of the topmost cmdframe, to allow us
3496				 * to mess with the line information */
3497
3498    /*
3499     * If you add options that make -e and -g not unique prefixes of -exact or
3500     * -glob, you *must* fix TclCompileSwitchCmd's option parser as well.
3501     */
3502
3503    static CONST char *options[] = {
3504	"-exact", "-glob", "-indexvar", "-matchvar", "-nocase", "-regexp",
3505	"--", NULL
3506    };
3507    enum options {
3508	OPT_EXACT, OPT_GLOB, OPT_INDEXV, OPT_MATCHV, OPT_NOCASE, OPT_REGEXP,
3509	OPT_LAST
3510    };
3511    typedef int (*strCmpFn_t)(const char *, const char *);
3512    strCmpFn_t strCmpFn = strcmp;
3513
3514    mode = OPT_EXACT;
3515    foundmode = 0;
3516    indexVarObj = NULL;
3517    matchVarObj = NULL;
3518    numMatchesSaved = 0;
3519    noCase = 0;
3520    for (i = 1; i < objc-2; i++) {
3521	if (TclGetString(objv[i])[0] != '-') {
3522	    break;
3523	}
3524	if (Tcl_GetIndexFromObj(interp, objv[i], options, "option", 0,
3525		&index) != TCL_OK) {
3526	    return TCL_ERROR;
3527	}
3528	switch ((enum options) index) {
3529	    /*
3530	     * General options.
3531	     */
3532
3533	case OPT_LAST:
3534	    i++;
3535	    goto finishedOptions;
3536	case OPT_NOCASE:
3537	    strCmpFn = strcasecmp;
3538	    noCase = 1;
3539	    break;
3540
3541	    /*
3542	     * Handle the different switch mode options.
3543	     */
3544
3545	default:
3546	    if (foundmode) {
3547		/*
3548		 * Mode already set via -exact, -glob, or -regexp.
3549		 */
3550
3551		Tcl_AppendResult(interp, "bad option \"",
3552			TclGetString(objv[i]), "\": ", options[mode],
3553			" option already found", NULL);
3554		return TCL_ERROR;
3555	    } else {
3556		foundmode = 1;
3557		mode = index;
3558		break;
3559	    }
3560
3561	    /*
3562	     * Check for TIP#75 options specifying the variables to write
3563	     * regexp information into.
3564	     */
3565
3566	case OPT_INDEXV:
3567	    i++;
3568	    if (i >= objc-2) {
3569		Tcl_AppendResult(interp, "missing variable name argument to ",
3570			"-indexvar", " option", NULL);
3571		return TCL_ERROR;
3572	    }
3573	    indexVarObj = objv[i];
3574	    numMatchesSaved = -1;
3575	    break;
3576	case OPT_MATCHV:
3577	    i++;
3578	    if (i >= objc-2) {
3579		Tcl_AppendResult(interp, "missing variable name argument to ",
3580			"-matchvar", " option", NULL);
3581		return TCL_ERROR;
3582	    }
3583	    matchVarObj = objv[i];
3584	    numMatchesSaved = -1;
3585	    break;
3586	}
3587    }
3588
3589  finishedOptions:
3590    if (objc - i < 2) {
3591	Tcl_WrongNumArgs(interp, 1, objv,
3592		"?switches? string pattern body ... ?default body?");
3593	return TCL_ERROR;
3594    }
3595    if (indexVarObj != NULL && mode != OPT_REGEXP) {
3596	Tcl_AppendResult(interp,
3597		"-indexvar option requires -regexp option", NULL);
3598	return TCL_ERROR;
3599    }
3600    if (matchVarObj != NULL && mode != OPT_REGEXP) {
3601	Tcl_AppendResult(interp,
3602		"-matchvar option requires -regexp option", NULL);
3603	return TCL_ERROR;
3604    }
3605
3606    stringObj = objv[i];
3607    objc -= i + 1;
3608    objv += i + 1;
3609    bidx = i + 1;		/* First after the match string. */
3610
3611    /*
3612     * If all of the pattern/command pairs are lumped into a single argument,
3613     * split them out again.
3614     *
3615     * TIP #280: Determine the lines the words in the list start at, based on
3616     * the same data for the list word itself. The cmdFramePtr line
3617     * information is manipulated directly.
3618     */
3619
3620    splitObjs = 0;
3621    if (objc == 1) {
3622	Tcl_Obj **listv;
3623	blist = objv[0];
3624
3625	if (TclListObjGetElements(interp, objv[0], &objc, &listv) != TCL_OK){
3626	    return TCL_ERROR;
3627	}
3628
3629	/*
3630	 * Ensure that the list is non-empty.
3631	 */
3632
3633	if (objc < 1) {
3634	    Tcl_WrongNumArgs(interp, 1, savedObjv,
3635		    "?switches? string {pattern body ... ?default body?}");
3636	    return TCL_ERROR;
3637	}
3638	objv = listv;
3639	splitObjs = 1;
3640    }
3641
3642    /*
3643     * Complain if there is an odd number of words in the list of patterns and
3644     * bodies.
3645     */
3646
3647    if (objc % 2) {
3648	Tcl_ResetResult(interp);
3649	Tcl_AppendResult(interp, "extra switch pattern with no body", NULL);
3650
3651	/*
3652	 * Check if this can be due to a badly placed comment in the switch
3653	 * block.
3654	 *
3655	 * The following is an heuristic to detect the infamous "comment in
3656	 * switch" error: just check if a pattern begins with '#'.
3657	 */
3658
3659	if (splitObjs) {
3660	    for (i=0 ; i<objc ; i+=2) {
3661		if (TclGetString(objv[i])[0] == '#') {
3662		    Tcl_AppendResult(interp, ", this may be due to a "
3663			    "comment incorrectly placed outside of a "
3664			    "switch body - see the \"switch\" "
3665			    "documentation", NULL);
3666		    break;
3667		}
3668	    }
3669	}
3670
3671	return TCL_ERROR;
3672    }
3673
3674    /*
3675     * Complain if the last body is a continuation. Note that this check
3676     * assumes that the list is non-empty!
3677     */
3678
3679    if (strcmp(TclGetString(objv[objc-1]), "-") == 0) {
3680	Tcl_ResetResult(interp);
3681	Tcl_AppendResult(interp, "no body specified for pattern \"",
3682		TclGetString(objv[objc-2]), "\"", NULL);
3683	return TCL_ERROR;
3684    }
3685
3686    for (i = 0; i < objc; i += 2) {
3687	/*
3688	 * See if the pattern matches the string.
3689	 */
3690
3691	pattern = TclGetStringFromObj(objv[i], &patternLength);
3692
3693	if ((i == objc - 2) && (*pattern == 'd')
3694		&& (strcmp(pattern, "default") == 0)) {
3695	    Tcl_Obj *emptyObj = NULL;
3696
3697	    /*
3698	     * If either indexVarObj or matchVarObj are non-NULL, we're in
3699	     * REGEXP mode but have reached the default clause anyway. TIP#75
3700	     * specifies that we set the variables to empty lists (== empty
3701	     * objects) in that case.
3702	     */
3703
3704	    if (indexVarObj != NULL) {
3705		TclNewObj(emptyObj);
3706		if (Tcl_ObjSetVar2(interp, indexVarObj, NULL, emptyObj,
3707			TCL_LEAVE_ERR_MSG) == NULL) {
3708		    return TCL_ERROR;
3709		}
3710	    }
3711	    if (matchVarObj != NULL) {
3712		if (emptyObj == NULL) {
3713		    TclNewObj(emptyObj);
3714		}
3715		if (Tcl_ObjSetVar2(interp, matchVarObj, NULL, emptyObj,
3716			TCL_LEAVE_ERR_MSG) == NULL) {
3717		    return TCL_ERROR;
3718		}
3719	    }
3720	    goto matchFound;
3721	} else {
3722	    switch (mode) {
3723	    case OPT_EXACT:
3724		if (strCmpFn(TclGetString(stringObj), pattern) == 0) {
3725		    goto matchFound;
3726		}
3727		break;
3728	    case OPT_GLOB:
3729		if (Tcl_StringCaseMatch(TclGetString(stringObj), pattern,
3730			noCase)) {
3731		    goto matchFound;
3732		}
3733		break;
3734	    case OPT_REGEXP:
3735		regExpr = Tcl_GetRegExpFromObj(interp, objv[i],
3736			TCL_REG_ADVANCED | (noCase ? TCL_REG_NOCASE : 0));
3737		if (regExpr == NULL) {
3738		    return TCL_ERROR;
3739		} else {
3740		    int matched = Tcl_RegExpExecObj(interp, regExpr,
3741			    stringObj, 0, numMatchesSaved, 0);
3742
3743		    if (matched < 0) {
3744			return TCL_ERROR;
3745		    } else if (matched) {
3746			goto matchFoundRegexp;
3747		    }
3748		}
3749		break;
3750	    }
3751	}
3752    }
3753    return TCL_OK;
3754
3755  matchFoundRegexp:
3756    /*
3757     * We are operating in REGEXP mode and we need to store information about
3758     * what we matched in some user-nominated arrays. So build the lists of
3759     * values and indices to write here. [TIP#75]
3760     */
3761
3762    if (numMatchesSaved) {
3763	Tcl_RegExpInfo info;
3764	Tcl_Obj *matchesObj, *indicesObj = NULL;
3765
3766	Tcl_RegExpGetInfo(regExpr, &info);
3767	if (matchVarObj != NULL) {
3768	    TclNewObj(matchesObj);
3769	} else {
3770	    matchesObj = NULL;
3771	}
3772	if (indexVarObj != NULL) {
3773	    TclNewObj(indicesObj);
3774	}
3775
3776	for (j=0 ; j<=info.nsubs ; j++) {
3777	    if (indexVarObj != NULL) {
3778		Tcl_Obj *rangeObjAry[2];
3779
3780		rangeObjAry[0] = Tcl_NewLongObj(info.matches[j].start);
3781		rangeObjAry[1] = Tcl_NewLongObj(info.matches[j].end);
3782
3783		/*
3784		 * Never fails; the object is always clean at this point.
3785		 */
3786
3787		Tcl_ListObjAppendElement(NULL, indicesObj,
3788			Tcl_NewListObj(2, rangeObjAry));
3789	    }
3790
3791	    if (matchVarObj != NULL) {
3792		Tcl_Obj *substringObj;
3793
3794		substringObj = Tcl_GetRange(stringObj,
3795			info.matches[j].start, info.matches[j].end-1);
3796
3797		/*
3798		 * Never fails; the object is always clean at this point.
3799		 */
3800
3801		Tcl_ListObjAppendElement(NULL, matchesObj, substringObj);
3802	    }
3803	}
3804
3805	if (indexVarObj != NULL) {
3806	    if (Tcl_ObjSetVar2(interp, indexVarObj, NULL, indicesObj,
3807		    TCL_LEAVE_ERR_MSG) == NULL) {
3808		/*
3809		 * Careful! Check to see if we have allocated the list of
3810		 * matched strings; if so (but there was an error assigning
3811		 * the indices list) we have a potential memory leak because
3812		 * the match list has not been written to a variable. Except
3813		 * that we'll clean that up right now.
3814		 */
3815
3816		if (matchesObj != NULL) {
3817		    Tcl_DecrRefCount(matchesObj);
3818		}
3819		return TCL_ERROR;
3820	    }
3821	}
3822	if (matchVarObj != NULL) {
3823	    if (Tcl_ObjSetVar2(interp, matchVarObj, NULL, matchesObj,
3824		    TCL_LEAVE_ERR_MSG) == NULL) {
3825		/*
3826		 * Unlike above, if indicesObj is non-NULL at this point, it
3827		 * will have been written to a variable already and will hence
3828		 * not be leaked.
3829		 */
3830
3831		return TCL_ERROR;
3832	    }
3833	}
3834    }
3835
3836    /*
3837     * We've got a match. Find a body to execute, skipping bodies that are
3838     * "-".
3839     */
3840
3841  matchFound:
3842    ctxPtr = (CmdFrame *) TclStackAlloc(interp, sizeof(CmdFrame));
3843    *ctxPtr = *iPtr->cmdFramePtr;
3844
3845    if (splitObjs) {
3846	/*
3847	 * We have to perform the GetSrc and other type dependent handling of
3848	 * the frame here because we are munging with the line numbers,
3849	 * something the other commands like if, etc. are not doing. Them are
3850	 * fine with simply passing the CmdFrame through and having the
3851	 * special handling done in 'info frame', or the bc compiler
3852	 */
3853
3854	if (ctxPtr->type == TCL_LOCATION_BC) {
3855	    /*
3856	     * Type BC => ctxPtr->data.eval.path    is not used.
3857	     *            ctxPtr->data.tebc.codePtr is used instead.
3858	     */
3859
3860	    TclGetSrcInfoForPc(ctxPtr);
3861	    pc = 1;
3862
3863	    /*
3864	     * The line information in the cmdFrame is now a copy we do not
3865	     * own.
3866	     */
3867	}
3868
3869	if (ctxPtr->type == TCL_LOCATION_SOURCE && ctxPtr->line[bidx] >= 0) {
3870	    int bline = ctxPtr->line[bidx];
3871
3872	    ctxPtr->line = (int *) ckalloc(objc * sizeof(int));
3873	    ctxPtr->nline = objc;
3874	    TclListLines(blist, bline, objc, ctxPtr->line, objv);
3875	} else {
3876	    /*
3877	     * This is either a dynamic code word, when all elements are
3878	     * relative to themselves, or something else less expected and
3879	     * where we have no information. The result is the same in both
3880	     * cases; tell the code to come that it doesn't know where it is,
3881	     * which triggers reversion to the old behavior.
3882	     */
3883
3884	    int k;
3885
3886	    ctxPtr->line = (int *) ckalloc(objc * sizeof(int));
3887	    ctxPtr->nline = objc;
3888	    for (k=0; k < objc; k++) {
3889		ctxPtr->line[k] = -1;
3890	    }
3891	}
3892    }
3893
3894    for (j = i + 1; ; j += 2) {
3895	if (j >= objc) {
3896	    /*
3897	     * This shouldn't happen since we've checked that the last body is
3898	     * not a continuation...
3899	     */
3900
3901	    Tcl_Panic("fall-out when searching for body to match pattern");
3902	}
3903	if (strcmp(TclGetString(objv[j]), "-") != 0) {
3904	    break;
3905	}
3906    }
3907
3908    /*
3909     * TIP #280: Make invoking context available to switch branch.
3910     */
3911
3912    result = TclEvalObjEx(interp, objv[j], 0, ctxPtr, splitObjs ? j : bidx+j);
3913    if (splitObjs) {
3914	ckfree((char *) ctxPtr->line);
3915	if (pc && (ctxPtr->type == TCL_LOCATION_SOURCE)) {
3916	    /*
3917	     * Death of SrcInfo reference.
3918	     */
3919
3920	    Tcl_DecrRefCount(ctxPtr->data.eval.path);
3921	}
3922    }
3923
3924    /*
3925     * Generate an error message if necessary.
3926     */
3927
3928    if (result == TCL_ERROR) {
3929	int limit = 50;
3930	int overflow = (patternLength > limit);
3931
3932	Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf(
3933		"\n    (\"%.*s%s\" arm line %d)",
3934		(overflow ? limit : patternLength), pattern,
3935		(overflow ? "..." : ""), interp->errorLine));
3936    }
3937    TclStackFree(interp, ctxPtr);
3938    return result;
3939}
3940
3941/*
3942 *----------------------------------------------------------------------
3943 *
3944 * Tcl_TimeObjCmd --
3945 *
3946 *	This object-based procedure is invoked to process the "time" Tcl
3947 *	command. See the user documentation for details on what it does.
3948 *
3949 * Results:
3950 *	A standard Tcl object result.
3951 *
3952 * Side effects:
3953 *	See the user documentation.
3954 *
3955 *----------------------------------------------------------------------
3956 */
3957
3958int
3959Tcl_TimeObjCmd(
3960    ClientData dummy,		/* Not used. */
3961    Tcl_Interp *interp,		/* Current interpreter. */
3962    int objc,			/* Number of arguments. */
3963    Tcl_Obj *CONST objv[])	/* Argument objects. */
3964{
3965    register Tcl_Obj *objPtr;
3966    Tcl_Obj *objs[4];
3967    register int i, result;
3968    int count;
3969    double totalMicroSec;
3970#ifndef TCL_WIDE_CLICKS
3971    Tcl_Time start, stop;
3972#else
3973    Tcl_WideInt start, stop;
3974#endif
3975
3976    if (objc == 2) {
3977	count = 1;
3978    } else if (objc == 3) {
3979	result = TclGetIntFromObj(interp, objv[2], &count);
3980	if (result != TCL_OK) {
3981	    return result;
3982	}
3983    } else {
3984	Tcl_WrongNumArgs(interp, 1, objv, "command ?count?");
3985	return TCL_ERROR;
3986    }
3987
3988    objPtr = objv[1];
3989    i = count;
3990#ifndef TCL_WIDE_CLICKS
3991    Tcl_GetTime(&start);
3992#else
3993    start = TclpGetWideClicks();
3994#endif
3995    while (i-- > 0) {
3996	result = Tcl_EvalObjEx(interp, objPtr, 0);
3997	if (result != TCL_OK) {
3998	    return result;
3999	}
4000    }
4001#ifndef TCL_WIDE_CLICKS
4002    Tcl_GetTime(&stop);
4003    totalMicroSec = ((double) (stop.sec - start.sec)) * 1.0e6
4004	    + (stop.usec - start.usec);
4005#else
4006    stop = TclpGetWideClicks();
4007    totalMicroSec = ((double) TclpWideClicksToNanoseconds(stop - start))/1.0e3;
4008#endif
4009
4010    if (count <= 1) {
4011	/*
4012	 * Use int obj since we know time is not fractional. [Bug 1202178]
4013	 */
4014
4015	objs[0] = Tcl_NewIntObj((count <= 0) ? 0 : (int) totalMicroSec);
4016    } else {
4017	objs[0] = Tcl_NewDoubleObj(totalMicroSec/count);
4018    }
4019
4020    /*
4021     * Construct the result as a list because many programs have always parsed
4022     * as such (extracting the first element, typically).
4023     */
4024
4025    TclNewLiteralStringObj(objs[1], "microseconds");
4026    TclNewLiteralStringObj(objs[2], "per");
4027    TclNewLiteralStringObj(objs[3], "iteration");
4028    Tcl_SetObjResult(interp, Tcl_NewListObj(4, objs));
4029
4030    return TCL_OK;
4031}
4032
4033/*
4034 *----------------------------------------------------------------------
4035 *
4036 * Tcl_WhileObjCmd --
4037 *
4038 *	This procedure is invoked to process the "while" Tcl command. See the
4039 *	user documentation for details on what it does.
4040 *
4041 *	With the bytecode compiler, this procedure is only called when a
4042 *	command name is computed at runtime, and is "while" or the name to
4043 *	which "while" was renamed: e.g., "set z while; $z {$i<100} {}"
4044 *
4045 * Results:
4046 *	A standard Tcl result.
4047 *
4048 * Side effects:
4049 *	See the user documentation.
4050 *
4051 *----------------------------------------------------------------------
4052 */
4053
4054int
4055Tcl_WhileObjCmd(
4056    ClientData dummy,		/* Not used. */
4057    Tcl_Interp *interp,		/* Current interpreter. */
4058    int objc,			/* Number of arguments. */
4059    Tcl_Obj *CONST objv[])	/* Argument objects. */
4060{
4061    int result, value;
4062    Interp *iPtr = (Interp *) interp;
4063
4064    if (objc != 3) {
4065	Tcl_WrongNumArgs(interp, 1, objv, "test command");
4066	return TCL_ERROR;
4067    }
4068
4069    while (1) {
4070	result = Tcl_ExprBooleanObj(interp, objv[1], &value);
4071	if (result != TCL_OK) {
4072	    return result;
4073	}
4074	if (!value) {
4075	    break;
4076	}
4077
4078	/* TIP #280. */
4079        result = TclEvalObjEx(interp, objv[2], 0, iPtr->cmdFramePtr, 2);
4080	if ((result != TCL_OK) && (result != TCL_CONTINUE)) {
4081	    if (result == TCL_ERROR) {
4082		Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf(
4083			"\n    (\"while\" body line %d)", interp->errorLine));
4084	    }
4085	    break;
4086	}
4087    }
4088    if (result == TCL_BREAK) {
4089	result = TCL_OK;
4090    }
4091    if (result == TCL_OK) {
4092	Tcl_ResetResult(interp);
4093    }
4094    return result;
4095}
4096
4097/*
4098 *----------------------------------------------------------------------
4099 *
4100 * TclListLines --
4101 *
4102 *	???
4103 *
4104 * Results:
4105 *	Filled in array of line numbers?
4106 *
4107 * Side effects:
4108 *	None.
4109 *
4110 *----------------------------------------------------------------------
4111 */
4112
4113void
4114TclListLines(
4115    Tcl_Obj* listObj,          /* Pointer to obj holding a string with list
4116				* structure.  Assumed to be valid. Assumed to
4117				* contain n elements.
4118				*/
4119    int line,			/* Line the list as a whole starts on. */
4120    int n,			/* #elements in lines */
4121    int *lines,			/* Array of line numbers, to fill. */
4122    Tcl_Obj* const* elems)      /* The list elems as Tcl_Obj*, in need of
4123				 * derived continuation data */
4124{
4125    CONST char*  listStr  = Tcl_GetString (listObj);
4126    CONST char*  listHead = listStr;
4127    int i, length = strlen(listStr);
4128    CONST char *element = NULL, *next = NULL;
4129    ContLineLoc* clLocPtr = TclContinuationsGet(listObj);
4130    int* clNext   = (clLocPtr ? &clLocPtr->loc[0] : NULL);
4131
4132    for (i = 0; i < n; i++) {
4133	TclFindElement(NULL, listStr, length, &element, &next, NULL, NULL);
4134
4135	TclAdvanceLines(&line, listStr, element);
4136				/* Leading whitespace */
4137	TclAdvanceContinuations (&line, &clNext, element - listHead);
4138	if (elems && clNext) {
4139	    TclContinuationsEnterDerived (elems[i], element - listHead,
4140					  clNext);
4141	}
4142	lines[i] = line;
4143	length -= (next - listStr);
4144	TclAdvanceLines(&line, element, next);
4145				/* Element */
4146	listStr = next;
4147
4148	if (*element == 0) {
4149	    /* ASSERT i == n */
4150	    break;
4151	}
4152    }
4153}
4154
4155/*
4156 * Local Variables:
4157 * mode: c
4158 * c-basic-offset: 4
4159 * fill-column: 78
4160 * End:
4161 */
4162