1/* 2 * tclCmdMZ.c -- 3 * 4 * This file contains the top-level command routines for most of the Tcl 5 * built-in commands whose names begin with the letters M to Z. It 6 * contains only commands in the generic core (i.e. those that don't 7 * depend much upon UNIX facilities). 8 * 9 * Copyright (c) 1987-1993 The Regents of the University of California. 10 * Copyright (c) 1994-1997 Sun Microsystems, Inc. 11 * Copyright (c) 1998-2000 Scriptics Corporation. 12 * Copyright (c) 2002 ActiveState Corporation. 13 * Copyright (c) 2003 Donal K. Fellows. 14 * 15 * See the file "license.terms" for information on usage and redistribution of 16 * this file, and for a DISCLAIMER OF ALL WARRANTIES. 17 * 18 * RCS: @(#) $Id: tclCmdMZ.c,v 1.163.2.9 2010/08/12 08:55:38 dkf Exp $ 19 */ 20 21#include "tclInt.h" 22#include "tclRegexp.h" 23 24static int UniCharIsAscii(int character); 25static int UniCharIsHexDigit(int character); 26 27/* 28 *---------------------------------------------------------------------- 29 * 30 * Tcl_PwdObjCmd -- 31 * 32 * This procedure is invoked to process the "pwd" Tcl command. See the 33 * user documentation for details on what it does. 34 * 35 * Results: 36 * A standard Tcl result. 37 * 38 * Side effects: 39 * See the user documentation. 40 * 41 *---------------------------------------------------------------------- 42 */ 43 44int 45Tcl_PwdObjCmd( 46 ClientData dummy, /* Not used. */ 47 Tcl_Interp *interp, /* Current interpreter. */ 48 int objc, /* Number of arguments. */ 49 Tcl_Obj *CONST objv[]) /* Argument objects. */ 50{ 51 Tcl_Obj *retVal; 52 53 if (objc != 1) { 54 Tcl_WrongNumArgs(interp, 1, objv, NULL); 55 return TCL_ERROR; 56 } 57 58 retVal = Tcl_FSGetCwd(interp); 59 if (retVal == NULL) { 60 return TCL_ERROR; 61 } 62 Tcl_SetObjResult(interp, retVal); 63 Tcl_DecrRefCount(retVal); 64 return TCL_OK; 65} 66 67/* 68 *---------------------------------------------------------------------- 69 * 70 * Tcl_RegexpObjCmd -- 71 * 72 * This procedure is invoked to process the "regexp" Tcl command. See 73 * the user documentation for details on what it does. 74 * 75 * Results: 76 * A standard Tcl result. 77 * 78 * Side effects: 79 * See the user documentation. 80 * 81 *---------------------------------------------------------------------- 82 */ 83 84int 85Tcl_RegexpObjCmd( 86 ClientData dummy, /* Not used. */ 87 Tcl_Interp *interp, /* Current interpreter. */ 88 int objc, /* Number of arguments. */ 89 Tcl_Obj *CONST objv[]) /* Argument objects. */ 90{ 91 int i, indices, match, about, offset, all, doinline, numMatchesSaved; 92 int cflags, eflags, stringLength, matchLength; 93 Tcl_RegExp regExpr; 94 Tcl_Obj *objPtr, *startIndex = NULL, *resultPtr = NULL; 95 Tcl_RegExpInfo info; 96 static CONST char *options[] = { 97 "-all", "-about", "-indices", "-inline", 98 "-expanded", "-line", "-linestop", "-lineanchor", 99 "-nocase", "-start", "--", NULL 100 }; 101 enum options { 102 REGEXP_ALL, REGEXP_ABOUT, REGEXP_INDICES, REGEXP_INLINE, 103 REGEXP_EXPANDED,REGEXP_LINE, REGEXP_LINESTOP,REGEXP_LINEANCHOR, 104 REGEXP_NOCASE, REGEXP_START, REGEXP_LAST 105 }; 106 107 indices = 0; 108 about = 0; 109 cflags = TCL_REG_ADVANCED; 110 eflags = 0; 111 offset = 0; 112 all = 0; 113 doinline = 0; 114 115 for (i = 1; i < objc; i++) { 116 char *name; 117 int index; 118 119 name = TclGetString(objv[i]); 120 if (name[0] != '-') { 121 break; 122 } 123 if (Tcl_GetIndexFromObj(interp, objv[i], options, "switch", TCL_EXACT, 124 &index) != TCL_OK) { 125 goto optionError; 126 } 127 switch ((enum options) index) { 128 case REGEXP_ALL: 129 all = 1; 130 break; 131 case REGEXP_INDICES: 132 indices = 1; 133 break; 134 case REGEXP_INLINE: 135 doinline = 1; 136 break; 137 case REGEXP_NOCASE: 138 cflags |= TCL_REG_NOCASE; 139 break; 140 case REGEXP_ABOUT: 141 about = 1; 142 break; 143 case REGEXP_EXPANDED: 144 cflags |= TCL_REG_EXPANDED; 145 break; 146 case REGEXP_LINE: 147 cflags |= TCL_REG_NEWLINE; 148 break; 149 case REGEXP_LINESTOP: 150 cflags |= TCL_REG_NLSTOP; 151 break; 152 case REGEXP_LINEANCHOR: 153 cflags |= TCL_REG_NLANCH; 154 break; 155 case REGEXP_START: { 156 int temp; 157 if (++i >= objc) { 158 goto endOfForLoop; 159 } 160 if (TclGetIntForIndexM(interp, objv[i], 0, &temp) != TCL_OK) { 161 goto optionError; 162 } 163 if (startIndex) { 164 Tcl_DecrRefCount(startIndex); 165 } 166 startIndex = objv[i]; 167 Tcl_IncrRefCount(startIndex); 168 break; 169 } 170 case REGEXP_LAST: 171 i++; 172 goto endOfForLoop; 173 } 174 } 175 176 endOfForLoop: 177 if ((objc - i) < (2 - about)) { 178 Tcl_WrongNumArgs(interp, 1, objv, 179 "?switches? exp string ?matchVar? ?subMatchVar subMatchVar ...?"); 180 goto optionError; 181 } 182 objc -= i; 183 objv += i; 184 185 /* 186 * Check if the user requested -inline, but specified match variables; a 187 * no-no. 188 */ 189 190 if (doinline && ((objc - 2) != 0)) { 191 Tcl_AppendResult(interp, "regexp match variables not allowed" 192 " when using -inline", NULL); 193 goto optionError; 194 } 195 196 /* 197 * Handle the odd about case separately. 198 */ 199 200 if (about) { 201 regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags); 202 if ((regExpr == NULL) || (TclRegAbout(interp, regExpr) < 0)) { 203 optionError: 204 if (startIndex) { 205 Tcl_DecrRefCount(startIndex); 206 } 207 return TCL_ERROR; 208 } 209 return TCL_OK; 210 } 211 212 /* 213 * Get the length of the string that we are matching against so we can do 214 * the termination test for -all matches. Do this before getting the 215 * regexp to avoid shimmering problems. 216 */ 217 218 objPtr = objv[1]; 219 stringLength = Tcl_GetCharLength(objPtr); 220 221 if (startIndex) { 222 TclGetIntForIndexM(NULL, startIndex, stringLength, &offset); 223 Tcl_DecrRefCount(startIndex); 224 if (offset < 0) { 225 offset = 0; 226 } 227 } 228 229 regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags); 230 if (regExpr == NULL) { 231 return TCL_ERROR; 232 } 233 234 objc -= 2; 235 objv += 2; 236 237 if (doinline) { 238 /* 239 * Save all the subexpressions, as we will return them as a list 240 */ 241 242 numMatchesSaved = -1; 243 } else { 244 /* 245 * Save only enough subexpressions for matches we want to keep, expect 246 * in the case of -all, where we need to keep at least one to know 247 * where to move the offset. 248 */ 249 250 numMatchesSaved = (objc == 0) ? all : objc; 251 } 252 253 /* 254 * The following loop is to handle multiple matches within the same source 255 * string; each iteration handles one match. If "-all" hasn't been 256 * specified then the loop body only gets executed once. We terminate the 257 * loop when the starting offset is past the end of the string. 258 */ 259 260 while (1) { 261 /* 262 * Pass either 0 or TCL_REG_NOTBOL in the eflags. Passing 263 * TCL_REG_NOTBOL indicates that the character at offset should not be 264 * considered the start of the line. If for example the pattern {^} is 265 * passed and -start is positive, then the pattern will not match the 266 * start of the string unless the previous character is a newline. 267 */ 268 269 if ((offset == 0) || ((offset > 0) && 270 (Tcl_GetUniChar(objPtr, offset-1) == (Tcl_UniChar)'\n'))) { 271 eflags = 0; 272 } else { 273 eflags = TCL_REG_NOTBOL; 274 } 275 276 match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset, 277 numMatchesSaved, eflags); 278 if (match < 0) { 279 return TCL_ERROR; 280 } 281 282 if (match == 0) { 283 /* 284 * We want to set the value of the intepreter result only when 285 * this is the first time through the loop. 286 */ 287 288 if (all <= 1) { 289 /* 290 * If inlining, the interpreter's object result remains an 291 * empty list, otherwise set it to an integer object w/ value 292 * 0. 293 */ 294 295 if (!doinline) { 296 Tcl_SetObjResult(interp, Tcl_NewIntObj(0)); 297 } 298 return TCL_OK; 299 } 300 break; 301 } 302 303 /* 304 * If additional variable names have been specified, return index 305 * information in those variables. 306 */ 307 308 Tcl_RegExpGetInfo(regExpr, &info); 309 if (doinline) { 310 /* 311 * It's the number of substitutions, plus one for the matchVar at 312 * index 0 313 */ 314 315 objc = info.nsubs + 1; 316 if (all <= 1) { 317 resultPtr = Tcl_NewObj(); 318 } 319 } 320 for (i = 0; i < objc; i++) { 321 Tcl_Obj *newPtr; 322 323 if (indices) { 324 int start, end; 325 Tcl_Obj *objs[2]; 326 327 /* 328 * Only adjust the match area if there was a match for that 329 * area. (Scriptics Bug 4391/SF Bug #219232) 330 */ 331 332 if (i <= info.nsubs && info.matches[i].start >= 0) { 333 start = offset + info.matches[i].start; 334 end = offset + info.matches[i].end; 335 336 /* 337 * Adjust index so it refers to the last character in the 338 * match instead of the first character after the match. 339 */ 340 341 if (end >= offset) { 342 end--; 343 } 344 } else { 345 start = -1; 346 end = -1; 347 } 348 349 objs[0] = Tcl_NewLongObj(start); 350 objs[1] = Tcl_NewLongObj(end); 351 352 newPtr = Tcl_NewListObj(2, objs); 353 } else { 354 if (i <= info.nsubs) { 355 newPtr = Tcl_GetRange(objPtr, 356 offset + info.matches[i].start, 357 offset + info.matches[i].end - 1); 358 } else { 359 newPtr = Tcl_NewObj(); 360 } 361 } 362 if (doinline) { 363 if (Tcl_ListObjAppendElement(interp, resultPtr, newPtr) 364 != TCL_OK) { 365 Tcl_DecrRefCount(newPtr); 366 Tcl_DecrRefCount(resultPtr); 367 return TCL_ERROR; 368 } 369 } else { 370 Tcl_Obj *valuePtr; 371 valuePtr = Tcl_ObjSetVar2(interp, objv[i], NULL, newPtr, 0); 372 if (valuePtr == NULL) { 373 Tcl_AppendResult(interp, "couldn't set variable \"", 374 TclGetString(objv[i]), "\"", NULL); 375 return TCL_ERROR; 376 } 377 } 378 } 379 380 if (all == 0) { 381 break; 382 } 383 384 /* 385 * Adjust the offset to the character just after the last one in the 386 * matchVar and increment all to count how many times we are making a 387 * match. We always increment the offset by at least one to prevent 388 * endless looping (as in the case: regexp -all {a*} a). Otherwise, 389 * when we match the NULL string at the end of the input string, we 390 * will loop indefinately (because the length of the match is 0, so 391 * offset never changes). 392 */ 393 394 matchLength = info.matches[0].end - info.matches[0].start; 395 offset += info.matches[0].end; 396 397 /* 398 * A match of length zero could happen for {^} {$} or {.*} and in 399 * these cases we always want to bump the index up one. 400 */ 401 402 if (matchLength == 0) { 403 offset++; 404 } 405 all++; 406 if (offset >= stringLength) { 407 break; 408 } 409 } 410 411 /* 412 * Set the interpreter's object result to an integer object with value 1 413 * if -all wasn't specified, otherwise it's all-1 (the number of times 414 * through the while - 1). 415 */ 416 417 if (doinline) { 418 Tcl_SetObjResult(interp, resultPtr); 419 } else { 420 Tcl_SetObjResult(interp, Tcl_NewIntObj(all ? all-1 : 1)); 421 } 422 return TCL_OK; 423} 424 425/* 426 *---------------------------------------------------------------------- 427 * 428 * Tcl_RegsubObjCmd -- 429 * 430 * This procedure is invoked to process the "regsub" Tcl command. See the 431 * user documentation for details on what it does. 432 * 433 * Results: 434 * A standard Tcl result. 435 * 436 * Side effects: 437 * See the user documentation. 438 * 439 *---------------------------------------------------------------------- 440 */ 441 442int 443Tcl_RegsubObjCmd( 444 ClientData dummy, /* Not used. */ 445 Tcl_Interp *interp, /* Current interpreter. */ 446 int objc, /* Number of arguments. */ 447 Tcl_Obj *CONST objv[]) /* Argument objects. */ 448{ 449 int idx, result, cflags, all, wlen, wsublen, numMatches, offset; 450 int start, end, subStart, subEnd, match; 451 Tcl_RegExp regExpr; 452 Tcl_RegExpInfo info; 453 Tcl_Obj *resultPtr, *subPtr, *objPtr, *startIndex = NULL; 454 Tcl_UniChar ch, *wsrc, *wfirstChar, *wstring, *wsubspec, *wend; 455 456 static CONST char *options[] = { 457 "-all", "-nocase", "-expanded", 458 "-line", "-linestop", "-lineanchor", "-start", 459 "--", NULL 460 }; 461 enum options { 462 REGSUB_ALL, REGSUB_NOCASE, REGSUB_EXPANDED, 463 REGSUB_LINE, REGSUB_LINESTOP, REGSUB_LINEANCHOR, REGSUB_START, 464 REGSUB_LAST 465 }; 466 467 cflags = TCL_REG_ADVANCED; 468 all = 0; 469 offset = 0; 470 resultPtr = NULL; 471 472 for (idx = 1; idx < objc; idx++) { 473 char *name; 474 int index; 475 476 name = TclGetString(objv[idx]); 477 if (name[0] != '-') { 478 break; 479 } 480 if (Tcl_GetIndexFromObj(interp, objv[idx], options, "switch", 481 TCL_EXACT, &index) != TCL_OK) { 482 goto optionError; 483 } 484 switch ((enum options) index) { 485 case REGSUB_ALL: 486 all = 1; 487 break; 488 case REGSUB_NOCASE: 489 cflags |= TCL_REG_NOCASE; 490 break; 491 case REGSUB_EXPANDED: 492 cflags |= TCL_REG_EXPANDED; 493 break; 494 case REGSUB_LINE: 495 cflags |= TCL_REG_NEWLINE; 496 break; 497 case REGSUB_LINESTOP: 498 cflags |= TCL_REG_NLSTOP; 499 break; 500 case REGSUB_LINEANCHOR: 501 cflags |= TCL_REG_NLANCH; 502 break; 503 case REGSUB_START: { 504 int temp; 505 if (++idx >= objc) { 506 goto endOfForLoop; 507 } 508 if (TclGetIntForIndexM(interp, objv[idx], 0, &temp) != TCL_OK) { 509 goto optionError; 510 } 511 if (startIndex) { 512 Tcl_DecrRefCount(startIndex); 513 } 514 startIndex = objv[idx]; 515 Tcl_IncrRefCount(startIndex); 516 break; 517 } 518 case REGSUB_LAST: 519 idx++; 520 goto endOfForLoop; 521 } 522 } 523 524 endOfForLoop: 525 if (objc-idx < 3 || objc-idx > 4) { 526 Tcl_WrongNumArgs(interp, 1, objv, 527 "?switches? exp string subSpec ?varName?"); 528 optionError: 529 if (startIndex) { 530 Tcl_DecrRefCount(startIndex); 531 } 532 return TCL_ERROR; 533 } 534 535 objc -= idx; 536 objv += idx; 537 538 if (startIndex) { 539 int stringLength = Tcl_GetCharLength(objv[1]); 540 541 TclGetIntForIndexM(NULL, startIndex, stringLength, &offset); 542 Tcl_DecrRefCount(startIndex); 543 if (offset < 0) { 544 offset = 0; 545 } 546 } 547 548 if (all && (offset == 0) 549 && (strpbrk(TclGetString(objv[2]), "&\\") == NULL) 550 && (strpbrk(TclGetString(objv[0]), "*+?{}()[].\\|^$") == NULL)) { 551 /* 552 * This is a simple one pair string map situation. We make use of a 553 * slightly modified version of the one pair STR_MAP code. 554 */ 555 556 int slen, nocase; 557 int (*strCmpFn)(CONST Tcl_UniChar*,CONST Tcl_UniChar*,unsigned long); 558 Tcl_UniChar *p, wsrclc; 559 560 numMatches = 0; 561 nocase = (cflags & TCL_REG_NOCASE); 562 strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp; 563 564 wsrc = Tcl_GetUnicodeFromObj(objv[0], &slen); 565 wstring = Tcl_GetUnicodeFromObj(objv[1], &wlen); 566 wsubspec = Tcl_GetUnicodeFromObj(objv[2], &wsublen); 567 wend = wstring + wlen - (slen ? slen - 1 : 0); 568 result = TCL_OK; 569 570 if (slen == 0) { 571 /* 572 * regsub behavior for "" matches between each character. 'string 573 * map' skips the "" case. 574 */ 575 576 if (wstring < wend) { 577 resultPtr = Tcl_NewUnicodeObj(wstring, 0); 578 Tcl_IncrRefCount(resultPtr); 579 for (; wstring < wend; wstring++) { 580 Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); 581 Tcl_AppendUnicodeToObj(resultPtr, wstring, 1); 582 numMatches++; 583 } 584 wlen = 0; 585 } 586 } else { 587 wsrclc = Tcl_UniCharToLower(*wsrc); 588 for (p = wfirstChar = wstring; wstring < wend; wstring++) { 589 if ((*wstring == *wsrc || 590 (nocase && Tcl_UniCharToLower(*wstring)==wsrclc)) && 591 (slen==1 || (strCmpFn(wstring, wsrc, 592 (unsigned long) slen) == 0))) { 593 if (numMatches == 0) { 594 resultPtr = Tcl_NewUnicodeObj(wstring, 0); 595 Tcl_IncrRefCount(resultPtr); 596 } 597 if (p != wstring) { 598 Tcl_AppendUnicodeToObj(resultPtr, p, wstring - p); 599 p = wstring + slen; 600 } else { 601 p += slen; 602 } 603 wstring = p - 1; 604 605 Tcl_AppendUnicodeToObj(resultPtr, wsubspec, wsublen); 606 numMatches++; 607 } 608 } 609 if (numMatches) { 610 wlen = wfirstChar + wlen - p; 611 wstring = p; 612 } 613 } 614 objPtr = NULL; 615 subPtr = NULL; 616 goto regsubDone; 617 } 618 619 regExpr = Tcl_GetRegExpFromObj(interp, objv[0], cflags); 620 if (regExpr == NULL) { 621 return TCL_ERROR; 622 } 623 624 /* 625 * Make sure to avoid problems where the objects are shared. This can 626 * cause RegExpObj <> UnicodeObj shimmering that causes data corruption. 627 * [Bug #461322] 628 */ 629 630 if (objv[1] == objv[0]) { 631 objPtr = Tcl_DuplicateObj(objv[1]); 632 } else { 633 objPtr = objv[1]; 634 } 635 wstring = Tcl_GetUnicodeFromObj(objPtr, &wlen); 636 if (objv[2] == objv[0]) { 637 subPtr = Tcl_DuplicateObj(objv[2]); 638 } else { 639 subPtr = objv[2]; 640 } 641 wsubspec = Tcl_GetUnicodeFromObj(subPtr, &wsublen); 642 643 result = TCL_OK; 644 645 /* 646 * The following loop is to handle multiple matches within the same source 647 * string; each iteration handles one match and its corresponding 648 * substitution. If "-all" hasn't been specified then the loop body only 649 * gets executed once. We must use 'offset <= wlen' in particular for the 650 * case where the regexp pattern can match the empty string - this is 651 * useful when doing, say, 'regsub -- ^ $str ...' when $str might be 652 * empty. 653 */ 654 655 numMatches = 0; 656 for ( ; offset <= wlen; ) { 657 658 /* 659 * The flags argument is set if string is part of a larger string, so 660 * that "^" won't match. 661 */ 662 663 match = Tcl_RegExpExecObj(interp, regExpr, objPtr, offset, 664 10 /* matches */, ((offset > 0 && 665 (wstring[offset-1] != (Tcl_UniChar)'\n')) 666 ? TCL_REG_NOTBOL : 0)); 667 668 if (match < 0) { 669 result = TCL_ERROR; 670 goto done; 671 } 672 if (match == 0) { 673 break; 674 } 675 if (numMatches == 0) { 676 resultPtr = Tcl_NewUnicodeObj(wstring, 0); 677 Tcl_IncrRefCount(resultPtr); 678 if (offset > 0) { 679 /* 680 * Copy the initial portion of the string in if an offset was 681 * specified. 682 */ 683 684 Tcl_AppendUnicodeToObj(resultPtr, wstring, offset); 685 } 686 } 687 numMatches++; 688 689 /* 690 * Copy the portion of the source string before the match to the 691 * result variable. 692 */ 693 694 Tcl_RegExpGetInfo(regExpr, &info); 695 start = info.matches[0].start; 696 end = info.matches[0].end; 697 Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, start); 698 699 /* 700 * Append the subSpec argument to the variable, making appropriate 701 * substitutions. This code is a bit hairy because of the backslash 702 * conventions and because the code saves up ranges of characters in 703 * subSpec to reduce the number of calls to Tcl_SetVar. 704 */ 705 706 wsrc = wfirstChar = wsubspec; 707 wend = wsubspec + wsublen; 708 for (ch = *wsrc; wsrc != wend; wsrc++, ch = *wsrc) { 709 if (ch == '&') { 710 idx = 0; 711 } else if (ch == '\\') { 712 ch = wsrc[1]; 713 if ((ch >= '0') && (ch <= '9')) { 714 idx = ch - '0'; 715 } else if ((ch == '\\') || (ch == '&')) { 716 *wsrc = ch; 717 Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, 718 wsrc - wfirstChar + 1); 719 *wsrc = '\\'; 720 wfirstChar = wsrc + 2; 721 wsrc++; 722 continue; 723 } else { 724 continue; 725 } 726 } else { 727 continue; 728 } 729 730 if (wfirstChar != wsrc) { 731 Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, 732 wsrc - wfirstChar); 733 } 734 735 if (idx <= info.nsubs) { 736 subStart = info.matches[idx].start; 737 subEnd = info.matches[idx].end; 738 if ((subStart >= 0) && (subEnd >= 0)) { 739 Tcl_AppendUnicodeToObj(resultPtr, 740 wstring + offset + subStart, subEnd - subStart); 741 } 742 } 743 744 if (*wsrc == '\\') { 745 wsrc++; 746 } 747 wfirstChar = wsrc + 1; 748 } 749 750 if (wfirstChar != wsrc) { 751 Tcl_AppendUnicodeToObj(resultPtr, wfirstChar, wsrc - wfirstChar); 752 } 753 754 if (end == 0) { 755 /* 756 * Always consume at least one character of the input string in 757 * order to prevent infinite loops. 758 */ 759 760 if (offset < wlen) { 761 Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); 762 } 763 offset++; 764 } else { 765 offset += end; 766 if (start == end) { 767 /* 768 * We matched an empty string, which means we must go forward 769 * one more step so we don't match again at the same spot. 770 */ 771 772 if (offset < wlen) { 773 Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, 1); 774 } 775 offset++; 776 } 777 } 778 if (!all) { 779 break; 780 } 781 } 782 783 /* 784 * Copy the portion of the source string after the last match to the 785 * result variable. 786 */ 787 788 regsubDone: 789 if (numMatches == 0) { 790 /* 791 * On zero matches, just ignore the offset, since it shouldn't matter 792 * to us in this case, and the user may have skewed it. 793 */ 794 795 resultPtr = objv[1]; 796 Tcl_IncrRefCount(resultPtr); 797 } else if (offset < wlen) { 798 Tcl_AppendUnicodeToObj(resultPtr, wstring + offset, wlen - offset); 799 } 800 if (objc == 4) { 801 if (Tcl_ObjSetVar2(interp, objv[3], NULL, resultPtr, 0) == NULL) { 802 Tcl_AppendResult(interp, "couldn't set variable \"", 803 TclGetString(objv[3]), "\"", NULL); 804 result = TCL_ERROR; 805 } else { 806 /* 807 * Set the interpreter's object result to an integer object 808 * holding the number of matches. 809 */ 810 811 Tcl_SetObjResult(interp, Tcl_NewIntObj(numMatches)); 812 } 813 } else { 814 /* 815 * No varname supplied, so just return the modified string. 816 */ 817 818 Tcl_SetObjResult(interp, resultPtr); 819 } 820 821 done: 822 if (objPtr && (objv[1] == objv[0])) { 823 Tcl_DecrRefCount(objPtr); 824 } 825 if (subPtr && (objv[2] == objv[0])) { 826 Tcl_DecrRefCount(subPtr); 827 } 828 if (resultPtr) { 829 Tcl_DecrRefCount(resultPtr); 830 } 831 return result; 832} 833 834/* 835 *---------------------------------------------------------------------- 836 * 837 * Tcl_RenameObjCmd -- 838 * 839 * This procedure is invoked to process the "rename" Tcl command. See the 840 * user documentation for details on what it does. 841 * 842 * Results: 843 * A standard Tcl object result. 844 * 845 * Side effects: 846 * See the user documentation. 847 * 848 *---------------------------------------------------------------------- 849 */ 850 851int 852Tcl_RenameObjCmd( 853 ClientData dummy, /* Arbitrary value passed to the command. */ 854 Tcl_Interp *interp, /* Current interpreter. */ 855 int objc, /* Number of arguments. */ 856 Tcl_Obj *CONST objv[]) /* Argument objects. */ 857{ 858 char *oldName, *newName; 859 860 if (objc != 3) { 861 Tcl_WrongNumArgs(interp, 1, objv, "oldName newName"); 862 return TCL_ERROR; 863 } 864 865 oldName = TclGetString(objv[1]); 866 newName = TclGetString(objv[2]); 867 return TclRenameCommand(interp, oldName, newName); 868} 869 870/* 871 *---------------------------------------------------------------------- 872 * 873 * Tcl_ReturnObjCmd -- 874 * 875 * This object-based procedure is invoked to process the "return" Tcl 876 * command. See the user documentation for details on what it does. 877 * 878 * Results: 879 * A standard Tcl object result. 880 * 881 * Side effects: 882 * See the user documentation. 883 * 884 *---------------------------------------------------------------------- 885 */ 886 887int 888Tcl_ReturnObjCmd( 889 ClientData dummy, /* Not used. */ 890 Tcl_Interp *interp, /* Current interpreter. */ 891 int objc, /* Number of arguments. */ 892 Tcl_Obj *CONST objv[]) /* Argument objects. */ 893{ 894 int code, level; 895 Tcl_Obj *returnOpts; 896 897 /* 898 * General syntax: [return ?-option value ...? ?result?] 899 * An even number of words means an explicit result argument is present. 900 */ 901 902 int explicitResult = (0 == (objc % 2)); 903 int numOptionWords = objc - 1 - explicitResult; 904 905 if (TCL_ERROR == TclMergeReturnOptions(interp, numOptionWords, objv+1, 906 &returnOpts, &code, &level)) { 907 return TCL_ERROR; 908 } 909 910 code = TclProcessReturn(interp, code, level, returnOpts); 911 if (explicitResult) { 912 Tcl_SetObjResult(interp, objv[objc-1]); 913 } 914 return code; 915} 916 917/* 918 *---------------------------------------------------------------------- 919 * 920 * Tcl_SourceObjCmd -- 921 * 922 * This procedure is invoked to process the "source" Tcl command. See the 923 * user documentation for details on what it does. 924 * 925 * Results: 926 * A standard Tcl object result. 927 * 928 * Side effects: 929 * See the user documentation. 930 * 931 *---------------------------------------------------------------------- 932 */ 933 934int 935Tcl_SourceObjCmd( 936 ClientData dummy, /* Not used. */ 937 Tcl_Interp *interp, /* Current interpreter. */ 938 int objc, /* Number of arguments. */ 939 Tcl_Obj *CONST objv[]) /* Argument objects. */ 940{ 941 CONST char *encodingName = NULL; 942 Tcl_Obj *fileName; 943 944 if (objc != 2 && objc !=4) { 945 Tcl_WrongNumArgs(interp, 1, objv, "?-encoding name? fileName"); 946 return TCL_ERROR; 947 } 948 949 fileName = objv[objc-1]; 950 951 if (objc == 4) { 952 static CONST char *options[] = { 953 "-encoding", NULL 954 }; 955 int index; 956 957 if (TCL_ERROR == Tcl_GetIndexFromObj(interp, objv[1], options, 958 "option", TCL_EXACT, &index)) { 959 return TCL_ERROR; 960 } 961 encodingName = TclGetString(objv[2]); 962 } 963 964 return Tcl_FSEvalFileEx(interp, fileName, encodingName); 965} 966 967/* 968 *---------------------------------------------------------------------- 969 * 970 * Tcl_SplitObjCmd -- 971 * 972 * This procedure is invoked to process the "split" Tcl command. See the 973 * user documentation for details on what it does. 974 * 975 * Results: 976 * A standard Tcl result. 977 * 978 * Side effects: 979 * See the user documentation. 980 * 981 *---------------------------------------------------------------------- 982 */ 983 984int 985Tcl_SplitObjCmd( 986 ClientData dummy, /* Not used. */ 987 Tcl_Interp *interp, /* Current interpreter. */ 988 int objc, /* Number of arguments. */ 989 Tcl_Obj *CONST objv[]) /* Argument objects. */ 990{ 991 Tcl_UniChar ch; 992 int len; 993 char *splitChars, *stringPtr, *end; 994 int splitCharLen, stringLen; 995 Tcl_Obj *listPtr, *objPtr; 996 997 if (objc == 2) { 998 splitChars = " \n\t\r"; 999 splitCharLen = 4; 1000 } else if (objc == 3) { 1001 splitChars = TclGetStringFromObj(objv[2], &splitCharLen); 1002 } else { 1003 Tcl_WrongNumArgs(interp, 1, objv, "string ?splitChars?"); 1004 return TCL_ERROR; 1005 } 1006 1007 stringPtr = TclGetStringFromObj(objv[1], &stringLen); 1008 end = stringPtr + stringLen; 1009 listPtr = Tcl_NewObj(); 1010 1011 if (stringLen == 0) { 1012 /* 1013 * Do nothing. 1014 */ 1015 } else if (splitCharLen == 0) { 1016 Tcl_HashTable charReuseTable; 1017 Tcl_HashEntry *hPtr; 1018 int isNew; 1019 1020 /* 1021 * Handle the special case of splitting on every character. 1022 * 1023 * Uses a hash table to ensure that each kind of character has only 1024 * one Tcl_Obj instance (multiply-referenced) in the final list. This 1025 * is a *major* win when splitting on a long string (especially in the 1026 * megabyte range!) - DKF 1027 */ 1028 1029 Tcl_InitHashTable(&charReuseTable, TCL_ONE_WORD_KEYS); 1030 1031 for ( ; stringPtr < end; stringPtr += len) { 1032 len = TclUtfToUniChar(stringPtr, &ch); 1033 1034 /* 1035 * Assume Tcl_UniChar is an integral type... 1036 */ 1037 1038 hPtr = Tcl_CreateHashEntry(&charReuseTable, (char*)0+ch, &isNew); 1039 if (isNew) { 1040 TclNewStringObj(objPtr, stringPtr, len); 1041 1042 /* 1043 * Don't need to fiddle with refcount... 1044 */ 1045 1046 Tcl_SetHashValue(hPtr, (ClientData) objPtr); 1047 } else { 1048 objPtr = (Tcl_Obj *) Tcl_GetHashValue(hPtr); 1049 } 1050 Tcl_ListObjAppendElement(NULL, listPtr, objPtr); 1051 } 1052 Tcl_DeleteHashTable(&charReuseTable); 1053 1054 } else if (splitCharLen == 1) { 1055 char *p; 1056 1057 /* 1058 * Handle the special case of splitting on a single character. This is 1059 * only true for the one-char ASCII case, as one unicode char is > 1 1060 * byte in length. 1061 */ 1062 1063 while (*stringPtr && (p=strchr(stringPtr,(int)*splitChars)) != NULL) { 1064 objPtr = Tcl_NewStringObj(stringPtr, p - stringPtr); 1065 Tcl_ListObjAppendElement(NULL, listPtr, objPtr); 1066 stringPtr = p + 1; 1067 } 1068 TclNewStringObj(objPtr, stringPtr, end - stringPtr); 1069 Tcl_ListObjAppendElement(NULL, listPtr, objPtr); 1070 } else { 1071 char *element, *p, *splitEnd; 1072 int splitLen; 1073 Tcl_UniChar splitChar; 1074 1075 /* 1076 * Normal case: split on any of a given set of characters. Discard 1077 * instances of the split characters. 1078 */ 1079 1080 splitEnd = splitChars + splitCharLen; 1081 1082 for (element = stringPtr; stringPtr < end; stringPtr += len) { 1083 len = TclUtfToUniChar(stringPtr, &ch); 1084 for (p = splitChars; p < splitEnd; p += splitLen) { 1085 splitLen = TclUtfToUniChar(p, &splitChar); 1086 if (ch == splitChar) { 1087 TclNewStringObj(objPtr, element, stringPtr - element); 1088 Tcl_ListObjAppendElement(NULL, listPtr, objPtr); 1089 element = stringPtr + len; 1090 break; 1091 } 1092 } 1093 } 1094 1095 TclNewStringObj(objPtr, element, stringPtr - element); 1096 Tcl_ListObjAppendElement(NULL, listPtr, objPtr); 1097 } 1098 Tcl_SetObjResult(interp, listPtr); 1099 return TCL_OK; 1100} 1101 1102/* 1103 *---------------------------------------------------------------------- 1104 * 1105 * StringFirstCmd -- 1106 * 1107 * This procedure is invoked to process the "string first" Tcl command. 1108 * See the user documentation for details on what it does. 1109 * 1110 * Results: 1111 * A standard Tcl result. 1112 * 1113 * Side effects: 1114 * See the user documentation. 1115 * 1116 *---------------------------------------------------------------------- 1117 */ 1118 1119static int 1120StringFirstCmd( 1121 ClientData dummy, /* Not used. */ 1122 Tcl_Interp *interp, /* Current interpreter. */ 1123 int objc, /* Number of arguments. */ 1124 Tcl_Obj *const objv[]) /* Argument objects. */ 1125{ 1126 Tcl_UniChar *ustring1, *ustring2; 1127 int match, start, length1, length2; 1128 1129 if (objc < 3 || objc > 4) { 1130 Tcl_WrongNumArgs(interp, 1, objv, 1131 "needleString haystackString ?startIndex?"); 1132 return TCL_ERROR; 1133 } 1134 1135 /* 1136 * We are searching string2 for the sequence string1. 1137 */ 1138 1139 match = -1; 1140 start = 0; 1141 length2 = -1; 1142 1143 ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1); 1144 ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2); 1145 1146 if (objc == 4) { 1147 /* 1148 * If a startIndex is specified, we will need to fast forward to that 1149 * point in the string before we think about a match. 1150 */ 1151 1152 if (TclGetIntForIndexM(interp, objv[3], length2-1, &start) != TCL_OK){ 1153 return TCL_ERROR; 1154 } 1155 1156 /* 1157 * Reread to prevent shimmering problems. 1158 */ 1159 1160 ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1); 1161 ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2); 1162 1163 if (start >= length2) { 1164 goto str_first_done; 1165 } else if (start > 0) { 1166 ustring2 += start; 1167 length2 -= start; 1168 } else if (start < 0) { 1169 /* 1170 * Invalid start index mapped to string start; Bug #423581 1171 */ 1172 1173 start = 0; 1174 } 1175 } 1176 1177 /* 1178 * If the length of the needle is more than the length of the haystack, it 1179 * cannot be contained in there so we can avoid searching. [Bug 2960021] 1180 */ 1181 1182 if (length1 > 0 && length1 <= length2) { 1183 register Tcl_UniChar *p, *end; 1184 1185 end = ustring2 + length2 - length1 + 1; 1186 for (p = ustring2; p < end; p++) { 1187 /* 1188 * Scan forward to find the first character. 1189 */ 1190 1191 if ((*p == *ustring1) && (TclUniCharNcmp(ustring1, p, 1192 (unsigned long) length1) == 0)) { 1193 match = p - ustring2; 1194 break; 1195 } 1196 } 1197 } 1198 1199 /* 1200 * Compute the character index of the matching string by counting the 1201 * number of characters before the match. 1202 */ 1203 1204 if ((match != -1) && (objc == 4)) { 1205 match += start; 1206 } 1207 1208 str_first_done: 1209 Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); 1210 return TCL_OK; 1211} 1212 1213/* 1214 *---------------------------------------------------------------------- 1215 * 1216 * StringLastCmd -- 1217 * 1218 * This procedure is invoked to process the "string last" Tcl command. 1219 * See the user documentation for details on what it does. 1220 * 1221 * Results: 1222 * A standard Tcl result. 1223 * 1224 * Side effects: 1225 * See the user documentation. 1226 * 1227 *---------------------------------------------------------------------- 1228 */ 1229 1230static int 1231StringLastCmd( 1232 ClientData dummy, /* Not used. */ 1233 Tcl_Interp *interp, /* Current interpreter. */ 1234 int objc, /* Number of arguments. */ 1235 Tcl_Obj *const objv[]) /* Argument objects. */ 1236{ 1237 Tcl_UniChar *ustring1, *ustring2, *p; 1238 int match, start, length1, length2; 1239 1240 if (objc < 3 || objc > 4) { 1241 Tcl_WrongNumArgs(interp, 1, objv, 1242 "needleString haystackString ?startIndex?"); 1243 return TCL_ERROR; 1244 } 1245 1246 /* 1247 * We are searching string2 for the sequence string1. 1248 */ 1249 1250 match = -1; 1251 start = 0; 1252 length2 = -1; 1253 1254 ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1); 1255 ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2); 1256 1257 if (objc == 4) { 1258 /* 1259 * If a startIndex is specified, we will need to restrict the string 1260 * range to that char index in the string 1261 */ 1262 1263 if (TclGetIntForIndexM(interp, objv[3], length2-1, &start) != TCL_OK){ 1264 return TCL_ERROR; 1265 } 1266 1267 /* 1268 * Reread to prevent shimmering problems. 1269 */ 1270 1271 ustring1 = Tcl_GetUnicodeFromObj(objv[1], &length1); 1272 ustring2 = Tcl_GetUnicodeFromObj(objv[2], &length2); 1273 1274 if (start < 0) { 1275 goto str_last_done; 1276 } else if (start < length2) { 1277 p = ustring2 + start + 1 - length1; 1278 } else { 1279 p = ustring2 + length2 - length1; 1280 } 1281 } else { 1282 p = ustring2 + length2 - length1; 1283 } 1284 1285 /* 1286 * If the length of the needle is more than the length of the haystack, it 1287 * cannot be contained in there so we can avoid searching. [Bug 2960021] 1288 */ 1289 1290 if (length1 > 0 && length1 <= length2) { 1291 for (; p >= ustring2; p--) { 1292 /* 1293 * Scan backwards to find the first character. 1294 */ 1295 1296 if ((*p == *ustring1) && !memcmp(ustring1, p, 1297 sizeof(Tcl_UniChar) * (size_t)length1)) { 1298 match = p - ustring2; 1299 break; 1300 } 1301 } 1302 } 1303 1304 str_last_done: 1305 Tcl_SetObjResult(interp, Tcl_NewIntObj(match)); 1306 return TCL_OK; 1307} 1308 1309/* 1310 *---------------------------------------------------------------------- 1311 * 1312 * StringIndexCmd -- 1313 * 1314 * This procedure is invoked to process the "string index" Tcl command. 1315 * See the user documentation for details on what it does. Note that this 1316 * command only functions correctly on properly formed Tcl UTF strings. 1317 * 1318 * Results: 1319 * A standard Tcl result. 1320 * 1321 * Side effects: 1322 * See the user documentation. 1323 * 1324 *---------------------------------------------------------------------- 1325 */ 1326 1327static int 1328StringIndexCmd( 1329 ClientData dummy, /* Not used. */ 1330 Tcl_Interp *interp, /* Current interpreter. */ 1331 int objc, /* Number of arguments. */ 1332 Tcl_Obj *const objv[]) /* Argument objects. */ 1333{ 1334 int length, index; 1335 1336 if (objc != 3) { 1337 Tcl_WrongNumArgs(interp, 1, objv, "string charIndex"); 1338 return TCL_ERROR; 1339 } 1340 1341 /* 1342 * If we have a ByteArray object, avoid indexing in the Utf string since 1343 * the byte array contains one byte per character. Otherwise, use the 1344 * Unicode string rep to get the index'th char. 1345 */ 1346 1347 if (objv[1]->typePtr == &tclByteArrayType) { 1348 const unsigned char *string = 1349 Tcl_GetByteArrayFromObj(objv[1], &length); 1350 1351 if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK){ 1352 return TCL_ERROR; 1353 } 1354 string = Tcl_GetByteArrayFromObj(objv[1], &length); 1355 if ((index >= 0) && (index < length)) { 1356 Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(string + index, 1)); 1357 } 1358 } else { 1359 /* 1360 * Get Unicode char length to calulate what 'end' means. 1361 */ 1362 1363 length = Tcl_GetCharLength(objv[1]); 1364 1365 if (TclGetIntForIndexM(interp, objv[2], length-1, &index) != TCL_OK){ 1366 return TCL_ERROR; 1367 } 1368 if ((index >= 0) && (index < length)) { 1369 char buf[TCL_UTF_MAX]; 1370 Tcl_UniChar ch; 1371 1372 ch = Tcl_GetUniChar(objv[1], index); 1373 length = Tcl_UniCharToUtf(ch, buf); 1374 Tcl_SetObjResult(interp, Tcl_NewStringObj(buf, length)); 1375 } 1376 } 1377 return TCL_OK; 1378} 1379 1380/* 1381 *---------------------------------------------------------------------- 1382 * 1383 * StringIsCmd -- 1384 * 1385 * This procedure is invoked to process the "string is" Tcl command. See 1386 * the user documentation for details on what it does. Note that this 1387 * command only functions correctly on properly formed Tcl UTF strings. 1388 * 1389 * Results: 1390 * A standard Tcl result. 1391 * 1392 * Side effects: 1393 * See the user documentation. 1394 * 1395 *---------------------------------------------------------------------- 1396 */ 1397 1398static int 1399StringIsCmd( 1400 ClientData dummy, /* Not used. */ 1401 Tcl_Interp *interp, /* Current interpreter. */ 1402 int objc, /* Number of arguments. */ 1403 Tcl_Obj *const objv[]) /* Argument objects. */ 1404{ 1405 const char *string1, *end, *stop; 1406 Tcl_UniChar ch; 1407 int (*chcomp)(int) = NULL; /* The UniChar comparison function. */ 1408 int i, failat = 0, result = 1, strict = 0, index, length1, length2; 1409 Tcl_Obj *objPtr, *failVarObj = NULL; 1410 Tcl_WideInt w; 1411 1412 static const char *isClasses[] = { 1413 "alnum", "alpha", "ascii", "control", 1414 "boolean", "digit", "double", "false", 1415 "graph", "integer", "list", "lower", 1416 "print", "punct", "space", "true", 1417 "upper", "wideinteger", "wordchar", "xdigit", 1418 NULL 1419 }; 1420 enum isClasses { 1421 STR_IS_ALNUM, STR_IS_ALPHA, STR_IS_ASCII, STR_IS_CONTROL, 1422 STR_IS_BOOL, STR_IS_DIGIT, STR_IS_DOUBLE, STR_IS_FALSE, 1423 STR_IS_GRAPH, STR_IS_INT, STR_IS_LIST, STR_IS_LOWER, 1424 STR_IS_PRINT, STR_IS_PUNCT, STR_IS_SPACE, STR_IS_TRUE, 1425 STR_IS_UPPER, STR_IS_WIDE, STR_IS_WORD, STR_IS_XDIGIT 1426 }; 1427 static const char *isOptions[] = { 1428 "-strict", "-failindex", NULL 1429 }; 1430 enum isOptions { 1431 OPT_STRICT, OPT_FAILIDX 1432 }; 1433 1434 if (objc < 3 || objc > 6) { 1435 Tcl_WrongNumArgs(interp, 1, objv, 1436 "class ?-strict? ?-failindex var? str"); 1437 return TCL_ERROR; 1438 } 1439 if (Tcl_GetIndexFromObj(interp, objv[1], isClasses, "class", 0, 1440 &index) != TCL_OK) { 1441 return TCL_ERROR; 1442 } 1443 1444 if (objc != 3) { 1445 for (i = 2; i < objc-1; i++) { 1446 int idx2; 1447 1448 if (Tcl_GetIndexFromObj(interp, objv[i], isOptions, "option", 0, 1449 &idx2) != TCL_OK) { 1450 return TCL_ERROR; 1451 } 1452 switch ((enum isOptions) idx2) { 1453 case OPT_STRICT: 1454 strict = 1; 1455 break; 1456 case OPT_FAILIDX: 1457 if (i+1 >= objc-1) { 1458 Tcl_WrongNumArgs(interp, 2, objv, 1459 "?-strict? ?-failindex var? str"); 1460 return TCL_ERROR; 1461 } 1462 failVarObj = objv[++i]; 1463 break; 1464 } 1465 } 1466 } 1467 1468 /* 1469 * We get the objPtr so that we can short-cut for some classes by checking 1470 * the object type (int and double), but we need the string otherwise, 1471 * because we don't want any conversion of type occuring (as, for example, 1472 * Tcl_Get*FromObj would do). 1473 */ 1474 1475 objPtr = objv[objc-1]; 1476 1477 /* 1478 * When entering here, result == 1 and failat == 0. 1479 */ 1480 1481 switch ((enum isClasses) index) { 1482 case STR_IS_ALNUM: 1483 chcomp = Tcl_UniCharIsAlnum; 1484 break; 1485 case STR_IS_ALPHA: 1486 chcomp = Tcl_UniCharIsAlpha; 1487 break; 1488 case STR_IS_ASCII: 1489 chcomp = UniCharIsAscii; 1490 break; 1491 case STR_IS_BOOL: 1492 case STR_IS_TRUE: 1493 case STR_IS_FALSE: 1494 if (TCL_OK != Tcl_ConvertToType(NULL, objPtr, &tclBooleanType)) { 1495 if (strict) { 1496 result = 0; 1497 } else { 1498 string1 = TclGetStringFromObj(objPtr, &length1); 1499 result = length1 == 0; 1500 } 1501 } else if (((index == STR_IS_TRUE) && 1502 objPtr->internalRep.longValue == 0) 1503 || ((index == STR_IS_FALSE) && 1504 objPtr->internalRep.longValue != 0)) { 1505 result = 0; 1506 } 1507 break; 1508 case STR_IS_CONTROL: 1509 chcomp = Tcl_UniCharIsControl; 1510 break; 1511 case STR_IS_DIGIT: 1512 chcomp = Tcl_UniCharIsDigit; 1513 break; 1514 case STR_IS_DOUBLE: { 1515 /* TODO */ 1516 if ((objPtr->typePtr == &tclDoubleType) || 1517 (objPtr->typePtr == &tclIntType) || 1518#ifndef NO_WIDE_TYPE 1519 (objPtr->typePtr == &tclWideIntType) || 1520#endif 1521 (objPtr->typePtr == &tclBignumType)) { 1522 break; 1523 } 1524 string1 = TclGetStringFromObj(objPtr, &length1); 1525 if (length1 == 0) { 1526 if (strict) { 1527 result = 0; 1528 } 1529 goto str_is_done; 1530 } 1531 end = string1 + length1; 1532 if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, 1533 (const char **) &stop, 0) != TCL_OK) { 1534 result = 0; 1535 failat = 0; 1536 } else { 1537 failat = stop - string1; 1538 if (stop < end) { 1539 result = 0; 1540 TclFreeIntRep(objPtr); 1541 objPtr->typePtr = NULL; 1542 } 1543 } 1544 break; 1545 } 1546 case STR_IS_GRAPH: 1547 chcomp = Tcl_UniCharIsGraph; 1548 break; 1549 case STR_IS_INT: 1550 if (TCL_OK == TclGetIntFromObj(NULL, objPtr, &i)) { 1551 break; 1552 } 1553 goto failedIntParse; 1554 case STR_IS_WIDE: 1555 if (TCL_OK == Tcl_GetWideIntFromObj(NULL, objPtr, &w)) { 1556 break; 1557 } 1558 1559 failedIntParse: 1560 string1 = TclGetStringFromObj(objPtr, &length1); 1561 if (length1 == 0) { 1562 if (strict) { 1563 result = 0; 1564 } 1565 goto str_is_done; 1566 } 1567 result = 0; 1568 if (failVarObj == NULL) { 1569 /* 1570 * Don't bother computing the failure point if we're not going to 1571 * return it. 1572 */ 1573 1574 break; 1575 } 1576 end = string1 + length1; 1577 if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, 1578 (const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) { 1579 if (stop == end) { 1580 /* 1581 * Entire string parses as an integer, but rejected by 1582 * Tcl_Get(Wide)IntFromObj() so we must have overflowed the 1583 * target type, and our convention is to return failure at 1584 * index -1 in that situation. 1585 */ 1586 1587 failat = -1; 1588 } else { 1589 /* 1590 * Some prefix parsed as an integer, but not the whole string, 1591 * so return failure index as the point where parsing stopped. 1592 * Clear out the internal rep, since keeping it would leave 1593 * *objPtr in an inconsistent state. 1594 */ 1595 1596 failat = stop - string1; 1597 TclFreeIntRep(objPtr); 1598 objPtr->typePtr = NULL; 1599 } 1600 } else { 1601 /* 1602 * No prefix is a valid integer. Fail at beginning. 1603 */ 1604 1605 failat = 0; 1606 } 1607 break; 1608 case STR_IS_LIST: 1609 /* 1610 * We ignore the strictness here, since empty strings are always 1611 * well-formed lists. 1612 */ 1613 1614 if (TCL_OK == TclListObjLength(NULL, objPtr, &length2)) { 1615 break; 1616 } 1617 1618 if (failVarObj != NULL) { 1619 /* 1620 * Need to figure out where the list parsing failed, which is 1621 * fairly expensive. This is adapted from the core of 1622 * SetListFromAny(). 1623 */ 1624 1625 const char *elemStart, *nextElem; 1626 int lenRemain, elemSize, hasBrace; 1627 register const char *p; 1628 1629 string1 = TclGetStringFromObj(objPtr, &length1); 1630 end = string1 + length1; 1631 failat = -1; 1632 for (p=string1, lenRemain=length1; lenRemain > 0; 1633 p=nextElem, lenRemain=end-nextElem) { 1634 if (TCL_ERROR == TclFindElement(NULL, p, lenRemain, 1635 &elemStart, &nextElem, &elemSize, &hasBrace)) { 1636 Tcl_Obj *tmpStr; 1637 1638 /* 1639 * This is the simplest way of getting the number of 1640 * characters parsed. Note that this is not the same as 1641 * the number of bytes when parsing strings with non-ASCII 1642 * characters in them. 1643 * 1644 * Skip leading spaces first. This is only really an issue 1645 * if it is the first "element" that has the failure. 1646 */ 1647 1648 while (isspace(UCHAR(*p))) { /* INTL: ? */ 1649 p++; 1650 } 1651 TclNewStringObj(tmpStr, string1, p-string1); 1652 failat = Tcl_GetCharLength(tmpStr); 1653 TclDecrRefCount(tmpStr); 1654 break; 1655 } 1656 } 1657 } 1658 result = 0; 1659 break; 1660 case STR_IS_LOWER: 1661 chcomp = Tcl_UniCharIsLower; 1662 break; 1663 case STR_IS_PRINT: 1664 chcomp = Tcl_UniCharIsPrint; 1665 break; 1666 case STR_IS_PUNCT: 1667 chcomp = Tcl_UniCharIsPunct; 1668 break; 1669 case STR_IS_SPACE: 1670 chcomp = Tcl_UniCharIsSpace; 1671 break; 1672 case STR_IS_UPPER: 1673 chcomp = Tcl_UniCharIsUpper; 1674 break; 1675 case STR_IS_WORD: 1676 chcomp = Tcl_UniCharIsWordChar; 1677 break; 1678 case STR_IS_XDIGIT: 1679 chcomp = UniCharIsHexDigit; 1680 break; 1681 } 1682 1683 if (chcomp != NULL) { 1684 string1 = TclGetStringFromObj(objPtr, &length1); 1685 if (length1 == 0) { 1686 if (strict) { 1687 result = 0; 1688 } 1689 goto str_is_done; 1690 } 1691 end = string1 + length1; 1692 for (; string1 < end; string1 += length2, failat++) { 1693 length2 = TclUtfToUniChar(string1, &ch); 1694 if (!chcomp(ch)) { 1695 result = 0; 1696 break; 1697 } 1698 } 1699 } 1700 1701 /* 1702 * Only set the failVarObj when we will return 0 and we have indicated a 1703 * valid fail index (>= 0). 1704 */ 1705 1706 str_is_done: 1707 if ((result == 0) && (failVarObj != NULL) && 1708 Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(failat), 1709 TCL_LEAVE_ERR_MSG) == NULL) { 1710 return TCL_ERROR; 1711 } 1712 Tcl_SetObjResult(interp, Tcl_NewBooleanObj(result)); 1713 return TCL_OK; 1714} 1715 1716static int 1717UniCharIsAscii( 1718 int character) 1719{ 1720 return (character >= 0) && (character < 0x80); 1721} 1722 1723static int 1724UniCharIsHexDigit( 1725 int character) 1726{ 1727 return (character >= 0) && (character < 0x80) && isxdigit(character); 1728} 1729 1730/* 1731 *---------------------------------------------------------------------- 1732 * 1733 * StringMapCmd -- 1734 * 1735 * This procedure is invoked to process the "string map" Tcl command. See 1736 * the user documentation for details on what it does. Note that this 1737 * command only functions correctly on properly formed Tcl UTF strings. 1738 * 1739 * Results: 1740 * A standard Tcl result. 1741 * 1742 * Side effects: 1743 * See the user documentation. 1744 * 1745 *---------------------------------------------------------------------- 1746 */ 1747 1748static int 1749StringMapCmd( 1750 ClientData dummy, /* Not used. */ 1751 Tcl_Interp *interp, /* Current interpreter. */ 1752 int objc, /* Number of arguments. */ 1753 Tcl_Obj *const objv[]) /* Argument objects. */ 1754{ 1755 int length1, length2, mapElemc, index; 1756 int nocase = 0, mapWithDict = 0, copySource = 0; 1757 Tcl_Obj **mapElemv, *sourceObj, *resultPtr; 1758 Tcl_UniChar *ustring1, *ustring2, *p, *end; 1759 int (*strCmpFn)(const Tcl_UniChar*, const Tcl_UniChar*, unsigned long); 1760 1761 if (objc < 3 || objc > 4) { 1762 Tcl_WrongNumArgs(interp, 1, objv, "?-nocase? charMap string"); 1763 return TCL_ERROR; 1764 } 1765 1766 if (objc == 4) { 1767 const char *string = TclGetStringFromObj(objv[1], &length2); 1768 1769 if ((length2 > 1) && 1770 strncmp(string, "-nocase", (size_t) length2) == 0) { 1771 nocase = 1; 1772 } else { 1773 Tcl_AppendResult(interp, "bad option \"", string, 1774 "\": must be -nocase", NULL); 1775 return TCL_ERROR; 1776 } 1777 } 1778 1779 /* 1780 * This test is tricky, but has to be that way or you get other strange 1781 * inconsistencies (see test string-10.20 for illustration why!) 1782 */ 1783 1784 if (objv[objc-2]->typePtr == &tclDictType && objv[objc-2]->bytes == NULL){ 1785 int i, done; 1786 Tcl_DictSearch search; 1787 1788 /* 1789 * We know the type exactly, so all dict operations will succeed for 1790 * sure. This shortens this code quite a bit. 1791 */ 1792 1793 Tcl_DictObjSize(interp, objv[objc-2], &mapElemc); 1794 if (mapElemc == 0) { 1795 /* 1796 * Empty charMap, just return whatever string was given. 1797 */ 1798 1799 Tcl_SetObjResult(interp, objv[objc-1]); 1800 return TCL_OK; 1801 } 1802 1803 mapElemc *= 2; 1804 mapWithDict = 1; 1805 1806 /* 1807 * Copy the dictionary out into an array; that's the easiest way to 1808 * adapt this code... 1809 */ 1810 1811 mapElemv = (Tcl_Obj **) 1812 TclStackAlloc(interp, sizeof(Tcl_Obj *) * mapElemc); 1813 Tcl_DictObjFirst(interp, objv[objc-2], &search, mapElemv+0, 1814 mapElemv+1, &done); 1815 for (i=2 ; i<mapElemc ; i+=2) { 1816 Tcl_DictObjNext(&search, mapElemv+i, mapElemv+i+1, &done); 1817 } 1818 Tcl_DictObjDone(&search); 1819 } else { 1820 if (TclListObjGetElements(interp, objv[objc-2], &mapElemc, 1821 &mapElemv) != TCL_OK) { 1822 return TCL_ERROR; 1823 } 1824 if (mapElemc == 0) { 1825 /* 1826 * empty charMap, just return whatever string was given. 1827 */ 1828 1829 Tcl_SetObjResult(interp, objv[objc-1]); 1830 return TCL_OK; 1831 } else if (mapElemc & 1) { 1832 /* 1833 * The charMap must be an even number of key/value items. 1834 */ 1835 1836 Tcl_SetObjResult(interp, 1837 Tcl_NewStringObj("char map list unbalanced", -1)); 1838 return TCL_ERROR; 1839 } 1840 } 1841 1842 /* 1843 * Take a copy of the source string object if it is the same as the map 1844 * string to cut out nasty sharing crashes. [Bug 1018562] 1845 */ 1846 1847 if (objv[objc-2] == objv[objc-1]) { 1848 sourceObj = Tcl_DuplicateObj(objv[objc-1]); 1849 copySource = 1; 1850 } else { 1851 sourceObj = objv[objc-1]; 1852 } 1853 ustring1 = Tcl_GetUnicodeFromObj(sourceObj, &length1); 1854 if (length1 == 0) { 1855 /* 1856 * Empty input string, just stop now. 1857 */ 1858 1859 goto done; 1860 } 1861 end = ustring1 + length1; 1862 1863 strCmpFn = (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); 1864 1865 /* 1866 * Force result to be Unicode 1867 */ 1868 1869 resultPtr = Tcl_NewUnicodeObj(ustring1, 0); 1870 1871 if (mapElemc == 2) { 1872 /* 1873 * Special case for one map pair which avoids the extra for loop and 1874 * extra calls to get Unicode data. The algorithm is otherwise 1875 * identical to the multi-pair case. This will be >30% faster on 1876 * larger strings. 1877 */ 1878 1879 int mapLen; 1880 Tcl_UniChar *mapString, u2lc; 1881 1882 ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2); 1883 p = ustring1; 1884 if ((length2 > length1) || (length2 == 0)) { 1885 /* 1886 * Match string is either longer than input or empty. 1887 */ 1888 1889 ustring1 = end; 1890 } else { 1891 mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen); 1892 u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0); 1893 for (; ustring1 < end; ustring1++) { 1894 if (((*ustring1 == *ustring2) || 1895 (nocase&&Tcl_UniCharToLower(*ustring1)==u2lc)) && 1896 (length2==1 || strCmpFn(ustring1, ustring2, 1897 (unsigned long) length2) == 0)) { 1898 if (p != ustring1) { 1899 Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); 1900 p = ustring1 + length2; 1901 } else { 1902 p += length2; 1903 } 1904 ustring1 = p - 1; 1905 1906 Tcl_AppendUnicodeToObj(resultPtr, mapString, mapLen); 1907 } 1908 } 1909 } 1910 } else { 1911 Tcl_UniChar **mapStrings, *u2lc = NULL; 1912 int *mapLens; 1913 1914 /* 1915 * Precompute pointers to the unicode string and length. This saves us 1916 * repeated function calls later, significantly speeding up the 1917 * algorithm. We only need the lowercase first char in the nocase 1918 * case. 1919 */ 1920 1921 mapStrings = (Tcl_UniChar **) TclStackAlloc(interp, 1922 mapElemc * 2 * sizeof(Tcl_UniChar *)); 1923 mapLens = (int *) TclStackAlloc(interp, mapElemc * 2 * sizeof(int)); 1924 if (nocase) { 1925 u2lc = (Tcl_UniChar *) TclStackAlloc(interp, 1926 mapElemc * sizeof(Tcl_UniChar)); 1927 } 1928 for (index = 0; index < mapElemc; index++) { 1929 mapStrings[index] = Tcl_GetUnicodeFromObj(mapElemv[index], 1930 mapLens+index); 1931 if (nocase && ((index % 2) == 0)) { 1932 u2lc[index/2] = Tcl_UniCharToLower(*mapStrings[index]); 1933 } 1934 } 1935 for (p = ustring1; ustring1 < end; ustring1++) { 1936 for (index = 0; index < mapElemc; index += 2) { 1937 /* 1938 * Get the key string to match on. 1939 */ 1940 1941 ustring2 = mapStrings[index]; 1942 length2 = mapLens[index]; 1943 if ((length2 > 0) && ((*ustring1 == *ustring2) || (nocase && 1944 (Tcl_UniCharToLower(*ustring1) == u2lc[index/2]))) && 1945 /* Restrict max compare length. */ 1946 (end-ustring1 >= length2) && ((length2 == 1) || 1947 !strCmpFn(ustring2, ustring1, (unsigned) length2))) { 1948 if (p != ustring1) { 1949 /* 1950 * Put the skipped chars onto the result first. 1951 */ 1952 1953 Tcl_AppendUnicodeToObj(resultPtr, p, ustring1-p); 1954 p = ustring1 + length2; 1955 } else { 1956 p += length2; 1957 } 1958 1959 /* 1960 * Adjust len to be full length of matched string. 1961 */ 1962 1963 ustring1 = p - 1; 1964 1965 /* 1966 * Append the map value to the unicode string. 1967 */ 1968 1969 Tcl_AppendUnicodeToObj(resultPtr, 1970 mapStrings[index+1], mapLens[index+1]); 1971 break; 1972 } 1973 } 1974 } 1975 if (nocase) { 1976 TclStackFree(interp, u2lc); 1977 } 1978 TclStackFree(interp, mapLens); 1979 TclStackFree(interp, mapStrings); 1980 } 1981 if (p != ustring1) { 1982 /* 1983 * Put the rest of the unmapped chars onto result. 1984 */ 1985 1986 Tcl_AppendUnicodeToObj(resultPtr, p, ustring1 - p); 1987 } 1988 Tcl_SetObjResult(interp, resultPtr); 1989 done: 1990 if (mapWithDict) { 1991 TclStackFree(interp, mapElemv); 1992 } 1993 if (copySource) { 1994 Tcl_DecrRefCount(sourceObj); 1995 } 1996 return TCL_OK; 1997} 1998 1999/* 2000 *---------------------------------------------------------------------- 2001 * 2002 * StringMatchCmd -- 2003 * 2004 * This procedure is invoked to process the "string match" Tcl command. 2005 * See the user documentation for details on what it does. Note that this 2006 * command only functions correctly on properly formed Tcl UTF strings. 2007 * 2008 * Results: 2009 * A standard Tcl result. 2010 * 2011 * Side effects: 2012 * See the user documentation. 2013 * 2014 *---------------------------------------------------------------------- 2015 */ 2016 2017static int 2018StringMatchCmd( 2019 ClientData dummy, /* Not used. */ 2020 Tcl_Interp *interp, /* Current interpreter. */ 2021 int objc, /* Number of arguments. */ 2022 Tcl_Obj *const objv[]) /* Argument objects. */ 2023{ 2024 int nocase = 0; 2025 2026 if (objc < 3 || objc > 4) { 2027 Tcl_WrongNumArgs(interp, 1, objv, "?-nocase? pattern string"); 2028 return TCL_ERROR; 2029 } 2030 2031 if (objc == 4) { 2032 int length; 2033 const char *string = TclGetStringFromObj(objv[1], &length); 2034 2035 if ((length > 1) && 2036 strncmp(string, "-nocase", (size_t) length) == 0) { 2037 nocase = TCL_MATCH_NOCASE; 2038 } else { 2039 Tcl_AppendResult(interp, "bad option \"", string, 2040 "\": must be -nocase", NULL); 2041 return TCL_ERROR; 2042 } 2043 } 2044 Tcl_SetObjResult(interp, Tcl_NewBooleanObj( 2045 TclStringMatchObj(objv[objc-1], objv[objc-2], nocase))); 2046 return TCL_OK; 2047} 2048 2049/* 2050 *---------------------------------------------------------------------- 2051 * 2052 * StringRangeCmd -- 2053 * 2054 * This procedure is invoked to process the "string range" Tcl command. 2055 * See the user documentation for details on what it does. Note that this 2056 * command only functions correctly on properly formed Tcl UTF strings. 2057 * 2058 * Results: 2059 * A standard Tcl result. 2060 * 2061 * Side effects: 2062 * See the user documentation. 2063 * 2064 *---------------------------------------------------------------------- 2065 */ 2066 2067static int 2068StringRangeCmd( 2069 ClientData dummy, /* Not used. */ 2070 Tcl_Interp *interp, /* Current interpreter. */ 2071 int objc, /* Number of arguments. */ 2072 Tcl_Obj *const objv[]) /* Argument objects. */ 2073{ 2074 const unsigned char *string; 2075 int length, first, last; 2076 2077 if (objc != 4) { 2078 Tcl_WrongNumArgs(interp, 1, objv, "string first last"); 2079 return TCL_ERROR; 2080 } 2081 2082 /* 2083 * If we have a ByteArray object, avoid indexing in the Utf string since 2084 * the byte array contains one byte per character. Otherwise, use the 2085 * Unicode string rep to get the range. 2086 */ 2087 2088 if (objv[1]->typePtr == &tclByteArrayType) { 2089 string = Tcl_GetByteArrayFromObj(objv[1], &length); 2090 length--; 2091 } else { 2092 /* 2093 * Get the length in actual characters. 2094 */ 2095 2096 string = NULL; 2097 length = Tcl_GetCharLength(objv[1]) - 1; 2098 } 2099 2100 if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK || 2101 TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK) { 2102 return TCL_ERROR; 2103 } 2104 2105 if (first < 0) { 2106 first = 0; 2107 } 2108 if (last >= length) { 2109 last = length; 2110 } 2111 if (last >= first) { 2112 if (string != NULL) { 2113 /* 2114 * Reread the string to prevent shimmering nasties. 2115 */ 2116 2117 string = Tcl_GetByteArrayFromObj(objv[1], &length); 2118 Tcl_SetObjResult(interp, 2119 Tcl_NewByteArrayObj(string+first, last - first + 1)); 2120 } else { 2121 Tcl_SetObjResult(interp, Tcl_GetRange(objv[1], first, last)); 2122 } 2123 } 2124 return TCL_OK; 2125} 2126 2127/* 2128 *---------------------------------------------------------------------- 2129 * 2130 * StringReptCmd -- 2131 * 2132 * This procedure is invoked to process the "string repeat" Tcl command. 2133 * See the user documentation for details on what it does. Note that this 2134 * command only functions correctly on properly formed Tcl UTF strings. 2135 * 2136 * Results: 2137 * A standard Tcl result. 2138 * 2139 * Side effects: 2140 * See the user documentation. 2141 * 2142 *---------------------------------------------------------------------- 2143 */ 2144 2145static int 2146StringReptCmd( 2147 ClientData dummy, /* Not used. */ 2148 Tcl_Interp *interp, /* Current interpreter. */ 2149 int objc, /* Number of arguments. */ 2150 Tcl_Obj *const objv[]) /* Argument objects. */ 2151{ 2152 const char *string1; 2153 char *string2; 2154 int count, index, length1, length2; 2155 Tcl_Obj *resultPtr; 2156 2157 if (objc != 3) { 2158 Tcl_WrongNumArgs(interp, 1, objv, "string count"); 2159 return TCL_ERROR; 2160 } 2161 2162 if (TclGetIntFromObj(interp, objv[2], &count) != TCL_OK) { 2163 return TCL_ERROR; 2164 } 2165 2166 /* 2167 * Check for cases that allow us to skip copying stuff. 2168 */ 2169 2170 if (count == 1) { 2171 Tcl_SetObjResult(interp, objv[1]); 2172 goto done; 2173 } else if (count < 1) { 2174 goto done; 2175 } 2176 string1 = TclGetStringFromObj(objv[1], &length1); 2177 if (length1 <= 0) { 2178 goto done; 2179 } 2180 2181 /* 2182 * Only build up a string that has data. Instead of building it up with 2183 * repeated appends, we just allocate the necessary space once and copy 2184 * the string value in. 2185 * 2186 * We have to worry about overflow [Bugs 714106, 2561746]. 2187 * At this point we know 1 <= length1 <= INT_MAX and 2 <= count <= INT_MAX. 2188 * We need to keep 2 <= length2 <= INT_MAX. 2189 */ 2190 2191 if (count > (INT_MAX / length1)) { 2192 Tcl_SetObjResult(interp, Tcl_ObjPrintf( 2193 "result exceeds max size for a Tcl value (%d bytes)", INT_MAX)); 2194 return TCL_ERROR; 2195 } 2196 length2 = length1 * count; 2197 2198 /* 2199 * Include space for the NUL. 2200 */ 2201 2202 string2 = attemptckalloc((unsigned) length2 + 1); 2203 if (string2 == NULL) { 2204 /* 2205 * Alloc failed. Note that in this case we try to do an error message 2206 * since this is a case that's most likely when the alloc is large and 2207 * that's easy to do with this API. Note that if we fail allocating a 2208 * short string, this will likely keel over too (and fatally). 2209 */ 2210 2211 Tcl_SetObjResult(interp, Tcl_ObjPrintf( 2212 "string size overflow, out of memory allocating %u bytes", 2213 length2 + 1)); 2214 return TCL_ERROR; 2215 } 2216 for (index = 0; index < count; index++) { 2217 memcpy(string2 + (length1 * index), string1, (size_t) length1); 2218 } 2219 string2[length2] = '\0'; 2220 2221 /* 2222 * We have to directly assign this instead of using Tcl_SetStringObj (and 2223 * indirectly TclInitStringRep) because that makes another copy of the 2224 * data. 2225 */ 2226 2227 TclNewObj(resultPtr); 2228 resultPtr->bytes = string2; 2229 resultPtr->length = length2; 2230 Tcl_SetObjResult(interp, resultPtr); 2231 2232 done: 2233 return TCL_OK; 2234} 2235 2236/* 2237 *---------------------------------------------------------------------- 2238 * 2239 * StringRplcCmd -- 2240 * 2241 * This procedure is invoked to process the "string replace" Tcl command. 2242 * See the user documentation for details on what it does. Note that this 2243 * command only functions correctly on properly formed Tcl UTF strings. 2244 * 2245 * Results: 2246 * A standard Tcl result. 2247 * 2248 * Side effects: 2249 * See the user documentation. 2250 * 2251 *---------------------------------------------------------------------- 2252 */ 2253 2254static int 2255StringRplcCmd( 2256 ClientData dummy, /* Not used. */ 2257 Tcl_Interp *interp, /* Current interpreter. */ 2258 int objc, /* Number of arguments. */ 2259 Tcl_Obj *const objv[]) /* Argument objects. */ 2260{ 2261 Tcl_UniChar *ustring; 2262 int first, last, length; 2263 2264 if (objc < 4 || objc > 5) { 2265 Tcl_WrongNumArgs(interp, 1, objv, "string first last ?string?"); 2266 return TCL_ERROR; 2267 } 2268 2269 ustring = Tcl_GetUnicodeFromObj(objv[1], &length); 2270 length--; 2271 2272 if (TclGetIntForIndexM(interp, objv[2], length, &first) != TCL_OK || 2273 TclGetIntForIndexM(interp, objv[3], length, &last) != TCL_OK){ 2274 return TCL_ERROR; 2275 } 2276 2277 if ((last < first) || (last < 0) || (first > length)) { 2278 Tcl_SetObjResult(interp, objv[1]); 2279 } else { 2280 Tcl_Obj *resultPtr; 2281 2282 ustring = Tcl_GetUnicodeFromObj(objv[1], &length); 2283 length--; 2284 2285 if (first < 0) { 2286 first = 0; 2287 } 2288 2289 resultPtr = Tcl_NewUnicodeObj(ustring, first); 2290 if (objc == 5) { 2291 Tcl_AppendObjToObj(resultPtr, objv[4]); 2292 } 2293 if (last < length) { 2294 Tcl_AppendUnicodeToObj(resultPtr, ustring + last + 1, 2295 length - last); 2296 } 2297 Tcl_SetObjResult(interp, resultPtr); 2298 } 2299 return TCL_OK; 2300} 2301 2302/* 2303 *---------------------------------------------------------------------- 2304 * 2305 * StringRevCmd -- 2306 * 2307 * This procedure is invoked to process the "string reverse" Tcl command. 2308 * See the user documentation for details on what it does. Note that this 2309 * command only functions correctly on properly formed Tcl UTF strings. 2310 * 2311 * Results: 2312 * A standard Tcl result. 2313 * 2314 * Side effects: 2315 * See the user documentation. 2316 * 2317 *---------------------------------------------------------------------- 2318 */ 2319 2320static int 2321StringRevCmd( 2322 ClientData dummy, /* Not used. */ 2323 Tcl_Interp *interp, /* Current interpreter. */ 2324 int objc, /* Number of arguments. */ 2325 Tcl_Obj *const objv[]) /* Argument objects. */ 2326{ 2327 if (objc != 2) { 2328 Tcl_WrongNumArgs(interp, 1, objv, "string"); 2329 return TCL_ERROR; 2330 } 2331 2332 Tcl_SetObjResult(interp, TclStringObjReverse(objv[1])); 2333 return TCL_OK; 2334} 2335 2336/* 2337 *---------------------------------------------------------------------- 2338 * 2339 * StringStartCmd -- 2340 * 2341 * This procedure is invoked to process the "string wordstart" Tcl 2342 * command. See the user documentation for details on what it does. Note 2343 * that this command only functions correctly on properly formed Tcl UTF 2344 * strings. 2345 * 2346 * Results: 2347 * A standard Tcl result. 2348 * 2349 * Side effects: 2350 * See the user documentation. 2351 * 2352 *---------------------------------------------------------------------- 2353 */ 2354 2355static int 2356StringStartCmd( 2357 ClientData dummy, /* Not used. */ 2358 Tcl_Interp *interp, /* Current interpreter. */ 2359 int objc, /* Number of arguments. */ 2360 Tcl_Obj *const objv[]) /* Argument objects. */ 2361{ 2362 Tcl_UniChar ch; 2363 const char *p, *string; 2364 int cur, index, length, numChars; 2365 2366 if (objc != 3) { 2367 Tcl_WrongNumArgs(interp, 1, objv, "string index"); 2368 return TCL_ERROR; 2369 } 2370 2371 string = TclGetStringFromObj(objv[1], &length); 2372 numChars = Tcl_NumUtfChars(string, length); 2373 if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { 2374 return TCL_ERROR; 2375 } 2376 string = TclGetStringFromObj(objv[1], &length); 2377 if (index >= numChars) { 2378 index = numChars - 1; 2379 } 2380 cur = 0; 2381 if (index > 0) { 2382 p = Tcl_UtfAtIndex(string, index); 2383 for (cur = index; cur >= 0; cur--) { 2384 TclUtfToUniChar(p, &ch); 2385 if (!Tcl_UniCharIsWordChar(ch)) { 2386 break; 2387 } 2388 p = Tcl_UtfPrev(p, string); 2389 } 2390 if (cur != index) { 2391 cur += 1; 2392 } 2393 } 2394 Tcl_SetObjResult(interp, Tcl_NewIntObj(cur)); 2395 return TCL_OK; 2396} 2397 2398/* 2399 *---------------------------------------------------------------------- 2400 * 2401 * StringEndCmd -- 2402 * 2403 * This procedure is invoked to process the "string wordend" Tcl command. 2404 * See the user documentation for details on what it does. Note that this 2405 * command only functions correctly on properly formed Tcl UTF strings. 2406 * 2407 * Results: 2408 * A standard Tcl result. 2409 * 2410 * Side effects: 2411 * See the user documentation. 2412 * 2413 *---------------------------------------------------------------------- 2414 */ 2415 2416static int 2417StringEndCmd( 2418 ClientData dummy, /* Not used. */ 2419 Tcl_Interp *interp, /* Current interpreter. */ 2420 int objc, /* Number of arguments. */ 2421 Tcl_Obj *const objv[]) /* Argument objects. */ 2422{ 2423 Tcl_UniChar ch; 2424 const char *p, *end, *string; 2425 int cur, index, length, numChars; 2426 2427 if (objc != 3) { 2428 Tcl_WrongNumArgs(interp, 1, objv, "string index"); 2429 return TCL_ERROR; 2430 } 2431 2432 string = TclGetStringFromObj(objv[1], &length); 2433 numChars = Tcl_NumUtfChars(string, length); 2434 if (TclGetIntForIndexM(interp, objv[2], numChars-1, &index) != TCL_OK) { 2435 return TCL_ERROR; 2436 } 2437 string = TclGetStringFromObj(objv[1], &length); 2438 if (index < 0) { 2439 index = 0; 2440 } 2441 if (index < numChars) { 2442 p = Tcl_UtfAtIndex(string, index); 2443 end = string+length; 2444 for (cur = index; p < end; cur++) { 2445 p += TclUtfToUniChar(p, &ch); 2446 if (!Tcl_UniCharIsWordChar(ch)) { 2447 break; 2448 } 2449 } 2450 if (cur == index) { 2451 cur++; 2452 } 2453 } else { 2454 cur = numChars; 2455 } 2456 Tcl_SetObjResult(interp, Tcl_NewIntObj(cur)); 2457 return TCL_OK; 2458} 2459 2460/* 2461 *---------------------------------------------------------------------- 2462 * 2463 * StringEqualCmd -- 2464 * 2465 * This procedure is invoked to process the "string equal" Tcl command. 2466 * See the user documentation for details on what it does. Note that this 2467 * command only functions correctly on properly formed Tcl UTF strings. 2468 * 2469 * Results: 2470 * A standard Tcl result. 2471 * 2472 * Side effects: 2473 * See the user documentation. 2474 * 2475 *---------------------------------------------------------------------- 2476 */ 2477 2478static int 2479StringEqualCmd( 2480 ClientData dummy, /* Not used. */ 2481 Tcl_Interp *interp, /* Current interpreter. */ 2482 int objc, /* Number of arguments. */ 2483 Tcl_Obj *const objv[]) /* Argument objects. */ 2484{ 2485 /* 2486 * Remember to keep code here in some sync with the byte-compiled versions 2487 * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as 2488 * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...). 2489 */ 2490 2491 char *string1, *string2; 2492 int length1, length2, i, match, length, nocase = 0, reqlength = -1; 2493 typedef int (*strCmpFn_t)(const char *, const char *, unsigned int); 2494 strCmpFn_t strCmpFn; 2495 2496 if (objc < 3 || objc > 6) { 2497 str_cmp_args: 2498 Tcl_WrongNumArgs(interp, 1, objv, 2499 "?-nocase? ?-length int? string1 string2"); 2500 return TCL_ERROR; 2501 } 2502 2503 for (i = 1; i < objc-2; i++) { 2504 string2 = TclGetStringFromObj(objv[i], &length2); 2505 if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) { 2506 nocase = 1; 2507 } else if ((length2 > 1) 2508 && !strncmp(string2, "-length", (size_t)length2)) { 2509 if (i+1 >= objc-2) { 2510 goto str_cmp_args; 2511 } 2512 ++i; 2513 if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) { 2514 return TCL_ERROR; 2515 } 2516 } else { 2517 Tcl_AppendResult(interp, "bad option \"", string2, 2518 "\": must be -nocase or -length", NULL); 2519 return TCL_ERROR; 2520 } 2521 } 2522 2523 /* 2524 * From now on, we only access the two objects at the end of the argument 2525 * array. 2526 */ 2527 2528 objv += objc-2; 2529 2530 if ((reqlength == 0) || (objv[0] == objv[1])) { 2531 /* 2532 * Always match at 0 chars of if it is the same obj. 2533 */ 2534 2535 Tcl_SetObjResult(interp, Tcl_NewBooleanObj(1)); 2536 return TCL_OK; 2537 } 2538 2539 if (!nocase && objv[0]->typePtr == &tclByteArrayType && 2540 objv[1]->typePtr == &tclByteArrayType) { 2541 /* 2542 * Use binary versions of comparisons since that won't cause undue 2543 * type conversions and it is much faster. Only do this if we're 2544 * case-sensitive (which is all that really makes sense with byte 2545 * arrays anyway, and we have no memcasecmp() for some reason... :^) 2546 */ 2547 2548 string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1); 2549 string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2); 2550 strCmpFn = (strCmpFn_t) memcmp; 2551 } else if ((objv[0]->typePtr == &tclStringType) 2552 && (objv[1]->typePtr == &tclStringType)) { 2553 /* 2554 * Do a unicode-specific comparison if both of the args are of String 2555 * type. In benchmark testing this proved the most efficient check 2556 * between the unicode and string comparison operations. 2557 */ 2558 2559 string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1); 2560 string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2); 2561 strCmpFn = (strCmpFn_t) 2562 (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); 2563 } else { 2564 /* 2565 * As a catch-all we will work with UTF-8. We cannot use memcmp() as 2566 * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's 2567 * utf rep). We can use the more efficient TclpUtfNcmp2 if we are 2568 * case-sensitive and no specific length was requested. 2569 */ 2570 2571 string1 = (char *) TclGetStringFromObj(objv[0], &length1); 2572 string2 = (char *) TclGetStringFromObj(objv[1], &length2); 2573 if ((reqlength < 0) && !nocase) { 2574 strCmpFn = (strCmpFn_t) TclpUtfNcmp2; 2575 } else { 2576 length1 = Tcl_NumUtfChars(string1, length1); 2577 length2 = Tcl_NumUtfChars(string2, length2); 2578 strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); 2579 } 2580 } 2581 2582 if ((reqlength < 0) && (length1 != length2)) { 2583 match = 1; /* This will be reversed below. */ 2584 } else { 2585 length = (length1 < length2) ? length1 : length2; 2586 if (reqlength > 0 && reqlength < length) { 2587 length = reqlength; 2588 } else if (reqlength < 0) { 2589 /* 2590 * The requested length is negative, so we ignore it by setting it 2591 * to length + 1 so we correct the match var. 2592 */ 2593 2594 reqlength = length + 1; 2595 } 2596 2597 match = strCmpFn(string1, string2, (unsigned) length); 2598 if ((match == 0) && (reqlength > length)) { 2599 match = length1 - length2; 2600 } 2601 } 2602 2603 Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1)); 2604 return TCL_OK; 2605} 2606 2607/* 2608 *---------------------------------------------------------------------- 2609 * 2610 * StringCmpCmd -- 2611 * 2612 * This procedure is invoked to process the "string compare" Tcl command. 2613 * See the user documentation for details on what it does. Note that this 2614 * command only functions correctly on properly formed Tcl UTF strings. 2615 * 2616 * Results: 2617 * A standard Tcl result. 2618 * 2619 * Side effects: 2620 * See the user documentation. 2621 * 2622 *---------------------------------------------------------------------- 2623 */ 2624 2625static int 2626StringCmpCmd( 2627 ClientData dummy, /* Not used. */ 2628 Tcl_Interp *interp, /* Current interpreter. */ 2629 int objc, /* Number of arguments. */ 2630 Tcl_Obj *const objv[]) /* Argument objects. */ 2631{ 2632 /* 2633 * Remember to keep code here in some sync with the byte-compiled versions 2634 * in tclExecute.c (INST_STR_EQ, INST_STR_NEQ and INST_STR_CMP as well as 2635 * the expr string comparison in INST_EQ/INST_NEQ/INST_LT/...). 2636 */ 2637 2638 char *string1, *string2; 2639 int length1, length2, i, match, length, nocase = 0, reqlength = -1; 2640 typedef int (*strCmpFn_t)(const char *, const char *, unsigned int); 2641 strCmpFn_t strCmpFn; 2642 2643 if (objc < 3 || objc > 6) { 2644 str_cmp_args: 2645 Tcl_WrongNumArgs(interp, 1, objv, 2646 "?-nocase? ?-length int? string1 string2"); 2647 return TCL_ERROR; 2648 } 2649 2650 for (i = 1; i < objc-2; i++) { 2651 string2 = TclGetStringFromObj(objv[i], &length2); 2652 if ((length2 > 1) && !strncmp(string2, "-nocase", (size_t)length2)) { 2653 nocase = 1; 2654 } else if ((length2 > 1) 2655 && !strncmp(string2, "-length", (size_t)length2)) { 2656 if (i+1 >= objc-2) { 2657 goto str_cmp_args; 2658 } 2659 ++i; 2660 if (TclGetIntFromObj(interp, objv[i], &reqlength) != TCL_OK) { 2661 return TCL_ERROR; 2662 } 2663 } else { 2664 Tcl_AppendResult(interp, "bad option \"", string2, 2665 "\": must be -nocase or -length", NULL); 2666 return TCL_ERROR; 2667 } 2668 } 2669 2670 /* 2671 * From now on, we only access the two objects at the end of the argument 2672 * array. 2673 */ 2674 2675 objv += objc-2; 2676 2677 if ((reqlength == 0) || (objv[0] == objv[1])) { 2678 /* 2679 * Always match at 0 chars of if it is the same obj. 2680 */ 2681 2682 Tcl_SetObjResult(interp, Tcl_NewBooleanObj(0)); 2683 return TCL_OK; 2684 } 2685 2686 if (!nocase && objv[0]->typePtr == &tclByteArrayType && 2687 objv[1]->typePtr == &tclByteArrayType) { 2688 /* 2689 * Use binary versions of comparisons since that won't cause undue 2690 * type conversions and it is much faster. Only do this if we're 2691 * case-sensitive (which is all that really makes sense with byte 2692 * arrays anyway, and we have no memcasecmp() for some reason... :^) 2693 */ 2694 2695 string1 = (char *) Tcl_GetByteArrayFromObj(objv[0], &length1); 2696 string2 = (char *) Tcl_GetByteArrayFromObj(objv[1], &length2); 2697 strCmpFn = (strCmpFn_t) memcmp; 2698 } else if ((objv[0]->typePtr == &tclStringType) 2699 && (objv[1]->typePtr == &tclStringType)) { 2700 /* 2701 * Do a unicode-specific comparison if both of the args are of String 2702 * type. In benchmark testing this proved the most efficient check 2703 * between the unicode and string comparison operations. 2704 */ 2705 2706 string1 = (char *) Tcl_GetUnicodeFromObj(objv[0], &length1); 2707 string2 = (char *) Tcl_GetUnicodeFromObj(objv[1], &length2); 2708 strCmpFn = (strCmpFn_t) 2709 (nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp); 2710 } else { 2711 /* 2712 * As a catch-all we will work with UTF-8. We cannot use memcmp() as 2713 * that is unsafe with any string containing NUL (\xC0\x80 in Tcl's 2714 * utf rep). We can use the more efficient TclpUtfNcmp2 if we are 2715 * case-sensitive and no specific length was requested. 2716 */ 2717 2718 string1 = (char *) TclGetStringFromObj(objv[0], &length1); 2719 string2 = (char *) TclGetStringFromObj(objv[1], &length2); 2720 if ((reqlength < 0) && !nocase) { 2721 strCmpFn = (strCmpFn_t) TclpUtfNcmp2; 2722 } else { 2723 length1 = Tcl_NumUtfChars(string1, length1); 2724 length2 = Tcl_NumUtfChars(string2, length2); 2725 strCmpFn = (strCmpFn_t) (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); 2726 } 2727 } 2728 2729 length = (length1 < length2) ? length1 : length2; 2730 if (reqlength > 0 && reqlength < length) { 2731 length = reqlength; 2732 } else if (reqlength < 0) { 2733 /* 2734 * The requested length is negative, so we ignore it by setting it to 2735 * length + 1 so we correct the match var. 2736 */ 2737 2738 reqlength = length + 1; 2739 } 2740 2741 match = strCmpFn(string1, string2, (unsigned) length); 2742 if ((match == 0) && (reqlength > length)) { 2743 match = length1 - length2; 2744 } 2745 2746 Tcl_SetObjResult(interp, 2747 Tcl_NewIntObj((match > 0) ? 1 : (match < 0) ? -1 : 0)); 2748 return TCL_OK; 2749} 2750 2751/* 2752 *---------------------------------------------------------------------- 2753 * 2754 * StringBytesCmd -- 2755 * 2756 * This procedure is invoked to process the "string bytelength" Tcl 2757 * command. See the user documentation for details on what it does. Note 2758 * that this command only functions correctly on properly formed Tcl UTF 2759 * strings. 2760 * 2761 * Results: 2762 * A standard Tcl result. 2763 * 2764 * Side effects: 2765 * See the user documentation. 2766 * 2767 *---------------------------------------------------------------------- 2768 */ 2769 2770static int 2771StringBytesCmd( 2772 ClientData dummy, /* Not used. */ 2773 Tcl_Interp *interp, /* Current interpreter. */ 2774 int objc, /* Number of arguments. */ 2775 Tcl_Obj *const objv[]) /* Argument objects. */ 2776{ 2777 int length; 2778 2779 if (objc != 2) { 2780 Tcl_WrongNumArgs(interp, 1, objv, "string"); 2781 return TCL_ERROR; 2782 } 2783 2784 (void) TclGetStringFromObj(objv[1], &length); 2785 Tcl_SetObjResult(interp, Tcl_NewIntObj(length)); 2786 return TCL_OK; 2787} 2788 2789/* 2790 *---------------------------------------------------------------------- 2791 * 2792 * StringLenCmd -- 2793 * 2794 * This procedure is invoked to process the "string length" Tcl command. 2795 * See the user documentation for details on what it does. Note that this 2796 * command only functions correctly on properly formed Tcl UTF strings. 2797 * 2798 * Results: 2799 * A standard Tcl result. 2800 * 2801 * Side effects: 2802 * See the user documentation. 2803 * 2804 *---------------------------------------------------------------------- 2805 */ 2806 2807static int 2808StringLenCmd( 2809 ClientData dummy, /* Not used. */ 2810 Tcl_Interp *interp, /* Current interpreter. */ 2811 int objc, /* Number of arguments. */ 2812 Tcl_Obj *const objv[]) /* Argument objects. */ 2813{ 2814 int length; 2815 2816 if (objc != 2) { 2817 Tcl_WrongNumArgs(interp, 1, objv, "string"); 2818 return TCL_ERROR; 2819 } 2820 2821 /* 2822 * If we have a ByteArray object, avoid recomputing the string since the 2823 * byte array contains one byte per character. Otherwise, use the Unicode 2824 * string rep to calculate the length. 2825 */ 2826 2827 if (objv[1]->typePtr == &tclByteArrayType) { 2828 (void) Tcl_GetByteArrayFromObj(objv[1], &length); 2829 } else { 2830 length = Tcl_GetCharLength(objv[1]); 2831 } 2832 Tcl_SetObjResult(interp, Tcl_NewIntObj(length)); 2833 return TCL_OK; 2834} 2835 2836/* 2837 *---------------------------------------------------------------------- 2838 * 2839 * StringLowerCmd -- 2840 * 2841 * This procedure is invoked to process the "string tolower" Tcl command. 2842 * See the user documentation for details on what it does. Note that this 2843 * command only functions correctly on properly formed Tcl UTF strings. 2844 * 2845 * Results: 2846 * A standard Tcl result. 2847 * 2848 * Side effects: 2849 * See the user documentation. 2850 * 2851 *---------------------------------------------------------------------- 2852 */ 2853 2854static int 2855StringLowerCmd( 2856 ClientData dummy, /* Not used. */ 2857 Tcl_Interp *interp, /* Current interpreter. */ 2858 int objc, /* Number of arguments. */ 2859 Tcl_Obj *const objv[]) /* Argument objects. */ 2860{ 2861 int length1, length2; 2862 char *string1, *string2; 2863 2864 if (objc < 2 || objc > 4) { 2865 Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?"); 2866 return TCL_ERROR; 2867 } 2868 2869 string1 = TclGetStringFromObj(objv[1], &length1); 2870 2871 if (objc == 2) { 2872 Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); 2873 2874 length1 = Tcl_UtfToLower(TclGetString(resultPtr)); 2875 Tcl_SetObjLength(resultPtr, length1); 2876 Tcl_SetObjResult(interp, resultPtr); 2877 } else { 2878 int first, last; 2879 const char *start, *end; 2880 Tcl_Obj *resultPtr; 2881 2882 length1 = Tcl_NumUtfChars(string1, length1) - 1; 2883 if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { 2884 return TCL_ERROR; 2885 } 2886 if (first < 0) { 2887 first = 0; 2888 } 2889 last = first; 2890 2891 if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1, 2892 &last) != TCL_OK)) { 2893 return TCL_ERROR; 2894 } 2895 2896 if (last >= length1) { 2897 last = length1; 2898 } 2899 if (last < first) { 2900 Tcl_SetObjResult(interp, objv[1]); 2901 return TCL_OK; 2902 } 2903 2904 string1 = TclGetStringFromObj(objv[1], &length1); 2905 start = Tcl_UtfAtIndex(string1, first); 2906 end = Tcl_UtfAtIndex(start, last - first + 1); 2907 resultPtr = Tcl_NewStringObj(string1, end - string1); 2908 string2 = TclGetString(resultPtr) + (start - string1); 2909 2910 length2 = Tcl_UtfToLower(string2); 2911 Tcl_SetObjLength(resultPtr, length2 + (start - string1)); 2912 2913 Tcl_AppendToObj(resultPtr, end, -1); 2914 Tcl_SetObjResult(interp, resultPtr); 2915 } 2916 2917 return TCL_OK; 2918} 2919 2920/* 2921 *---------------------------------------------------------------------- 2922 * 2923 * StringUpperCmd -- 2924 * 2925 * This procedure is invoked to process the "string toupper" Tcl command. 2926 * See the user documentation for details on what it does. Note that this 2927 * command only functions correctly on properly formed Tcl UTF strings. 2928 * 2929 * Results: 2930 * A standard Tcl result. 2931 * 2932 * Side effects: 2933 * See the user documentation. 2934 * 2935 *---------------------------------------------------------------------- 2936 */ 2937 2938static int 2939StringUpperCmd( 2940 ClientData dummy, /* Not used. */ 2941 Tcl_Interp *interp, /* Current interpreter. */ 2942 int objc, /* Number of arguments. */ 2943 Tcl_Obj *const objv[]) /* Argument objects. */ 2944{ 2945 int length1, length2; 2946 char *string1, *string2; 2947 2948 if (objc < 2 || objc > 4) { 2949 Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?"); 2950 return TCL_ERROR; 2951 } 2952 2953 string1 = TclGetStringFromObj(objv[1], &length1); 2954 2955 if (objc == 2) { 2956 Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); 2957 2958 length1 = Tcl_UtfToUpper(TclGetString(resultPtr)); 2959 Tcl_SetObjLength(resultPtr, length1); 2960 Tcl_SetObjResult(interp, resultPtr); 2961 } else { 2962 int first, last; 2963 const char *start, *end; 2964 Tcl_Obj *resultPtr; 2965 2966 length1 = Tcl_NumUtfChars(string1, length1) - 1; 2967 if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { 2968 return TCL_ERROR; 2969 } 2970 if (first < 0) { 2971 first = 0; 2972 } 2973 last = first; 2974 2975 if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1, 2976 &last) != TCL_OK)) { 2977 return TCL_ERROR; 2978 } 2979 2980 if (last >= length1) { 2981 last = length1; 2982 } 2983 if (last < first) { 2984 Tcl_SetObjResult(interp, objv[1]); 2985 return TCL_OK; 2986 } 2987 2988 string1 = TclGetStringFromObj(objv[1], &length1); 2989 start = Tcl_UtfAtIndex(string1, first); 2990 end = Tcl_UtfAtIndex(start, last - first + 1); 2991 resultPtr = Tcl_NewStringObj(string1, end - string1); 2992 string2 = TclGetString(resultPtr) + (start - string1); 2993 2994 length2 = Tcl_UtfToUpper(string2); 2995 Tcl_SetObjLength(resultPtr, length2 + (start - string1)); 2996 2997 Tcl_AppendToObj(resultPtr, end, -1); 2998 Tcl_SetObjResult(interp, resultPtr); 2999 } 3000 3001 return TCL_OK; 3002} 3003 3004/* 3005 *---------------------------------------------------------------------- 3006 * 3007 * StringTitleCmd -- 3008 * 3009 * This procedure is invoked to process the "string totitle" Tcl command. 3010 * See the user documentation for details on what it does. Note that this 3011 * command only functions correctly on properly formed Tcl UTF strings. 3012 * 3013 * Results: 3014 * A standard Tcl result. 3015 * 3016 * Side effects: 3017 * See the user documentation. 3018 * 3019 *---------------------------------------------------------------------- 3020 */ 3021 3022static int 3023StringTitleCmd( 3024 ClientData dummy, /* Not used. */ 3025 Tcl_Interp *interp, /* Current interpreter. */ 3026 int objc, /* Number of arguments. */ 3027 Tcl_Obj *const objv[]) /* Argument objects. */ 3028{ 3029 int length1, length2; 3030 char *string1, *string2; 3031 3032 if (objc < 2 || objc > 4) { 3033 Tcl_WrongNumArgs(interp, 1, objv, "string ?first? ?last?"); 3034 return TCL_ERROR; 3035 } 3036 3037 string1 = TclGetStringFromObj(objv[1], &length1); 3038 3039 if (objc == 2) { 3040 Tcl_Obj *resultPtr = Tcl_NewStringObj(string1, length1); 3041 3042 length1 = Tcl_UtfToTitle(TclGetString(resultPtr)); 3043 Tcl_SetObjLength(resultPtr, length1); 3044 Tcl_SetObjResult(interp, resultPtr); 3045 } else { 3046 int first, last; 3047 const char *start, *end; 3048 Tcl_Obj *resultPtr; 3049 3050 length1 = Tcl_NumUtfChars(string1, length1) - 1; 3051 if (TclGetIntForIndexM(interp,objv[2],length1, &first) != TCL_OK) { 3052 return TCL_ERROR; 3053 } 3054 if (first < 0) { 3055 first = 0; 3056 } 3057 last = first; 3058 3059 if ((objc == 4) && (TclGetIntForIndexM(interp, objv[3], length1, 3060 &last) != TCL_OK)) { 3061 return TCL_ERROR; 3062 } 3063 3064 if (last >= length1) { 3065 last = length1; 3066 } 3067 if (last < first) { 3068 Tcl_SetObjResult(interp, objv[1]); 3069 return TCL_OK; 3070 } 3071 3072 string1 = TclGetStringFromObj(objv[1], &length1); 3073 start = Tcl_UtfAtIndex(string1, first); 3074 end = Tcl_UtfAtIndex(start, last - first + 1); 3075 resultPtr = Tcl_NewStringObj(string1, end - string1); 3076 string2 = TclGetString(resultPtr) + (start - string1); 3077 3078 length2 = Tcl_UtfToTitle(string2); 3079 Tcl_SetObjLength(resultPtr, length2 + (start - string1)); 3080 3081 Tcl_AppendToObj(resultPtr, end, -1); 3082 Tcl_SetObjResult(interp, resultPtr); 3083 } 3084 3085 return TCL_OK; 3086} 3087 3088/* 3089 *---------------------------------------------------------------------- 3090 * 3091 * StringTrimCmd -- 3092 * 3093 * This procedure is invoked to process the "string trim" Tcl command. 3094 * See the user documentation for details on what it does. Note that this 3095 * command only functions correctly on properly formed Tcl UTF strings. 3096 * 3097 * Results: 3098 * A standard Tcl result. 3099 * 3100 * Side effects: 3101 * See the user documentation. 3102 * 3103 *---------------------------------------------------------------------- 3104 */ 3105 3106static int 3107StringTrimCmd( 3108 ClientData dummy, /* Not used. */ 3109 Tcl_Interp *interp, /* Current interpreter. */ 3110 int objc, /* Number of arguments. */ 3111 Tcl_Obj *const objv[]) /* Argument objects. */ 3112{ 3113 Tcl_UniChar ch, trim; 3114 register const char *p, *end; 3115 const char *check, *checkEnd, *string1, *string2; 3116 int offset, length1, length2; 3117 3118 if (objc == 3) { 3119 string2 = TclGetStringFromObj(objv[2], &length2); 3120 } else if (objc == 2) { 3121 string2 = " \t\n\r"; 3122 length2 = strlen(string2); 3123 } else { 3124 Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?"); 3125 return TCL_ERROR; 3126 } 3127 string1 = TclGetStringFromObj(objv[1], &length1); 3128 checkEnd = string2 + length2; 3129 3130 /* 3131 * The outer loop iterates over the string. The inner loop iterates over 3132 * the trim characters. The loops terminate as soon as a non-trim 3133 * character is discovered and string1 is left pointing at the first 3134 * non-trim character. 3135 */ 3136 3137 end = string1 + length1; 3138 for (p = string1; p < end; p += offset) { 3139 offset = TclUtfToUniChar(p, &ch); 3140 3141 for (check = string2; ; ) { 3142 if (check >= checkEnd) { 3143 p = end; 3144 break; 3145 } 3146 check += TclUtfToUniChar(check, &trim); 3147 if (ch == trim) { 3148 length1 -= offset; 3149 string1 += offset; 3150 break; 3151 } 3152 } 3153 } 3154 3155 /* 3156 * The outer loop iterates over the string. The inner loop iterates over 3157 * the trim characters. The loops terminate as soon as a non-trim 3158 * character is discovered and length1 marks the last non-trim character. 3159 */ 3160 3161 end = string1; 3162 for (p = string1 + length1; p > end; ) { 3163 p = Tcl_UtfPrev(p, string1); 3164 offset = TclUtfToUniChar(p, &ch); 3165 check = string2; 3166 while (1) { 3167 if (check >= checkEnd) { 3168 p = end; 3169 break; 3170 } 3171 check += TclUtfToUniChar(check, &trim); 3172 if (ch == trim) { 3173 length1 -= offset; 3174 break; 3175 } 3176 } 3177 } 3178 3179 Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); 3180 return TCL_OK; 3181} 3182 3183/* 3184 *---------------------------------------------------------------------- 3185 * 3186 * StringTrimLCmd -- 3187 * 3188 * This procedure is invoked to process the "string trimleft" Tcl 3189 * command. See the user documentation for details on what it does. Note 3190 * that this command only functions correctly on properly formed Tcl UTF 3191 * strings. 3192 * 3193 * Results: 3194 * A standard Tcl result. 3195 * 3196 * Side effects: 3197 * See the user documentation. 3198 * 3199 *---------------------------------------------------------------------- 3200 */ 3201 3202static int 3203StringTrimLCmd( 3204 ClientData dummy, /* Not used. */ 3205 Tcl_Interp *interp, /* Current interpreter. */ 3206 int objc, /* Number of arguments. */ 3207 Tcl_Obj *const objv[]) /* Argument objects. */ 3208{ 3209 Tcl_UniChar ch, trim; 3210 register const char *p, *end; 3211 const char *check, *checkEnd, *string1, *string2; 3212 int offset, length1, length2; 3213 3214 if (objc == 3) { 3215 string2 = TclGetStringFromObj(objv[2], &length2); 3216 } else if (objc == 2) { 3217 string2 = " \t\n\r"; 3218 length2 = strlen(string2); 3219 } else { 3220 Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?"); 3221 return TCL_ERROR; 3222 } 3223 string1 = TclGetStringFromObj(objv[1], &length1); 3224 checkEnd = string2 + length2; 3225 3226 /* 3227 * The outer loop iterates over the string. The inner loop iterates over 3228 * the trim characters. The loops terminate as soon as a non-trim 3229 * character is discovered and string1 is left pointing at the first 3230 * non-trim character. 3231 */ 3232 3233 end = string1 + length1; 3234 for (p = string1; p < end; p += offset) { 3235 offset = TclUtfToUniChar(p, &ch); 3236 3237 for (check = string2; ; ) { 3238 if (check >= checkEnd) { 3239 p = end; 3240 break; 3241 } 3242 check += TclUtfToUniChar(check, &trim); 3243 if (ch == trim) { 3244 length1 -= offset; 3245 string1 += offset; 3246 break; 3247 } 3248 } 3249 } 3250 3251 Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); 3252 return TCL_OK; 3253} 3254 3255/* 3256 *---------------------------------------------------------------------- 3257 * 3258 * StringTrimRCmd -- 3259 * 3260 * This procedure is invoked to process the "string trimright" Tcl 3261 * command. See the user documentation for details on what it does. Note 3262 * that this command only functions correctly on properly formed Tcl UTF 3263 * strings. 3264 * 3265 * Results: 3266 * A standard Tcl result. 3267 * 3268 * Side effects: 3269 * See the user documentation. 3270 * 3271 *---------------------------------------------------------------------- 3272 */ 3273 3274static int 3275StringTrimRCmd( 3276 ClientData dummy, /* Not used. */ 3277 Tcl_Interp *interp, /* Current interpreter. */ 3278 int objc, /* Number of arguments. */ 3279 Tcl_Obj *const objv[]) /* Argument objects. */ 3280{ 3281 Tcl_UniChar ch, trim; 3282 register const char *p, *end; 3283 const char *check, *checkEnd, *string1, *string2; 3284 int offset, length1, length2; 3285 3286 if (objc == 3) { 3287 string2 = TclGetStringFromObj(objv[2], &length2); 3288 } else if (objc == 2) { 3289 string2 = " \t\n\r"; 3290 length2 = strlen(string2); 3291 } else { 3292 Tcl_WrongNumArgs(interp, 1, objv, "string ?chars?"); 3293 return TCL_ERROR; 3294 } 3295 string1 = TclGetStringFromObj(objv[1], &length1); 3296 checkEnd = string2 + length2; 3297 3298 /* 3299 * The outer loop iterates over the string. The inner loop iterates over 3300 * the trim characters. The loops terminate as soon as a non-trim 3301 * character is discovered and length1 marks the last non-trim character. 3302 */ 3303 3304 end = string1; 3305 for (p = string1 + length1; p > end; ) { 3306 p = Tcl_UtfPrev(p, string1); 3307 offset = TclUtfToUniChar(p, &ch); 3308 check = string2; 3309 while (1) { 3310 if (check >= checkEnd) { 3311 p = end; 3312 break; 3313 } 3314 check += TclUtfToUniChar(check, &trim); 3315 if (ch == trim) { 3316 length1 -= offset; 3317 break; 3318 } 3319 } 3320 } 3321 3322 Tcl_SetObjResult(interp, Tcl_NewStringObj(string1, length1)); 3323 return TCL_OK; 3324} 3325 3326/* 3327 *---------------------------------------------------------------------- 3328 * 3329 * TclInitStringCmd -- 3330 * 3331 * This procedure creates the "string" Tcl command. See the user 3332 * documentation for details on what it does. Note that this command only 3333 * functions correctly on properly formed Tcl UTF strings. 3334 * 3335 * Also note that the primary methods here (equal, compare, match, ...) 3336 * have bytecode equivalents. You will find the code for those in 3337 * tclExecute.c. The code here will only be used in the non-bc case (like 3338 * in an 'eval'). 3339 * 3340 * Results: 3341 * A standard Tcl result. 3342 * 3343 * Side effects: 3344 * See the user documentation. 3345 * 3346 *---------------------------------------------------------------------- 3347 */ 3348 3349Tcl_Command 3350TclInitStringCmd( 3351 Tcl_Interp *interp) /* Current interpreter. */ 3352{ 3353 static const EnsembleImplMap stringImplMap[] = { 3354 {"bytelength", StringBytesCmd, NULL}, 3355 {"compare", StringCmpCmd, TclCompileStringCmpCmd}, 3356 {"equal", StringEqualCmd, TclCompileStringEqualCmd}, 3357 {"first", StringFirstCmd, NULL}, 3358 {"index", StringIndexCmd, TclCompileStringIndexCmd}, 3359 {"is", StringIsCmd, NULL}, 3360 {"last", StringLastCmd, NULL}, 3361 {"length", StringLenCmd, TclCompileStringLenCmd}, 3362 {"map", StringMapCmd, NULL}, 3363 {"match", StringMatchCmd, TclCompileStringMatchCmd}, 3364 {"range", StringRangeCmd, NULL}, 3365 {"repeat", StringReptCmd, NULL}, 3366 {"replace", StringRplcCmd, NULL}, 3367 {"reverse", StringRevCmd, NULL}, 3368 {"tolower", StringLowerCmd, NULL}, 3369 {"toupper", StringUpperCmd, NULL}, 3370 {"totitle", StringTitleCmd, NULL}, 3371 {"trim", StringTrimCmd, NULL}, 3372 {"trimleft", StringTrimLCmd, NULL}, 3373 {"trimright", StringTrimRCmd, NULL}, 3374 {"wordend", StringEndCmd, NULL}, 3375 {"wordstart", StringStartCmd, NULL}, 3376 {NULL} 3377 }; 3378 3379 return TclMakeEnsemble(interp, "string", stringImplMap); 3380} 3381 3382/* 3383 *---------------------------------------------------------------------- 3384 * 3385 * Tcl_SubstObjCmd -- 3386 * 3387 * This procedure is invoked to process the "subst" Tcl command. See the 3388 * user documentation for details on what it does. This command relies on 3389 * Tcl_SubstObj() for its implementation. 3390 * 3391 * Results: 3392 * A standard Tcl result. 3393 * 3394 * Side effects: 3395 * See the user documentation. 3396 * 3397 *---------------------------------------------------------------------- 3398 */ 3399 3400int 3401Tcl_SubstObjCmd( 3402 ClientData dummy, /* Not used. */ 3403 Tcl_Interp *interp, /* Current interpreter. */ 3404 int objc, /* Number of arguments. */ 3405 Tcl_Obj *CONST objv[]) /* Argument objects. */ 3406{ 3407 static CONST char *substOptions[] = { 3408 "-nobackslashes", "-nocommands", "-novariables", NULL 3409 }; 3410 enum substOptions { 3411 SUBST_NOBACKSLASHES, SUBST_NOCOMMANDS, SUBST_NOVARS 3412 }; 3413 Tcl_Obj *resultPtr; 3414 int flags, i; 3415 3416 /* 3417 * Parse command-line options. 3418 */ 3419 3420 flags = TCL_SUBST_ALL; 3421 for (i = 1; i < (objc-1); i++) { 3422 int optionIndex; 3423 3424 if (Tcl_GetIndexFromObj(interp, objv[i], substOptions, "switch", 0, 3425 &optionIndex) != TCL_OK) { 3426 return TCL_ERROR; 3427 } 3428 switch (optionIndex) { 3429 case SUBST_NOBACKSLASHES: 3430 flags &= ~TCL_SUBST_BACKSLASHES; 3431 break; 3432 case SUBST_NOCOMMANDS: 3433 flags &= ~TCL_SUBST_COMMANDS; 3434 break; 3435 case SUBST_NOVARS: 3436 flags &= ~TCL_SUBST_VARIABLES; 3437 break; 3438 default: 3439 Tcl_Panic("Tcl_SubstObjCmd: bad option index to SubstOptions"); 3440 } 3441 } 3442 if (i != objc-1) { 3443 Tcl_WrongNumArgs(interp, 1, objv, 3444 "?-nobackslashes? ?-nocommands? ?-novariables? string"); 3445 return TCL_ERROR; 3446 } 3447 3448 /* 3449 * Perform the substitution. 3450 */ 3451 3452 resultPtr = Tcl_SubstObj(interp, objv[i], flags); 3453 3454 if (resultPtr == NULL) { 3455 return TCL_ERROR; 3456 } 3457 Tcl_SetObjResult(interp, resultPtr); 3458 return TCL_OK; 3459} 3460 3461/* 3462 *---------------------------------------------------------------------- 3463 * 3464 * Tcl_SwitchObjCmd -- 3465 * 3466 * This object-based procedure is invoked to process the "switch" Tcl 3467 * command. See the user documentation for details on what it does. 3468 * 3469 * Results: 3470 * A standard Tcl object result. 3471 * 3472 * Side effects: 3473 * See the user documentation. 3474 * 3475 *---------------------------------------------------------------------- 3476 */ 3477 3478int 3479Tcl_SwitchObjCmd( 3480 ClientData dummy, /* Not used. */ 3481 Tcl_Interp *interp, /* Current interpreter. */ 3482 int objc, /* Number of arguments. */ 3483 Tcl_Obj *CONST objv[]) /* Argument objects. */ 3484{ 3485 int i,j, index, mode, foundmode, result, splitObjs, numMatchesSaved; 3486 int noCase, patternLength; 3487 char *pattern; 3488 Tcl_Obj *stringObj, *indexVarObj, *matchVarObj; 3489 Tcl_Obj *CONST *savedObjv = objv; 3490 Tcl_RegExp regExpr = NULL; 3491 Interp *iPtr = (Interp *) interp; 3492 int pc = 0; 3493 int bidx = 0; /* Index of body argument. */ 3494 Tcl_Obj *blist = NULL; /* List obj which is the body */ 3495 CmdFrame *ctxPtr; /* Copy of the topmost cmdframe, to allow us 3496 * to mess with the line information */ 3497 3498 /* 3499 * If you add options that make -e and -g not unique prefixes of -exact or 3500 * -glob, you *must* fix TclCompileSwitchCmd's option parser as well. 3501 */ 3502 3503 static CONST char *options[] = { 3504 "-exact", "-glob", "-indexvar", "-matchvar", "-nocase", "-regexp", 3505 "--", NULL 3506 }; 3507 enum options { 3508 OPT_EXACT, OPT_GLOB, OPT_INDEXV, OPT_MATCHV, OPT_NOCASE, OPT_REGEXP, 3509 OPT_LAST 3510 }; 3511 typedef int (*strCmpFn_t)(const char *, const char *); 3512 strCmpFn_t strCmpFn = strcmp; 3513 3514 mode = OPT_EXACT; 3515 foundmode = 0; 3516 indexVarObj = NULL; 3517 matchVarObj = NULL; 3518 numMatchesSaved = 0; 3519 noCase = 0; 3520 for (i = 1; i < objc-2; i++) { 3521 if (TclGetString(objv[i])[0] != '-') { 3522 break; 3523 } 3524 if (Tcl_GetIndexFromObj(interp, objv[i], options, "option", 0, 3525 &index) != TCL_OK) { 3526 return TCL_ERROR; 3527 } 3528 switch ((enum options) index) { 3529 /* 3530 * General options. 3531 */ 3532 3533 case OPT_LAST: 3534 i++; 3535 goto finishedOptions; 3536 case OPT_NOCASE: 3537 strCmpFn = strcasecmp; 3538 noCase = 1; 3539 break; 3540 3541 /* 3542 * Handle the different switch mode options. 3543 */ 3544 3545 default: 3546 if (foundmode) { 3547 /* 3548 * Mode already set via -exact, -glob, or -regexp. 3549 */ 3550 3551 Tcl_AppendResult(interp, "bad option \"", 3552 TclGetString(objv[i]), "\": ", options[mode], 3553 " option already found", NULL); 3554 return TCL_ERROR; 3555 } else { 3556 foundmode = 1; 3557 mode = index; 3558 break; 3559 } 3560 3561 /* 3562 * Check for TIP#75 options specifying the variables to write 3563 * regexp information into. 3564 */ 3565 3566 case OPT_INDEXV: 3567 i++; 3568 if (i >= objc-2) { 3569 Tcl_AppendResult(interp, "missing variable name argument to ", 3570 "-indexvar", " option", NULL); 3571 return TCL_ERROR; 3572 } 3573 indexVarObj = objv[i]; 3574 numMatchesSaved = -1; 3575 break; 3576 case OPT_MATCHV: 3577 i++; 3578 if (i >= objc-2) { 3579 Tcl_AppendResult(interp, "missing variable name argument to ", 3580 "-matchvar", " option", NULL); 3581 return TCL_ERROR; 3582 } 3583 matchVarObj = objv[i]; 3584 numMatchesSaved = -1; 3585 break; 3586 } 3587 } 3588 3589 finishedOptions: 3590 if (objc - i < 2) { 3591 Tcl_WrongNumArgs(interp, 1, objv, 3592 "?switches? string pattern body ... ?default body?"); 3593 return TCL_ERROR; 3594 } 3595 if (indexVarObj != NULL && mode != OPT_REGEXP) { 3596 Tcl_AppendResult(interp, 3597 "-indexvar option requires -regexp option", NULL); 3598 return TCL_ERROR; 3599 } 3600 if (matchVarObj != NULL && mode != OPT_REGEXP) { 3601 Tcl_AppendResult(interp, 3602 "-matchvar option requires -regexp option", NULL); 3603 return TCL_ERROR; 3604 } 3605 3606 stringObj = objv[i]; 3607 objc -= i + 1; 3608 objv += i + 1; 3609 bidx = i + 1; /* First after the match string. */ 3610 3611 /* 3612 * If all of the pattern/command pairs are lumped into a single argument, 3613 * split them out again. 3614 * 3615 * TIP #280: Determine the lines the words in the list start at, based on 3616 * the same data for the list word itself. The cmdFramePtr line 3617 * information is manipulated directly. 3618 */ 3619 3620 splitObjs = 0; 3621 if (objc == 1) { 3622 Tcl_Obj **listv; 3623 blist = objv[0]; 3624 3625 if (TclListObjGetElements(interp, objv[0], &objc, &listv) != TCL_OK){ 3626 return TCL_ERROR; 3627 } 3628 3629 /* 3630 * Ensure that the list is non-empty. 3631 */ 3632 3633 if (objc < 1) { 3634 Tcl_WrongNumArgs(interp, 1, savedObjv, 3635 "?switches? string {pattern body ... ?default body?}"); 3636 return TCL_ERROR; 3637 } 3638 objv = listv; 3639 splitObjs = 1; 3640 } 3641 3642 /* 3643 * Complain if there is an odd number of words in the list of patterns and 3644 * bodies. 3645 */ 3646 3647 if (objc % 2) { 3648 Tcl_ResetResult(interp); 3649 Tcl_AppendResult(interp, "extra switch pattern with no body", NULL); 3650 3651 /* 3652 * Check if this can be due to a badly placed comment in the switch 3653 * block. 3654 * 3655 * The following is an heuristic to detect the infamous "comment in 3656 * switch" error: just check if a pattern begins with '#'. 3657 */ 3658 3659 if (splitObjs) { 3660 for (i=0 ; i<objc ; i+=2) { 3661 if (TclGetString(objv[i])[0] == '#') { 3662 Tcl_AppendResult(interp, ", this may be due to a " 3663 "comment incorrectly placed outside of a " 3664 "switch body - see the \"switch\" " 3665 "documentation", NULL); 3666 break; 3667 } 3668 } 3669 } 3670 3671 return TCL_ERROR; 3672 } 3673 3674 /* 3675 * Complain if the last body is a continuation. Note that this check 3676 * assumes that the list is non-empty! 3677 */ 3678 3679 if (strcmp(TclGetString(objv[objc-1]), "-") == 0) { 3680 Tcl_ResetResult(interp); 3681 Tcl_AppendResult(interp, "no body specified for pattern \"", 3682 TclGetString(objv[objc-2]), "\"", NULL); 3683 return TCL_ERROR; 3684 } 3685 3686 for (i = 0; i < objc; i += 2) { 3687 /* 3688 * See if the pattern matches the string. 3689 */ 3690 3691 pattern = TclGetStringFromObj(objv[i], &patternLength); 3692 3693 if ((i == objc - 2) && (*pattern == 'd') 3694 && (strcmp(pattern, "default") == 0)) { 3695 Tcl_Obj *emptyObj = NULL; 3696 3697 /* 3698 * If either indexVarObj or matchVarObj are non-NULL, we're in 3699 * REGEXP mode but have reached the default clause anyway. TIP#75 3700 * specifies that we set the variables to empty lists (== empty 3701 * objects) in that case. 3702 */ 3703 3704 if (indexVarObj != NULL) { 3705 TclNewObj(emptyObj); 3706 if (Tcl_ObjSetVar2(interp, indexVarObj, NULL, emptyObj, 3707 TCL_LEAVE_ERR_MSG) == NULL) { 3708 return TCL_ERROR; 3709 } 3710 } 3711 if (matchVarObj != NULL) { 3712 if (emptyObj == NULL) { 3713 TclNewObj(emptyObj); 3714 } 3715 if (Tcl_ObjSetVar2(interp, matchVarObj, NULL, emptyObj, 3716 TCL_LEAVE_ERR_MSG) == NULL) { 3717 return TCL_ERROR; 3718 } 3719 } 3720 goto matchFound; 3721 } else { 3722 switch (mode) { 3723 case OPT_EXACT: 3724 if (strCmpFn(TclGetString(stringObj), pattern) == 0) { 3725 goto matchFound; 3726 } 3727 break; 3728 case OPT_GLOB: 3729 if (Tcl_StringCaseMatch(TclGetString(stringObj), pattern, 3730 noCase)) { 3731 goto matchFound; 3732 } 3733 break; 3734 case OPT_REGEXP: 3735 regExpr = Tcl_GetRegExpFromObj(interp, objv[i], 3736 TCL_REG_ADVANCED | (noCase ? TCL_REG_NOCASE : 0)); 3737 if (regExpr == NULL) { 3738 return TCL_ERROR; 3739 } else { 3740 int matched = Tcl_RegExpExecObj(interp, regExpr, 3741 stringObj, 0, numMatchesSaved, 0); 3742 3743 if (matched < 0) { 3744 return TCL_ERROR; 3745 } else if (matched) { 3746 goto matchFoundRegexp; 3747 } 3748 } 3749 break; 3750 } 3751 } 3752 } 3753 return TCL_OK; 3754 3755 matchFoundRegexp: 3756 /* 3757 * We are operating in REGEXP mode and we need to store information about 3758 * what we matched in some user-nominated arrays. So build the lists of 3759 * values and indices to write here. [TIP#75] 3760 */ 3761 3762 if (numMatchesSaved) { 3763 Tcl_RegExpInfo info; 3764 Tcl_Obj *matchesObj, *indicesObj = NULL; 3765 3766 Tcl_RegExpGetInfo(regExpr, &info); 3767 if (matchVarObj != NULL) { 3768 TclNewObj(matchesObj); 3769 } else { 3770 matchesObj = NULL; 3771 } 3772 if (indexVarObj != NULL) { 3773 TclNewObj(indicesObj); 3774 } 3775 3776 for (j=0 ; j<=info.nsubs ; j++) { 3777 if (indexVarObj != NULL) { 3778 Tcl_Obj *rangeObjAry[2]; 3779 3780 rangeObjAry[0] = Tcl_NewLongObj(info.matches[j].start); 3781 rangeObjAry[1] = Tcl_NewLongObj(info.matches[j].end); 3782 3783 /* 3784 * Never fails; the object is always clean at this point. 3785 */ 3786 3787 Tcl_ListObjAppendElement(NULL, indicesObj, 3788 Tcl_NewListObj(2, rangeObjAry)); 3789 } 3790 3791 if (matchVarObj != NULL) { 3792 Tcl_Obj *substringObj; 3793 3794 substringObj = Tcl_GetRange(stringObj, 3795 info.matches[j].start, info.matches[j].end-1); 3796 3797 /* 3798 * Never fails; the object is always clean at this point. 3799 */ 3800 3801 Tcl_ListObjAppendElement(NULL, matchesObj, substringObj); 3802 } 3803 } 3804 3805 if (indexVarObj != NULL) { 3806 if (Tcl_ObjSetVar2(interp, indexVarObj, NULL, indicesObj, 3807 TCL_LEAVE_ERR_MSG) == NULL) { 3808 /* 3809 * Careful! Check to see if we have allocated the list of 3810 * matched strings; if so (but there was an error assigning 3811 * the indices list) we have a potential memory leak because 3812 * the match list has not been written to a variable. Except 3813 * that we'll clean that up right now. 3814 */ 3815 3816 if (matchesObj != NULL) { 3817 Tcl_DecrRefCount(matchesObj); 3818 } 3819 return TCL_ERROR; 3820 } 3821 } 3822 if (matchVarObj != NULL) { 3823 if (Tcl_ObjSetVar2(interp, matchVarObj, NULL, matchesObj, 3824 TCL_LEAVE_ERR_MSG) == NULL) { 3825 /* 3826 * Unlike above, if indicesObj is non-NULL at this point, it 3827 * will have been written to a variable already and will hence 3828 * not be leaked. 3829 */ 3830 3831 return TCL_ERROR; 3832 } 3833 } 3834 } 3835 3836 /* 3837 * We've got a match. Find a body to execute, skipping bodies that are 3838 * "-". 3839 */ 3840 3841 matchFound: 3842 ctxPtr = (CmdFrame *) TclStackAlloc(interp, sizeof(CmdFrame)); 3843 *ctxPtr = *iPtr->cmdFramePtr; 3844 3845 if (splitObjs) { 3846 /* 3847 * We have to perform the GetSrc and other type dependent handling of 3848 * the frame here because we are munging with the line numbers, 3849 * something the other commands like if, etc. are not doing. Them are 3850 * fine with simply passing the CmdFrame through and having the 3851 * special handling done in 'info frame', or the bc compiler 3852 */ 3853 3854 if (ctxPtr->type == TCL_LOCATION_BC) { 3855 /* 3856 * Type BC => ctxPtr->data.eval.path is not used. 3857 * ctxPtr->data.tebc.codePtr is used instead. 3858 */ 3859 3860 TclGetSrcInfoForPc(ctxPtr); 3861 pc = 1; 3862 3863 /* 3864 * The line information in the cmdFrame is now a copy we do not 3865 * own. 3866 */ 3867 } 3868 3869 if (ctxPtr->type == TCL_LOCATION_SOURCE && ctxPtr->line[bidx] >= 0) { 3870 int bline = ctxPtr->line[bidx]; 3871 3872 ctxPtr->line = (int *) ckalloc(objc * sizeof(int)); 3873 ctxPtr->nline = objc; 3874 TclListLines(blist, bline, objc, ctxPtr->line, objv); 3875 } else { 3876 /* 3877 * This is either a dynamic code word, when all elements are 3878 * relative to themselves, or something else less expected and 3879 * where we have no information. The result is the same in both 3880 * cases; tell the code to come that it doesn't know where it is, 3881 * which triggers reversion to the old behavior. 3882 */ 3883 3884 int k; 3885 3886 ctxPtr->line = (int *) ckalloc(objc * sizeof(int)); 3887 ctxPtr->nline = objc; 3888 for (k=0; k < objc; k++) { 3889 ctxPtr->line[k] = -1; 3890 } 3891 } 3892 } 3893 3894 for (j = i + 1; ; j += 2) { 3895 if (j >= objc) { 3896 /* 3897 * This shouldn't happen since we've checked that the last body is 3898 * not a continuation... 3899 */ 3900 3901 Tcl_Panic("fall-out when searching for body to match pattern"); 3902 } 3903 if (strcmp(TclGetString(objv[j]), "-") != 0) { 3904 break; 3905 } 3906 } 3907 3908 /* 3909 * TIP #280: Make invoking context available to switch branch. 3910 */ 3911 3912 result = TclEvalObjEx(interp, objv[j], 0, ctxPtr, splitObjs ? j : bidx+j); 3913 if (splitObjs) { 3914 ckfree((char *) ctxPtr->line); 3915 if (pc && (ctxPtr->type == TCL_LOCATION_SOURCE)) { 3916 /* 3917 * Death of SrcInfo reference. 3918 */ 3919 3920 Tcl_DecrRefCount(ctxPtr->data.eval.path); 3921 } 3922 } 3923 3924 /* 3925 * Generate an error message if necessary. 3926 */ 3927 3928 if (result == TCL_ERROR) { 3929 int limit = 50; 3930 int overflow = (patternLength > limit); 3931 3932 Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( 3933 "\n (\"%.*s%s\" arm line %d)", 3934 (overflow ? limit : patternLength), pattern, 3935 (overflow ? "..." : ""), interp->errorLine)); 3936 } 3937 TclStackFree(interp, ctxPtr); 3938 return result; 3939} 3940 3941/* 3942 *---------------------------------------------------------------------- 3943 * 3944 * Tcl_TimeObjCmd -- 3945 * 3946 * This object-based procedure is invoked to process the "time" Tcl 3947 * command. See the user documentation for details on what it does. 3948 * 3949 * Results: 3950 * A standard Tcl object result. 3951 * 3952 * Side effects: 3953 * See the user documentation. 3954 * 3955 *---------------------------------------------------------------------- 3956 */ 3957 3958int 3959Tcl_TimeObjCmd( 3960 ClientData dummy, /* Not used. */ 3961 Tcl_Interp *interp, /* Current interpreter. */ 3962 int objc, /* Number of arguments. */ 3963 Tcl_Obj *CONST objv[]) /* Argument objects. */ 3964{ 3965 register Tcl_Obj *objPtr; 3966 Tcl_Obj *objs[4]; 3967 register int i, result; 3968 int count; 3969 double totalMicroSec; 3970#ifndef TCL_WIDE_CLICKS 3971 Tcl_Time start, stop; 3972#else 3973 Tcl_WideInt start, stop; 3974#endif 3975 3976 if (objc == 2) { 3977 count = 1; 3978 } else if (objc == 3) { 3979 result = TclGetIntFromObj(interp, objv[2], &count); 3980 if (result != TCL_OK) { 3981 return result; 3982 } 3983 } else { 3984 Tcl_WrongNumArgs(interp, 1, objv, "command ?count?"); 3985 return TCL_ERROR; 3986 } 3987 3988 objPtr = objv[1]; 3989 i = count; 3990#ifndef TCL_WIDE_CLICKS 3991 Tcl_GetTime(&start); 3992#else 3993 start = TclpGetWideClicks(); 3994#endif 3995 while (i-- > 0) { 3996 result = Tcl_EvalObjEx(interp, objPtr, 0); 3997 if (result != TCL_OK) { 3998 return result; 3999 } 4000 } 4001#ifndef TCL_WIDE_CLICKS 4002 Tcl_GetTime(&stop); 4003 totalMicroSec = ((double) (stop.sec - start.sec)) * 1.0e6 4004 + (stop.usec - start.usec); 4005#else 4006 stop = TclpGetWideClicks(); 4007 totalMicroSec = ((double) TclpWideClicksToNanoseconds(stop - start))/1.0e3; 4008#endif 4009 4010 if (count <= 1) { 4011 /* 4012 * Use int obj since we know time is not fractional. [Bug 1202178] 4013 */ 4014 4015 objs[0] = Tcl_NewIntObj((count <= 0) ? 0 : (int) totalMicroSec); 4016 } else { 4017 objs[0] = Tcl_NewDoubleObj(totalMicroSec/count); 4018 } 4019 4020 /* 4021 * Construct the result as a list because many programs have always parsed 4022 * as such (extracting the first element, typically). 4023 */ 4024 4025 TclNewLiteralStringObj(objs[1], "microseconds"); 4026 TclNewLiteralStringObj(objs[2], "per"); 4027 TclNewLiteralStringObj(objs[3], "iteration"); 4028 Tcl_SetObjResult(interp, Tcl_NewListObj(4, objs)); 4029 4030 return TCL_OK; 4031} 4032 4033/* 4034 *---------------------------------------------------------------------- 4035 * 4036 * Tcl_WhileObjCmd -- 4037 * 4038 * This procedure is invoked to process the "while" Tcl command. See the 4039 * user documentation for details on what it does. 4040 * 4041 * With the bytecode compiler, this procedure is only called when a 4042 * command name is computed at runtime, and is "while" or the name to 4043 * which "while" was renamed: e.g., "set z while; $z {$i<100} {}" 4044 * 4045 * Results: 4046 * A standard Tcl result. 4047 * 4048 * Side effects: 4049 * See the user documentation. 4050 * 4051 *---------------------------------------------------------------------- 4052 */ 4053 4054int 4055Tcl_WhileObjCmd( 4056 ClientData dummy, /* Not used. */ 4057 Tcl_Interp *interp, /* Current interpreter. */ 4058 int objc, /* Number of arguments. */ 4059 Tcl_Obj *CONST objv[]) /* Argument objects. */ 4060{ 4061 int result, value; 4062 Interp *iPtr = (Interp *) interp; 4063 4064 if (objc != 3) { 4065 Tcl_WrongNumArgs(interp, 1, objv, "test command"); 4066 return TCL_ERROR; 4067 } 4068 4069 while (1) { 4070 result = Tcl_ExprBooleanObj(interp, objv[1], &value); 4071 if (result != TCL_OK) { 4072 return result; 4073 } 4074 if (!value) { 4075 break; 4076 } 4077 4078 /* TIP #280. */ 4079 result = TclEvalObjEx(interp, objv[2], 0, iPtr->cmdFramePtr, 2); 4080 if ((result != TCL_OK) && (result != TCL_CONTINUE)) { 4081 if (result == TCL_ERROR) { 4082 Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( 4083 "\n (\"while\" body line %d)", interp->errorLine)); 4084 } 4085 break; 4086 } 4087 } 4088 if (result == TCL_BREAK) { 4089 result = TCL_OK; 4090 } 4091 if (result == TCL_OK) { 4092 Tcl_ResetResult(interp); 4093 } 4094 return result; 4095} 4096 4097/* 4098 *---------------------------------------------------------------------- 4099 * 4100 * TclListLines -- 4101 * 4102 * ??? 4103 * 4104 * Results: 4105 * Filled in array of line numbers? 4106 * 4107 * Side effects: 4108 * None. 4109 * 4110 *---------------------------------------------------------------------- 4111 */ 4112 4113void 4114TclListLines( 4115 Tcl_Obj* listObj, /* Pointer to obj holding a string with list 4116 * structure. Assumed to be valid. Assumed to 4117 * contain n elements. 4118 */ 4119 int line, /* Line the list as a whole starts on. */ 4120 int n, /* #elements in lines */ 4121 int *lines, /* Array of line numbers, to fill. */ 4122 Tcl_Obj* const* elems) /* The list elems as Tcl_Obj*, in need of 4123 * derived continuation data */ 4124{ 4125 CONST char* listStr = Tcl_GetString (listObj); 4126 CONST char* listHead = listStr; 4127 int i, length = strlen(listStr); 4128 CONST char *element = NULL, *next = NULL; 4129 ContLineLoc* clLocPtr = TclContinuationsGet(listObj); 4130 int* clNext = (clLocPtr ? &clLocPtr->loc[0] : NULL); 4131 4132 for (i = 0; i < n; i++) { 4133 TclFindElement(NULL, listStr, length, &element, &next, NULL, NULL); 4134 4135 TclAdvanceLines(&line, listStr, element); 4136 /* Leading whitespace */ 4137 TclAdvanceContinuations (&line, &clNext, element - listHead); 4138 if (elems && clNext) { 4139 TclContinuationsEnterDerived (elems[i], element - listHead, 4140 clNext); 4141 } 4142 lines[i] = line; 4143 length -= (next - listStr); 4144 TclAdvanceLines(&line, element, next); 4145 /* Element */ 4146 listStr = next; 4147 4148 if (*element == 0) { 4149 /* ASSERT i == n */ 4150 break; 4151 } 4152 } 4153} 4154 4155/* 4156 * Local Variables: 4157 * mode: c 4158 * c-basic-offset: 4 4159 * fill-column: 78 4160 * End: 4161 */ 4162