1/* 2 * tclScan.c -- 3 * 4 * This file contains the implementation of the "scan" command. 5 * 6 * Copyright (c) 1998 by Scriptics Corporation. 7 * 8 * See the file "license.terms" for information on usage and redistribution 9 * of this file, and for a DISCLAIMER OF ALL WARRANTIES. 10 * 11 * RCS: @(#) $Id: tclScan.c,v 1.12.2.2 2005/10/23 22:01:30 msofer Exp $ 12 */ 13 14#include "tclInt.h" 15/* 16 * For strtoll() and strtoull() declarations on some platforms... 17 */ 18#include "tclPort.h" 19 20/* 21 * Flag values used by Tcl_ScanObjCmd. 22 */ 23 24#define SCAN_NOSKIP 0x1 /* Don't skip blanks. */ 25#define SCAN_SUPPRESS 0x2 /* Suppress assignment. */ 26#define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */ 27#define SCAN_WIDTH 0x8 /* A width value was supplied. */ 28 29#define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */ 30#define SCAN_NODIGITS 0x20 /* No digits have been scanned. */ 31#define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */ 32#define SCAN_XOK 0x80 /* An 'x' is allowed. */ 33#define SCAN_PTOK 0x100 /* Decimal point is allowed. */ 34#define SCAN_EXPOK 0x200 /* An exponent is allowed. */ 35 36#define SCAN_LONGER 0x400 /* Asked for a wide value. */ 37 38/* 39 * The following structure contains the information associated with 40 * a character set. 41 */ 42 43typedef struct CharSet { 44 int exclude; /* 1 if this is an exclusion set. */ 45 int nchars; 46 Tcl_UniChar *chars; 47 int nranges; 48 struct Range { 49 Tcl_UniChar start; 50 Tcl_UniChar end; 51 } *ranges; 52} CharSet; 53 54/* 55 * Declarations for functions used only in this file. 56 */ 57 58static char * BuildCharSet _ANSI_ARGS_((CharSet *cset, char *format)); 59static int CharInSet _ANSI_ARGS_((CharSet *cset, int ch)); 60static void ReleaseCharSet _ANSI_ARGS_((CharSet *cset)); 61static int ValidateFormat _ANSI_ARGS_((Tcl_Interp *interp, char *format, 62 int numVars, int *totalVars)); 63 64/* 65 *---------------------------------------------------------------------- 66 * 67 * BuildCharSet -- 68 * 69 * This function examines a character set format specification 70 * and builds a CharSet containing the individual characters and 71 * character ranges specified. 72 * 73 * Results: 74 * Returns the next format position. 75 * 76 * Side effects: 77 * Initializes the charset. 78 * 79 *---------------------------------------------------------------------- 80 */ 81 82static char * 83BuildCharSet(cset, format) 84 CharSet *cset; 85 char *format; /* Points to first char of set. */ 86{ 87 Tcl_UniChar ch, start; 88 int offset, nranges; 89 char *end; 90 91 memset(cset, 0, sizeof(CharSet)); 92 93 offset = Tcl_UtfToUniChar(format, &ch); 94 if (ch == '^') { 95 cset->exclude = 1; 96 format += offset; 97 offset = Tcl_UtfToUniChar(format, &ch); 98 } 99 end = format + offset; 100 101 /* 102 * Find the close bracket so we can overallocate the set. 103 */ 104 105 if (ch == ']') { 106 end += Tcl_UtfToUniChar(end, &ch); 107 } 108 nranges = 0; 109 while (ch != ']') { 110 if (ch == '-') { 111 nranges++; 112 } 113 end += Tcl_UtfToUniChar(end, &ch); 114 } 115 116 cset->chars = (Tcl_UniChar *) ckalloc(sizeof(Tcl_UniChar) 117 * (end - format - 1)); 118 if (nranges > 0) { 119 cset->ranges = (struct Range *) ckalloc(sizeof(struct Range)*nranges); 120 } else { 121 cset->ranges = NULL; 122 } 123 124 /* 125 * Now build the character set. 126 */ 127 128 cset->nchars = cset->nranges = 0; 129 format += Tcl_UtfToUniChar(format, &ch); 130 start = ch; 131 if (ch == ']' || ch == '-') { 132 cset->chars[cset->nchars++] = ch; 133 format += Tcl_UtfToUniChar(format, &ch); 134 } 135 while (ch != ']') { 136 if (*format == '-') { 137 /* 138 * This may be the first character of a range, so don't add 139 * it yet. 140 */ 141 142 start = ch; 143 } else if (ch == '-') { 144 /* 145 * Check to see if this is the last character in the set, in which 146 * case it is not a range and we should add the previous character 147 * as well as the dash. 148 */ 149 150 if (*format == ']') { 151 cset->chars[cset->nchars++] = start; 152 cset->chars[cset->nchars++] = ch; 153 } else { 154 format += Tcl_UtfToUniChar(format, &ch); 155 156 /* 157 * Check to see if the range is in reverse order. 158 */ 159 160 if (start < ch) { 161 cset->ranges[cset->nranges].start = start; 162 cset->ranges[cset->nranges].end = ch; 163 } else { 164 cset->ranges[cset->nranges].start = ch; 165 cset->ranges[cset->nranges].end = start; 166 } 167 cset->nranges++; 168 } 169 } else { 170 cset->chars[cset->nchars++] = ch; 171 } 172 format += Tcl_UtfToUniChar(format, &ch); 173 } 174 return format; 175} 176 177/* 178 *---------------------------------------------------------------------- 179 * 180 * CharInSet -- 181 * 182 * Check to see if a character matches the given set. 183 * 184 * Results: 185 * Returns non-zero if the character matches the given set. 186 * 187 * Side effects: 188 * None. 189 * 190 *---------------------------------------------------------------------- 191 */ 192 193static int 194CharInSet(cset, c) 195 CharSet *cset; 196 int c; /* Character to test, passed as int because 197 * of non-ANSI prototypes. */ 198{ 199 Tcl_UniChar ch = (Tcl_UniChar) c; 200 int i, match = 0; 201 for (i = 0; i < cset->nchars; i++) { 202 if (cset->chars[i] == ch) { 203 match = 1; 204 break; 205 } 206 } 207 if (!match) { 208 for (i = 0; i < cset->nranges; i++) { 209 if ((cset->ranges[i].start <= ch) 210 && (ch <= cset->ranges[i].end)) { 211 match = 1; 212 break; 213 } 214 } 215 } 216 return (cset->exclude ? !match : match); 217} 218 219/* 220 *---------------------------------------------------------------------- 221 * 222 * ReleaseCharSet -- 223 * 224 * Free the storage associated with a character set. 225 * 226 * Results: 227 * None. 228 * 229 * Side effects: 230 * None. 231 * 232 *---------------------------------------------------------------------- 233 */ 234 235static void 236ReleaseCharSet(cset) 237 CharSet *cset; 238{ 239 ckfree((char *)cset->chars); 240 if (cset->ranges) { 241 ckfree((char *)cset->ranges); 242 } 243} 244 245/* 246 *---------------------------------------------------------------------- 247 * 248 * ValidateFormat -- 249 * 250 * Parse the format string and verify that it is properly formed 251 * and that there are exactly enough variables on the command line. 252 * 253 * Results: 254 * A standard Tcl result. 255 * 256 * Side effects: 257 * May place an error in the interpreter result. 258 * 259 *---------------------------------------------------------------------- 260 */ 261 262static int 263ValidateFormat(interp, format, numVars, totalSubs) 264 Tcl_Interp *interp; /* Current interpreter. */ 265 char *format; /* The format string. */ 266 int numVars; /* The number of variables passed to the 267 * scan command. */ 268 int *totalSubs; /* The number of variables that will be 269 * required. */ 270{ 271#define STATIC_LIST_SIZE 16 272 int gotXpg, gotSequential, value, i, flags; 273 char *end; 274 Tcl_UniChar ch; 275 int staticAssign[STATIC_LIST_SIZE]; 276 int *nassign = staticAssign; 277 int objIndex, xpgSize, nspace = STATIC_LIST_SIZE; 278 char buf[TCL_UTF_MAX+1]; 279 280 /* 281 * Initialize an array that records the number of times a variable 282 * is assigned to by the format string. We use this to detect if 283 * a variable is multiply assigned or left unassigned. 284 */ 285 286 if (numVars > nspace) { 287 nassign = (int*)ckalloc(sizeof(int) * numVars); 288 nspace = numVars; 289 } 290 for (i = 0; i < nspace; i++) { 291 nassign[i] = 0; 292 } 293 294 xpgSize = objIndex = gotXpg = gotSequential = 0; 295 296 while (*format != '\0') { 297 format += Tcl_UtfToUniChar(format, &ch); 298 299 flags = 0; 300 301 if (ch != '%') { 302 continue; 303 } 304 format += Tcl_UtfToUniChar(format, &ch); 305 if (ch == '%') { 306 continue; 307 } 308 if (ch == '*') { 309 flags |= SCAN_SUPPRESS; 310 format += Tcl_UtfToUniChar(format, &ch); 311 goto xpgCheckDone; 312 } 313 314 if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ 315 /* 316 * Check for an XPG3-style %n$ specification. Note: there 317 * must not be a mixture of XPG3 specs and non-XPG3 specs 318 * in the same format string. 319 */ 320 321 value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */ 322 if (*end != '$') { 323 goto notXpg; 324 } 325 format = end+1; 326 format += Tcl_UtfToUniChar(format, &ch); 327 gotXpg = 1; 328 if (gotSequential) { 329 goto mixedXPG; 330 } 331 objIndex = value - 1; 332 if ((objIndex < 0) || (numVars && (objIndex >= numVars))) { 333 goto badIndex; 334 } else if (numVars == 0) { 335 /* 336 * In the case where no vars are specified, the user can 337 * specify %9999$ legally, so we have to consider special 338 * rules for growing the assign array. 'value' is 339 * guaranteed to be > 0. 340 */ 341 xpgSize = (xpgSize > value) ? xpgSize : value; 342 } 343 goto xpgCheckDone; 344 } 345 346 notXpg: 347 gotSequential = 1; 348 if (gotXpg) { 349 mixedXPG: 350 Tcl_SetResult(interp, 351 "cannot mix \"%\" and \"%n$\" conversion specifiers", 352 TCL_STATIC); 353 goto error; 354 } 355 356 xpgCheckDone: 357 /* 358 * Parse any width specifier. 359 */ 360 361 if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ 362 value = strtoul(format-1, &format, 10); /* INTL: "C" locale. */ 363 flags |= SCAN_WIDTH; 364 format += Tcl_UtfToUniChar(format, &ch); 365 } 366 367 /* 368 * Handle any size specifier. 369 */ 370 371 switch (ch) { 372 case 'l': 373 case 'L': 374 flags |= SCAN_LONGER; 375 case 'h': 376 format += Tcl_UtfToUniChar(format, &ch); 377 } 378 379 if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) { 380 goto badIndex; 381 } 382 383 /* 384 * Handle the various field types. 385 */ 386 387 switch (ch) { 388 case 'c': 389 if (flags & SCAN_WIDTH) { 390 Tcl_SetResult(interp, 391 "field width may not be specified in %c conversion", 392 TCL_STATIC); 393 goto error; 394 } 395 /* 396 * Fall through! 397 */ 398 case 'n': 399 case 's': 400 if (flags & SCAN_LONGER) { 401 invalidLonger: 402 buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; 403 Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), 404 "'l' modifier may not be specified in %", buf, 405 " conversion", NULL); 406 goto error; 407 } 408 /* 409 * Fall through! 410 */ 411 case 'd': 412 case 'e': 413 case 'f': 414 case 'g': 415 case 'i': 416 case 'o': 417 case 'u': 418 case 'x': 419 break; 420 /* 421 * Bracket terms need special checking 422 */ 423 case '[': 424 if (flags & SCAN_LONGER) { 425 goto invalidLonger; 426 } 427 if (*format == '\0') { 428 goto badSet; 429 } 430 format += Tcl_UtfToUniChar(format, &ch); 431 if (ch == '^') { 432 if (*format == '\0') { 433 goto badSet; 434 } 435 format += Tcl_UtfToUniChar(format, &ch); 436 } 437 if (ch == ']') { 438 if (*format == '\0') { 439 goto badSet; 440 } 441 format += Tcl_UtfToUniChar(format, &ch); 442 } 443 while (ch != ']') { 444 if (*format == '\0') { 445 goto badSet; 446 } 447 format += Tcl_UtfToUniChar(format, &ch); 448 } 449 break; 450 badSet: 451 Tcl_SetResult(interp, "unmatched [ in format string", 452 TCL_STATIC); 453 goto error; 454 default: 455 { 456 char buf[TCL_UTF_MAX+1]; 457 458 buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; 459 Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), 460 "bad scan conversion character \"", buf, "\"", NULL); 461 goto error; 462 } 463 } 464 if (!(flags & SCAN_SUPPRESS)) { 465 if (objIndex >= nspace) { 466 /* 467 * Expand the nassign buffer. If we are using XPG specifiers, 468 * make sure that we grow to a large enough size. xpgSize is 469 * guaranteed to be at least one larger than objIndex. 470 */ 471 value = nspace; 472 if (xpgSize) { 473 nspace = xpgSize; 474 } else { 475 nspace += STATIC_LIST_SIZE; 476 } 477 if (nassign == staticAssign) { 478 nassign = (void *)ckalloc(nspace * sizeof(int)); 479 for (i = 0; i < STATIC_LIST_SIZE; ++i) { 480 nassign[i] = staticAssign[i]; 481 } 482 } else { 483 nassign = (void *)ckrealloc((void *)nassign, 484 nspace * sizeof(int)); 485 } 486 for (i = value; i < nspace; i++) { 487 nassign[i] = 0; 488 } 489 } 490 nassign[objIndex]++; 491 objIndex++; 492 } 493 } 494 495 /* 496 * Verify that all of the variable were assigned exactly once. 497 */ 498 499 if (numVars == 0) { 500 if (xpgSize) { 501 numVars = xpgSize; 502 } else { 503 numVars = objIndex; 504 } 505 } 506 if (totalSubs) { 507 *totalSubs = numVars; 508 } 509 for (i = 0; i < numVars; i++) { 510 if (nassign[i] > 1) { 511 Tcl_SetResult(interp, "variable is assigned by multiple \"%n$\" conversion specifiers", TCL_STATIC); 512 goto error; 513 } else if (!xpgSize && (nassign[i] == 0)) { 514 /* 515 * If the space is empty, and xpgSize is 0 (means XPG wasn't 516 * used, and/or numVars != 0), then too many vars were given 517 */ 518 Tcl_SetResult(interp, "variable is not assigned by any conversion specifiers", TCL_STATIC); 519 goto error; 520 } 521 } 522 523 if (nassign != staticAssign) { 524 ckfree((char *)nassign); 525 } 526 return TCL_OK; 527 528 badIndex: 529 if (gotXpg) { 530 Tcl_SetResult(interp, "\"%n$\" argument index out of range", 531 TCL_STATIC); 532 } else { 533 Tcl_SetResult(interp, 534 "different numbers of variable names and field specifiers", 535 TCL_STATIC); 536 } 537 538 error: 539 if (nassign != staticAssign) { 540 ckfree((char *)nassign); 541 } 542 return TCL_ERROR; 543#undef STATIC_LIST_SIZE 544} 545 546/* 547 *---------------------------------------------------------------------- 548 * 549 * Tcl_ScanObjCmd -- 550 * 551 * This procedure is invoked to process the "scan" Tcl command. 552 * See the user documentation for details on what it does. 553 * 554 * Results: 555 * A standard Tcl result. 556 * 557 * Side effects: 558 * See the user documentation. 559 * 560 *---------------------------------------------------------------------- 561 */ 562 563 /* ARGSUSED */ 564int 565Tcl_ScanObjCmd(dummy, interp, objc, objv) 566 ClientData dummy; /* Not used. */ 567 Tcl_Interp *interp; /* Current interpreter. */ 568 int objc; /* Number of arguments. */ 569 Tcl_Obj *CONST objv[]; /* Argument objects. */ 570{ 571 char *format; 572 int numVars, nconversions, totalVars = -1; 573 int objIndex, offset, i, result, code; 574 long value; 575 char *string, *end, *baseString; 576 char op = 0; 577 int base = 0; 578 int underflow = 0; 579 size_t width; 580 long (*fn)() = NULL; 581#ifndef TCL_WIDE_INT_IS_LONG 582 Tcl_WideInt (*lfn)() = NULL; 583 Tcl_WideInt wideValue; 584#endif 585 Tcl_UniChar ch, sch; 586 Tcl_Obj **objs = NULL, *objPtr = NULL; 587 int flags; 588 char buf[513]; /* Temporary buffer to hold scanned 589 * number strings before they are 590 * passed to strtoul. */ 591 592 if (objc < 3) { 593 Tcl_WrongNumArgs(interp, 1, objv, 594 "string format ?varName varName ...?"); 595 return TCL_ERROR; 596 } 597 598 format = Tcl_GetStringFromObj(objv[2], NULL); 599 numVars = objc-3; 600 601 /* 602 * Check for errors in the format string. 603 */ 604 605 if (ValidateFormat(interp, format, numVars, &totalVars) == TCL_ERROR) { 606 return TCL_ERROR; 607 } 608 609 /* 610 * Allocate space for the result objects. 611 */ 612 613 if (totalVars > 0) { 614 objs = (Tcl_Obj **) ckalloc(sizeof(Tcl_Obj*) * totalVars); 615 for (i = 0; i < totalVars; i++) { 616 objs[i] = NULL; 617 } 618 } 619 620 string = Tcl_GetStringFromObj(objv[1], NULL); 621 baseString = string; 622 623 /* 624 * Iterate over the format string filling in the result objects until 625 * we reach the end of input, the end of the format string, or there 626 * is a mismatch. 627 */ 628 629 objIndex = 0; 630 nconversions = 0; 631 while (*format != '\0') { 632 format += Tcl_UtfToUniChar(format, &ch); 633 634 flags = 0; 635 636 /* 637 * If we see whitespace in the format, skip whitespace in the string. 638 */ 639 640 if (Tcl_UniCharIsSpace(ch)) { 641 offset = Tcl_UtfToUniChar(string, &sch); 642 while (Tcl_UniCharIsSpace(sch)) { 643 if (*string == '\0') { 644 goto done; 645 } 646 string += offset; 647 offset = Tcl_UtfToUniChar(string, &sch); 648 } 649 continue; 650 } 651 652 if (ch != '%') { 653 literal: 654 if (*string == '\0') { 655 underflow = 1; 656 goto done; 657 } 658 string += Tcl_UtfToUniChar(string, &sch); 659 if (ch != sch) { 660 goto done; 661 } 662 continue; 663 } 664 665 format += Tcl_UtfToUniChar(format, &ch); 666 if (ch == '%') { 667 goto literal; 668 } 669 670 /* 671 * Check for assignment suppression ('*') or an XPG3-style 672 * assignment ('%n$'). 673 */ 674 675 if (ch == '*') { 676 flags |= SCAN_SUPPRESS; 677 format += Tcl_UtfToUniChar(format, &ch); 678 } else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ 679 value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */ 680 if (*end == '$') { 681 format = end+1; 682 format += Tcl_UtfToUniChar(format, &ch); 683 objIndex = (int) value - 1; 684 } 685 } 686 687 /* 688 * Parse any width specifier. 689 */ 690 691 if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ 692 width = strtoul(format-1, &format, 10); /* INTL: "C" locale. */ 693 format += Tcl_UtfToUniChar(format, &ch); 694 } else { 695 width = 0; 696 } 697 698 /* 699 * Handle any size specifier. 700 */ 701 702 switch (ch) { 703 case 'l': 704 case 'L': 705 flags |= SCAN_LONGER; 706 /* 707 * Fall through so we skip to the next character. 708 */ 709 case 'h': 710 format += Tcl_UtfToUniChar(format, &ch); 711 } 712 713 /* 714 * Handle the various field types. 715 */ 716 717 switch (ch) { 718 case 'n': 719 if (!(flags & SCAN_SUPPRESS)) { 720 objPtr = Tcl_NewIntObj(string - baseString); 721 Tcl_IncrRefCount(objPtr); 722 objs[objIndex++] = objPtr; 723 } 724 nconversions++; 725 continue; 726 727 case 'd': 728 op = 'i'; 729 base = 10; 730 fn = (long (*)())strtol; 731#ifndef TCL_WIDE_INT_IS_LONG 732 lfn = (Tcl_WideInt (*)())strtoll; 733#endif 734 break; 735 case 'i': 736 op = 'i'; 737 base = 0; 738 fn = (long (*)())strtol; 739#ifndef TCL_WIDE_INT_IS_LONG 740 lfn = (Tcl_WideInt (*)())strtoll; 741#endif 742 break; 743 case 'o': 744 op = 'i'; 745 base = 8; 746 fn = (long (*)())strtoul; 747#ifndef TCL_WIDE_INT_IS_LONG 748 lfn = (Tcl_WideInt (*)())strtoull; 749#endif 750 break; 751 case 'x': 752 op = 'i'; 753 base = 16; 754 fn = (long (*)())strtoul; 755#ifndef TCL_WIDE_INT_IS_LONG 756 lfn = (Tcl_WideInt (*)())strtoull; 757#endif 758 break; 759 case 'u': 760 op = 'i'; 761 base = 10; 762 flags |= SCAN_UNSIGNED; 763 fn = (long (*)())strtoul; 764#ifndef TCL_WIDE_INT_IS_LONG 765 lfn = (Tcl_WideInt (*)())strtoull; 766#endif 767 break; 768 769 case 'f': 770 case 'e': 771 case 'g': 772 op = 'f'; 773 break; 774 775 case 's': 776 op = 's'; 777 break; 778 779 case 'c': 780 op = 'c'; 781 flags |= SCAN_NOSKIP; 782 break; 783 case '[': 784 op = '['; 785 flags |= SCAN_NOSKIP; 786 break; 787 } 788 789 /* 790 * At this point, we will need additional characters from the 791 * string to proceed. 792 */ 793 794 if (*string == '\0') { 795 underflow = 1; 796 goto done; 797 } 798 799 /* 800 * Skip any leading whitespace at the beginning of a field unless 801 * the format suppresses this behavior. 802 */ 803 804 if (!(flags & SCAN_NOSKIP)) { 805 while (*string != '\0') { 806 offset = Tcl_UtfToUniChar(string, &sch); 807 if (!Tcl_UniCharIsSpace(sch)) { 808 break; 809 } 810 string += offset; 811 } 812 if (*string == '\0') { 813 underflow = 1; 814 goto done; 815 } 816 } 817 818 /* 819 * Perform the requested scanning operation. 820 */ 821 822 switch (op) { 823 case 's': 824 /* 825 * Scan a string up to width characters or whitespace. 826 */ 827 828 if (width == 0) { 829 width = (size_t) ~0; 830 } 831 end = string; 832 while (*end != '\0') { 833 offset = Tcl_UtfToUniChar(end, &sch); 834 if (Tcl_UniCharIsSpace(sch)) { 835 break; 836 } 837 end += offset; 838 if (--width == 0) { 839 break; 840 } 841 } 842 if (!(flags & SCAN_SUPPRESS)) { 843 objPtr = Tcl_NewStringObj(string, end-string); 844 Tcl_IncrRefCount(objPtr); 845 objs[objIndex++] = objPtr; 846 } 847 string = end; 848 break; 849 850 case '[': { 851 CharSet cset; 852 853 if (width == 0) { 854 width = (size_t) ~0; 855 } 856 end = string; 857 858 format = BuildCharSet(&cset, format); 859 while (*end != '\0') { 860 offset = Tcl_UtfToUniChar(end, &sch); 861 if (!CharInSet(&cset, (int)sch)) { 862 break; 863 } 864 end += offset; 865 if (--width == 0) { 866 break; 867 } 868 } 869 ReleaseCharSet(&cset); 870 871 if (string == end) { 872 /* 873 * Nothing matched the range, stop processing 874 */ 875 goto done; 876 } 877 if (!(flags & SCAN_SUPPRESS)) { 878 objPtr = Tcl_NewStringObj(string, end-string); 879 Tcl_IncrRefCount(objPtr); 880 objs[objIndex++] = objPtr; 881 } 882 string = end; 883 884 break; 885 } 886 case 'c': 887 /* 888 * Scan a single Unicode character. 889 */ 890 891 string += Tcl_UtfToUniChar(string, &sch); 892 if (!(flags & SCAN_SUPPRESS)) { 893 objPtr = Tcl_NewIntObj((int)sch); 894 Tcl_IncrRefCount(objPtr); 895 objs[objIndex++] = objPtr; 896 } 897 break; 898 899 case 'i': 900 /* 901 * Scan an unsigned or signed integer. 902 */ 903 904 if ((width == 0) || (width > sizeof(buf) - 1)) { 905 width = sizeof(buf) - 1; 906 } 907 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO; 908 for (end = buf; width > 0; width--) { 909 switch (*string) { 910 /* 911 * The 0 digit has special meaning at the beginning of 912 * a number. If we are unsure of the base, it 913 * indicates that we are in base 8 or base 16 (if it is 914 * followed by an 'x'). 915 * 916 * 8.1 - 8.3.4 incorrectly handled 0x... base-16 917 * cases for %x by not reading the 0x as the 918 * auto-prelude for base-16. [Bug #495213] 919 */ 920 case '0': 921 if (base == 0) { 922 base = 8; 923 flags |= SCAN_XOK; 924 } 925 if (base == 16) { 926 flags |= SCAN_XOK; 927 } 928 if (flags & SCAN_NOZERO) { 929 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS 930 | SCAN_NOZERO); 931 } else { 932 flags &= ~(SCAN_SIGNOK | SCAN_XOK 933 | SCAN_NODIGITS); 934 } 935 goto addToInt; 936 937 case '1': case '2': case '3': case '4': 938 case '5': case '6': case '7': 939 if (base == 0) { 940 base = 10; 941 } 942 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); 943 goto addToInt; 944 945 case '8': case '9': 946 if (base == 0) { 947 base = 10; 948 } 949 if (base <= 8) { 950 break; 951 } 952 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); 953 goto addToInt; 954 955 case 'A': case 'B': case 'C': 956 case 'D': case 'E': case 'F': 957 case 'a': case 'b': case 'c': 958 case 'd': case 'e': case 'f': 959 if (base <= 10) { 960 break; 961 } 962 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); 963 goto addToInt; 964 965 case '+': case '-': 966 if (flags & SCAN_SIGNOK) { 967 flags &= ~SCAN_SIGNOK; 968 goto addToInt; 969 } 970 break; 971 972 case 'x': case 'X': 973 if ((flags & SCAN_XOK) && (end == buf+1)) { 974 base = 16; 975 flags &= ~SCAN_XOK; 976 goto addToInt; 977 } 978 break; 979 } 980 981 /* 982 * We got an illegal character so we are done accumulating. 983 */ 984 985 break; 986 987 addToInt: 988 /* 989 * Add the character to the temporary buffer. 990 */ 991 992 *end++ = *string++; 993 if (*string == '\0') { 994 break; 995 } 996 } 997 998 /* 999 * Check to see if we need to back up because we only got a 1000 * sign or a trailing x after a 0. 1001 */ 1002 1003 if (flags & SCAN_NODIGITS) { 1004 if (*string == '\0') { 1005 underflow = 1; 1006 } 1007 goto done; 1008 } else if (end[-1] == 'x' || end[-1] == 'X') { 1009 end--; 1010 string--; 1011 } 1012 1013 1014 /* 1015 * Scan the value from the temporary buffer. If we are 1016 * returning a large unsigned value, we have to convert it back 1017 * to a string since Tcl only supports signed values. 1018 */ 1019 1020 if (!(flags & SCAN_SUPPRESS)) { 1021 *end = '\0'; 1022#ifndef TCL_WIDE_INT_IS_LONG 1023 if (flags & SCAN_LONGER) { 1024 wideValue = (Tcl_WideInt) (*lfn)(buf, NULL, base); 1025 if ((flags & SCAN_UNSIGNED) && (wideValue < 0)) { 1026 /* INTL: ISO digit */ 1027 sprintf(buf, "%" TCL_LL_MODIFIER "u", 1028 (Tcl_WideUInt)wideValue); 1029 objPtr = Tcl_NewStringObj(buf, -1); 1030 } else { 1031 objPtr = Tcl_NewWideIntObj(wideValue); 1032 } 1033 } else { 1034#endif /* !TCL_WIDE_INT_IS_LONG */ 1035 value = (long) (*fn)(buf, NULL, base); 1036 if ((flags & SCAN_UNSIGNED) && (value < 0)) { 1037 sprintf(buf, "%lu", value); /* INTL: ISO digit */ 1038 objPtr = Tcl_NewStringObj(buf, -1); 1039 } else if ((flags & SCAN_LONGER) 1040 || (unsigned long) value > UINT_MAX) { 1041 objPtr = Tcl_NewLongObj(value); 1042 } else { 1043 objPtr = Tcl_NewIntObj(value); 1044 } 1045#ifndef TCL_WIDE_INT_IS_LONG 1046 } 1047#endif 1048 Tcl_IncrRefCount(objPtr); 1049 objs[objIndex++] = objPtr; 1050 } 1051 1052 break; 1053 1054 case 'f': 1055 /* 1056 * Scan a floating point number 1057 */ 1058 1059 if ((width == 0) || (width > sizeof(buf) - 1)) { 1060 width = sizeof(buf) - 1; 1061 } 1062 flags &= ~SCAN_LONGER; 1063 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK; 1064 for (end = buf; width > 0; width--) { 1065 switch (*string) { 1066 case '0': case '1': case '2': case '3': 1067 case '4': case '5': case '6': case '7': 1068 case '8': case '9': 1069 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS); 1070 goto addToFloat; 1071 case '+': case '-': 1072 if (flags & SCAN_SIGNOK) { 1073 flags &= ~SCAN_SIGNOK; 1074 goto addToFloat; 1075 } 1076 break; 1077 case '.': 1078 if (flags & SCAN_PTOK) { 1079 flags &= ~(SCAN_SIGNOK | SCAN_PTOK); 1080 goto addToFloat; 1081 } 1082 break; 1083 case 'e': case 'E': 1084 /* 1085 * An exponent is not allowed until there has 1086 * been at least one digit. 1087 */ 1088 1089 if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) 1090 == SCAN_EXPOK) { 1091 flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK)) 1092 | SCAN_SIGNOK | SCAN_NODIGITS; 1093 goto addToFloat; 1094 } 1095 break; 1096 } 1097 1098 /* 1099 * We got an illegal character so we are done accumulating. 1100 */ 1101 1102 break; 1103 1104 addToFloat: 1105 /* 1106 * Add the character to the temporary buffer. 1107 */ 1108 1109 *end++ = *string++; 1110 if (*string == '\0') { 1111 break; 1112 } 1113 } 1114 1115 /* 1116 * Check to see if we need to back up because we saw a 1117 * trailing 'e' or sign. 1118 */ 1119 1120 if (flags & SCAN_NODIGITS) { 1121 if (flags & SCAN_EXPOK) { 1122 /* 1123 * There were no digits at all so scanning has 1124 * failed and we are done. 1125 */ 1126 if (*string == '\0') { 1127 underflow = 1; 1128 } 1129 goto done; 1130 } 1131 1132 /* 1133 * We got a bad exponent ('e' and maybe a sign). 1134 */ 1135 1136 end--; 1137 string--; 1138 if (*end != 'e' && *end != 'E') { 1139 end--; 1140 string--; 1141 } 1142 } 1143 1144 /* 1145 * Scan the value from the temporary buffer. 1146 */ 1147 1148 if (!(flags & SCAN_SUPPRESS)) { 1149 double dvalue; 1150 *end = '\0'; 1151 dvalue = strtod(buf, NULL); 1152 objPtr = Tcl_NewDoubleObj(dvalue); 1153 Tcl_IncrRefCount(objPtr); 1154 objs[objIndex++] = objPtr; 1155 } 1156 break; 1157 } 1158 nconversions++; 1159 } 1160 1161 done: 1162 result = 0; 1163 code = TCL_OK; 1164 1165 if (numVars) { 1166 /* 1167 * In this case, variables were specified (classic scan) 1168 */ 1169 for (i = 0; i < totalVars; i++) { 1170 if (objs[i] != NULL) { 1171 Tcl_Obj *tmpPtr; 1172 1173 result++; 1174 tmpPtr = Tcl_ObjSetVar2(interp, objv[i+3], NULL, objs[i], 0); 1175 Tcl_DecrRefCount(objs[i]); 1176 if (tmpPtr == NULL) { 1177 Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), 1178 "couldn't set variable \"", 1179 Tcl_GetString(objv[i+3]), "\"", (char *) NULL); 1180 code = TCL_ERROR; 1181 } 1182 } 1183 } 1184 } else { 1185 /* 1186 * Here no vars were specified, we want a list returned (inline scan) 1187 */ 1188 objPtr = Tcl_NewObj(); 1189 for (i = 0; i < totalVars; i++) { 1190 if (objs[i] != NULL) { 1191 Tcl_ListObjAppendElement(NULL, objPtr, objs[i]); 1192 Tcl_DecrRefCount(objs[i]); 1193 } else { 1194 /* 1195 * More %-specifiers than matching chars, so we 1196 * just spit out empty strings for these 1197 */ 1198 Tcl_ListObjAppendElement(NULL, objPtr, Tcl_NewObj()); 1199 } 1200 } 1201 } 1202 if (objs != NULL) { 1203 ckfree((char*) objs); 1204 } 1205 if (code == TCL_OK) { 1206 if (underflow && (nconversions == 0)) { 1207 if (numVars) { 1208 objPtr = Tcl_NewIntObj(-1); 1209 } else { 1210 if (objPtr) { 1211 Tcl_SetListObj(objPtr, 0, NULL); 1212 } else { 1213 objPtr = Tcl_NewObj(); 1214 } 1215 } 1216 } else if (numVars) { 1217 objPtr = Tcl_NewIntObj(result); 1218 } 1219 Tcl_SetObjResult(interp, objPtr); 1220 } 1221 return code; 1222} 1223