1/*
2 * tclScan.c --
3 *
4 *	This file contains the implementation of the "scan" command.
5 *
6 * Copyright (c) 1998 by Scriptics Corporation.
7 *
8 * See the file "license.terms" for information on usage and redistribution
9 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
10 *
11 * RCS: @(#) $Id: tclScan.c,v 1.12.2.2 2005/10/23 22:01:30 msofer Exp $
12 */
13
14#include "tclInt.h"
15/*
16 * For strtoll() and strtoull() declarations on some platforms...
17 */
18#include "tclPort.h"
19
20/*
21 * Flag values used by Tcl_ScanObjCmd.
22 */
23
24#define SCAN_NOSKIP	0x1		  /* Don't skip blanks. */
25#define SCAN_SUPPRESS	0x2		  /* Suppress assignment. */
26#define SCAN_UNSIGNED	0x4		  /* Read an unsigned value. */
27#define SCAN_WIDTH	0x8		  /* A width value was supplied. */
28
29#define SCAN_SIGNOK	0x10		  /* A +/- character is allowed. */
30#define SCAN_NODIGITS	0x20		  /* No digits have been scanned. */
31#define SCAN_NOZERO	0x40		  /* No zero digits have been scanned. */
32#define SCAN_XOK	0x80		  /* An 'x' is allowed. */
33#define SCAN_PTOK	0x100		  /* Decimal point is allowed. */
34#define SCAN_EXPOK	0x200		  /* An exponent is allowed. */
35
36#define SCAN_LONGER	0x400		  /* Asked for a wide value. */
37
38/*
39 * The following structure contains the information associated with
40 * a character set.
41 */
42
43typedef struct CharSet {
44    int exclude;		/* 1 if this is an exclusion set. */
45    int nchars;
46    Tcl_UniChar *chars;
47    int nranges;
48    struct Range {
49	Tcl_UniChar start;
50	Tcl_UniChar end;
51    } *ranges;
52} CharSet;
53
54/*
55 * Declarations for functions used only in this file.
56 */
57
58static char *	BuildCharSet _ANSI_ARGS_((CharSet *cset, char *format));
59static int	CharInSet _ANSI_ARGS_((CharSet *cset, int ch));
60static void	ReleaseCharSet _ANSI_ARGS_((CharSet *cset));
61static int	ValidateFormat _ANSI_ARGS_((Tcl_Interp *interp, char *format,
62		    int numVars, int *totalVars));
63
64/*
65 *----------------------------------------------------------------------
66 *
67 * BuildCharSet --
68 *
69 *	This function examines a character set format specification
70 *	and builds a CharSet containing the individual characters and
71 *	character ranges specified.
72 *
73 * Results:
74 *	Returns the next format position.
75 *
76 * Side effects:
77 *	Initializes the charset.
78 *
79 *----------------------------------------------------------------------
80 */
81
82static char *
83BuildCharSet(cset, format)
84    CharSet *cset;
85    char *format;		/* Points to first char of set. */
86{
87    Tcl_UniChar ch, start;
88    int offset, nranges;
89    char *end;
90
91    memset(cset, 0, sizeof(CharSet));
92
93    offset = Tcl_UtfToUniChar(format, &ch);
94    if (ch == '^') {
95	cset->exclude = 1;
96	format += offset;
97	offset = Tcl_UtfToUniChar(format, &ch);
98    }
99    end = format + offset;
100
101    /*
102     * Find the close bracket so we can overallocate the set.
103     */
104
105    if (ch == ']') {
106	end += Tcl_UtfToUniChar(end, &ch);
107    }
108    nranges = 0;
109    while (ch != ']') {
110	if (ch == '-') {
111	    nranges++;
112	}
113	end += Tcl_UtfToUniChar(end, &ch);
114    }
115
116    cset->chars = (Tcl_UniChar *) ckalloc(sizeof(Tcl_UniChar)
117	    * (end - format - 1));
118    if (nranges > 0) {
119	cset->ranges = (struct Range *) ckalloc(sizeof(struct Range)*nranges);
120    } else {
121	cset->ranges = NULL;
122    }
123
124    /*
125     * Now build the character set.
126     */
127
128    cset->nchars = cset->nranges = 0;
129    format += Tcl_UtfToUniChar(format, &ch);
130    start = ch;
131    if (ch == ']' || ch == '-') {
132	cset->chars[cset->nchars++] = ch;
133	format += Tcl_UtfToUniChar(format, &ch);
134    }
135    while (ch != ']') {
136	if (*format == '-') {
137	    /*
138	     * This may be the first character of a range, so don't add
139	     * it yet.
140	     */
141
142	    start = ch;
143	} else if (ch == '-') {
144	    /*
145	     * Check to see if this is the last character in the set, in which
146	     * case it is not a range and we should add the previous character
147	     * as well as the dash.
148	     */
149
150	    if (*format == ']') {
151		cset->chars[cset->nchars++] = start;
152		cset->chars[cset->nchars++] = ch;
153	    } else {
154		format += Tcl_UtfToUniChar(format, &ch);
155
156		/*
157		 * Check to see if the range is in reverse order.
158		 */
159
160		if (start < ch) {
161		    cset->ranges[cset->nranges].start = start;
162		    cset->ranges[cset->nranges].end = ch;
163		} else {
164		    cset->ranges[cset->nranges].start = ch;
165		    cset->ranges[cset->nranges].end = start;
166		}
167		cset->nranges++;
168	    }
169	} else {
170	    cset->chars[cset->nchars++] = ch;
171	}
172	format += Tcl_UtfToUniChar(format, &ch);
173    }
174    return format;
175}
176
177/*
178 *----------------------------------------------------------------------
179 *
180 * CharInSet --
181 *
182 *	Check to see if a character matches the given set.
183 *
184 * Results:
185 *	Returns non-zero if the character matches the given set.
186 *
187 * Side effects:
188 *	None.
189 *
190 *----------------------------------------------------------------------
191 */
192
193static int
194CharInSet(cset, c)
195    CharSet *cset;
196    int c;			/* Character to test, passed as int because
197				 * of non-ANSI prototypes. */
198{
199    Tcl_UniChar ch = (Tcl_UniChar) c;
200    int i, match = 0;
201    for (i = 0; i < cset->nchars; i++) {
202	if (cset->chars[i] == ch) {
203	    match = 1;
204	    break;
205	}
206    }
207    if (!match) {
208	for (i = 0; i < cset->nranges; i++) {
209	    if ((cset->ranges[i].start <= ch)
210		    && (ch <= cset->ranges[i].end)) {
211		match = 1;
212		break;
213	    }
214	}
215    }
216    return (cset->exclude ? !match : match);
217}
218
219/*
220 *----------------------------------------------------------------------
221 *
222 * ReleaseCharSet --
223 *
224 *	Free the storage associated with a character set.
225 *
226 * Results:
227 *	None.
228 *
229 * Side effects:
230 *	None.
231 *
232 *----------------------------------------------------------------------
233 */
234
235static void
236ReleaseCharSet(cset)
237    CharSet *cset;
238{
239    ckfree((char *)cset->chars);
240    if (cset->ranges) {
241	ckfree((char *)cset->ranges);
242    }
243}
244
245/*
246 *----------------------------------------------------------------------
247 *
248 * ValidateFormat --
249 *
250 *	Parse the format string and verify that it is properly formed
251 *	and that there are exactly enough variables on the command line.
252 *
253 * Results:
254 *	A standard Tcl result.
255 *
256 * Side effects:
257 *	May place an error in the interpreter result.
258 *
259 *----------------------------------------------------------------------
260 */
261
262static int
263ValidateFormat(interp, format, numVars, totalSubs)
264    Tcl_Interp *interp;		/* Current interpreter. */
265    char *format;		/* The format string. */
266    int numVars;		/* The number of variables passed to the
267				 * scan command. */
268    int *totalSubs;		/* The number of variables that will be
269				 * required. */
270{
271#define STATIC_LIST_SIZE 16
272    int gotXpg, gotSequential, value, i, flags;
273    char *end;
274    Tcl_UniChar ch;
275    int staticAssign[STATIC_LIST_SIZE];
276    int *nassign = staticAssign;
277    int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
278    char buf[TCL_UTF_MAX+1];
279
280    /*
281     * Initialize an array that records the number of times a variable
282     * is assigned to by the format string.  We use this to detect if
283     * a variable is multiply assigned or left unassigned.
284     */
285
286    if (numVars > nspace) {
287	nassign = (int*)ckalloc(sizeof(int) * numVars);
288	nspace = numVars;
289    }
290    for (i = 0; i < nspace; i++) {
291	nassign[i] = 0;
292    }
293
294    xpgSize = objIndex = gotXpg = gotSequential = 0;
295
296    while (*format != '\0') {
297	format += Tcl_UtfToUniChar(format, &ch);
298
299	flags = 0;
300
301	if (ch != '%') {
302	    continue;
303	}
304	format += Tcl_UtfToUniChar(format, &ch);
305	if (ch == '%') {
306	    continue;
307	}
308	if (ch == '*') {
309	    flags |= SCAN_SUPPRESS;
310	    format += Tcl_UtfToUniChar(format, &ch);
311	    goto xpgCheckDone;
312	}
313
314	if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
315	    /*
316	     * Check for an XPG3-style %n$ specification.  Note: there
317	     * must not be a mixture of XPG3 specs and non-XPG3 specs
318	     * in the same format string.
319	     */
320
321	    value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */
322	    if (*end != '$') {
323		goto notXpg;
324	    }
325	    format = end+1;
326	    format += Tcl_UtfToUniChar(format, &ch);
327	    gotXpg = 1;
328	    if (gotSequential) {
329		goto mixedXPG;
330	    }
331	    objIndex = value - 1;
332	    if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
333		goto badIndex;
334	    } else if (numVars == 0) {
335		/*
336		 * In the case where no vars are specified, the user can
337		 * specify %9999$ legally, so we have to consider special
338		 * rules for growing the assign array.  'value' is
339		 * guaranteed to be > 0.
340		 */
341		xpgSize = (xpgSize > value) ? xpgSize : value;
342	    }
343	    goto xpgCheckDone;
344	}
345
346	notXpg:
347	gotSequential = 1;
348	if (gotXpg) {
349	    mixedXPG:
350	    Tcl_SetResult(interp,
351		    "cannot mix \"%\" and \"%n$\" conversion specifiers",
352		    TCL_STATIC);
353	    goto error;
354	}
355
356	xpgCheckDone:
357	/*
358	 * Parse any width specifier.
359	 */
360
361	if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
362	    value = strtoul(format-1, &format, 10); /* INTL: "C" locale. */
363	    flags |= SCAN_WIDTH;
364	    format += Tcl_UtfToUniChar(format, &ch);
365	}
366
367	/*
368	 * Handle any size specifier.
369	 */
370
371	switch (ch) {
372	case 'l':
373	case 'L':
374	    flags |= SCAN_LONGER;
375	case 'h':
376	    format += Tcl_UtfToUniChar(format, &ch);
377	}
378
379	if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
380	    goto badIndex;
381	}
382
383	/*
384	 * Handle the various field types.
385	 */
386
387	switch (ch) {
388	    case 'c':
389                if (flags & SCAN_WIDTH) {
390		    Tcl_SetResult(interp,
391			    "field width may not be specified in %c conversion",
392			    TCL_STATIC);
393		    goto error;
394                }
395		/*
396		 * Fall through!
397		 */
398	    case 'n':
399	    case 's':
400		if (flags & SCAN_LONGER) {
401		invalidLonger:
402		    buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
403		    Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
404			   "'l' modifier may not be specified in %", buf,
405			   " conversion", NULL);
406		    goto error;
407		}
408		/*
409		 * Fall through!
410		 */
411	    case 'd':
412	    case 'e':
413	    case 'f':
414	    case 'g':
415	    case 'i':
416	    case 'o':
417	    case 'u':
418	    case 'x':
419 		break;
420		/*
421		 * Bracket terms need special checking
422		 */
423	    case '[':
424		if (flags & SCAN_LONGER) {
425		    goto invalidLonger;
426		}
427		if (*format == '\0') {
428		    goto badSet;
429		}
430		format += Tcl_UtfToUniChar(format, &ch);
431		if (ch == '^') {
432		    if (*format == '\0') {
433			goto badSet;
434		    }
435		    format += Tcl_UtfToUniChar(format, &ch);
436		}
437		if (ch == ']') {
438		    if (*format == '\0') {
439			goto badSet;
440		    }
441		    format += Tcl_UtfToUniChar(format, &ch);
442		}
443		while (ch != ']') {
444		    if (*format == '\0') {
445			goto badSet;
446		    }
447		    format += Tcl_UtfToUniChar(format, &ch);
448		}
449		break;
450	    badSet:
451		Tcl_SetResult(interp, "unmatched [ in format string",
452			TCL_STATIC);
453		goto error;
454	    default:
455	    {
456		char buf[TCL_UTF_MAX+1];
457
458		buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
459		Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
460			"bad scan conversion character \"", buf, "\"", NULL);
461		goto error;
462	    }
463	}
464	if (!(flags & SCAN_SUPPRESS)) {
465	    if (objIndex >= nspace) {
466		/*
467		 * Expand the nassign buffer.  If we are using XPG specifiers,
468		 * make sure that we grow to a large enough size.  xpgSize is
469		 * guaranteed to be at least one larger than objIndex.
470		 */
471		value = nspace;
472		if (xpgSize) {
473		    nspace = xpgSize;
474		} else {
475		    nspace += STATIC_LIST_SIZE;
476		}
477		if (nassign == staticAssign) {
478		    nassign = (void *)ckalloc(nspace * sizeof(int));
479		    for (i = 0; i < STATIC_LIST_SIZE; ++i) {
480			nassign[i] = staticAssign[i];
481		    }
482		} else {
483		    nassign = (void *)ckrealloc((void *)nassign,
484			    nspace * sizeof(int));
485		}
486		for (i = value; i < nspace; i++) {
487		    nassign[i] = 0;
488		}
489	    }
490	    nassign[objIndex]++;
491	    objIndex++;
492	}
493    }
494
495    /*
496     * Verify that all of the variable were assigned exactly once.
497     */
498
499    if (numVars == 0) {
500	if (xpgSize) {
501	    numVars = xpgSize;
502	} else {
503	    numVars = objIndex;
504	}
505    }
506    if (totalSubs) {
507	*totalSubs = numVars;
508    }
509    for (i = 0; i < numVars; i++) {
510	if (nassign[i] > 1) {
511	    Tcl_SetResult(interp, "variable is assigned by multiple \"%n$\" conversion specifiers", TCL_STATIC);
512	    goto error;
513	} else if (!xpgSize && (nassign[i] == 0)) {
514	    /*
515	     * If the space is empty, and xpgSize is 0 (means XPG wasn't
516	     * used, and/or numVars != 0), then too many vars were given
517	     */
518	    Tcl_SetResult(interp, "variable is not assigned by any conversion specifiers", TCL_STATIC);
519	    goto error;
520	}
521    }
522
523    if (nassign != staticAssign) {
524	ckfree((char *)nassign);
525    }
526    return TCL_OK;
527
528    badIndex:
529    if (gotXpg) {
530	Tcl_SetResult(interp, "\"%n$\" argument index out of range",
531		TCL_STATIC);
532    } else {
533	Tcl_SetResult(interp,
534		"different numbers of variable names and field specifiers",
535		TCL_STATIC);
536    }
537
538    error:
539    if (nassign != staticAssign) {
540	ckfree((char *)nassign);
541    }
542    return TCL_ERROR;
543#undef STATIC_LIST_SIZE
544}
545
546/*
547 *----------------------------------------------------------------------
548 *
549 * Tcl_ScanObjCmd --
550 *
551 *	This procedure is invoked to process the "scan" Tcl command.
552 *	See the user documentation for details on what it does.
553 *
554 * Results:
555 *	A standard Tcl result.
556 *
557 * Side effects:
558 *	See the user documentation.
559 *
560 *----------------------------------------------------------------------
561 */
562
563	/* ARGSUSED */
564int
565Tcl_ScanObjCmd(dummy, interp, objc, objv)
566    ClientData dummy;    	/* Not used. */
567    Tcl_Interp *interp;		/* Current interpreter. */
568    int objc;			/* Number of arguments. */
569    Tcl_Obj *CONST objv[];	/* Argument objects. */
570{
571    char *format;
572    int numVars, nconversions, totalVars = -1;
573    int objIndex, offset, i, result, code;
574    long value;
575    char *string, *end, *baseString;
576    char op = 0;
577    int base = 0;
578    int underflow = 0;
579    size_t width;
580    long (*fn)() = NULL;
581#ifndef TCL_WIDE_INT_IS_LONG
582    Tcl_WideInt (*lfn)() = NULL;
583    Tcl_WideInt wideValue;
584#endif
585    Tcl_UniChar ch, sch;
586    Tcl_Obj **objs = NULL, *objPtr = NULL;
587    int flags;
588    char buf[513];			  /* Temporary buffer to hold scanned
589					   * number strings before they are
590					   * passed to strtoul. */
591
592    if (objc < 3) {
593        Tcl_WrongNumArgs(interp, 1, objv,
594		"string format ?varName varName ...?");
595	return TCL_ERROR;
596    }
597
598    format = Tcl_GetStringFromObj(objv[2], NULL);
599    numVars = objc-3;
600
601    /*
602     * Check for errors in the format string.
603     */
604
605    if (ValidateFormat(interp, format, numVars, &totalVars) == TCL_ERROR) {
606	return TCL_ERROR;
607    }
608
609    /*
610     * Allocate space for the result objects.
611     */
612
613    if (totalVars > 0) {
614	objs = (Tcl_Obj **) ckalloc(sizeof(Tcl_Obj*) * totalVars);
615	for (i = 0; i < totalVars; i++) {
616	    objs[i] = NULL;
617	}
618    }
619
620    string = Tcl_GetStringFromObj(objv[1], NULL);
621    baseString = string;
622
623    /*
624     * Iterate over the format string filling in the result objects until
625     * we reach the end of input, the end of the format string, or there
626     * is a mismatch.
627     */
628
629    objIndex = 0;
630    nconversions = 0;
631    while (*format != '\0') {
632	format += Tcl_UtfToUniChar(format, &ch);
633
634	flags = 0;
635
636	/*
637	 * If we see whitespace in the format, skip whitespace in the string.
638	 */
639
640	if (Tcl_UniCharIsSpace(ch)) {
641	    offset = Tcl_UtfToUniChar(string, &sch);
642	    while (Tcl_UniCharIsSpace(sch)) {
643		if (*string == '\0') {
644		    goto done;
645		}
646		string += offset;
647		offset = Tcl_UtfToUniChar(string, &sch);
648	    }
649	    continue;
650	}
651
652	if (ch != '%') {
653	    literal:
654	    if (*string == '\0') {
655		underflow = 1;
656		goto done;
657	    }
658	    string += Tcl_UtfToUniChar(string, &sch);
659	    if (ch != sch) {
660		goto done;
661	    }
662	    continue;
663	}
664
665	format += Tcl_UtfToUniChar(format, &ch);
666	if (ch == '%') {
667	    goto literal;
668	}
669
670	/*
671	 * Check for assignment suppression ('*') or an XPG3-style
672	 * assignment ('%n$').
673	 */
674
675	if (ch == '*') {
676	    flags |= SCAN_SUPPRESS;
677	    format += Tcl_UtfToUniChar(format, &ch);
678	} else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
679	    value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */
680	    if (*end == '$') {
681		format = end+1;
682		format += Tcl_UtfToUniChar(format, &ch);
683		objIndex = (int) value - 1;
684	    }
685	}
686
687	/*
688	 * Parse any width specifier.
689	 */
690
691	if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
692	    width = strtoul(format-1, &format, 10); /* INTL: "C" locale. */
693	    format += Tcl_UtfToUniChar(format, &ch);
694	} else {
695	    width = 0;
696	}
697
698	/*
699	 * Handle any size specifier.
700	 */
701
702	switch (ch) {
703	case 'l':
704	case 'L':
705	    flags |= SCAN_LONGER;
706	    /*
707	     * Fall through so we skip to the next character.
708	     */
709	case 'h':
710	    format += Tcl_UtfToUniChar(format, &ch);
711	}
712
713	/*
714	 * Handle the various field types.
715	 */
716
717	switch (ch) {
718	    case 'n':
719		if (!(flags & SCAN_SUPPRESS)) {
720		    objPtr = Tcl_NewIntObj(string - baseString);
721		    Tcl_IncrRefCount(objPtr);
722		    objs[objIndex++] = objPtr;
723		}
724		nconversions++;
725		continue;
726
727	    case 'd':
728		op = 'i';
729		base = 10;
730		fn = (long (*)())strtol;
731#ifndef TCL_WIDE_INT_IS_LONG
732		lfn = (Tcl_WideInt (*)())strtoll;
733#endif
734		break;
735	    case 'i':
736		op = 'i';
737		base = 0;
738		fn = (long (*)())strtol;
739#ifndef TCL_WIDE_INT_IS_LONG
740		lfn = (Tcl_WideInt (*)())strtoll;
741#endif
742		break;
743	    case 'o':
744		op = 'i';
745		base = 8;
746		fn = (long (*)())strtoul;
747#ifndef TCL_WIDE_INT_IS_LONG
748		lfn = (Tcl_WideInt (*)())strtoull;
749#endif
750		break;
751	    case 'x':
752		op = 'i';
753		base = 16;
754		fn = (long (*)())strtoul;
755#ifndef TCL_WIDE_INT_IS_LONG
756		lfn = (Tcl_WideInt (*)())strtoull;
757#endif
758		break;
759	    case 'u':
760		op = 'i';
761		base = 10;
762		flags |= SCAN_UNSIGNED;
763		fn = (long (*)())strtoul;
764#ifndef TCL_WIDE_INT_IS_LONG
765		lfn = (Tcl_WideInt (*)())strtoull;
766#endif
767		break;
768
769	    case 'f':
770	    case 'e':
771	    case 'g':
772		op = 'f';
773		break;
774
775	    case 's':
776		op = 's';
777		break;
778
779	    case 'c':
780		op = 'c';
781		flags |= SCAN_NOSKIP;
782		break;
783	    case '[':
784		op = '[';
785		flags |= SCAN_NOSKIP;
786		break;
787	}
788
789	/*
790	 * At this point, we will need additional characters from the
791	 * string to proceed.
792	 */
793
794	if (*string == '\0') {
795	    underflow = 1;
796	    goto done;
797	}
798
799	/*
800	 * Skip any leading whitespace at the beginning of a field unless
801	 * the format suppresses this behavior.
802	 */
803
804	if (!(flags & SCAN_NOSKIP)) {
805	    while (*string != '\0') {
806		offset = Tcl_UtfToUniChar(string, &sch);
807		if (!Tcl_UniCharIsSpace(sch)) {
808		    break;
809		}
810		string += offset;
811	    }
812	    if (*string == '\0') {
813		underflow = 1;
814		goto done;
815	    }
816	}
817
818	/*
819	 * Perform the requested scanning operation.
820	 */
821
822	switch (op) {
823	    case 's':
824		/*
825		 * Scan a string up to width characters or whitespace.
826		 */
827
828		if (width == 0) {
829		    width = (size_t) ~0;
830		}
831		end = string;
832		while (*end != '\0') {
833		    offset = Tcl_UtfToUniChar(end, &sch);
834		    if (Tcl_UniCharIsSpace(sch)) {
835			break;
836		    }
837		    end += offset;
838		    if (--width == 0) {
839			break;
840		    }
841		}
842		if (!(flags & SCAN_SUPPRESS)) {
843		    objPtr = Tcl_NewStringObj(string, end-string);
844		    Tcl_IncrRefCount(objPtr);
845		    objs[objIndex++] = objPtr;
846		}
847		string = end;
848		break;
849
850	    case '[': {
851		CharSet cset;
852
853		if (width == 0) {
854		    width = (size_t) ~0;
855		}
856		end = string;
857
858		format = BuildCharSet(&cset, format);
859		while (*end != '\0') {
860		    offset = Tcl_UtfToUniChar(end, &sch);
861		    if (!CharInSet(&cset, (int)sch)) {
862			break;
863		    }
864		    end += offset;
865		    if (--width == 0) {
866			break;
867		    }
868		}
869		ReleaseCharSet(&cset);
870
871		if (string == end) {
872		    /*
873		     * Nothing matched the range, stop processing
874		     */
875		    goto done;
876		}
877		if (!(flags & SCAN_SUPPRESS)) {
878		    objPtr = Tcl_NewStringObj(string, end-string);
879		    Tcl_IncrRefCount(objPtr);
880		    objs[objIndex++] = objPtr;
881		}
882		string = end;
883
884		break;
885	    }
886	    case 'c':
887		/*
888		 * Scan a single Unicode character.
889		 */
890
891		string += Tcl_UtfToUniChar(string, &sch);
892		if (!(flags & SCAN_SUPPRESS)) {
893		    objPtr = Tcl_NewIntObj((int)sch);
894		    Tcl_IncrRefCount(objPtr);
895		    objs[objIndex++] = objPtr;
896		}
897		break;
898
899	    case 'i':
900		/*
901		 * Scan an unsigned or signed integer.
902		 */
903
904		if ((width == 0) || (width > sizeof(buf) - 1)) {
905		    width = sizeof(buf) - 1;
906		}
907		flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
908		for (end = buf; width > 0; width--) {
909		    switch (*string) {
910			/*
911			 * The 0 digit has special meaning at the beginning of
912			 * a number.  If we are unsure of the base, it
913			 * indicates that we are in base 8 or base 16 (if it is
914			 * followed by an 'x').
915			 *
916			 * 8.1 - 8.3.4 incorrectly handled 0x... base-16
917			 * cases for %x by not reading the 0x as the
918			 * auto-prelude for base-16. [Bug #495213]
919			 */
920			case '0':
921			    if (base == 0) {
922				base = 8;
923				flags |= SCAN_XOK;
924			    }
925			    if (base == 16) {
926				flags |= SCAN_XOK;
927			    }
928			    if (flags & SCAN_NOZERO) {
929				flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS
930					| SCAN_NOZERO);
931			    } else {
932				flags &= ~(SCAN_SIGNOK | SCAN_XOK
933					| SCAN_NODIGITS);
934			    }
935			    goto addToInt;
936
937			case '1': case '2': case '3': case '4':
938			case '5': case '6': case '7':
939			    if (base == 0) {
940				base = 10;
941			    }
942			    flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
943			    goto addToInt;
944
945			case '8': case '9':
946			    if (base == 0) {
947				base = 10;
948			    }
949			    if (base <= 8) {
950				break;
951			    }
952			    flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
953			    goto addToInt;
954
955			case 'A': case 'B': case 'C':
956			case 'D': case 'E': case 'F':
957			case 'a': case 'b': case 'c':
958			case 'd': case 'e': case 'f':
959			    if (base <= 10) {
960				break;
961			    }
962			    flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
963			    goto addToInt;
964
965			case '+': case '-':
966			    if (flags & SCAN_SIGNOK) {
967				flags &= ~SCAN_SIGNOK;
968				goto addToInt;
969			    }
970			    break;
971
972			case 'x': case 'X':
973			    if ((flags & SCAN_XOK) && (end == buf+1)) {
974				base = 16;
975				flags &= ~SCAN_XOK;
976				goto addToInt;
977			    }
978			    break;
979		    }
980
981		    /*
982		     * We got an illegal character so we are done accumulating.
983		     */
984
985		    break;
986
987		    addToInt:
988		    /*
989		     * Add the character to the temporary buffer.
990		     */
991
992		    *end++ = *string++;
993		    if (*string == '\0') {
994			break;
995		    }
996		}
997
998		/*
999		 * Check to see if we need to back up because we only got a
1000		 * sign or a trailing x after a 0.
1001		 */
1002
1003		if (flags & SCAN_NODIGITS) {
1004		    if (*string == '\0') {
1005			underflow = 1;
1006		    }
1007		    goto done;
1008		} else if (end[-1] == 'x' || end[-1] == 'X') {
1009		    end--;
1010		    string--;
1011		}
1012
1013
1014		/*
1015		 * Scan the value from the temporary buffer.  If we are
1016		 * returning a large unsigned value, we have to convert it back
1017		 * to a string since Tcl only supports signed values.
1018		 */
1019
1020		if (!(flags & SCAN_SUPPRESS)) {
1021		    *end = '\0';
1022#ifndef TCL_WIDE_INT_IS_LONG
1023		    if (flags & SCAN_LONGER) {
1024			wideValue = (Tcl_WideInt) (*lfn)(buf, NULL, base);
1025			if ((flags & SCAN_UNSIGNED) && (wideValue < 0)) {
1026			    /* INTL: ISO digit */
1027			    sprintf(buf, "%" TCL_LL_MODIFIER "u",
1028				    (Tcl_WideUInt)wideValue);
1029			    objPtr = Tcl_NewStringObj(buf, -1);
1030			} else {
1031			    objPtr = Tcl_NewWideIntObj(wideValue);
1032			}
1033		    } else {
1034#endif /* !TCL_WIDE_INT_IS_LONG */
1035			value = (long) (*fn)(buf, NULL, base);
1036			if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1037			    sprintf(buf, "%lu", value); /* INTL: ISO digit */
1038			    objPtr = Tcl_NewStringObj(buf, -1);
1039			} else if ((flags & SCAN_LONGER)
1040				|| (unsigned long) value > UINT_MAX) {
1041			    objPtr = Tcl_NewLongObj(value);
1042			} else {
1043			    objPtr = Tcl_NewIntObj(value);
1044			}
1045#ifndef TCL_WIDE_INT_IS_LONG
1046		    }
1047#endif
1048		    Tcl_IncrRefCount(objPtr);
1049		    objs[objIndex++] = objPtr;
1050		}
1051
1052		break;
1053
1054	    case 'f':
1055		/*
1056		 * Scan a floating point number
1057		 */
1058
1059		if ((width == 0) || (width > sizeof(buf) - 1)) {
1060		    width = sizeof(buf) - 1;
1061		}
1062		flags &= ~SCAN_LONGER;
1063		flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1064		for (end = buf; width > 0; width--) {
1065		    switch (*string) {
1066			case '0': case '1': case '2': case '3':
1067			case '4': case '5': case '6': case '7':
1068			case '8': case '9':
1069			    flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1070			    goto addToFloat;
1071			case '+': case '-':
1072			    if (flags & SCAN_SIGNOK) {
1073				flags &= ~SCAN_SIGNOK;
1074				goto addToFloat;
1075			    }
1076			    break;
1077			case '.':
1078			    if (flags & SCAN_PTOK) {
1079				flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1080				goto addToFloat;
1081			    }
1082			    break;
1083			case 'e': case 'E':
1084			    /*
1085			     * An exponent is not allowed until there has
1086			     * been at least one digit.
1087			     */
1088
1089			    if ((flags & (SCAN_NODIGITS | SCAN_EXPOK))
1090				    == SCAN_EXPOK) {
1091				flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1092				    | SCAN_SIGNOK | SCAN_NODIGITS;
1093				goto addToFloat;
1094			    }
1095			    break;
1096		    }
1097
1098		    /*
1099		     * We got an illegal character so we are done accumulating.
1100		     */
1101
1102		    break;
1103
1104		    addToFloat:
1105		    /*
1106		     * Add the character to the temporary buffer.
1107		     */
1108
1109		    *end++ = *string++;
1110		    if (*string == '\0') {
1111			break;
1112		    }
1113		}
1114
1115		/*
1116		 * Check to see if we need to back up because we saw a
1117		 * trailing 'e' or sign.
1118		 */
1119
1120		if (flags & SCAN_NODIGITS) {
1121		    if (flags & SCAN_EXPOK) {
1122			/*
1123			 * There were no digits at all so scanning has
1124			 * failed and we are done.
1125			 */
1126			if (*string == '\0') {
1127			    underflow = 1;
1128			}
1129			goto done;
1130		    }
1131
1132		    /*
1133		     * We got a bad exponent ('e' and maybe a sign).
1134		     */
1135
1136		    end--;
1137		    string--;
1138		    if (*end != 'e' && *end != 'E') {
1139			end--;
1140			string--;
1141		    }
1142		}
1143
1144		/*
1145		 * Scan the value from the temporary buffer.
1146		 */
1147
1148		if (!(flags & SCAN_SUPPRESS)) {
1149		    double dvalue;
1150		    *end = '\0';
1151		    dvalue = strtod(buf, NULL);
1152		    objPtr = Tcl_NewDoubleObj(dvalue);
1153		    Tcl_IncrRefCount(objPtr);
1154		    objs[objIndex++] = objPtr;
1155		}
1156		break;
1157	}
1158	nconversions++;
1159    }
1160
1161    done:
1162    result = 0;
1163    code = TCL_OK;
1164
1165    if (numVars) {
1166	/*
1167	 * In this case, variables were specified (classic scan)
1168	 */
1169	for (i = 0; i < totalVars; i++) {
1170	    if (objs[i] != NULL) {
1171		Tcl_Obj *tmpPtr;
1172
1173		result++;
1174		tmpPtr = Tcl_ObjSetVar2(interp, objv[i+3], NULL, objs[i], 0);
1175		Tcl_DecrRefCount(objs[i]);
1176		if (tmpPtr == NULL) {
1177		    Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
1178			    "couldn't set variable \"",
1179			    Tcl_GetString(objv[i+3]), "\"", (char *) NULL);
1180		    code = TCL_ERROR;
1181		}
1182	    }
1183	}
1184    } else {
1185	/*
1186	 * Here no vars were specified, we want a list returned (inline scan)
1187	 */
1188	objPtr = Tcl_NewObj();
1189	for (i = 0; i < totalVars; i++) {
1190	    if (objs[i] != NULL) {
1191		Tcl_ListObjAppendElement(NULL, objPtr, objs[i]);
1192		Tcl_DecrRefCount(objs[i]);
1193	    } else {
1194		/*
1195		 * More %-specifiers than matching chars, so we
1196		 * just spit out empty strings for these
1197		 */
1198		Tcl_ListObjAppendElement(NULL, objPtr, Tcl_NewObj());
1199	    }
1200	}
1201    }
1202    if (objs != NULL) {
1203	ckfree((char*) objs);
1204    }
1205    if (code == TCL_OK) {
1206	if (underflow && (nconversions == 0)) {
1207	    if (numVars) {
1208		objPtr = Tcl_NewIntObj(-1);
1209	    } else {
1210		if (objPtr) {
1211		    Tcl_SetListObj(objPtr, 0, NULL);
1212		} else {
1213		    objPtr = Tcl_NewObj();
1214		}
1215	    }
1216	} else if (numVars) {
1217	    objPtr = Tcl_NewIntObj(result);
1218	}
1219	Tcl_SetObjResult(interp, objPtr);
1220    }
1221    return code;
1222}
1223