vfwscanf.c revision 128002
1/*-
2 * Copyright (c) 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/cdefs.h>
38#if 0
39#if defined(LIBC_SCCS) && !defined(lint)
40static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
41#endif /* LIBC_SCCS and not lint */
42__FBSDID("FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.35 2004/01/31 23:16:09 das Exp ");
43#endif
44__FBSDID("$FreeBSD: head/lib/libc/stdio/vfwscanf.c 128002 2004-04-07 09:55:05Z tjr $");
45
46#include "namespace.h"
47#include <ctype.h>
48#include <inttypes.h>
49#include <stdio.h>
50#include <stdlib.h>
51#include <stddef.h>
52#include <stdarg.h>
53#include <string.h>
54#include <wchar.h>
55#include <wctype.h>
56#include "un-namespace.h"
57
58#include "libc_private.h"
59#include "local.h"
60
61#define FLOATING_POINT
62
63#ifdef FLOATING_POINT
64#include <locale.h>
65#endif
66
67#define	BUF		513	/* Maximum length of numeric string. */
68
69/*
70 * Flags used during conversion.
71 */
72#define	LONG		0x01	/* l: long or double */
73#define	LONGDBL		0x02	/* L: long double */
74#define	SHORT		0x04	/* h: short */
75#define	SUPPRESS	0x08	/* *: suppress assignment */
76#define	POINTER		0x10	/* p: void * (as hex) */
77#define	NOSKIP		0x20	/* [ or c: do not skip blanks */
78#define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
79#define	INTMAXT		0x800	/* j: intmax_t */
80#define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
81#define	SIZET		0x2000	/* z: size_t */
82#define	SHORTSHORT	0x4000	/* hh: char */
83#define	UNSIGNED	0x8000	/* %[oupxX] conversions */
84
85/*
86 * The following are used in integral conversions only:
87 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
88 */
89#define	SIGNOK		0x40	/* +/- is (still) legal */
90#define	NDIGITS		0x80	/* no digits detected */
91#define	PFXOK		0x100	/* 0x prefix is (still) legal */
92#define	NZDIGITS	0x200	/* no zero digits detected */
93#define	HAVESIGN	0x10000	/* sign detected */
94
95/*
96 * Conversion types.
97 */
98#define	CT_CHAR		0	/* %c conversion */
99#define	CT_CCL		1	/* %[...] conversion */
100#define	CT_STRING	2	/* %s conversion */
101#define	CT_INT		3	/* %[dioupxX] conversion */
102#define	CT_FLOAT	4	/* %[efgEFG] conversion */
103
104static int parsefloat(FILE *, wchar_t *, wchar_t *);
105
106extern int __scanfdebug;
107
108#define	INCCL(_c)	\
109	(cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) : \
110	(wmemchr(ccls, (_c), ccle - ccls) != NULL))
111
112/*
113 * MT-safe version.
114 */
115int
116vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
117{
118	int ret;
119
120	FLOCKFILE(fp);
121	ORIENT(fp, 1);
122	ret = __vfwscanf(fp, fmt, ap);
123	FUNLOCKFILE(fp);
124	return (ret);
125}
126
127/*
128 * Non-MT-safe version.
129 */
130int
131__vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
132{
133	wint_t c;		/* character from format, or conversion */
134	size_t width;		/* field width, or 0 */
135	wchar_t *p;		/* points into all kinds of strings */
136	int n;			/* handy integer */
137	int flags;		/* flags as defined above */
138	wchar_t *p0;		/* saves original value of p when necessary */
139	int nassigned;		/* number of fields assigned */
140	int nconversions;	/* number of conversions */
141	int nread;		/* number of characters consumed from fp */
142	int base;		/* base argument to conversion function */
143	wchar_t buf[BUF];	/* buffer for numeric conversions */
144	const wchar_t *ccls;	/* character class start */
145	const wchar_t *ccle;	/* character class end */
146	int cclcompl;		/* ccl is complemented? */
147	wint_t wi;		/* handy wint_t */
148	char *mbp;		/* multibyte string pointer for %c %s %[ */
149	size_t nconv;		/* number of bytes in mb. conversion */
150	char mbbuf[MB_LEN_MAX];	/* temporary mb. character buffer */
151	static const mbstate_t initial;
152	mbstate_t mbs;
153
154	/* `basefix' is used to avoid `if' tests in the integer scanner */
155	static short basefix[17] =
156		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
157
158	nassigned = 0;
159	nconversions = 0;
160	nread = 0;
161	ccls = ccle = NULL;
162	for (;;) {
163		c = *fmt++;
164		if (c == 0)
165			return (nassigned);
166		if (iswspace(c)) {
167			while ((c = __fgetwc(fp)) != WEOF &&
168			    iswspace(c))
169				;
170			if (c != WEOF)
171				__ungetwc(c, fp);
172			continue;
173		}
174		if (c != '%')
175			goto literal;
176		width = 0;
177		flags = 0;
178		/*
179		 * switch on the format.  continue if done;
180		 * break once format type is derived.
181		 */
182again:		c = *fmt++;
183		switch (c) {
184		case '%':
185literal:
186			if ((wi = __fgetwc(fp)) == WEOF)
187				goto input_failure;
188			if (wi != c) {
189				__ungetwc(wi, fp);
190				goto input_failure;
191			}
192			nread++;
193			continue;
194
195		case '*':
196			flags |= SUPPRESS;
197			goto again;
198		case 'j':
199			flags |= INTMAXT;
200			goto again;
201		case 'l':
202			if (flags & LONG) {
203				flags &= ~LONG;
204				flags |= LONGLONG;
205			} else
206				flags |= LONG;
207			goto again;
208		case 'q':
209			flags |= LONGLONG;	/* not quite */
210			goto again;
211		case 't':
212			flags |= PTRDIFFT;
213			goto again;
214		case 'z':
215			flags |= SIZET;
216			goto again;
217		case 'L':
218			flags |= LONGDBL;
219			goto again;
220		case 'h':
221			if (flags & SHORT) {
222				flags &= ~SHORT;
223				flags |= SHORTSHORT;
224			} else
225				flags |= SHORT;
226			goto again;
227
228		case '0': case '1': case '2': case '3': case '4':
229		case '5': case '6': case '7': case '8': case '9':
230			width = width * 10 + c - '0';
231			goto again;
232
233		/*
234		 * Conversions.
235		 */
236		case 'd':
237			c = CT_INT;
238			base = 10;
239			break;
240
241		case 'i':
242			c = CT_INT;
243			base = 0;
244			break;
245
246		case 'o':
247			c = CT_INT;
248			flags |= UNSIGNED;
249			base = 8;
250			break;
251
252		case 'u':
253			c = CT_INT;
254			flags |= UNSIGNED;
255			base = 10;
256			break;
257
258		case 'X':
259		case 'x':
260			flags |= PFXOK;	/* enable 0x prefixing */
261			c = CT_INT;
262			flags |= UNSIGNED;
263			base = 16;
264			break;
265
266#ifdef FLOATING_POINT
267		case 'A': case 'E': case 'F': case 'G':
268		case 'a': case 'e': case 'f': case 'g':
269			c = CT_FLOAT;
270			break;
271#endif
272
273		case 'S':
274			flags |= LONG;
275			/* FALLTHROUGH */
276		case 's':
277			c = CT_STRING;
278			break;
279
280		case '[':
281			ccls = fmt;
282			if (*fmt == '^') {
283				cclcompl = 1;
284				fmt++;
285			} else
286				cclcompl = 0;
287			if (*fmt == ']')
288				fmt++;
289			while (*fmt != '\0' && *fmt != ']')
290				fmt++;
291			ccle = fmt;
292			fmt++;
293			flags |= NOSKIP;
294			c = CT_CCL;
295			break;
296
297		case 'C':
298			flags |= LONG;
299			/* FALLTHROUGH */
300		case 'c':
301			flags |= NOSKIP;
302			c = CT_CHAR;
303			break;
304
305		case 'p':	/* pointer format is like hex */
306			flags |= POINTER | PFXOK;
307			c = CT_INT;		/* assumes sizeof(uintmax_t) */
308			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
309			base = 16;
310			break;
311
312		case 'n':
313			nconversions++;
314			if (flags & SUPPRESS)	/* ??? */
315				continue;
316			if (flags & SHORTSHORT)
317				*va_arg(ap, char *) = nread;
318			else if (flags & SHORT)
319				*va_arg(ap, short *) = nread;
320			else if (flags & LONG)
321				*va_arg(ap, long *) = nread;
322			else if (flags & LONGLONG)
323				*va_arg(ap, long long *) = nread;
324			else if (flags & INTMAXT)
325				*va_arg(ap, intmax_t *) = nread;
326			else if (flags & SIZET)
327				*va_arg(ap, size_t *) = nread;
328			else if (flags & PTRDIFFT)
329				*va_arg(ap, ptrdiff_t *) = nread;
330			else
331				*va_arg(ap, int *) = nread;
332			continue;
333
334		default:
335			goto match_failure;
336
337		/*
338		 * Disgusting backwards compatibility hack.	XXX
339		 */
340		case '\0':	/* compat */
341			return (EOF);
342		}
343
344		/*
345		 * Consume leading white space, except for formats
346		 * that suppress this.
347		 */
348		if ((flags & NOSKIP) == 0) {
349			while ((wi = __fgetwc(fp)) != WEOF && iswspace(wi))
350				nread++;
351			if (wi == WEOF)
352				goto input_failure;
353			__ungetwc(wi, fp);
354		}
355
356		/*
357		 * Do the conversion.
358		 */
359		switch (c) {
360
361		case CT_CHAR:
362			/* scan arbitrary characters (sets NOSKIP) */
363			if (width == 0)
364				width = 1;
365			if (flags & LONG) {
366				if (!(flags & SUPPRESS))
367					p = va_arg(ap, wchar_t *);
368				n = 0;
369				while (width-- != 0 &&
370				    (wi = __fgetwc(fp)) != WEOF) {
371					if (!(flags & SUPPRESS))
372						*p++ = (wchar_t)wi;
373					n++;
374				}
375				if (n == 0)
376					goto input_failure;
377				nread += n;
378				if (!(flags & SUPPRESS))
379					nassigned++;
380			} else {
381				if (!(flags & SUPPRESS))
382					mbp = va_arg(ap, char *);
383				n = 0;
384				mbs = initial;
385				while (width != 0 &&
386				    (wi = __fgetwc(fp)) != WEOF) {
387					if (width >= MB_CUR_MAX &&
388					    !(flags & SUPPRESS)) {
389						nconv = wcrtomb(mbp, wi, &mbs);
390						if (nconv == (size_t)-1)
391							goto input_failure;
392					} else {
393						nconv = wcrtomb(mbbuf, wi,
394						    &mbs);
395						if (nconv == (size_t)-1)
396							goto input_failure;
397						if (nconv > width) {
398							__ungetwc(wi, fp);
399							break;
400						}
401						if (!(flags & SUPPRESS))
402							memcpy(mbp, mbbuf,
403							    nconv);
404					}
405					if (!(flags & SUPPRESS))
406						mbp += nconv;
407					width -= nconv;
408					n++;
409				}
410				if (n == 0)
411					goto input_failure;
412				nread += n;
413				if (!(flags & SUPPRESS))
414					nassigned++;
415			}
416			nconversions++;
417			break;
418
419		case CT_CCL:
420			/* scan a (nonempty) character class (sets NOSKIP) */
421			if (width == 0)
422				width = (size_t)~0;	/* `infinity' */
423			/* take only those things in the class */
424			if ((flags & SUPPRESS) && (flags & LONG)) {
425				n = 0;
426				while ((wi = __fgetwc(fp)) != WEOF &&
427				    width-- != 0 && INCCL(wi))
428					n++;
429				if (wi != WEOF)
430					__ungetwc(wi, fp);
431				if (n == 0)
432					goto match_failure;
433			} else if (flags & LONG) {
434				p0 = p = va_arg(ap, wchar_t *);
435				while ((wi = __fgetwc(fp)) != WEOF &&
436				    width-- != 0 && INCCL(wi))
437					*p++ = (wchar_t)wi;
438				if (wi != WEOF)
439					__ungetwc(wi, fp);
440				n = p - p0;
441				if (n == 0)
442					goto match_failure;
443				*p = 0;
444				nassigned++;
445			} else {
446				if (!(flags & SUPPRESS))
447					mbp = va_arg(ap, char *);
448				n = 0;
449				mbs = initial;
450				while ((wi = __fgetwc(fp)) != WEOF &&
451				    width != 0 && INCCL(wi)) {
452					if (width >= MB_CUR_MAX &&
453					   !(flags & SUPPRESS)) {
454						nconv = wcrtomb(mbp, wi, &mbs);
455						if (nconv == (size_t)-1)
456							goto input_failure;
457					} else {
458						nconv = wcrtomb(mbbuf, wi,
459						    &mbs);
460						if (nconv == (size_t)-1)
461							goto input_failure;
462						if (nconv > width)
463							break;
464						if (!(flags & SUPPRESS))
465							memcpy(mbp, mbbuf,
466							    nconv);
467					}
468					if (!(flags & SUPPRESS))
469						mbp += nconv;
470					width -= nconv;
471					n++;
472				}
473				if (wi != WEOF)
474					__ungetwc(wi, fp);
475				if (!(flags & SUPPRESS)) {
476					*mbp = 0;
477					nassigned++;
478				}
479			}
480			nread += n;
481			nconversions++;
482			break;
483
484		case CT_STRING:
485			/* like CCL, but zero-length string OK, & no NOSKIP */
486			if (width == 0)
487				width = (size_t)~0;
488			if ((flags & SUPPRESS) && (flags & LONG)) {
489				while ((wi = __fgetwc(fp)) != WEOF &&
490				    width-- != 0 &&
491				    !iswspace(wi))
492					nread++;
493				if (wi != WEOF)
494					__ungetwc(wi, fp);
495			} else if (flags & LONG) {
496				p0 = p = va_arg(ap, wchar_t *);
497				while ((wi = __fgetwc(fp)) != WEOF &&
498				    width-- != 0 &&
499				    !iswspace(wi)) {
500					*p++ = (wchar_t)wi;
501					nread++;
502				}
503				if (wi != WEOF)
504					__ungetwc(wi, fp);
505				*p = '\0';
506				nassigned++;
507			} else {
508				if (!(flags & SUPPRESS))
509					mbp = va_arg(ap, char *);
510				mbs = initial;
511				while ((wi = __fgetwc(fp)) != WEOF &&
512				    width != 0 &&
513				    !iswspace(wi)) {
514					if (width >= MB_CUR_MAX &&
515					    !(flags & SUPPRESS)) {
516						nconv = wcrtomb(mbp, wi, &mbs);
517						if (nconv == (size_t)-1)
518							goto input_failure;
519					} else {
520						nconv = wcrtomb(mbbuf, wi,
521						    &mbs);
522						if (nconv == (size_t)-1)
523							goto input_failure;
524						if (nconv > width)
525							break;
526						if (!(flags & SUPPRESS))
527							memcpy(mbp, mbbuf,
528							    nconv);
529					}
530					if (!(flags & SUPPRESS))
531						mbp += nconv;
532					width -= nconv;
533					nread++;
534				}
535				if (wi != WEOF)
536					__ungetwc(wi, fp);
537				if (!(flags & SUPPRESS)) {
538					*mbp = 0;
539					nassigned++;
540				}
541			}
542			nconversions++;
543			continue;
544
545		case CT_INT:
546			/* scan an integer as if by the conversion function */
547			if (width == 0 || width > sizeof(buf) /
548			    sizeof(*buf) - 1)
549				width = sizeof(buf) / sizeof(*buf) - 1;
550			flags |= SIGNOK | NDIGITS | NZDIGITS;
551			for (p = buf; width; width--) {
552				c = __fgetwc(fp);
553				/*
554				 * Switch on the character; `goto ok'
555				 * if we accept it as a part of number.
556				 */
557				switch (c) {
558
559				/*
560				 * The digit 0 is always legal, but is
561				 * special.  For %i conversions, if no
562				 * digits (zero or nonzero) have been
563				 * scanned (only signs), we will have
564				 * base==0.  In that case, we should set
565				 * it to 8 and enable 0x prefixing.
566				 * Also, if we have not scanned zero digits
567				 * before this, do not turn off prefixing
568				 * (someone else will turn it off if we
569				 * have scanned any nonzero digits).
570				 */
571				case '0':
572					if (base == 0) {
573						base = 8;
574						flags |= PFXOK;
575					}
576					if (flags & NZDIGITS)
577					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
578					else
579					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
580					goto ok;
581
582				/* 1 through 7 always legal */
583				case '1': case '2': case '3':
584				case '4': case '5': case '6': case '7':
585					base = basefix[base];
586					flags &= ~(SIGNOK | PFXOK | NDIGITS);
587					goto ok;
588
589				/* digits 8 and 9 ok iff decimal or hex */
590				case '8': case '9':
591					base = basefix[base];
592					if (base <= 8)
593						break;	/* not legal here */
594					flags &= ~(SIGNOK | PFXOK | NDIGITS);
595					goto ok;
596
597				/* letters ok iff hex */
598				case 'A': case 'B': case 'C':
599				case 'D': case 'E': case 'F':
600				case 'a': case 'b': case 'c':
601				case 'd': case 'e': case 'f':
602					/* no need to fix base here */
603					if (base <= 10)
604						break;	/* not legal here */
605					flags &= ~(SIGNOK | PFXOK | NDIGITS);
606					goto ok;
607
608				/* sign ok only as first character */
609				case '+': case '-':
610					if (flags & SIGNOK) {
611						flags &= ~SIGNOK;
612						flags |= HAVESIGN;
613						goto ok;
614					}
615					break;
616
617				/*
618				 * x ok iff flag still set & 2nd char (or
619				 * 3rd char if we have a sign).
620				 */
621				case 'x': case 'X':
622					if (flags & PFXOK && p ==
623					    buf + 1 + !!(flags & HAVESIGN)) {
624						base = 16;	/* if %i */
625						flags &= ~PFXOK;
626						goto ok;
627					}
628					break;
629				}
630
631				/*
632				 * If we got here, c is not a legal character
633				 * for a number.  Stop accumulating digits.
634				 */
635				if (c != WEOF)
636					__ungetwc(c, fp);
637				break;
638		ok:
639				/*
640				 * c is legal: store it and look at the next.
641				 */
642				*p++ = (wchar_t)c;
643			}
644			/*
645			 * If we had only a sign, it is no good; push
646			 * back the sign.  If the number ends in `x',
647			 * it was [sign] '0' 'x', so push back the x
648			 * and treat it as [sign] '0'.
649			 */
650			if (flags & NDIGITS) {
651				if (p > buf)
652					__ungetwc(*--p, fp);
653				goto match_failure;
654			}
655			c = p[-1];
656			if (c == 'x' || c == 'X') {
657				--p;
658				__ungetwc(c, fp);
659			}
660			if ((flags & SUPPRESS) == 0) {
661				uintmax_t res;
662
663				*p = 0;
664				if ((flags & UNSIGNED) == 0)
665				    res = wcstoimax(buf, NULL, base);
666				else
667				    res = wcstoumax(buf, NULL, base);
668				if (flags & POINTER)
669					*va_arg(ap, void **) =
670							(void *)(uintptr_t)res;
671				else if (flags & SHORTSHORT)
672					*va_arg(ap, char *) = res;
673				else if (flags & SHORT)
674					*va_arg(ap, short *) = res;
675				else if (flags & LONG)
676					*va_arg(ap, long *) = res;
677				else if (flags & LONGLONG)
678					*va_arg(ap, long long *) = res;
679				else if (flags & INTMAXT)
680					*va_arg(ap, intmax_t *) = res;
681				else if (flags & PTRDIFFT)
682					*va_arg(ap, ptrdiff_t *) = res;
683				else if (flags & SIZET)
684					*va_arg(ap, size_t *) = res;
685				else
686					*va_arg(ap, int *) = res;
687				nassigned++;
688			}
689			nread += p - buf;
690			nconversions++;
691			break;
692
693#ifdef FLOATING_POINT
694		case CT_FLOAT:
695			/* scan a floating point number as if by strtod */
696			if (width == 0 || width > sizeof(buf) /
697			    sizeof(*buf) - 1)
698				width = sizeof(buf) / sizeof(*buf) - 1;
699			if ((width = parsefloat(fp, buf, buf + width)) == 0)
700				goto match_failure;
701			if ((flags & SUPPRESS) == 0) {
702				if (flags & LONGDBL) {
703					long double res = wcstold(buf, &p);
704					*va_arg(ap, long double *) = res;
705				} else if (flags & LONG) {
706					double res = wcstod(buf, &p);
707					*va_arg(ap, double *) = res;
708				} else {
709					float res = wcstof(buf, &p);
710					*va_arg(ap, float *) = res;
711				}
712				if (__scanfdebug && p - buf != width)
713					abort();
714				nassigned++;
715			}
716			nread += width;
717			nconversions++;
718			break;
719#endif /* FLOATING_POINT */
720		}
721	}
722input_failure:
723	return (nconversions != 0 ? nassigned : EOF);
724match_failure:
725	return (nassigned);
726}
727
728#ifdef FLOATING_POINT
729static int
730parsefloat(FILE *fp, wchar_t *buf, wchar_t *end)
731{
732	wchar_t *commit, *p;
733	int infnanpos = 0;
734	enum {
735		S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
736		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
737	} state = S_START;
738	wchar_t c;
739	wchar_t decpt = (wchar_t)(unsigned char)*localeconv()->decimal_point;
740	_Bool gotmantdig = 0, ishex = 0;
741
742	/*
743	 * We set commit = p whenever the string we have read so far
744	 * constitutes a valid representation of a floating point
745	 * number by itself.  At some point, the parse will complete
746	 * or fail, and we will ungetc() back to the last commit point.
747	 * To ensure that the file offset gets updated properly, it is
748	 * always necessary to read at least one character that doesn't
749	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
750	 */
751	commit = buf - 1;
752	c = WEOF;
753	for (p = buf; p < end; ) {
754		if ((c = __fgetwc(fp)) == WEOF)
755			break;
756reswitch:
757		switch (state) {
758		case S_START:
759			state = S_GOTSIGN;
760			if (c == '-' || c == '+')
761				break;
762			else
763				goto reswitch;
764		case S_GOTSIGN:
765			switch (c) {
766			case '0':
767				state = S_MAYBEHEX;
768				commit = p;
769				break;
770			case 'I':
771			case 'i':
772				state = S_INF;
773				break;
774			case 'N':
775			case 'n':
776				state = S_NAN;
777				break;
778			default:
779				state = S_DIGITS;
780				goto reswitch;
781			}
782			break;
783		case S_INF:
784			if (infnanpos > 6 ||
785			    (c != "nfinity"[infnanpos] &&
786			     c != "NFINITY"[infnanpos]))
787				goto parsedone;
788			if (infnanpos == 1 || infnanpos == 6)
789				commit = p;	/* inf or infinity */
790			infnanpos++;
791			break;
792		case S_NAN:
793			switch (infnanpos) {
794			case -1:	/* XXX kludge to deal with nan(...) */
795				goto parsedone;
796			case 0:
797				if (c != 'A' && c != 'a')
798					goto parsedone;
799				break;
800			case 1:
801				if (c != 'N' && c != 'n')
802					goto parsedone;
803				else
804					commit = p;
805				break;
806			case 2:
807				if (c != '(')
808					goto parsedone;
809				break;
810			default:
811				if (c == ')') {
812					commit = p;
813					infnanpos = -2;
814				} else if (!iswalnum(c) && c != '_')
815					goto parsedone;
816				break;
817			}
818			infnanpos++;
819			break;
820		case S_MAYBEHEX:
821			state = S_DIGITS;
822			if (c == 'X' || c == 'x') {
823				ishex = 1;
824				break;
825			} else {	/* we saw a '0', but no 'x' */
826				gotmantdig = 1;
827				goto reswitch;
828			}
829		case S_DIGITS:
830			if ((ishex && iswxdigit(c)) || iswdigit(c))
831				gotmantdig = 1;
832			else {
833				state = S_FRAC;
834				if (c != decpt)
835					goto reswitch;
836			}
837			if (gotmantdig)
838				commit = p;
839			break;
840		case S_FRAC:
841			if (((c == 'E' || c == 'e') && !ishex) ||
842			    ((c == 'P' || c == 'p') && ishex)) {
843				if (!gotmantdig)
844					goto parsedone;
845				else
846					state = S_EXP;
847			} else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
848				commit = p;
849				gotmantdig = 1;
850			} else
851				goto parsedone;
852			break;
853		case S_EXP:
854			state = S_EXPDIGITS;
855			if (c == '-' || c == '+')
856				break;
857			else
858				goto reswitch;
859		case S_EXPDIGITS:
860			if (iswdigit(c))
861				commit = p;
862			else
863				goto parsedone;
864			break;
865		default:
866			abort();
867		}
868		*p++ = c;
869		c = WEOF;
870	}
871
872parsedone:
873	if (c != WEOF)
874		__ungetwc(c, fp);
875	while (commit < --p)
876		__ungetwc(*p, fp);
877	*++commit = '\0';
878	return (commit - buf);
879}
880#endif
881