vfwscanf.c revision 117249
1/*-
2 * Copyright (c) 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/cdefs.h>
38#if 0
39#if defined(LIBC_SCCS) && !defined(lint)
40static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
41#endif /* LIBC_SCCS and not lint */
42__FBSDID("FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.32 2003/06/28 09:03:05 das Exp ");
43#endif
44__FBSDID("$FreeBSD: head/lib/libc/stdio/vfwscanf.c 117249 2003-07-05 02:35:06Z tjr $");
45
46#include "namespace.h"
47#include <ctype.h>
48#include <inttypes.h>
49#include <stdio.h>
50#include <stdlib.h>
51#include <stddef.h>
52#include <stdarg.h>
53#include <string.h>
54#include <wchar.h>
55#include <wctype.h>
56#include "un-namespace.h"
57
58#include "libc_private.h"
59#include "local.h"
60
61#define FLOATING_POINT
62
63#ifdef FLOATING_POINT
64#include <locale.h>
65#endif
66
67#define	BUF		513	/* Maximum length of numeric string. */
68
69/*
70 * Flags used during conversion.
71 */
72#define	LONG		0x01	/* l: long or double */
73#define	LONGDBL		0x02	/* L: long double */
74#define	SHORT		0x04	/* h: short */
75#define	SUPPRESS	0x08	/* *: suppress assignment */
76#define	POINTER		0x10	/* p: void * (as hex) */
77#define	NOSKIP		0x20	/* [ or c: do not skip blanks */
78#define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
79#define	INTMAXT		0x800	/* j: intmax_t */
80#define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
81#define	SIZET		0x2000	/* z: size_t */
82#define	SHORTSHORT	0x4000	/* hh: char */
83#define	UNSIGNED	0x8000	/* %[oupxX] conversions */
84
85/*
86 * The following are used in integral conversions only:
87 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
88 */
89#define	SIGNOK		0x40	/* +/- is (still) legal */
90#define	NDIGITS		0x80	/* no digits detected */
91#define	PFXOK		0x100	/* 0x prefix is (still) legal */
92#define	NZDIGITS	0x200	/* no zero digits detected */
93
94/*
95 * Conversion types.
96 */
97#define	CT_CHAR		0	/* %c conversion */
98#define	CT_CCL		1	/* %[...] conversion */
99#define	CT_STRING	2	/* %s conversion */
100#define	CT_INT		3	/* %[dioupxX] conversion */
101#define	CT_FLOAT	4	/* %[efgEFG] conversion */
102
103static int parsefloat(FILE *, wchar_t *, wchar_t *);
104
105extern int __scanfdebug;
106
107#define	INCCL(_c)	\
108	(cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) : \
109	(wmemchr(ccls, (_c), ccle - ccls) != NULL))
110
111/*
112 * MT-safe version.
113 */
114int
115vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
116{
117	int ret;
118
119	FLOCKFILE(fp);
120	ORIENT(fp, 1);
121	ret = __vfwscanf(fp, fmt, ap);
122	FUNLOCKFILE(fp);
123	return (ret);
124}
125
126/*
127 * Non-MT-safe version.
128 */
129int
130__vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
131{
132	wint_t c;		/* character from format, or conversion */
133	size_t width;		/* field width, or 0 */
134	wchar_t *p;		/* points into all kinds of strings */
135	int n;			/* handy integer */
136	int flags;		/* flags as defined above */
137	wchar_t *p0;		/* saves original value of p when necessary */
138	int nassigned;		/* number of fields assigned */
139	int nconversions;	/* number of conversions */
140	int nread;		/* number of characters consumed from fp */
141	int base;		/* base argument to conversion function */
142	wchar_t buf[BUF];	/* buffer for numeric conversions */
143	const wchar_t *ccls;	/* character class start */
144	const wchar_t *ccle;	/* character class end */
145	int cclcompl;		/* ccl is complemented? */
146	wint_t wi;		/* handy wint_t */
147	char *mbp;		/* multibyte string pointer for %c %s %[ */
148	size_t nconv;		/* number of bytes in mb. conversion */
149	mbstate_t mbs;		/* multibyte state */
150	char mbbuf[MB_LEN_MAX];	/* temporary mb. character buffer */
151
152	/* `basefix' is used to avoid `if' tests in the integer scanner */
153	static short basefix[17] =
154		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
155
156	nassigned = 0;
157	nconversions = 0;
158	nread = 0;
159	ccls = ccle = NULL;
160	for (;;) {
161		c = *fmt++;
162		if (c == 0)
163			return (nassigned);
164		if (iswspace(c)) {
165			while ((c = __fgetwc(fp)) != WEOF &&
166			    iswspace(c))
167				;
168			if (c != WEOF)
169				__ungetwc(c, fp);
170			continue;
171		}
172		if (c != '%')
173			goto literal;
174		width = 0;
175		flags = 0;
176		/*
177		 * switch on the format.  continue if done;
178		 * break once format type is derived.
179		 */
180again:		c = *fmt++;
181		switch (c) {
182		case '%':
183literal:
184			if ((wi = __fgetwc(fp)) == WEOF)
185				goto input_failure;
186			if (wi != c) {
187				__ungetwc(wi, fp);
188				goto input_failure;
189			}
190			nread++;
191			continue;
192
193		case '*':
194			flags |= SUPPRESS;
195			goto again;
196		case 'j':
197			flags |= INTMAXT;
198			goto again;
199		case 'l':
200			if (flags & LONG) {
201				flags &= ~LONG;
202				flags |= LONGLONG;
203			} else
204				flags |= LONG;
205			goto again;
206		case 'q':
207			flags |= LONGLONG;	/* not quite */
208			goto again;
209		case 't':
210			flags |= PTRDIFFT;
211			goto again;
212		case 'z':
213			flags |= SIZET;
214			goto again;
215		case 'L':
216			flags |= LONGDBL;
217			goto again;
218		case 'h':
219			if (flags & SHORT) {
220				flags &= ~SHORT;
221				flags |= SHORTSHORT;
222			} else
223				flags |= SHORT;
224			goto again;
225
226		case '0': case '1': case '2': case '3': case '4':
227		case '5': case '6': case '7': case '8': case '9':
228			width = width * 10 + c - '0';
229			goto again;
230
231		/*
232		 * Conversions.
233		 */
234		case 'd':
235			c = CT_INT;
236			base = 10;
237			break;
238
239		case 'i':
240			c = CT_INT;
241			base = 0;
242			break;
243
244		case 'o':
245			c = CT_INT;
246			flags |= UNSIGNED;
247			base = 8;
248			break;
249
250		case 'u':
251			c = CT_INT;
252			flags |= UNSIGNED;
253			base = 10;
254			break;
255
256		case 'X':
257		case 'x':
258			flags |= PFXOK;	/* enable 0x prefixing */
259			c = CT_INT;
260			flags |= UNSIGNED;
261			base = 16;
262			break;
263
264#ifdef FLOATING_POINT
265		case 'A': case 'E': case 'F': case 'G':
266		case 'a': case 'e': case 'f': case 'g':
267			c = CT_FLOAT;
268			break;
269#endif
270
271		case 'S':
272			flags |= LONG;
273			/* FALLTHROUGH */
274		case 's':
275			c = CT_STRING;
276			break;
277
278		case '[':
279			ccls = fmt;
280			if (*fmt == '^') {
281				cclcompl = 1;
282				fmt++;
283			} else
284				cclcompl = 0;
285			if (*fmt == ']')
286				fmt++;
287			while (*fmt != '\0' && *fmt != ']')
288				fmt++;
289			ccle = fmt;
290			fmt++;
291			flags |= NOSKIP;
292			c = CT_CCL;
293			break;
294
295		case 'C':
296			flags |= LONG;
297			/* FALLTHROUGH */
298		case 'c':
299			flags |= NOSKIP;
300			c = CT_CHAR;
301			break;
302
303		case 'p':	/* pointer format is like hex */
304			flags |= POINTER | PFXOK;
305			c = CT_INT;		/* assumes sizeof(uintmax_t) */
306			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
307			base = 16;
308			break;
309
310		case 'n':
311			nconversions++;
312			if (flags & SUPPRESS)	/* ??? */
313				continue;
314			if (flags & SHORTSHORT)
315				*va_arg(ap, char *) = nread;
316			else if (flags & SHORT)
317				*va_arg(ap, short *) = nread;
318			else if (flags & LONG)
319				*va_arg(ap, long *) = nread;
320			else if (flags & LONGLONG)
321				*va_arg(ap, long long *) = nread;
322			else if (flags & INTMAXT)
323				*va_arg(ap, intmax_t *) = nread;
324			else if (flags & SIZET)
325				*va_arg(ap, size_t *) = nread;
326			else if (flags & PTRDIFFT)
327				*va_arg(ap, ptrdiff_t *) = nread;
328			else
329				*va_arg(ap, int *) = nread;
330			continue;
331
332		default:
333			goto match_failure;
334
335		/*
336		 * Disgusting backwards compatibility hack.	XXX
337		 */
338		case '\0':	/* compat */
339			return (EOF);
340		}
341
342		/*
343		 * Consume leading white space, except for formats
344		 * that suppress this.
345		 */
346		if ((flags & NOSKIP) == 0) {
347			while ((wi = __fgetwc(fp)) != WEOF && iswspace(wi))
348				nread++;
349			if (wi == WEOF)
350				goto input_failure;
351			__ungetwc(wi, fp);
352		}
353
354		/*
355		 * Do the conversion.
356		 */
357		switch (c) {
358
359		case CT_CHAR:
360			/* scan arbitrary characters (sets NOSKIP) */
361			if (width == 0)
362				width = 1;
363			if (flags & LONG) {
364				if (!(flags & SUPPRESS))
365					p = va_arg(ap, wchar_t *);
366				n = 0;
367				while (width-- != 0 &&
368				    (wi = __fgetwc(fp)) != WEOF) {
369					if (!(flags & SUPPRESS))
370						*p++ = (wchar_t)wi;
371					n++;
372				}
373				if (n == 0)
374					goto input_failure;
375				nread += n;
376				if (!(flags & SUPPRESS))
377					nassigned++;
378			} else {
379				if (!(flags & SUPPRESS))
380					mbp = va_arg(ap, char *);
381				n = 0;
382				memset(&mbs, 0, sizeof(mbs));
383				while (width != 0 &&
384				    (wi = __fgetwc(fp)) != WEOF) {
385					if (width >= MB_CUR_MAX &&
386					    !(flags & SUPPRESS)) {
387						nconv = wcrtomb(mbp, wi, &mbs);
388						if (nconv == (size_t)-1)
389							goto input_failure;
390					} else {
391						nconv = wcrtomb(mbbuf, wi,
392						    &mbs);
393						if (nconv == (size_t)-1)
394							goto input_failure;
395						if (nconv > width) {
396							__ungetwc(wi, fp);
397							break;
398						}
399						if (!(flags & SUPPRESS))
400							memcpy(mbp, mbbuf,
401							    nconv);
402					}
403					if (!(flags & SUPPRESS))
404						mbp += nconv;
405					width -= nconv;
406					n++;
407				}
408				if (n == 0)
409					goto input_failure;
410				nread += n;
411				if (!(flags & SUPPRESS))
412					nassigned++;
413			}
414			nconversions++;
415			break;
416
417		case CT_CCL:
418			/* scan a (nonempty) character class (sets NOSKIP) */
419			if (width == 0)
420				width = (size_t)~0;	/* `infinity' */
421			/* take only those things in the class */
422			if ((flags & SUPPRESS) && (flags & LONG)) {
423				n = 0;
424				while ((wi = __fgetwc(fp)) != WEOF &&
425				    width-- != 0 && INCCL(wi))
426					n++;
427				if (wi != WEOF)
428					__ungetwc(wi, fp);
429				if (n == 0)
430					goto match_failure;
431			} else if (flags & LONG) {
432				p0 = p = va_arg(ap, wchar_t *);
433				while ((wi = __fgetwc(fp)) != WEOF &&
434				    width-- != 0 && INCCL(wi))
435					*p++ = (wchar_t)wi;
436				if (wi != WEOF)
437					__ungetwc(wi, fp);
438				n = p - p0;
439				if (n == 0)
440					goto match_failure;
441				*p = 0;
442				nassigned++;
443			} else {
444				if (!(flags & SUPPRESS))
445					mbp = va_arg(ap, char *);
446				n = 0;
447				memset(&mbs, 0, sizeof(mbs));
448				while ((wi = __fgetwc(fp)) != WEOF &&
449				    width != 0 && INCCL(wi)) {
450					if (width >= MB_CUR_MAX &&
451					   !(flags & SUPPRESS)) {
452						nconv = wcrtomb(mbp, wi, &mbs);
453						if (nconv == (size_t)-1)
454							goto input_failure;
455					} else {
456						nconv = wcrtomb(mbbuf, wi,
457						    &mbs);
458						if (nconv == (size_t)-1)
459							goto input_failure;
460						if (nconv > width)
461							break;
462						if (!(flags & SUPPRESS))
463							memcpy(mbp, mbbuf,
464							    nconv);
465					}
466					if (!(flags & SUPPRESS))
467						mbp += nconv;
468					width -= nconv;
469					n++;
470				}
471				if (wi != WEOF)
472					__ungetwc(wi, fp);
473				if (!(flags & SUPPRESS)) {
474					*mbp = 0;
475					nassigned++;
476				}
477			}
478			nread += n;
479			nconversions++;
480			break;
481
482		case CT_STRING:
483			/* like CCL, but zero-length string OK, & no NOSKIP */
484			if (width == 0)
485				width = (size_t)~0;
486			if ((flags & SUPPRESS) && (flags & LONG)) {
487				while ((wi = __fgetwc(fp)) != WEOF &&
488				    width-- != 0 &&
489				    !iswspace(wi))
490					nread++;
491				if (wi != WEOF)
492					__ungetwc(wi, fp);
493			} else if (flags & LONG) {
494				p0 = p = va_arg(ap, wchar_t *);
495				while ((wi = __fgetwc(fp)) != WEOF &&
496				    width-- != 0 &&
497				    !iswspace(wi)) {
498					*p++ = (wchar_t)wi;
499					nread++;
500				}
501				if (wi != WEOF)
502					__ungetwc(wi, fp);
503				*p = '\0';
504				nassigned++;
505			} else {
506				if (!(flags & SUPPRESS))
507					mbp = va_arg(ap, char *);
508				memset(&mbs, 0, sizeof(mbs));
509				while ((wi = __fgetwc(fp)) != WEOF &&
510				    width != 0 &&
511				    !iswspace(wi)) {
512					if (width >= MB_CUR_MAX &&
513					    !(flags & SUPPRESS)) {
514						nconv = wcrtomb(mbp, wi, &mbs);
515						if (nconv == (size_t)-1)
516							goto input_failure;
517					} else {
518						nconv = wcrtomb(mbbuf, wi,
519						    &mbs);
520						if (nconv == (size_t)-1)
521							goto input_failure;
522						if (nconv > width)
523							break;
524						if (!(flags & SUPPRESS))
525							memcpy(mbp, mbbuf,
526							    nconv);
527					}
528					if (!(flags & SUPPRESS))
529						mbp += nconv;
530					width -= nconv;
531					nread++;
532				}
533				if (wi != WEOF)
534					__ungetwc(wi, fp);
535				if (!(flags & SUPPRESS)) {
536					*mbp = 0;
537					nassigned++;
538				}
539			}
540			nconversions++;
541			continue;
542
543		case CT_INT:
544			/* scan an integer as if by the conversion function */
545#ifdef hardway
546			if (width == 0 || width > sizeof(buf) - 1)
547				width = sizeof(buf) - 1;
548#else
549			/* size_t is unsigned, hence this optimisation */
550			if (--width > sizeof(buf) - 2)
551				width = sizeof(buf) - 2;
552			width++;
553#endif
554			flags |= SIGNOK | NDIGITS | NZDIGITS;
555			for (p = buf; width; width--) {
556				c = __fgetwc(fp);
557				/*
558				 * Switch on the character; `goto ok'
559				 * if we accept it as a part of number.
560				 */
561				switch (c) {
562
563				/*
564				 * The digit 0 is always legal, but is
565				 * special.  For %i conversions, if no
566				 * digits (zero or nonzero) have been
567				 * scanned (only signs), we will have
568				 * base==0.  In that case, we should set
569				 * it to 8 and enable 0x prefixing.
570				 * Also, if we have not scanned zero digits
571				 * before this, do not turn off prefixing
572				 * (someone else will turn it off if we
573				 * have scanned any nonzero digits).
574				 */
575				case '0':
576					if (base == 0) {
577						base = 8;
578						flags |= PFXOK;
579					}
580					if (flags & NZDIGITS)
581					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
582					else
583					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
584					goto ok;
585
586				/* 1 through 7 always legal */
587				case '1': case '2': case '3':
588				case '4': case '5': case '6': case '7':
589					base = basefix[base];
590					flags &= ~(SIGNOK | PFXOK | NDIGITS);
591					goto ok;
592
593				/* digits 8 and 9 ok iff decimal or hex */
594				case '8': case '9':
595					base = basefix[base];
596					if (base <= 8)
597						break;	/* not legal here */
598					flags &= ~(SIGNOK | PFXOK | NDIGITS);
599					goto ok;
600
601				/* letters ok iff hex */
602				case 'A': case 'B': case 'C':
603				case 'D': case 'E': case 'F':
604				case 'a': case 'b': case 'c':
605				case 'd': case 'e': case 'f':
606					/* no need to fix base here */
607					if (base <= 10)
608						break;	/* not legal here */
609					flags &= ~(SIGNOK | PFXOK | NDIGITS);
610					goto ok;
611
612				/* sign ok only as first character */
613				case '+': case '-':
614					if (flags & SIGNOK) {
615						flags &= ~SIGNOK;
616						goto ok;
617					}
618					break;
619
620				/* x ok iff flag still set & 2nd char */
621				case 'x': case 'X':
622					if (flags & PFXOK && p == buf + 1) {
623						base = 16;	/* if %i */
624						flags &= ~PFXOK;
625						goto ok;
626					}
627					break;
628				}
629
630				/*
631				 * If we got here, c is not a legal character
632				 * for a number.  Stop accumulating digits.
633				 */
634				if (c != WEOF)
635					__ungetwc(c, fp);
636				break;
637		ok:
638				/*
639				 * c is legal: store it and look at the next.
640				 */
641				*p++ = (wchar_t)c;
642			}
643			/*
644			 * If we had only a sign, it is no good; push
645			 * back the sign.  If the number ends in `x',
646			 * it was [sign] '0' 'x', so push back the x
647			 * and treat it as [sign] '0'.
648			 */
649			if (flags & NDIGITS) {
650				if (p > buf)
651					__ungetwc(*--p, fp);
652				goto match_failure;
653			}
654			c = p[-1];
655			if (c == 'x' || c == 'X') {
656				--p;
657				__ungetwc(c, fp);
658			}
659			if ((flags & SUPPRESS) == 0) {
660				uintmax_t res;
661
662				*p = 0;
663				if ((flags & UNSIGNED) == 0)
664				    res = wcstoimax(buf, NULL, base);
665				else
666				    res = wcstoumax(buf, NULL, base);
667				if (flags & POINTER)
668					*va_arg(ap, void **) =
669							(void *)(uintptr_t)res;
670				else if (flags & SHORTSHORT)
671					*va_arg(ap, char *) = res;
672				else if (flags & SHORT)
673					*va_arg(ap, short *) = res;
674				else if (flags & LONG)
675					*va_arg(ap, long *) = res;
676				else if (flags & LONGLONG)
677					*va_arg(ap, long long *) = res;
678				else if (flags & INTMAXT)
679					*va_arg(ap, intmax_t *) = res;
680				else if (flags & PTRDIFFT)
681					*va_arg(ap, ptrdiff_t *) = res;
682				else if (flags & SIZET)
683					*va_arg(ap, size_t *) = res;
684				else
685					*va_arg(ap, int *) = res;
686				nassigned++;
687			}
688			nread += p - buf;
689			nconversions++;
690			break;
691
692#ifdef FLOATING_POINT
693		case CT_FLOAT:
694			/* scan a floating point number as if by strtod */
695			if (width == 0 || width > sizeof(buf) - 1)
696				width = sizeof(buf) - 1;
697			if ((width = parsefloat(fp, buf, buf + width)) == 0)
698				goto match_failure;
699			if ((flags & SUPPRESS) == 0) {
700				if (flags & LONGDBL) {
701					long double res = wcstold(buf, &p);
702					*va_arg(ap, long double *) = res;
703				} else if (flags & LONG) {
704					double res = wcstod(buf, &p);
705					*va_arg(ap, double *) = res;
706				} else {
707					float res = wcstof(buf, &p);
708					*va_arg(ap, float *) = res;
709				}
710				if (__scanfdebug && p - buf != width)
711					abort();
712				nassigned++;
713			}
714			nread += width;
715			nconversions++;
716			break;
717#endif /* FLOATING_POINT */
718		}
719	}
720input_failure:
721	return (nconversions != 0 ? nassigned : EOF);
722match_failure:
723	return (nassigned);
724}
725
726#ifdef FLOATING_POINT
727static int
728parsefloat(FILE *fp, wchar_t *buf, wchar_t *end)
729{
730	wchar_t *commit, *p;
731	int infnanpos = 0;
732	enum {
733		S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
734		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
735	} state = S_START;
736	wchar_t c;
737	wchar_t decpt = (wchar_t)(unsigned char)*localeconv()->decimal_point;
738	_Bool gotmantdig = 0, ishex = 0;
739
740	/*
741	 * We set commit = p whenever the string we have read so far
742	 * constitutes a valid representation of a floating point
743	 * number by itself.  At some point, the parse will complete
744	 * or fail, and we will ungetc() back to the last commit point.
745	 * To ensure that the file offset gets updated properly, it is
746	 * always necessary to read at least one character that doesn't
747	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
748	 */
749	commit = buf - 1;
750	c = WEOF;
751	for (p = buf; p < end; ) {
752		if ((c = __fgetwc(fp)) == WEOF)
753			break;
754reswitch:
755		switch (state) {
756		case S_START:
757			state = S_GOTSIGN;
758			if (c == '-' || c == '+')
759				break;
760			else
761				goto reswitch;
762		case S_GOTSIGN:
763			switch (c) {
764			case '0':
765				state = S_MAYBEHEX;
766				commit = p;
767				break;
768			case 'I':
769			case 'i':
770				state = S_INF;
771				break;
772			case 'N':
773			case 'n':
774				state = S_NAN;
775				break;
776			default:
777				state = S_DIGITS;
778				goto reswitch;
779			}
780			break;
781		case S_INF:
782			if (infnanpos > 6 ||
783			    (c != "nfinity"[infnanpos] &&
784			     c != "NFINITY"[infnanpos]))
785				goto parsedone;
786			if (infnanpos == 1 || infnanpos == 6)
787				commit = p;	/* inf or infinity */
788			infnanpos++;
789			break;
790		case S_NAN:
791			switch (infnanpos) {
792			case -1:	/* XXX kludge to deal with nan(...) */
793				goto parsedone;
794			case 0:
795				if (c != 'A' && c != 'a')
796					goto parsedone;
797				break;
798			case 1:
799				if (c != 'N' && c != 'n')
800					goto parsedone;
801				else
802					commit = p;
803				break;
804			case 2:
805				if (c != '(')
806					goto parsedone;
807				break;
808			default:
809				if (c == ')') {
810					commit = p;
811					infnanpos = -2;
812				} else if (!iswalnum(c) && c != '_')
813					goto parsedone;
814				break;
815			}
816			infnanpos++;
817			break;
818		case S_MAYBEHEX:
819			state = S_DIGITS;
820			if (c == 'X' || c == 'x') {
821				ishex = 1;
822				break;
823			} else {	/* we saw a '0', but no 'x' */
824				gotmantdig = 1;
825				goto reswitch;
826			}
827		case S_DIGITS:
828			if (ishex && iswxdigit(c) || iswdigit(c))
829				gotmantdig = 1;
830			else {
831				state = S_FRAC;
832				if (c != decpt)
833					goto reswitch;
834			}
835			if (gotmantdig)
836				commit = p;
837			break;
838		case S_FRAC:
839			if ((c == 'E' || c == 'e') && !ishex ||
840			    (c == 'P' || c == 'p') && ishex) {
841				if (!gotmantdig)
842					goto parsedone;
843				else
844					state = S_EXP;
845			} else if (ishex && iswxdigit(c) || iswdigit(c)) {
846				commit = p;
847				gotmantdig = 1;
848			} else
849				goto parsedone;
850			break;
851		case S_EXP:
852			state = S_EXPDIGITS;
853			if (c == '-' || c == '+')
854				break;
855			else
856				goto reswitch;
857		case S_EXPDIGITS:
858			if (iswdigit(c))
859				commit = p;
860			else
861				goto parsedone;
862			break;
863		default:
864			abort();
865		}
866		*p++ = c;
867		c = WEOF;
868	}
869
870parsedone:
871	if (c != WEOF)
872		__ungetwc(c, fp);
873	while (commit < --p)
874		__ungetwc(*p, fp);
875	*++commit = '\0';
876	return (commit - buf);
877}
878#endif
879