vfwscanf.c revision 165903
1/*-
2 * Copyright (c) 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#if 0
34#if defined(LIBC_SCCS) && !defined(lint)
35static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
36#endif /* LIBC_SCCS and not lint */
37#endif
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD: head/lib/libc/stdio/vfwscanf.c 165903 2007-01-09 00:28:16Z imp $");
40
41#include "namespace.h"
42#include <ctype.h>
43#include <inttypes.h>
44#include <limits.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <stddef.h>
48#include <stdarg.h>
49#include <string.h>
50#include <wchar.h>
51#include <wctype.h>
52#include "un-namespace.h"
53
54#include "libc_private.h"
55#include "local.h"
56
57#ifndef NO_FLOATING_POINT
58#include <locale.h>
59#endif
60
61#define	BUF		513	/* Maximum length of numeric string. */
62
63/*
64 * Flags used during conversion.
65 */
66#define	LONG		0x01	/* l: long or double */
67#define	LONGDBL		0x02	/* L: long double */
68#define	SHORT		0x04	/* h: short */
69#define	SUPPRESS	0x08	/* *: suppress assignment */
70#define	POINTER		0x10	/* p: void * (as hex) */
71#define	NOSKIP		0x20	/* [ or c: do not skip blanks */
72#define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
73#define	INTMAXT		0x800	/* j: intmax_t */
74#define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
75#define	SIZET		0x2000	/* z: size_t */
76#define	SHORTSHORT	0x4000	/* hh: char */
77#define	UNSIGNED	0x8000	/* %[oupxX] conversions */
78
79/*
80 * The following are used in integral conversions only:
81 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
82 */
83#define	SIGNOK		0x40	/* +/- is (still) legal */
84#define	NDIGITS		0x80	/* no digits detected */
85#define	PFXOK		0x100	/* 0x prefix is (still) legal */
86#define	NZDIGITS	0x200	/* no zero digits detected */
87#define	HAVESIGN	0x10000	/* sign detected */
88
89/*
90 * Conversion types.
91 */
92#define	CT_CHAR		0	/* %c conversion */
93#define	CT_CCL		1	/* %[...] conversion */
94#define	CT_STRING	2	/* %s conversion */
95#define	CT_INT		3	/* %[dioupxX] conversion */
96#define	CT_FLOAT	4	/* %[efgEFG] conversion */
97
98#ifndef NO_FLOATING_POINT
99static int parsefloat(FILE *, wchar_t *, wchar_t *);
100#endif
101
102extern int __scanfdebug;
103
104#define	INCCL(_c)	\
105	(cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) : \
106	(wmemchr(ccls, (_c), ccle - ccls) != NULL))
107
108/*
109 * MT-safe version.
110 */
111int
112vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
113{
114	int ret;
115
116	FLOCKFILE(fp);
117	ORIENT(fp, 1);
118	ret = __vfwscanf(fp, fmt, ap);
119	FUNLOCKFILE(fp);
120	return (ret);
121}
122
123/*
124 * Non-MT-safe version.
125 */
126int
127__vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
128{
129	wint_t c;		/* character from format, or conversion */
130	size_t width;		/* field width, or 0 */
131	wchar_t *p;		/* points into all kinds of strings */
132	int n;			/* handy integer */
133	int flags;		/* flags as defined above */
134	wchar_t *p0;		/* saves original value of p when necessary */
135	int nassigned;		/* number of fields assigned */
136	int nconversions;	/* number of conversions */
137	int nread;		/* number of characters consumed from fp */
138	int base;		/* base argument to conversion function */
139	wchar_t buf[BUF];	/* buffer for numeric conversions */
140	const wchar_t *ccls;	/* character class start */
141	const wchar_t *ccle;	/* character class end */
142	int cclcompl;		/* ccl is complemented? */
143	wint_t wi;		/* handy wint_t */
144	char *mbp;		/* multibyte string pointer for %c %s %[ */
145	size_t nconv;		/* number of bytes in mb. conversion */
146	char mbbuf[MB_LEN_MAX];	/* temporary mb. character buffer */
147	static const mbstate_t initial;
148	mbstate_t mbs;
149
150	/* `basefix' is used to avoid `if' tests in the integer scanner */
151	static short basefix[17] =
152		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
153
154	nassigned = 0;
155	nconversions = 0;
156	nread = 0;
157	ccls = ccle = NULL;
158	for (;;) {
159		c = *fmt++;
160		if (c == 0)
161			return (nassigned);
162		if (iswspace(c)) {
163			while ((c = __fgetwc(fp)) != WEOF &&
164			    iswspace(c))
165				;
166			if (c != WEOF)
167				__ungetwc(c, fp);
168			continue;
169		}
170		if (c != '%')
171			goto literal;
172		width = 0;
173		flags = 0;
174		/*
175		 * switch on the format.  continue if done;
176		 * break once format type is derived.
177		 */
178again:		c = *fmt++;
179		switch (c) {
180		case '%':
181literal:
182			if ((wi = __fgetwc(fp)) == WEOF)
183				goto input_failure;
184			if (wi != c) {
185				__ungetwc(wi, fp);
186				goto input_failure;
187			}
188			nread++;
189			continue;
190
191		case '*':
192			flags |= SUPPRESS;
193			goto again;
194		case 'j':
195			flags |= INTMAXT;
196			goto again;
197		case 'l':
198			if (flags & LONG) {
199				flags &= ~LONG;
200				flags |= LONGLONG;
201			} else
202				flags |= LONG;
203			goto again;
204		case 'q':
205			flags |= LONGLONG;	/* not quite */
206			goto again;
207		case 't':
208			flags |= PTRDIFFT;
209			goto again;
210		case 'z':
211			flags |= SIZET;
212			goto again;
213		case 'L':
214			flags |= LONGDBL;
215			goto again;
216		case 'h':
217			if (flags & SHORT) {
218				flags &= ~SHORT;
219				flags |= SHORTSHORT;
220			} else
221				flags |= SHORT;
222			goto again;
223
224		case '0': case '1': case '2': case '3': case '4':
225		case '5': case '6': case '7': case '8': case '9':
226			width = width * 10 + c - '0';
227			goto again;
228
229		/*
230		 * Conversions.
231		 */
232		case 'd':
233			c = CT_INT;
234			base = 10;
235			break;
236
237		case 'i':
238			c = CT_INT;
239			base = 0;
240			break;
241
242		case 'o':
243			c = CT_INT;
244			flags |= UNSIGNED;
245			base = 8;
246			break;
247
248		case 'u':
249			c = CT_INT;
250			flags |= UNSIGNED;
251			base = 10;
252			break;
253
254		case 'X':
255		case 'x':
256			flags |= PFXOK;	/* enable 0x prefixing */
257			c = CT_INT;
258			flags |= UNSIGNED;
259			base = 16;
260			break;
261
262#ifndef NO_FLOATING_POINT
263		case 'A': case 'E': case 'F': case 'G':
264		case 'a': case 'e': case 'f': case 'g':
265			c = CT_FLOAT;
266			break;
267#endif
268
269		case 'S':
270			flags |= LONG;
271			/* FALLTHROUGH */
272		case 's':
273			c = CT_STRING;
274			break;
275
276		case '[':
277			ccls = fmt;
278			if (*fmt == '^') {
279				cclcompl = 1;
280				fmt++;
281			} else
282				cclcompl = 0;
283			if (*fmt == ']')
284				fmt++;
285			while (*fmt != '\0' && *fmt != ']')
286				fmt++;
287			ccle = fmt;
288			fmt++;
289			flags |= NOSKIP;
290			c = CT_CCL;
291			break;
292
293		case 'C':
294			flags |= LONG;
295			/* FALLTHROUGH */
296		case 'c':
297			flags |= NOSKIP;
298			c = CT_CHAR;
299			break;
300
301		case 'p':	/* pointer format is like hex */
302			flags |= POINTER | PFXOK;
303			c = CT_INT;		/* assumes sizeof(uintmax_t) */
304			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
305			base = 16;
306			break;
307
308		case 'n':
309			nconversions++;
310			if (flags & SUPPRESS)	/* ??? */
311				continue;
312			if (flags & SHORTSHORT)
313				*va_arg(ap, char *) = nread;
314			else if (flags & SHORT)
315				*va_arg(ap, short *) = nread;
316			else if (flags & LONG)
317				*va_arg(ap, long *) = nread;
318			else if (flags & LONGLONG)
319				*va_arg(ap, long long *) = nread;
320			else if (flags & INTMAXT)
321				*va_arg(ap, intmax_t *) = nread;
322			else if (flags & SIZET)
323				*va_arg(ap, size_t *) = nread;
324			else if (flags & PTRDIFFT)
325				*va_arg(ap, ptrdiff_t *) = nread;
326			else
327				*va_arg(ap, int *) = nread;
328			continue;
329
330		default:
331			goto match_failure;
332
333		/*
334		 * Disgusting backwards compatibility hack.	XXX
335		 */
336		case '\0':	/* compat */
337			return (EOF);
338		}
339
340		/*
341		 * Consume leading white space, except for formats
342		 * that suppress this.
343		 */
344		if ((flags & NOSKIP) == 0) {
345			while ((wi = __fgetwc(fp)) != WEOF && iswspace(wi))
346				nread++;
347			if (wi == WEOF)
348				goto input_failure;
349			__ungetwc(wi, fp);
350		}
351
352		/*
353		 * Do the conversion.
354		 */
355		switch (c) {
356
357		case CT_CHAR:
358			/* scan arbitrary characters (sets NOSKIP) */
359			if (width == 0)
360				width = 1;
361			if (flags & LONG) {
362				if (!(flags & SUPPRESS))
363					p = va_arg(ap, wchar_t *);
364				n = 0;
365				while (width-- != 0 &&
366				    (wi = __fgetwc(fp)) != WEOF) {
367					if (!(flags & SUPPRESS))
368						*p++ = (wchar_t)wi;
369					n++;
370				}
371				if (n == 0)
372					goto input_failure;
373				nread += n;
374				if (!(flags & SUPPRESS))
375					nassigned++;
376			} else {
377				if (!(flags & SUPPRESS))
378					mbp = va_arg(ap, char *);
379				n = 0;
380				mbs = initial;
381				while (width != 0 &&
382				    (wi = __fgetwc(fp)) != WEOF) {
383					if (width >= MB_CUR_MAX &&
384					    !(flags & SUPPRESS)) {
385						nconv = wcrtomb(mbp, wi, &mbs);
386						if (nconv == (size_t)-1)
387							goto input_failure;
388					} else {
389						nconv = wcrtomb(mbbuf, wi,
390						    &mbs);
391						if (nconv == (size_t)-1)
392							goto input_failure;
393						if (nconv > width) {
394							__ungetwc(wi, fp);
395							break;
396						}
397						if (!(flags & SUPPRESS))
398							memcpy(mbp, mbbuf,
399							    nconv);
400					}
401					if (!(flags & SUPPRESS))
402						mbp += nconv;
403					width -= nconv;
404					n++;
405				}
406				if (n == 0)
407					goto input_failure;
408				nread += n;
409				if (!(flags & SUPPRESS))
410					nassigned++;
411			}
412			nconversions++;
413			break;
414
415		case CT_CCL:
416			/* scan a (nonempty) character class (sets NOSKIP) */
417			if (width == 0)
418				width = (size_t)~0;	/* `infinity' */
419			/* take only those things in the class */
420			if ((flags & SUPPRESS) && (flags & LONG)) {
421				n = 0;
422				while ((wi = __fgetwc(fp)) != WEOF &&
423				    width-- != 0 && INCCL(wi))
424					n++;
425				if (wi != WEOF)
426					__ungetwc(wi, fp);
427				if (n == 0)
428					goto match_failure;
429			} else if (flags & LONG) {
430				p0 = p = va_arg(ap, wchar_t *);
431				while ((wi = __fgetwc(fp)) != WEOF &&
432				    width-- != 0 && INCCL(wi))
433					*p++ = (wchar_t)wi;
434				if (wi != WEOF)
435					__ungetwc(wi, fp);
436				n = p - p0;
437				if (n == 0)
438					goto match_failure;
439				*p = 0;
440				nassigned++;
441			} else {
442				if (!(flags & SUPPRESS))
443					mbp = va_arg(ap, char *);
444				n = 0;
445				mbs = initial;
446				while ((wi = __fgetwc(fp)) != WEOF &&
447				    width != 0 && INCCL(wi)) {
448					if (width >= MB_CUR_MAX &&
449					   !(flags & SUPPRESS)) {
450						nconv = wcrtomb(mbp, wi, &mbs);
451						if (nconv == (size_t)-1)
452							goto input_failure;
453					} else {
454						nconv = wcrtomb(mbbuf, wi,
455						    &mbs);
456						if (nconv == (size_t)-1)
457							goto input_failure;
458						if (nconv > width)
459							break;
460						if (!(flags & SUPPRESS))
461							memcpy(mbp, mbbuf,
462							    nconv);
463					}
464					if (!(flags & SUPPRESS))
465						mbp += nconv;
466					width -= nconv;
467					n++;
468				}
469				if (wi != WEOF)
470					__ungetwc(wi, fp);
471				if (!(flags & SUPPRESS)) {
472					*mbp = 0;
473					nassigned++;
474				}
475			}
476			nread += n;
477			nconversions++;
478			break;
479
480		case CT_STRING:
481			/* like CCL, but zero-length string OK, & no NOSKIP */
482			if (width == 0)
483				width = (size_t)~0;
484			if ((flags & SUPPRESS) && (flags & LONG)) {
485				while ((wi = __fgetwc(fp)) != WEOF &&
486				    width-- != 0 &&
487				    !iswspace(wi))
488					nread++;
489				if (wi != WEOF)
490					__ungetwc(wi, fp);
491			} else if (flags & LONG) {
492				p0 = p = va_arg(ap, wchar_t *);
493				while ((wi = __fgetwc(fp)) != WEOF &&
494				    width-- != 0 &&
495				    !iswspace(wi)) {
496					*p++ = (wchar_t)wi;
497					nread++;
498				}
499				if (wi != WEOF)
500					__ungetwc(wi, fp);
501				*p = '\0';
502				nassigned++;
503			} else {
504				if (!(flags & SUPPRESS))
505					mbp = va_arg(ap, char *);
506				mbs = initial;
507				while ((wi = __fgetwc(fp)) != WEOF &&
508				    width != 0 &&
509				    !iswspace(wi)) {
510					if (width >= MB_CUR_MAX &&
511					    !(flags & SUPPRESS)) {
512						nconv = wcrtomb(mbp, wi, &mbs);
513						if (nconv == (size_t)-1)
514							goto input_failure;
515					} else {
516						nconv = wcrtomb(mbbuf, wi,
517						    &mbs);
518						if (nconv == (size_t)-1)
519							goto input_failure;
520						if (nconv > width)
521							break;
522						if (!(flags & SUPPRESS))
523							memcpy(mbp, mbbuf,
524							    nconv);
525					}
526					if (!(flags & SUPPRESS))
527						mbp += nconv;
528					width -= nconv;
529					nread++;
530				}
531				if (wi != WEOF)
532					__ungetwc(wi, fp);
533				if (!(flags & SUPPRESS)) {
534					*mbp = 0;
535					nassigned++;
536				}
537			}
538			nconversions++;
539			continue;
540
541		case CT_INT:
542			/* scan an integer as if by the conversion function */
543			if (width == 0 || width > sizeof(buf) /
544			    sizeof(*buf) - 1)
545				width = sizeof(buf) / sizeof(*buf) - 1;
546			flags |= SIGNOK | NDIGITS | NZDIGITS;
547			for (p = buf; width; width--) {
548				c = __fgetwc(fp);
549				/*
550				 * Switch on the character; `goto ok'
551				 * if we accept it as a part of number.
552				 */
553				switch (c) {
554
555				/*
556				 * The digit 0 is always legal, but is
557				 * special.  For %i conversions, if no
558				 * digits (zero or nonzero) have been
559				 * scanned (only signs), we will have
560				 * base==0.  In that case, we should set
561				 * it to 8 and enable 0x prefixing.
562				 * Also, if we have not scanned zero digits
563				 * before this, do not turn off prefixing
564				 * (someone else will turn it off if we
565				 * have scanned any nonzero digits).
566				 */
567				case '0':
568					if (base == 0) {
569						base = 8;
570						flags |= PFXOK;
571					}
572					if (flags & NZDIGITS)
573					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
574					else
575					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
576					goto ok;
577
578				/* 1 through 7 always legal */
579				case '1': case '2': case '3':
580				case '4': case '5': case '6': case '7':
581					base = basefix[base];
582					flags &= ~(SIGNOK | PFXOK | NDIGITS);
583					goto ok;
584
585				/* digits 8 and 9 ok iff decimal or hex */
586				case '8': case '9':
587					base = basefix[base];
588					if (base <= 8)
589						break;	/* not legal here */
590					flags &= ~(SIGNOK | PFXOK | NDIGITS);
591					goto ok;
592
593				/* letters ok iff hex */
594				case 'A': case 'B': case 'C':
595				case 'D': case 'E': case 'F':
596				case 'a': case 'b': case 'c':
597				case 'd': case 'e': case 'f':
598					/* no need to fix base here */
599					if (base <= 10)
600						break;	/* not legal here */
601					flags &= ~(SIGNOK | PFXOK | NDIGITS);
602					goto ok;
603
604				/* sign ok only as first character */
605				case '+': case '-':
606					if (flags & SIGNOK) {
607						flags &= ~SIGNOK;
608						flags |= HAVESIGN;
609						goto ok;
610					}
611					break;
612
613				/*
614				 * x ok iff flag still set & 2nd char (or
615				 * 3rd char if we have a sign).
616				 */
617				case 'x': case 'X':
618					if (flags & PFXOK && p ==
619					    buf + 1 + !!(flags & HAVESIGN)) {
620						base = 16;	/* if %i */
621						flags &= ~PFXOK;
622						goto ok;
623					}
624					break;
625				}
626
627				/*
628				 * If we got here, c is not a legal character
629				 * for a number.  Stop accumulating digits.
630				 */
631				if (c != WEOF)
632					__ungetwc(c, fp);
633				break;
634		ok:
635				/*
636				 * c is legal: store it and look at the next.
637				 */
638				*p++ = (wchar_t)c;
639			}
640			/*
641			 * If we had only a sign, it is no good; push
642			 * back the sign.  If the number ends in `x',
643			 * it was [sign] '0' 'x', so push back the x
644			 * and treat it as [sign] '0'.
645			 */
646			if (flags & NDIGITS) {
647				if (p > buf)
648					__ungetwc(*--p, fp);
649				goto match_failure;
650			}
651			c = p[-1];
652			if (c == 'x' || c == 'X') {
653				--p;
654				__ungetwc(c, fp);
655			}
656			if ((flags & SUPPRESS) == 0) {
657				uintmax_t res;
658
659				*p = 0;
660				if ((flags & UNSIGNED) == 0)
661				    res = wcstoimax(buf, NULL, base);
662				else
663				    res = wcstoumax(buf, NULL, base);
664				if (flags & POINTER)
665					*va_arg(ap, void **) =
666							(void *)(uintptr_t)res;
667				else if (flags & SHORTSHORT)
668					*va_arg(ap, char *) = res;
669				else if (flags & SHORT)
670					*va_arg(ap, short *) = res;
671				else if (flags & LONG)
672					*va_arg(ap, long *) = res;
673				else if (flags & LONGLONG)
674					*va_arg(ap, long long *) = res;
675				else if (flags & INTMAXT)
676					*va_arg(ap, intmax_t *) = res;
677				else if (flags & PTRDIFFT)
678					*va_arg(ap, ptrdiff_t *) = res;
679				else if (flags & SIZET)
680					*va_arg(ap, size_t *) = res;
681				else
682					*va_arg(ap, int *) = res;
683				nassigned++;
684			}
685			nread += p - buf;
686			nconversions++;
687			break;
688
689#ifndef NO_FLOATING_POINT
690		case CT_FLOAT:
691			/* scan a floating point number as if by strtod */
692			if (width == 0 || width > sizeof(buf) /
693			    sizeof(*buf) - 1)
694				width = sizeof(buf) / sizeof(*buf) - 1;
695			if ((width = parsefloat(fp, buf, buf + width)) == 0)
696				goto match_failure;
697			if ((flags & SUPPRESS) == 0) {
698				if (flags & LONGDBL) {
699					long double res = wcstold(buf, &p);
700					*va_arg(ap, long double *) = res;
701				} else if (flags & LONG) {
702					double res = wcstod(buf, &p);
703					*va_arg(ap, double *) = res;
704				} else {
705					float res = wcstof(buf, &p);
706					*va_arg(ap, float *) = res;
707				}
708				if (__scanfdebug && p - buf != width)
709					abort();
710				nassigned++;
711			}
712			nread += width;
713			nconversions++;
714			break;
715#endif /* !NO_FLOATING_POINT */
716		}
717	}
718input_failure:
719	return (nconversions != 0 ? nassigned : EOF);
720match_failure:
721	return (nassigned);
722}
723
724#ifndef NO_FLOATING_POINT
725static int
726parsefloat(FILE *fp, wchar_t *buf, wchar_t *end)
727{
728	wchar_t *commit, *p;
729	int infnanpos = 0;
730	enum {
731		S_START, S_GOTSIGN, S_INF, S_NAN, S_MAYBEHEX,
732		S_DIGITS, S_FRAC, S_EXP, S_EXPDIGITS
733	} state = S_START;
734	wchar_t c;
735	wchar_t decpt = (wchar_t)(unsigned char)*localeconv()->decimal_point;
736	_Bool gotmantdig = 0, ishex = 0;
737
738	/*
739	 * We set commit = p whenever the string we have read so far
740	 * constitutes a valid representation of a floating point
741	 * number by itself.  At some point, the parse will complete
742	 * or fail, and we will ungetc() back to the last commit point.
743	 * To ensure that the file offset gets updated properly, it is
744	 * always necessary to read at least one character that doesn't
745	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
746	 */
747	commit = buf - 1;
748	c = WEOF;
749	for (p = buf; p < end; ) {
750		if ((c = __fgetwc(fp)) == WEOF)
751			break;
752reswitch:
753		switch (state) {
754		case S_START:
755			state = S_GOTSIGN;
756			if (c == '-' || c == '+')
757				break;
758			else
759				goto reswitch;
760		case S_GOTSIGN:
761			switch (c) {
762			case '0':
763				state = S_MAYBEHEX;
764				commit = p;
765				break;
766			case 'I':
767			case 'i':
768				state = S_INF;
769				break;
770			case 'N':
771			case 'n':
772				state = S_NAN;
773				break;
774			default:
775				state = S_DIGITS;
776				goto reswitch;
777			}
778			break;
779		case S_INF:
780			if (infnanpos > 6 ||
781			    (c != "nfinity"[infnanpos] &&
782			     c != "NFINITY"[infnanpos]))
783				goto parsedone;
784			if (infnanpos == 1 || infnanpos == 6)
785				commit = p;	/* inf or infinity */
786			infnanpos++;
787			break;
788		case S_NAN:
789			switch (infnanpos) {
790			case -1:	/* XXX kludge to deal with nan(...) */
791				goto parsedone;
792			case 0:
793				if (c != 'A' && c != 'a')
794					goto parsedone;
795				break;
796			case 1:
797				if (c != 'N' && c != 'n')
798					goto parsedone;
799				else
800					commit = p;
801				break;
802			case 2:
803				if (c != '(')
804					goto parsedone;
805				break;
806			default:
807				if (c == ')') {
808					commit = p;
809					infnanpos = -2;
810				} else if (!iswalnum(c) && c != '_')
811					goto parsedone;
812				break;
813			}
814			infnanpos++;
815			break;
816		case S_MAYBEHEX:
817			state = S_DIGITS;
818			if (c == 'X' || c == 'x') {
819				ishex = 1;
820				break;
821			} else {	/* we saw a '0', but no 'x' */
822				gotmantdig = 1;
823				goto reswitch;
824			}
825		case S_DIGITS:
826			if ((ishex && iswxdigit(c)) || iswdigit(c))
827				gotmantdig = 1;
828			else {
829				state = S_FRAC;
830				if (c != decpt)
831					goto reswitch;
832			}
833			if (gotmantdig)
834				commit = p;
835			break;
836		case S_FRAC:
837			if (((c == 'E' || c == 'e') && !ishex) ||
838			    ((c == 'P' || c == 'p') && ishex)) {
839				if (!gotmantdig)
840					goto parsedone;
841				else
842					state = S_EXP;
843			} else if ((ishex && iswxdigit(c)) || iswdigit(c)) {
844				commit = p;
845				gotmantdig = 1;
846			} else
847				goto parsedone;
848			break;
849		case S_EXP:
850			state = S_EXPDIGITS;
851			if (c == '-' || c == '+')
852				break;
853			else
854				goto reswitch;
855		case S_EXPDIGITS:
856			if (iswdigit(c))
857				commit = p;
858			else
859				goto parsedone;
860			break;
861		default:
862			abort();
863		}
864		*p++ = c;
865		c = WEOF;
866	}
867
868parsedone:
869	if (c != WEOF)
870		__ungetwc(c, fp);
871	while (commit < --p)
872		__ungetwc(*p, fp);
873	*++commit = '\0';
874	return (commit - buf);
875}
876#endif
877