vfscanf.c revision 187422
162143Sarchie/*-
262143Sarchie * Copyright (c) 1990, 1993
362143Sarchie *	The Regents of the University of California.  All rights reserved.
4139823Simp *
5139823Simp * This code is derived from software contributed to Berkeley by
6139823Simp * Chris Torek.
762143Sarchie *
862143Sarchie * Redistribution and use in source and binary forms, with or without
962143Sarchie * modification, are permitted provided that the following conditions
1062143Sarchie * are met:
1162143Sarchie * 1. Redistributions of source code must retain the above copyright
1262143Sarchie *    notice, this list of conditions and the following disclaimer.
1362143Sarchie * 2. Redistributions in binary form must reproduce the above copyright
1462143Sarchie *    notice, this list of conditions and the following disclaimer in the
1562143Sarchie *    documentation and/or other materials provided with the distribution.
1662143Sarchie * 4. Neither the name of the University nor the names of its contributors
1762143Sarchie *    may be used to endorse or promote products derived from this software
1862143Sarchie *    without specific prior written permission.
1962143Sarchie *
2062143Sarchie * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2162143Sarchie * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2262143Sarchie * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2362143Sarchie * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2462143Sarchie * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2562143Sarchie * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2662143Sarchie * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2762143Sarchie * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2862143Sarchie * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2962143Sarchie * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3062143Sarchie * SUCH DAMAGE.
3162143Sarchie */
3262143Sarchie
3362143Sarchie#if defined(LIBC_SCCS) && !defined(lint)
3462143Sarchiestatic char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
3562143Sarchie#endif /* LIBC_SCCS and not lint */
3662143Sarchie#include <sys/cdefs.h>
3762143Sarchie__FBSDID("$FreeBSD: head/lib/libc/stdio/vfscanf.c 187422 2009-01-19 06:19:51Z das $");
3862143Sarchie
3962143Sarchie#include "namespace.h"
4062143Sarchie#include <ctype.h>
4162143Sarchie#include <inttypes.h>
4262143Sarchie#include <stdio.h>
4362143Sarchie#include <stdlib.h>
4462143Sarchie#include <stddef.h>
4562143Sarchie#include <stdarg.h>
4662143Sarchie#include <string.h>
4762143Sarchie#include <wchar.h>
4862143Sarchie#include <wctype.h>
4962143Sarchie#include "un-namespace.h"
5062143Sarchie
5162143Sarchie#include "collate.h"
5262143Sarchie#include "libc_private.h"
5362143Sarchie#include "local.h"
5462143Sarchie
55196019Srwatson#ifndef NO_FLOATING_POINT
5662143Sarchie#include <locale.h>
5762143Sarchie#endif
5862143Sarchie
5962143Sarchie#define	BUF		513	/* Maximum length of numeric string. */
60141721Sglebius
6162143Sarchie/*
6262143Sarchie * Flags used during conversion.
6362143Sarchie */
6462143Sarchie#define	LONG		0x01	/* l: long or double */
65151305Sthompsa#define	LONGDBL		0x02	/* L: long double */
66185571Sbz#define	SHORT		0x04	/* h: short */
6762143Sarchie#define	SUPPRESS	0x08	/* *: suppress assignment */
6862143Sarchie#define	POINTER		0x10	/* p: void * (as hex) */
6962143Sarchie#define	NOSKIP		0x20	/* [ or c: do not skip blanks */
7062143Sarchie#define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
7162143Sarchie#define	INTMAXT		0x800	/* j: intmax_t */
7262143Sarchie#define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
73152243Sru#define	SIZET		0x2000	/* z: size_t */
7462143Sarchie#define	SHORTSHORT	0x4000	/* hh: char */
75126035Spjd#define	UNSIGNED	0x8000	/* %[oupxX] conversions */
76126035Spjd
77126035Spjd/*
78126035Spjd * The following are used in integral conversions only:
79129281Sarchie * SIGNOK, NDIGITS, PFXOK, and NZDIGITS
80129281Sarchie */
81126035Spjd#define	SIGNOK		0x40	/* +/- is (still) legal */
82126035Spjd#define	NDIGITS		0x80	/* no digits detected */
83126035Spjd#define	PFXOK		0x100	/* 0x prefix is (still) legal */
84126035Spjd#define	NZDIGITS	0x200	/* no zero digits detected */
85126035Spjd#define	HAVESIGN	0x10000	/* sign detected */
86126035Spjd
8762143Sarchie/*
88106933Ssam * Conversion types.
89106933Ssam */
90106933Ssam#define	CT_CHAR		0	/* %c conversion */
91106933Ssam#define	CT_CCL		1	/* %[...] conversion */
92106933Ssam#define	CT_STRING	2	/* %s conversion */
93106933Ssam#define	CT_INT		3	/* %[dioupxX] conversion */
94139903Sglebius#define	CT_FLOAT	4	/* %[efgEFG] conversion */
95106933Ssam
9662143Sarchiestatic const u_char *__sccl(char *, const u_char *);
97106933Ssam#ifndef NO_FLOATING_POINT
98106933Ssamstatic int parsefloat(FILE *, char *, char *);
9962143Sarchie#endif
10062143Sarchie
10162143Sarchie__weak_reference(__vfscanf, vfscanf);
102139903Sglebius
10362143Sarchie/*
10462143Sarchie * __vfscanf - MT-safe version
105186488Sjulian */
106186488Sjulianint
10762143Sarchie__vfscanf(FILE *fp, char const *fmt0, va_list ap)
10862143Sarchie{
10962143Sarchie	int ret;
11062143Sarchie
11170700Sjulian	FLOCKFILE(fp);
11262143Sarchie	ret = __svfscanf(fp, fmt0, ap);
11362143Sarchie	FUNLOCKFILE(fp);
11462143Sarchie	return (ret);
11562143Sarchie}
11662143Sarchie
11762143Sarchie/*
11862143Sarchie * __svfscanf - non-MT-safe version of __vfscanf
11962143Sarchie */
12062143Sarchieint
12162143Sarchie__svfscanf(FILE *fp, const char *fmt0, va_list ap)
12262143Sarchie{
12362143Sarchie	const u_char *fmt = (const u_char *)fmt0;
12462143Sarchie	int c;			/* character from format, or conversion */
12562143Sarchie	size_t width;		/* field width, or 0 */
12662143Sarchie	char *p;		/* points into all kinds of strings */
12762143Sarchie	int n;			/* handy integer */
12862143Sarchie	int flags;		/* flags as defined above */
12962143Sarchie	char *p0;		/* saves original value of p when necessary */
13062143Sarchie	int nassigned;		/* number of fields assigned */
13162143Sarchie	int nconversions;	/* number of conversions */
13262143Sarchie	int nread;		/* number of characters consumed from fp */
13364358Sarchie	int base;		/* base argument to conversion function */
13464358Sarchie	char ccltab[256];	/* character class table for %[...] */
13564358Sarchie	char buf[BUF];		/* buffer for numeric and mb conversions */
13664358Sarchie	wchar_t *wcp;		/* handy wide character pointer */
13764358Sarchie	size_t nconv;		/* length of multibyte sequence converted */
138123600Sru	static const mbstate_t initial;
13964358Sarchie	mbstate_t mbs;
14064358Sarchie
14164358Sarchie	/* `basefix' is used to avoid `if' tests in the integer scanner */
14264653Sarchie	static short basefix[17] =
14364653Sarchie		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
144123600Sru
14564653Sarchie	ORIENT(fp, -1);
14664653Sarchie
14764653Sarchie	nassigned = 0;
14864653Sarchie	nconversions = 0;
14964653Sarchie	nread = 0;
15064653Sarchie	for (;;) {
15164653Sarchie		c = *fmt++;
15264653Sarchie		if (c == 0)
15364653Sarchie			return (nassigned);
15464653Sarchie		if (isspace(c)) {
15564653Sarchie			while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
15664358Sarchie				nread++, fp->_r--, fp->_p++;
15764358Sarchie			continue;
15864358Sarchie		}
15964358Sarchie		if (c != '%')
16064358Sarchie			goto literal;
16164358Sarchie		width = 0;
16264358Sarchie		flags = 0;
16364653Sarchie		/*
16464653Sarchie		 * switch on the format.  continue if done;
16564653Sarchie		 * break once format type is derived.
16664653Sarchie		 */
16764653Sarchieagain:		c = *fmt++;
16864653Sarchie		switch (c) {
16964653Sarchie		case '%':
17064358Sarchieliteral:
17164358Sarchie			if (fp->_r <= 0 && __srefill(fp))
17264358Sarchie				goto input_failure;
17364358Sarchie			if (*fp->_p != c)
17464358Sarchie				goto match_failure;
175141721Sglebius			fp->_r--, fp->_p++;
176141721Sglebius			nread++;
177141721Sglebius			continue;
178141721Sglebius
179141721Sglebius		case '*':
180141721Sglebius			flags |= SUPPRESS;
181141721Sglebius			goto again;
182141721Sglebius		case 'j':
183141721Sglebius			flags |= INTMAXT;
184141721Sglebius			goto again;
185141721Sglebius		case 'l':
186141721Sglebius			if (flags & LONG) {
187141721Sglebius				flags &= ~LONG;
188141721Sglebius				flags |= LONGLONG;
189141910Sglebius			} else
190141910Sglebius				flags |= LONG;
191141910Sglebius			goto again;
192141910Sglebius		case 'q':
193141910Sglebius			flags |= LONGLONG;	/* not quite */
194141910Sglebius			goto again;
195141910Sglebius		case 't':
19662143Sarchie			flags |= PTRDIFFT;
19762143Sarchie			goto again;
19862143Sarchie		case 'z':
19962143Sarchie			flags |= SIZET;
200129823Sjulian			goto again;
201129823Sjulian		case 'L':
202129823Sjulian			flags |= LONGDBL;
203129823Sjulian			goto again;
204129823Sjulian		case 'h':
205129823Sjulian			if (flags & SHORT) {
206129823Sjulian				flags &= ~SHORT;
207129823Sjulian				flags |= SHORTSHORT;
208129823Sjulian			} else
209129823Sjulian				flags |= SHORT;
21062143Sarchie			goto again;
21162143Sarchie
21262143Sarchie		case '0': case '1': case '2': case '3': case '4':
21362143Sarchie		case '5': case '6': case '7': case '8': case '9':
21462143Sarchie			width = width * 10 + c - '0';
21562143Sarchie			goto again;
21662143Sarchie
21762143Sarchie		/*
21862143Sarchie		 * Conversions.
21962143Sarchie		 */
22062143Sarchie		case 'd':
22162143Sarchie			c = CT_INT;
22262143Sarchie			base = 10;
22362143Sarchie			break;
224106933Ssam
22562143Sarchie		case 'i':
22662143Sarchie			c = CT_INT;
22770784Sjulian			base = 0;
228129281Sarchie			break;
22962143Sarchie
23062143Sarchie		case 'o':
231129281Sarchie			c = CT_INT;
23262143Sarchie			flags |= UNSIGNED;
233129281Sarchie			base = 8;
23462143Sarchie			break;
23562143Sarchie
23662143Sarchie		case 'u':
23762143Sarchie			c = CT_INT;
23862143Sarchie			flags |= UNSIGNED;
23962143Sarchie			base = 10;
24062143Sarchie			break;
24162143Sarchie
24262143Sarchie		case 'X':
243106933Ssam		case 'x':
24462143Sarchie			flags |= PFXOK;	/* enable 0x prefixing */
24562143Sarchie			c = CT_INT;
24670784Sjulian			flags |= UNSIGNED;
247129281Sarchie			base = 16;
24862143Sarchie			break;
249129281Sarchie
250129281Sarchie#ifndef NO_FLOATING_POINT
25162143Sarchie		case 'A': case 'E': case 'F': case 'G':
25262143Sarchie		case 'a': case 'e': case 'f': case 'g':
25362143Sarchie			c = CT_FLOAT;
254129281Sarchie			break;
25562143Sarchie#endif
25662143Sarchie
25762143Sarchie		case 'S':
25862143Sarchie			flags |= LONG;
25962143Sarchie			/* FALLTHROUGH */
26062143Sarchie		case 's':
26162143Sarchie			c = CT_STRING;
26262143Sarchie			break;
26362143Sarchie
26462143Sarchie		case '[':
26570784Sjulian			fmt = __sccl(ccltab, fmt);
26662143Sarchie			flags |= NOSKIP;
26762143Sarchie			c = CT_CCL;
26862143Sarchie			break;
26962143Sarchie
27062143Sarchie		case 'C':
27162143Sarchie			flags |= LONG;
27262143Sarchie			/* FALLTHROUGH */
273194012Szec		case 'c':
27470700Sjulian			flags |= NOSKIP;
275194012Szec			c = CT_CHAR;
27662143Sarchie			break;
27762143Sarchie
27862143Sarchie		case 'p':	/* pointer format is like hex */
27962143Sarchie			flags |= POINTER | PFXOK;
28062143Sarchie			c = CT_INT;		/* assumes sizeof(uintmax_t) */
28162143Sarchie			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
28262143Sarchie			base = 16;
28362143Sarchie			break;
28462143Sarchie
28562143Sarchie		case 'n':
28662143Sarchie			nconversions++;
28762143Sarchie			if (flags & SUPPRESS)	/* ??? */
28862143Sarchie				continue;
289191510Szec			if (flags & SHORTSHORT)
290191510Szec				*va_arg(ap, char *) = nread;
291191510Szec			else if (flags & SHORT)
292191510Szec				*va_arg(ap, short *) = nread;
293191510Szec			else if (flags & LONG)
294191510Szec				*va_arg(ap, long *) = nread;
295191510Szec			else if (flags & LONGLONG)
296191510Szec				*va_arg(ap, long long *) = nread;
297191510Szec			else if (flags & INTMAXT)
298191510Szec				*va_arg(ap, intmax_t *) = nread;
299191510Szec			else if (flags & SIZET)
300191510Szec				*va_arg(ap, size_t *) = nread;
30162143Sarchie			else if (flags & PTRDIFFT)
30287599Sobrien				*va_arg(ap, ptrdiff_t *) = nread;
30362143Sarchie			else
30462143Sarchie				*va_arg(ap, int *) = nread;
305121816Sbrooks			continue;
30662143Sarchie
30762143Sarchie		default:
30862143Sarchie			goto match_failure;
30962143Sarchie
310184205Sdes		/*
31162143Sarchie		 * Disgusting backwards compatibility hack.	XXX
31262143Sarchie		 */
313121816Sbrooks		case '\0':	/* compat */
31470784Sjulian			return (EOF);
31562143Sarchie		}
31662143Sarchie
31770784Sjulian		/*
31862143Sarchie		 * We have a conversion that requires input.
319152243Sru		 */
32090249Sarchie		if (fp->_r <= 0 && __srefill(fp))
32162143Sarchie			goto input_failure;
32262143Sarchie
323121816Sbrooks		/*
32462143Sarchie		 * Consume leading white space, except for formats
325121816Sbrooks		 * that suppress this.
32662143Sarchie		 */
32762143Sarchie		if ((flags & NOSKIP) == 0) {
32862143Sarchie			while (isspace(*fp->_p)) {
32962143Sarchie				nread++;
33062143Sarchie				if (--fp->_r > 0)
33171849Sjulian					fp->_p++;
33262143Sarchie				else if (__srefill(fp))
33362143Sarchie					goto input_failure;
33462143Sarchie			}
33562143Sarchie			/*
33662143Sarchie			 * Note that there is at least one character in
33771849Sjulian			 * the buffer, so conversions that do not set NOSKIP
33862143Sarchie			 * ca no longer result in an input failure.
33971849Sjulian			 */
34071849Sjulian		}
34171849Sjulian
34271849Sjulian		/*
34371849Sjulian		 * Do the conversion.
34471849Sjulian		 */
345152243Sru		switch (c) {
34671849Sjulian
34771849Sjulian		case CT_CHAR:
34862143Sarchie			/* scan arbitrary characters (sets NOSKIP) */
34962143Sarchie			if (width == 0)
350139903Sglebius				width = 1;
351139903Sglebius			if (flags & LONG) {
352139903Sglebius				if ((flags & SUPPRESS) == 0)
353139903Sglebius					wcp = va_arg(ap, wchar_t *);
354139903Sglebius				else
355139903Sglebius					wcp = NULL;
356139903Sglebius				n = 0;
357139903Sglebius				while (width != 0) {
358139903Sglebius					if (n == MB_CUR_MAX) {
359139903Sglebius						fp->_flags |= __SERR;
360139903Sglebius						goto input_failure;
361139903Sglebius					}
362139903Sglebius					buf[n++] = *fp->_p;
363139903Sglebius					fp->_p++;
364139903Sglebius					fp->_r--;
365139903Sglebius					mbs = initial;
366139903Sglebius					nconv = mbrtowc(wcp, buf, n, &mbs);
367139903Sglebius					if (nconv == (size_t)-1) {
368139903Sglebius						fp->_flags |= __SERR;
369201924Sfjoe						goto input_failure;
370201924Sfjoe					}
371201924Sfjoe					if (nconv == 0 && !(flags & SUPPRESS))
372201924Sfjoe						*wcp = L'\0';
373201924Sfjoe					if (nconv != (size_t)-2) {
374201924Sfjoe						nread += n;
375201924Sfjoe						width--;
376201924Sfjoe						if (!(flags & SUPPRESS))
377201924Sfjoe							wcp++;
378201924Sfjoe						n = 0;
379139903Sglebius					}
380139903Sglebius					if (fp->_r <= 0 && __srefill(fp)) {
38162143Sarchie						if (n != 0) {
38262143Sarchie							fp->_flags |= __SERR;
38362143Sarchie							goto input_failure;
38462143Sarchie						}
38562143Sarchie						break;
38662143Sarchie					}
38762143Sarchie				}
38862143Sarchie				if (!(flags & SUPPRESS))
38962143Sarchie					nassigned++;
39062143Sarchie			} else if (flags & SUPPRESS) {
39170700Sjulian				size_t sum = 0;
39262143Sarchie				for (;;) {
39362143Sarchie					if ((n = fp->_r) < width) {
39462143Sarchie						sum += n;
39562143Sarchie						width -= n;
39662143Sarchie						fp->_p += n;
39762143Sarchie						if (__srefill(fp)) {
39862143Sarchie							if (sum == 0)
39962143Sarchie							    goto input_failure;
40062143Sarchie							break;
40162143Sarchie						}
40270784Sjulian					} else {
40362143Sarchie						sum += width;
40462143Sarchie						fp->_r -= width;
40562143Sarchie						fp->_p += width;
40662143Sarchie						break;
40762143Sarchie					}
40862143Sarchie				}
40962143Sarchie				nread += sum;
410186488Sjulian			} else {
41162143Sarchie				size_t r = __fread((void *)va_arg(ap, char *), 1,
412186488Sjulian				    width, fp);
413194012Szec
414186488Sjulian				if (r == 0)
41562143Sarchie					goto input_failure;
416186488Sjulian				nread += r;
417186488Sjulian				nassigned++;
418129281Sarchie			}
419186488Sjulian			nconversions++;
420186488Sjulian			break;
42162143Sarchie
42262143Sarchie		case CT_CCL:
42362143Sarchie			/* scan a (nonempty) character class (sets NOSKIP) */
42462143Sarchie			if (width == 0)
42562143Sarchie				width = (size_t)~0;	/* `infinity' */
42662143Sarchie			/* take only those things in the class */
42790249Sarchie			if (flags & LONG) {
42890249Sarchie				wchar_t twc;
42990249Sarchie				int nchars;
430194699Smav
43162143Sarchie				if ((flags & SUPPRESS) == 0)
43262143Sarchie					wcp = va_arg(ap, wchar_t *);
43362143Sarchie				else
43462143Sarchie					wcp = &twc;
43562143Sarchie				n = 0;
43662143Sarchie				nchars = 0;
43762143Sarchie				while (width != 0) {
43862143Sarchie					if (n == MB_CUR_MAX) {
43962143Sarchie						fp->_flags |= __SERR;
44070700Sjulian						goto input_failure;
44162143Sarchie					}
44270784Sjulian					buf[n++] = *fp->_p;
44362143Sarchie					fp->_p++;
44462143Sarchie					fp->_r--;
44570700Sjulian					mbs = initial;
44662143Sarchie					nconv = mbrtowc(wcp, buf, n, &mbs);
44770700Sjulian					if (nconv == (size_t)-1) {
44862143Sarchie						fp->_flags |= __SERR;
44962143Sarchie						goto input_failure;
45062143Sarchie					}
45162143Sarchie					if (nconv == 0)
452141195Sru						*wcp = L'\0';
45362143Sarchie					if (nconv != (size_t)-2) {
45462143Sarchie						if (wctob(*wcp) != EOF &&
45562143Sarchie						    !ccltab[wctob(*wcp)]) {
45662143Sarchie							while (n != 0) {
457141195Sru								n--;
45862143Sarchie								__ungetc(buf[n],
45962143Sarchie								    fp);
46062143Sarchie							}
46162143Sarchie							break;
46262143Sarchie						}
46362143Sarchie						nread += n;
46462143Sarchie						width--;
46562143Sarchie						if (!(flags & SUPPRESS))
46662143Sarchie							wcp++;
46764358Sarchie						nchars++;
46864358Sarchie						n = 0;
46964358Sarchie					}
47064358Sarchie					if (fp->_r <= 0 && __srefill(fp)) {
47164358Sarchie						if (n != 0) {
47264358Sarchie							fp->_flags |= __SERR;
473152315Sru							goto input_failure;
47464358Sarchie						}
47564358Sarchie						break;
47664653Sarchie					}
47764653Sarchie				}
47864653Sarchie				if (n != 0) {
47964653Sarchie					fp->_flags |= __SERR;
48064653Sarchie					goto input_failure;
48164653Sarchie				}
48264653Sarchie				n = nchars;
48364653Sarchie				if (n == 0)
484202588Sthompsa					goto match_failure;
48564653Sarchie				if (!(flags & SUPPRESS)) {
48664653Sarchie					*wcp = L'\0';
48764653Sarchie					nassigned++;
48864653Sarchie				}
48964653Sarchie			} else if (flags & SUPPRESS) {
49064653Sarchie				n = 0;
49164653Sarchie				while (ccltab[*fp->_p]) {
49264653Sarchie					n++, fp->_r--, fp->_p++;
49364653Sarchie					if (--width == 0)
49464653Sarchie						break;
49564358Sarchie					if (fp->_r <= 0 && __srefill(fp)) {
49664358Sarchie						if (n == 0)
49764358Sarchie							goto input_failure;
49864358Sarchie						break;
49964358Sarchie					}
50064358Sarchie				}
50164358Sarchie				if (n == 0)
50264358Sarchie					goto match_failure;
50364358Sarchie			} else {
50464358Sarchie				p0 = p = va_arg(ap, char *);
50564358Sarchie				while (ccltab[*fp->_p]) {
50664358Sarchie					fp->_r--;
50764358Sarchie					*p++ = *fp->_p++;
50864358Sarchie					if (--width == 0)
50964358Sarchie						break;
51064358Sarchie					if (fp->_r <= 0 && __srefill(fp)) {
51164653Sarchie						if (p == p0)
51264653Sarchie							goto input_failure;
51364653Sarchie						break;
51464653Sarchie					}
51564653Sarchie				}
51664653Sarchie				n = p - p0;
51764653Sarchie				if (n == 0)
51864653Sarchie					goto match_failure;
51964358Sarchie				*p = 0;
52064358Sarchie				nassigned++;
52164358Sarchie			}
52264358Sarchie			nread += n;
52364358Sarchie			nconversions++;
52464358Sarchie			break;
52564358Sarchie
526141721Sglebius		case CT_STRING:
527141721Sglebius			/* like CCL, but zero-length string OK, & no NOSKIP */
528141721Sglebius			if (width == 0)
529167729Sbms				width = (size_t)~0;
530141721Sglebius			if (flags & LONG) {
531141721Sglebius				wchar_t twc;
532141721Sglebius
533141721Sglebius				if ((flags & SUPPRESS) == 0)
534141721Sglebius					wcp = va_arg(ap, wchar_t *);
535141755Sglebius				else
536141721Sglebius					wcp = &twc;
537141721Sglebius				n = 0;
538141755Sglebius				while (!isspace(*fp->_p) && width != 0) {
539141721Sglebius					if (n == MB_CUR_MAX) {
540141721Sglebius						fp->_flags |= __SERR;
541167729Sbms						goto input_failure;
542167729Sbms					}
543167729Sbms					buf[n++] = *fp->_p;
544167729Sbms					fp->_p++;
545167729Sbms					fp->_r--;
546167729Sbms					mbs = initial;
547167729Sbms					nconv = mbrtowc(wcp, buf, n, &mbs);
548195049Srwatson					if (nconv == (size_t)-1) {
549167729Sbms						fp->_flags |= __SERR;
550167729Sbms						goto input_failure;
551195049Srwatson					}
552167729Sbms					if (nconv == 0)
553167729Sbms						*wcp = L'\0';
554167729Sbms					if (nconv != (size_t)-2) {
555167729Sbms						if (iswspace(*wcp)) {
556167729Sbms							while (n != 0) {
557167729Sbms								n--;
558141721Sglebius								__ungetc(buf[n],
559141721Sglebius								    fp);
560141721Sglebius							}
561141721Sglebius							break;
562141721Sglebius						}
563141721Sglebius						nread += n;
564141721Sglebius						width--;
565141721Sglebius						if (!(flags & SUPPRESS))
566141721Sglebius							wcp++;
567141721Sglebius						n = 0;
568141755Sglebius					}
569141721Sglebius					if (fp->_r <= 0 && __srefill(fp)) {
570141721Sglebius						if (n != 0) {
571141755Sglebius							fp->_flags |= __SERR;
572141721Sglebius							goto input_failure;
573141721Sglebius						}
574141721Sglebius						break;
575141721Sglebius					}
576141721Sglebius				}
577141721Sglebius				if (!(flags & SUPPRESS)) {
578141910Sglebius					*wcp = L'\0';
579141910Sglebius					nassigned++;
580141910Sglebius				}
58162143Sarchie			} else if (flags & SUPPRESS) {
58262143Sarchie				n = 0;
58362143Sarchie				while (!isspace(*fp->_p)) {
58462143Sarchie					n++, fp->_r--, fp->_p++;
58562143Sarchie					if (--width == 0)
58662143Sarchie						break;
58762143Sarchie					if (fp->_r <= 0 && __srefill(fp))
58862143Sarchie						break;
58962143Sarchie				}
59070700Sjulian				nread += n;
59170700Sjulian			} else {
59262143Sarchie				p0 = p = va_arg(ap, char *);
59362143Sarchie				while (!isspace(*fp->_p)) {
59462143Sarchie					fp->_r--;
59562143Sarchie					*p++ = *fp->_p++;
59662143Sarchie					if (--width == 0)
597186488Sjulian						break;
59862143Sarchie					if (fp->_r <= 0 && __srefill(fp))
59962143Sarchie						break;
60070700Sjulian				}
60162143Sarchie				*p = 0;
60270700Sjulian				nread += p - p0;
603131155Sjulian				nassigned++;
60487599Sobrien			}
605129281Sarchie			nconversions++;
606136312Sdes			continue;
60783366Sjulian
60862143Sarchie		case CT_INT:
60962143Sarchie			/* scan an integer as if by the conversion function */
61062143Sarchie#ifdef hardway
611129281Sarchie			if (width == 0 || width > sizeof(buf) - 1)
61262143Sarchie				width = sizeof(buf) - 1;
61362143Sarchie#else
614186488Sjulian			/* size_t is unsigned, hence this optimisation */
61562143Sarchie			if (--width > sizeof(buf) - 2)
616186488Sjulian				width = sizeof(buf) - 2;
617186488Sjulian			width++;
61870784Sjulian#endif
61996265Sarchie			flags |= SIGNOK | NDIGITS | NZDIGITS;
62062143Sarchie			for (p = buf; width; width--) {
621186488Sjulian				c = *fp->_p;
622186488Sjulian				/*
623186488Sjulian				 * Switch on the character; `goto ok'
62496265Sarchie				 * if we accept it as a part of number.
625186488Sjulian				 */
626148887Srwatson				switch (c) {
627148887Srwatson
62896265Sarchie				/*
62996265Sarchie				 * The digit 0 is always legal, but is
63096265Sarchie				 * special.  For %i conversions, if no
63196265Sarchie				 * digits (zero or nonzero) have been
63262143Sarchie				 * scanned (only signs), we will have
63362143Sarchie				 * base==0.  In that case, we should set
63470700Sjulian				 * it to 8 and enable 0x prefixing.
63562143Sarchie				 * Also, if we have not scanned zero digits
63662143Sarchie				 * before this, do not turn off prefixing
63762143Sarchie				 * (someone else will turn it off if we
63897896Sarchie				 * have scanned any nonzero digits).
63962143Sarchie				 */
64062143Sarchie				case '0':
64164358Sarchie					if (base == 0) {
64264358Sarchie						base = 8;
64397896Sarchie						flags |= PFXOK;
64497896Sarchie					}
64597896Sarchie					if (flags & NZDIGITS)
64697896Sarchie					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
64797896Sarchie					else
64897896Sarchie					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
64997896Sarchie					goto ok;
650152315Sru
65164358Sarchie				/* 1 through 7 always legal */
65264358Sarchie				case '1': case '2': case '3':
65364358Sarchie				case '4': case '5': case '6': case '7':
65462678Sjulian					base = basefix[base];
65562143Sarchie					flags &= ~(SIGNOK | PFXOK | NDIGITS);
65696265Sarchie					goto ok;
65762143Sarchie
65862143Sarchie				/* digits 8 and 9 ok iff decimal or hex */
65962143Sarchie				case '8': case '9':
66062143Sarchie					base = basefix[base];
66162143Sarchie					if (base <= 8)
66262143Sarchie						break;	/* not legal here */
663186488Sjulian					flags &= ~(SIGNOK | PFXOK | NDIGITS);
66462143Sarchie					goto ok;
665186488Sjulian
666186488Sjulian				/* letters ok iff hex */
66770784Sjulian				case 'A': case 'B': case 'C':
668151063Sglebius				case 'D': case 'E': case 'F':
66962143Sarchie				case 'a': case 'b': case 'c':
670186488Sjulian				case 'd': case 'e': case 'f':
671186488Sjulian					/* no need to fix base here */
672186488Sjulian					if (base <= 10)
673152001Sru						break;	/* not legal here */
674152001Sru					flags &= ~(SIGNOK | PFXOK | NDIGITS);
675152001Sru					goto ok;
676152001Sru
677152001Sru				/* sign ok only as first character */
678152001Sru				case '+': case '-':
679152001Sru					if (flags & SIGNOK) {
680152001Sru						flags &= ~SIGNOK;
681152001Sru						flags |= HAVESIGN;
682151063Sglebius						goto ok;
68362143Sarchie					}
684151305Sthompsa					break;
685151063Sglebius
686151305Sthompsa				/*
687151063Sglebius				 * x ok iff flag still set & 2nd char (or
688151063Sglebius				 * 3rd char if we have a sign).
689151063Sglebius				 */
690151063Sglebius				case 'x': case 'X':
69162143Sarchie					if (flags & PFXOK && p ==
692151305Sthompsa					    buf + 1 + !!(flags & HAVESIGN)) {
69362143Sarchie						base = 16;	/* if %i */
69462143Sarchie						flags &= ~PFXOK;
69562143Sarchie						goto ok;
69662143Sarchie					}
69771849Sjulian					break;
69871849Sjulian				}
69962143Sarchie
70062143Sarchie				/*
70170700Sjulian				 * If we got here, c is not a legal character
70262143Sarchie				 * for a number.  Stop accumulating digits.
70370784Sjulian				 */
70464358Sarchie				break;
705132464Sjulian		ok:
70671849Sjulian				/*
70771849Sjulian				 * c is legal: store it and look at the next.
70871849Sjulian				 */
70971849Sjulian				*p++ = c;
71071849Sjulian				if (--fp->_r > 0)
71171849Sjulian					fp->_p++;
71271849Sjulian				else if (__srefill(fp))
713184205Sdes					break;		/* EOF */
71471849Sjulian			}
71571849Sjulian			/*
71670700Sjulian			 * If we had only a sign, it is no good; push
717124269Sgreen			 * back the sign.  If the number ends in `x',
718124269Sgreen			 * it was [sign] '0' 'x', so push back the x
719124269Sgreen			 * and treat it as [sign] '0'.
720124269Sgreen			 */
72164358Sarchie			if (flags & NDIGITS) {
722132464Sjulian				if (p > buf)
723132464Sjulian					(void) __ungetc(*(u_char *)--p, fp);
72462143Sarchie				goto match_failure;
72562143Sarchie			}
72662143Sarchie			c = ((u_char *)p)[-1];
72762143Sarchie			if (c == 'x' || c == 'X') {
72862143Sarchie				--p;
72962143Sarchie				(void) __ungetc(c, fp);
73062143Sarchie			}
73162143Sarchie			if ((flags & SUPPRESS) == 0) {
73262143Sarchie				uintmax_t res;
73370784Sjulian
73462143Sarchie				*p = 0;
73590249Sarchie				if ((flags & UNSIGNED) == 0)
73662143Sarchie				    res = strtoimax(buf, (char **)NULL, base);
737124270Sgreen				else
738124270Sgreen				    res = strtoumax(buf, (char **)NULL, base);
739129281Sarchie				if (flags & POINTER)
74062143Sarchie					*va_arg(ap, void **) =
741129281Sarchie							(void *)(uintptr_t)res;
742129281Sarchie				else if (flags & SHORTSHORT)
743129281Sarchie					*va_arg(ap, char *) = res;
74487599Sobrien				else if (flags & SHORT)
74570784Sjulian					*va_arg(ap, short *) = res;
74670784Sjulian				else if (flags & LONG)
74770784Sjulian					*va_arg(ap, long *) = res;
74862143Sarchie				else if (flags & LONGLONG)
74962143Sarchie					*va_arg(ap, long long *) = res;
75062143Sarchie				else if (flags & INTMAXT)
75162143Sarchie					*va_arg(ap, intmax_t *) = res;
75262143Sarchie				else if (flags & PTRDIFFT)
75362143Sarchie					*va_arg(ap, ptrdiff_t *) = res;
75462143Sarchie				else if (flags & SIZET)
75562143Sarchie					*va_arg(ap, size_t *) = res;
75662143Sarchie				else
75762143Sarchie					*va_arg(ap, int *) = res;
75862143Sarchie				nassigned++;
75962143Sarchie			}
76062143Sarchie			nread += p - buf;
76162143Sarchie			nconversions++;
76262143Sarchie			break;
76362143Sarchie
76462143Sarchie#ifndef NO_FLOATING_POINT
76562143Sarchie		case CT_FLOAT:
76662143Sarchie			/* scan a floating point number as if by strtod */
76762143Sarchie			if (width == 0 || width > sizeof(buf) - 1)
76862143Sarchie				width = sizeof(buf) - 1;
76962143Sarchie			if ((width = parsefloat(fp, buf, buf + width)) == 0)
77062143Sarchie				goto match_failure;
77162143Sarchie			if ((flags & SUPPRESS) == 0) {
77262143Sarchie				if (flags & LONGDBL) {
77362143Sarchie					long double res = strtold(buf, &p);
77462143Sarchie					*va_arg(ap, long double *) = res;
77562143Sarchie				} else if (flags & LONG) {
77662143Sarchie					double res = strtod(buf, &p);
77762143Sarchie					*va_arg(ap, double *) = res;
778139903Sglebius				} else {
77962143Sarchie					float res = strtof(buf, &p);
78062143Sarchie					*va_arg(ap, float *) = res;
78162143Sarchie				}
78262143Sarchie				nassigned++;
78362143Sarchie			}
78462143Sarchie			nread += width;
78562143Sarchie			nconversions++;
78662143Sarchie			break;
78762143Sarchie#endif /* !NO_FLOATING_POINT */
78862143Sarchie		}
78962143Sarchie	}
79062143Sarchieinput_failure:
79162143Sarchie	return (nconversions != 0 ? nassigned : EOF);
79262143Sarchiematch_failure:
79362143Sarchie	return (nassigned);
79462143Sarchie}
79562143Sarchie
79662143Sarchie/*
79762143Sarchie * Fill in the given table from the scanset at the given format
798139903Sglebius * (just after `[').  Return a pointer to the character past the
79962143Sarchie * closing `]'.  The table has a 1 wherever characters should be
80062143Sarchie * considered part of the scanset.
80162143Sarchie */
80262143Sarchiestatic const u_char *
80362143Sarchie__sccl(tab, fmt)
80462143Sarchie	char *tab;
80562143Sarchie	const u_char *fmt;
80662143Sarchie{
80762143Sarchie	int c, n, v, i;
80862143Sarchie
809195837Srwatson	/* first `clear' the whole table */
810195837Srwatson	c = *fmt++;		/* first char hat => negated scanset */
811191510Szec	if (c == '^') {
812191510Szec		v = 1;		/* default => accept */
813191510Szec		c = *fmt++;	/* get new first char */
814195837Srwatson	} else
815195837Srwatson		v = 0;		/* default => reject */
816195837Srwatson
817195837Srwatson	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
818191510Szec	(void) memset(tab, v, 256);
819191510Szec
820191510Szec	if (c == 0)
821191510Szec		return (fmt - 1);/* format ended before closing ] */
822191510Szec
823191510Szec	/*
824191510Szec	 * Now set the entries corresponding to the actual scanset
825191510Szec	 * to the opposite of the above.
826191510Szec	 *
827195837Srwatson	 * The first character may be ']' (or '-') without being special;
828195837Srwatson	 * the last character may be '-'.
829	 */
830	v = 1 - v;
831	for (;;) {
832		tab[c] = v;		/* take character c */
833doswitch:
834		n = *fmt++;		/* and examine the next */
835		switch (n) {
836
837		case 0:			/* format ended too soon */
838			return (fmt - 1);
839
840		case '-':
841			/*
842			 * A scanset of the form
843			 *	[01+-]
844			 * is defined as `the digit 0, the digit 1,
845			 * the character +, the character -', but
846			 * the effect of a scanset such as
847			 *	[a-zA-Z0-9]
848			 * is implementation defined.  The V7 Unix
849			 * scanf treats `a-z' as `the letters a through
850			 * z', but treats `a-a' as `the letter a, the
851			 * character -, and the letter a'.
852			 *
853			 * For compatibility, the `-' is not considerd
854			 * to define a range if the character following
855			 * it is either a close bracket (required by ANSI)
856			 * or is not numerically greater than the character
857			 * we just stored in the table (c).
858			 */
859			n = *fmt;
860			if (n == ']'
861			    || (__collate_load_error ? n < c :
862				__collate_range_cmp (n, c) < 0
863			       )
864			   ) {
865				c = '-';
866				break;	/* resume the for(;;) */
867			}
868			fmt++;
869			/* fill in the range */
870			if (__collate_load_error) {
871				do {
872					tab[++c] = v;
873				} while (c < n);
874			} else {
875				for (i = 0; i < 256; i ++)
876					if (   __collate_range_cmp (c, i) < 0
877					    && __collate_range_cmp (i, n) <= 0
878					   )
879						tab[i] = v;
880			}
881#if 1	/* XXX another disgusting compatibility hack */
882			c = n;
883			/*
884			 * Alas, the V7 Unix scanf also treats formats
885			 * such as [a-c-e] as `the letters a through e'.
886			 * This too is permitted by the standard....
887			 */
888			goto doswitch;
889#else
890			c = *fmt++;
891			if (c == 0)
892				return (fmt - 1);
893			if (c == ']')
894				return (fmt);
895#endif
896			break;
897
898		case ']':		/* end of scanset */
899			return (fmt);
900
901		default:		/* just another character */
902			c = n;
903			break;
904		}
905	}
906	/* NOTREACHED */
907}
908
909#ifndef NO_FLOATING_POINT
910static int
911parsefloat(FILE *fp, char *buf, char *end)
912{
913	char *commit, *p;
914	int infnanpos = 0, decptpos = 0;
915	enum {
916		S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX,
917		S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS
918	} state = S_START;
919	unsigned char c;
920	const char *decpt = localeconv()->decimal_point;
921	_Bool gotmantdig = 0, ishex = 0;
922
923	/*
924	 * We set commit = p whenever the string we have read so far
925	 * constitutes a valid representation of a floating point
926	 * number by itself.  At some point, the parse will complete
927	 * or fail, and we will ungetc() back to the last commit point.
928	 * To ensure that the file offset gets updated properly, it is
929	 * always necessary to read at least one character that doesn't
930	 * match; thus, we can't short-circuit "infinity" or "nan(...)".
931	 */
932	commit = buf - 1;
933	for (p = buf; p < end; ) {
934		c = *fp->_p;
935reswitch:
936		switch (state) {
937		case S_START:
938			state = S_GOTSIGN;
939			if (c == '-' || c == '+')
940				break;
941			else
942				goto reswitch;
943		case S_GOTSIGN:
944			switch (c) {
945			case '0':
946				state = S_MAYBEHEX;
947				commit = p;
948				break;
949			case 'I':
950			case 'i':
951				state = S_INF;
952				break;
953			case 'N':
954			case 'n':
955				state = S_NAN;
956				break;
957			default:
958				state = S_DIGITS;
959				goto reswitch;
960			}
961			break;
962		case S_INF:
963			if (infnanpos > 6 ||
964			    (c != "nfinity"[infnanpos] &&
965			     c != "NFINITY"[infnanpos]))
966				goto parsedone;
967			if (infnanpos == 1 || infnanpos == 6)
968				commit = p;	/* inf or infinity */
969			infnanpos++;
970			break;
971		case S_NAN:
972			switch (infnanpos) {
973			case 0:
974				if (c != 'A' && c != 'a')
975					goto parsedone;
976				break;
977			case 1:
978				if (c != 'N' && c != 'n')
979					goto parsedone;
980				else
981					commit = p;
982				break;
983			case 2:
984				if (c != '(')
985					goto parsedone;
986				break;
987			default:
988				if (c == ')') {
989					commit = p;
990					state = S_DONE;
991				} else if (!isalnum(c) && c != '_')
992					goto parsedone;
993				break;
994			}
995			infnanpos++;
996			break;
997		case S_DONE:
998			goto parsedone;
999		case S_MAYBEHEX:
1000			state = S_DIGITS;
1001			if (c == 'X' || c == 'x') {
1002				ishex = 1;
1003				break;
1004			} else {	/* we saw a '0', but no 'x' */
1005				gotmantdig = 1;
1006				goto reswitch;
1007			}
1008		case S_DIGITS:
1009			if ((ishex && isxdigit(c)) || isdigit(c)) {
1010				gotmantdig = 1;
1011				commit = p;
1012				break;
1013			} else {
1014				state = S_DECPT;
1015				goto reswitch;
1016			}
1017		case S_DECPT:
1018			if (c == decpt[decptpos]) {
1019				if (decpt[++decptpos] == '\0') {
1020					/* We read the complete decpt seq. */
1021					state = S_FRAC;
1022					if (gotmantdig)
1023						commit = p;
1024				}
1025				break;
1026			} else if (!decptpos) {
1027				/* We didn't read any decpt characters. */
1028				state = S_FRAC;
1029				goto reswitch;
1030			} else {
1031				/*
1032				 * We read part of a multibyte decimal point,
1033				 * but the rest is invalid, so bail.
1034				 */
1035				goto parsedone;
1036			}
1037		case S_FRAC:
1038			if (((c == 'E' || c == 'e') && !ishex) ||
1039			    ((c == 'P' || c == 'p') && ishex)) {
1040				if (!gotmantdig)
1041					goto parsedone;
1042				else
1043					state = S_EXP;
1044			} else if ((ishex && isxdigit(c)) || isdigit(c)) {
1045				commit = p;
1046				gotmantdig = 1;
1047			} else
1048				goto parsedone;
1049			break;
1050		case S_EXP:
1051			state = S_EXPDIGITS;
1052			if (c == '-' || c == '+')
1053				break;
1054			else
1055				goto reswitch;
1056		case S_EXPDIGITS:
1057			if (isdigit(c))
1058				commit = p;
1059			else
1060				goto parsedone;
1061			break;
1062		default:
1063			abort();
1064		}
1065		*p++ = c;
1066		if (--fp->_r > 0)
1067			fp->_p++;
1068		else if (__srefill(fp))
1069			break;	/* EOF */
1070	}
1071
1072parsedone:
1073	while (commit < --p)
1074		__ungetc(*(u_char *)p, fp);
1075	*++commit = '\0';
1076	return (commit - buf);
1077}
1078#endif
1079