1/*-
2 * Copyright (c) 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <stdio.h>
38#include <stdlib.h>
39#include <ctype.h>
40#include <stdarg.h>
41#include "local.h"
42
43#ifdef FLOATING_POINT
44#	include "floatio.h"
45#endif
46
47#define	BUF		513	/* Maximum length of numeric string. */
48
49/*
50 * Flags used during conversion.
51 */
52#define	LONG		0x01	/* l: long or double */
53#define	LONGDBL		0x02	/* L: long double; unimplemented */
54#define	SHORT		0x04	/* h: short */
55#define QUAD		0x08	/* q: quad */
56#define	SUPPRESS	0x10	/* suppress assignment */
57#define	POINTER		0x20	/* weird %p pointer (`fake hex') */
58#define	NOSKIP		0x40	/* do not skip blanks */
59
60/*
61 * The following are used in numeric conversions only:
62 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
63 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
64 */
65#define	SIGNOK		0x080	/* +/- is (still) legal */
66#define	NDIGITS		0x100	/* no digits detected */
67
68#define	DPTOK		0x200	/* (float) decimal point is still legal */
69#define	EXPOK		0x400	/* (float) exponent (e+3, etc) still legal */
70
71#define	PFXOK		0x200	/* 0x prefix is (still) legal */
72#define	NZDIGITS	0x400	/* no zero digits detected */
73
74/*
75 * Conversion types.
76 */
77#define	CT_CHAR		0	/* %c conversion */
78#define	CT_CCL		1	/* %[...] conversion */
79#define	CT_STRING	2	/* %s conversion */
80#define	CT_INT		3	/* integer, i.e., strtoll or strtoull */
81#define	CT_FLOAT	4	/* floating, i.e., strtod */
82
83#define u_char unsigned char
84#define u_long unsigned long
85
86static u_char *__sccl(char *tab, u_char *fmt);
87
88/*
89 * vfscanf
90 */
91int
92__svfscanf(fp, fmt0, ap)
93	register FILE *fp;
94	char const *fmt0;
95	va_list ap;
96{
97	register u_char *fmt = (u_char *)fmt0;
98	register int c;		/* character from format, or conversion */
99	register size_t width;	/* field width, or 0 */
100	register char *p;	/* points into all kinds of strings */
101	register int n;		/* handy integer */
102	register int flags;	/* flags as defined above */
103	register char *p0;	/* saves original value of p when necessary */
104	int nassigned;		/* number of fields assigned */
105	int nread;		/* number of characters consumed from fp */
106	int base;		/* base argument to strtoll/strtoull */
107	uint64 (*ccfn)();	/* conversion function (strtoll/strtoull) */
108	char ccltab[256];	/* character class table for %[...] */
109	char buf[BUF];		/* buffer for numeric conversions */
110
111	/* `basefix' is used to avoid `if' tests in the integer scanner */
112	static short basefix[17] =
113		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
114
115	nassigned = 0;
116	nread = 0;
117	base = 0;		/* XXX just to keep gcc happy */
118	ccfn = NULL;		/* XXX just to keep gcc happy */
119	for (;;) {
120		c = *fmt++;
121		if (c == 0)
122			return (nassigned);
123		if (isspace(c)) {
124			while ((fp->_r > 0 || __srefill(fp) == 0) &&
125			    isspace(*fp->_p))
126				nread++, fp->_r--, fp->_p++;
127			continue;
128		}
129		if (c != '%')
130			goto literal;
131		width = 0;
132		flags = 0;
133		/*
134		 * switch on the format.  continue if done;
135		 * break once format type is derived.
136		 */
137again:		c = *fmt++;
138		switch (c) {
139		case '%':
140literal:
141			if (fp->_r <= 0 && __srefill(fp))
142				goto input_failure;
143			if (*fp->_p != c)
144				goto match_failure;
145			fp->_r--, fp->_p++;
146			nread++;
147			continue;
148
149		case '*':
150			flags |= SUPPRESS;
151			goto again;
152		case 'L':
153			flags |= LONGDBL | QUAD;
154			goto again;
155		case 'h':
156			flags |= SHORT;
157			goto again;
158		case 'l':
159			if (*fmt == 'l') {
160				fmt++;
161				flags |= QUAD;
162			} else {
163				flags |= LONG;
164			}
165			goto again;
166		case 'q':
167			flags |= QUAD;
168			goto again;
169
170		case '0': case '1': case '2': case '3': case '4':
171		case '5': case '6': case '7': case '8': case '9':
172			width = width * 10 + c - '0';
173			goto again;
174
175		/*
176		 * Conversions.
177		 * Those marked `compat' are for 4.[123]BSD compatibility.
178		 *
179		 * (According to ANSI, E and X formats are supposed
180		 * to the same as e and x.  Sorry about that.)
181		 */
182		case 'D':	/* compat */
183			flags |= LONG;
184			/* FALLTHROUGH */
185		case 'd':
186			c = CT_INT;
187			ccfn = (uint64 (*)())strtoll;
188			base = 10;
189			break;
190
191		case 'i':
192			c = CT_INT;
193			ccfn = (uint64 (*)())strtoll;
194			base = 0;
195			break;
196
197		case 'O':	/* compat */
198			flags |= LONG;
199			/* FALLTHROUGH */
200		case 'o':
201			c = CT_INT;
202			ccfn = strtoull;
203			base = 8;
204			break;
205
206		case 'u':
207			c = CT_INT;
208			ccfn = strtoull;
209			base = 10;
210			break;
211
212		case 'X':
213		case 'x':
214			flags |= PFXOK;	/* enable 0x prefixing */
215			c = CT_INT;
216			ccfn = strtoull;
217			base = 16;
218			break;
219
220#ifdef FLOATING_POINT
221		case 'E':
222		case 'G':
223		case 'e':
224		case 'f':
225		case 'g':
226			c = CT_FLOAT;
227			break;
228#endif
229
230		case 's':
231			c = CT_STRING;
232			break;
233
234		case '[':
235			fmt = __sccl(ccltab, fmt);
236			flags |= NOSKIP;
237			c = CT_CCL;
238			break;
239
240		case 'c':
241			flags |= NOSKIP;
242			c = CT_CHAR;
243			break;
244
245		case 'p':	/* pointer format is like hex */
246			flags |= POINTER | PFXOK;
247			c = CT_INT;
248			ccfn = strtoull;
249			base = 16;
250			break;
251
252		case 'n':
253			if (flags & SUPPRESS)	/* ??? */
254				continue;
255			if (flags & SHORT)
256				*va_arg(ap, short *) = nread;
257			else if (flags & LONG)
258				*va_arg(ap, long *) = nread;
259			else
260				*va_arg(ap, int *) = nread;
261			continue;
262
263		/*
264		 * Disgusting backwards compatibility hacks.	XXX
265		 */
266		case '\0':	/* compat */
267			return (EOF);
268
269		default:	/* compat */
270			if (isupper(c))
271				flags |= LONG;
272			c = CT_INT;
273			ccfn = (uint64 (*)())strtoll;
274			base = 10;
275			break;
276		}
277
278		/*
279		 * We have a conversion that requires input.
280		 */
281		if (fp->_r <= 0 && __srefill(fp))
282			goto input_failure;
283
284		/*
285		 * Consume leading white space, except for formats
286		 * that suppress this.
287		 */
288		if ((flags & NOSKIP) == 0) {
289			while (isspace(*fp->_p)) {
290				nread++;
291				if (--fp->_r > 0)
292					fp->_p++;
293				else if (__srefill(fp))
294					goto input_failure;
295			}
296			/*
297			 * Note that there is at least one character in
298			 * the buffer, so conversions that do not set NOSKIP
299			 * ca no longer result in an input failure.
300			 */
301		}
302
303		/*
304		 * Do the conversion.
305		 */
306		switch (c) {
307
308		case CT_CHAR:
309			/* scan arbitrary characters (sets NOSKIP) */
310			if (width == 0)
311				width = 1;
312			if (flags & SUPPRESS) {
313				size_t sum = 0;
314				for (;;) {
315					if ((n = fp->_r) < width) {
316						sum += n;
317						width -= n;
318						fp->_p += n;
319						if (__srefill(fp)) {
320							if (sum == 0)
321							    goto input_failure;
322							break;
323						}
324					} else {
325						sum += width;
326						fp->_r -= width;
327						fp->_p += width;
328						break;
329					}
330				}
331				nread += sum;
332			} else {
333				size_t r = fread((void *)va_arg(ap, char *), 1,
334				    width, fp);
335
336				if (r == 0)
337					goto input_failure;
338				nread += r;
339				nassigned++;
340			}
341			break;
342
343		case CT_CCL:
344			/* scan a (nonempty) character class (sets NOSKIP) */
345			if (width == 0)
346				width = (size_t)~0;	/* `infinity' */
347			/* take only those things in the class */
348			if (flags & SUPPRESS) {
349				n = 0;
350				while (ccltab[*fp->_p]) {
351					n++, fp->_r--, fp->_p++;
352					if (--width == 0)
353						break;
354					if (fp->_r <= 0 && __srefill(fp)) {
355						if (n == 0)
356							goto input_failure;
357						break;
358					}
359				}
360				if (n == 0)
361					goto match_failure;
362			} else {
363				p0 = p = va_arg(ap, char *);
364				while (ccltab[*fp->_p]) {
365					fp->_r--;
366					*p++ = *fp->_p++;
367					if (--width == 0)
368						break;
369					if (fp->_r <= 0 && __srefill(fp)) {
370						if (p == p0)
371							goto input_failure;
372						break;
373					}
374				}
375				n = p - p0;
376				if (n == 0)
377					goto match_failure;
378				*p = 0;
379				nassigned++;
380			}
381			nread += n;
382			break;
383
384		case CT_STRING:
385			/* like CCL, but zero-length string OK, & no NOSKIP */
386			if (width == 0)
387				width = (size_t)~0;
388			if (flags & SUPPRESS) {
389				n = 0;
390				while (!isspace(*fp->_p)) {
391					n++, fp->_r--, fp->_p++;
392					if (--width == 0)
393						break;
394					if (fp->_r <= 0 && __srefill(fp))
395						break;
396				}
397				nread += n;
398			} else {
399				p0 = p = va_arg(ap, char *);
400				while (!isspace(*fp->_p)) {
401					fp->_r--;
402					*p++ = *fp->_p++;
403					if (--width == 0)
404						break;
405					if (fp->_r <= 0 && __srefill(fp))
406						break;
407				}
408				*p = 0;
409				nread += p - p0;
410				nassigned++;
411			}
412			continue;
413
414		case CT_INT:
415			/* scan an integer as if by strtoll/strtoull */
416#ifdef hardway
417			if (width == 0 || width > sizeof(buf) - 1)
418				width = sizeof(buf) - 1;
419#else
420			/* size_t is unsigned, hence this optimisation */
421			if (--width > sizeof(buf) - 2)
422				width = sizeof(buf) - 2;
423			width++;
424#endif
425			flags |= SIGNOK | NDIGITS | NZDIGITS;
426			for (p = buf; width; width--) {
427				c = *fp->_p;
428				/*
429				 * Switch on the character; `goto ok'
430				 * if we accept it as a part of number.
431				 */
432				switch (c) {
433
434				/*
435				 * The digit 0 is always legal, but is
436				 * special.  For %i conversions, if no
437				 * digits (zero or nonzero) have been
438				 * scanned (only signs), we will have
439				 * base==0.  In that case, we should set
440				 * it to 8 and enable 0x prefixing.
441				 * Also, if we have not scanned zero digits
442				 * before this, do not turn off prefixing
443				 * (someone else will turn it off if we
444				 * have scanned any nonzero digits).
445				 */
446				case '0':
447					if (base == 0) {
448						base = 8;
449						flags |= PFXOK;
450					}
451					if (flags & NZDIGITS)
452					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
453					else
454					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
455					goto ok;
456
457				/* 1 through 7 always legal */
458				case '1': case '2': case '3':
459				case '4': case '5': case '6': case '7':
460					base = basefix[base];
461					flags &= ~(SIGNOK | PFXOK | NDIGITS);
462					goto ok;
463
464				/* digits 8 and 9 ok iff decimal or hex */
465				case '8': case '9':
466					base = basefix[base];
467					if (base <= 8)
468						break;	/* not legal here */
469					flags &= ~(SIGNOK | PFXOK | NDIGITS);
470					goto ok;
471
472				/* letters ok iff hex */
473				case 'A': case 'B': case 'C':
474				case 'D': case 'E': case 'F':
475				case 'a': case 'b': case 'c':
476				case 'd': case 'e': case 'f':
477					/* no need to fix base here */
478					if (base <= 10)
479						break;	/* not legal here */
480					flags &= ~(SIGNOK | PFXOK | NDIGITS);
481					goto ok;
482
483				/* sign ok only as first character */
484				case '+': case '-':
485					if (flags & SIGNOK) {
486						flags &= ~SIGNOK;
487						goto ok;
488					}
489					break;
490
491				/* x ok iff flag still set & 2nd char */
492				case 'x': case 'X':
493					if (flags & PFXOK && p == buf + 1) {
494						base = 16;	/* if %i */
495						flags &= ~PFXOK;
496						goto ok;
497					}
498					break;
499				}
500
501				/*
502				 * If we got here, c is not a legal character
503				 * for a number.  Stop accumulating digits.
504				 */
505				break;
506		ok:
507				/*
508				 * c is legal: store it and look at the next.
509				 */
510				*p++ = c;
511				if (--fp->_r > 0)
512					fp->_p++;
513				else if (__srefill(fp))
514					break;		/* EOF */
515			}
516			/*
517			 * If we had only a sign, it is no good; push
518			 * back the sign.  If the number ends in `x',
519			 * it was [sign] '0' 'x', so push back the x
520			 * and treat it as [sign] '0'.
521			 */
522			if (flags & NDIGITS) {
523				if (p > buf)
524					(void) ungetc(*(u_char *)--p, fp);
525				goto match_failure;
526			}
527			c = ((u_char *)p)[-1];
528			if (c == 'x' || c == 'X') {
529				--p;
530				(void) ungetc(c, fp);
531			}
532			if ((flags & SUPPRESS) == 0) {
533				uint64 res;
534
535				*p = 0;
536				res = (*ccfn)(buf, (char **)NULL, base);
537				if (flags & POINTER)
538					*va_arg(ap, void **) =
539					    (void *)(long)res;
540				else if (flags & QUAD)
541					*va_arg(ap, int64 *) = res;
542				else if (flags & LONG)
543					*va_arg(ap, long *) = res;
544				else if (flags & SHORT)
545					*va_arg(ap, short *) = res;
546				else
547					*va_arg(ap, int *) = res;
548				nassigned++;
549			}
550			nread += p - buf;
551			break;
552
553#ifdef FLOATING_POINT
554		case CT_FLOAT:
555			/* scan a floating point number as if by strtod */
556#ifdef hardway
557			if (width == 0 || width > sizeof(buf) - 1)
558				width = sizeof(buf) - 1;
559#else
560			/* size_t is unsigned, hence this optimisation */
561			if (--width > sizeof(buf) - 2)
562				width = sizeof(buf) - 2;
563			width++;
564#endif
565			flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
566			for (p = buf; width; width--) {
567				c = *fp->_p;
568				/*
569				 * This code mimicks the integer conversion
570				 * code, but is much simpler.
571				 */
572				switch (c) {
573
574				case '0': case '1': case '2': case '3':
575				case '4': case '5': case '6': case '7':
576				case '8': case '9':
577					flags &= ~(SIGNOK | NDIGITS);
578					goto fok;
579
580				case '+': case '-':
581					if (flags & SIGNOK) {
582						flags &= ~SIGNOK;
583						goto fok;
584					}
585					break;
586				case '.':
587					if (flags & DPTOK) {
588						flags &= ~(SIGNOK | DPTOK);
589						goto fok;
590					}
591					break;
592				case 'e': case 'E':
593					/* no exponent without some digits */
594					if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
595						flags =
596						    (flags & ~(EXPOK|DPTOK)) |
597						    SIGNOK | NDIGITS;
598						goto fok;
599					}
600					break;
601				}
602				break;
603		fok:
604				*p++ = c;
605				if (--fp->_r > 0)
606					fp->_p++;
607				else if (__srefill(fp))
608					break;	/* EOF */
609			}
610			/*
611			 * If no digits, might be missing exponent digits
612			 * (just give back the exponent) or might be missing
613			 * regular digits, but had sign and/or decimal point.
614			 */
615			if (flags & NDIGITS) {
616				if (flags & EXPOK) {
617					/* no digits at all */
618					while (p > buf)
619						ungetc(*(u_char *)--p, fp);
620					goto match_failure;
621				}
622				/* just a bad exponent (e and maybe sign) */
623				c = *(u_char *)--p;
624				if (c != 'e' && c != 'E') {
625					(void) ungetc(c, fp);/* sign */
626					c = *(u_char *)--p;
627				}
628				(void) ungetc(c, fp);
629			}
630			if ((flags & SUPPRESS) == 0) {
631				double res;
632
633				*p = 0;
634				res = strtod(buf, (char **) NULL);
635				if (flags & LONGDBL)
636					*va_arg(ap, long double *) = res;
637				else if (flags & LONG)
638					*va_arg(ap, double *) = res;
639				else
640					*va_arg(ap, float *) = res;
641				nassigned++;
642			}
643			nread += p - buf;
644			break;
645#endif /* FLOATING_POINT */
646		}
647	}
648input_failure:
649	return (nassigned ? nassigned : -1);
650match_failure:
651	return (nassigned);
652}
653
654/*
655 * Fill in the given table from the scanset at the given format
656 * (just after `[').  Return a pointer to the character past the
657 * closing `]'.  The table has a 1 wherever characters should be
658 * considered part of the scanset.
659 */
660static u_char *
661__sccl(tab, fmt)
662	register char *tab;
663	register u_char *fmt;
664{
665	register int c, n, v;
666
667	/* first `clear' the whole table */
668	c = *fmt++;		/* first char hat => negated scanset */
669	if (c == '^') {
670		v = 1;		/* default => accept */
671		c = *fmt++;	/* get new first char */
672	} else
673		v = 0;		/* default => reject */
674	/* should probably use memset here */
675	for (n = 0; n < 256; n++)
676		tab[n] = v;
677	if (c == 0)
678		return (fmt - 1);/* format ended before closing ] */
679
680	/*
681	 * Now set the entries corresponding to the actual scanset
682	 * to the opposite of the above.
683	 *
684	 * The first character may be ']' (or '-') without being special;
685	 * the last character may be '-'.
686	 */
687	v = 1 - v;
688	for (;;) {
689		tab[c] = v;		/* take character c */
690doswitch:
691		n = *fmt++;		/* and examine the next */
692		switch (n) {
693
694		case 0:			/* format ended too soon */
695			return (fmt - 1);
696
697		case '-':
698			/*
699			 * A scanset of the form
700			 *	[01+-]
701			 * is defined as `the digit 0, the digit 1,
702			 * the character +, the character -', but
703			 * the effect of a scanset such as
704			 *	[a-zA-Z0-9]
705			 * is implementation defined.  The V7 Unix
706			 * scanf treats `a-z' as `the letters a through
707			 * z', but treats `a-a' as `the letter a, the
708			 * character -, and the letter a'.
709			 *
710			 * For compatibility, the `-' is not considerd
711			 * to define a range if the character following
712			 * it is either a close bracket (required by ANSI)
713			 * or is not numerically greater than the character
714			 * we just stored in the table (c).
715			 */
716			n = *fmt;
717			if (n == ']' || n < c) {
718				c = '-';
719				break;	/* resume the for(;;) */
720			}
721			fmt++;
722			do {		/* fill in the range */
723				tab[++c] = v;
724			} while (c < n);
725#if 1	/* XXX another disgusting compatibility hack */
726			/*
727			 * Alas, the V7 Unix scanf also treats formats
728			 * such as [a-c-e] as `the letters a through e'.
729			 * This too is permitted by the standard....
730			 */
731			goto doswitch;
732#else
733			c = *fmt++;
734			if (c == 0)
735				return (fmt - 1);
736			if (c == ']')
737				return (fmt);
738#endif
739			break;
740
741		case ']':		/* end of scanset */
742			return (fmt);
743
744		default:		/* just another character */
745			c = n;
746			break;
747		}
748	}
749	/* NOTREACHED */
750}
751