vfscanf.c revision 50476
1/*-
2 * Copyright (c) 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#if defined(LIBC_SCCS) && !defined(lint)
38#if 0
39static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
40#endif
41static const char rcsid[] =
42  "$FreeBSD: head/lib/libc/stdio/vfscanf.c 50476 1999-08-28 00:22:10Z peter $";
43#endif /* LIBC_SCCS and not lint */
44
45#include <stdio.h>
46#include <stdlib.h>
47#include <ctype.h>
48#if __STDC__
49#include <stdarg.h>
50#else
51#include <varargs.h>
52#endif
53#include <string.h>
54
55#include "collate.h"
56#include "local.h"
57
58#define FLOATING_POINT
59
60#include "floatio.h"
61#define	BUF		513	/* Maximum length of numeric string. */
62
63/*
64 * Flags used during conversion.
65 */
66#define	LONG		0x01	/* l: long or double */
67#define	LONGDBL		0x02	/* L: long double */
68#define	SHORT		0x04	/* h: short */
69#define	SUPPRESS	0x08	/* suppress assignment */
70#define	POINTER		0x10	/* weird %p pointer (`fake hex') */
71#define	NOSKIP		0x20	/* do not skip blanks */
72#define	QUAD		0x400
73
74/*
75 * The following are used in numeric conversions only:
76 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
77 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
78 */
79#define	SIGNOK		0x40	/* +/- is (still) legal */
80#define	NDIGITS		0x80	/* no digits detected */
81
82#define	DPTOK		0x100	/* (float) decimal point is still legal */
83#define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
84
85#define	PFXOK		0x100	/* 0x prefix is (still) legal */
86#define	NZDIGITS	0x200	/* no zero digits detected */
87
88/*
89 * Conversion types.
90 */
91#define	CT_CHAR		0	/* %c conversion */
92#define	CT_CCL		1	/* %[...] conversion */
93#define	CT_STRING	2	/* %s conversion */
94#define	CT_INT		3	/* integer, i.e., strtoq or strtouq */
95#define	CT_FLOAT	4	/* floating, i.e., strtod */
96
97#define u_char unsigned char
98#define u_long unsigned long
99
100static u_char *__sccl(char *, u_char *);
101
102/*
103 * vfscanf
104 */
105int
106__svfscanf(fp, fmt0, ap)
107	register FILE *fp;
108	char const *fmt0;
109	va_list ap;
110{
111	register u_char *fmt = (u_char *)fmt0;
112	register int c;		/* character from format, or conversion */
113	register size_t width;	/* field width, or 0 */
114	register char *p;	/* points into all kinds of strings */
115	register int n;		/* handy integer */
116	register int flags;	/* flags as defined above */
117	register char *p0;	/* saves original value of p when necessary */
118	int nassigned;		/* number of fields assigned */
119	int nconversions;	/* number of conversions */
120	int nread;		/* number of characters consumed from fp */
121	int base;		/* base argument to strtoq/strtouq */
122	u_quad_t(*ccfn)();	/* conversion function (strtoq/strtouq) */
123	char ccltab[256];	/* character class table for %[...] */
124	char buf[BUF];		/* buffer for numeric conversions */
125
126	/* `basefix' is used to avoid `if' tests in the integer scanner */
127	static short basefix[17] =
128		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
129
130	nassigned = 0;
131	nconversions = 0;
132	nread = 0;
133	base = 0;		/* XXX just to keep gcc happy */
134	ccfn = NULL;		/* XXX just to keep gcc happy */
135	for (;;) {
136		c = *fmt++;
137		if (c == 0)
138			return (nassigned);
139		if (isspace(c)) {
140			while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p))
141				nread++, fp->_r--, fp->_p++;
142			continue;
143		}
144		if (c != '%')
145			goto literal;
146		width = 0;
147		flags = 0;
148		/*
149		 * switch on the format.  continue if done;
150		 * break once format type is derived.
151		 */
152again:		c = *fmt++;
153		switch (c) {
154		case '%':
155literal:
156			if (fp->_r <= 0 && __srefill(fp))
157				goto input_failure;
158			if (*fp->_p != c)
159				goto match_failure;
160			fp->_r--, fp->_p++;
161			nread++;
162			continue;
163
164		case '*':
165			flags |= SUPPRESS;
166			goto again;
167		case 'l':
168			flags |= LONG;
169			goto again;
170		case 'q':
171			flags |= QUAD;
172			goto again;
173		case 'L':
174			flags |= LONGDBL;
175			goto again;
176		case 'h':
177			flags |= SHORT;
178			goto again;
179
180		case '0': case '1': case '2': case '3': case '4':
181		case '5': case '6': case '7': case '8': case '9':
182			width = width * 10 + c - '0';
183			goto again;
184
185		/*
186		 * Conversions.
187		 * Those marked `compat' are for 4.[123]BSD compatibility.
188		 *
189		 * (According to ANSI, E and X formats are supposed
190		 * to the same as e and x.  Sorry about that.)
191		 */
192		case 'D':	/* compat */
193			flags |= LONG;
194			/* FALLTHROUGH */
195		case 'd':
196			c = CT_INT;
197			ccfn = (u_quad_t (*)())strtoq;
198			base = 10;
199			break;
200
201		case 'i':
202			c = CT_INT;
203			ccfn = (u_quad_t (*)())strtoq;
204			base = 0;
205			break;
206
207		case 'O':	/* compat */
208			flags |= LONG;
209			/* FALLTHROUGH */
210		case 'o':
211			c = CT_INT;
212			ccfn = strtouq;
213			base = 8;
214			break;
215
216		case 'u':
217			c = CT_INT;
218			ccfn = strtouq;
219			base = 10;
220			break;
221
222		case 'X':	/* compat   XXX */
223			flags |= LONG;
224			/* FALLTHROUGH */
225		case 'x':
226			flags |= PFXOK;	/* enable 0x prefixing */
227			c = CT_INT;
228			ccfn = strtouq;
229			base = 16;
230			break;
231
232#ifdef FLOATING_POINT
233		case 'E':	/* compat   XXX */
234		case 'F':	/* compat */
235			flags |= LONG;
236			/* FALLTHROUGH */
237		case 'e': case 'f': case 'g':
238			c = CT_FLOAT;
239			break;
240#endif
241
242		case 's':
243			c = CT_STRING;
244			break;
245
246		case '[':
247			fmt = __sccl(ccltab, fmt);
248			flags |= NOSKIP;
249			c = CT_CCL;
250			break;
251
252		case 'c':
253			flags |= NOSKIP;
254			c = CT_CHAR;
255			break;
256
257		case 'p':	/* pointer format is like hex */
258			flags |= POINTER | PFXOK;
259			c = CT_INT;
260			ccfn = strtouq;
261			base = 16;
262			break;
263
264		case 'n':
265			nconversions++;
266			if (flags & SUPPRESS)	/* ??? */
267				continue;
268			if (flags & SHORT)
269				*va_arg(ap, short *) = nread;
270			else if (flags & LONG)
271				*va_arg(ap, long *) = nread;
272			else if (flags & QUAD)
273				*va_arg(ap, quad_t *) = nread;
274			else
275				*va_arg(ap, int *) = nread;
276			continue;
277
278		/*
279		 * Disgusting backwards compatibility hacks.	XXX
280		 */
281		case '\0':	/* compat */
282			return (EOF);
283
284		default:	/* compat */
285			if (isupper(c))
286				flags |= LONG;
287			c = CT_INT;
288			ccfn = (u_quad_t (*)())strtoq;
289			base = 10;
290			break;
291		}
292
293		/*
294		 * We have a conversion that requires input.
295		 */
296		if (fp->_r <= 0 && __srefill(fp))
297			goto input_failure;
298
299		/*
300		 * Consume leading white space, except for formats
301		 * that suppress this.
302		 */
303		if ((flags & NOSKIP) == 0) {
304			while (isspace(*fp->_p)) {
305				nread++;
306				if (--fp->_r > 0)
307					fp->_p++;
308				else if (__srefill(fp))
309					goto input_failure;
310			}
311			/*
312			 * Note that there is at least one character in
313			 * the buffer, so conversions that do not set NOSKIP
314			 * ca no longer result in an input failure.
315			 */
316		}
317
318		/*
319		 * Do the conversion.
320		 */
321		switch (c) {
322
323		case CT_CHAR:
324			/* scan arbitrary characters (sets NOSKIP) */
325			if (width == 0)
326				width = 1;
327			if (flags & SUPPRESS) {
328				size_t sum = 0;
329				for (;;) {
330					if ((n = fp->_r) < width) {
331						sum += n;
332						width -= n;
333						fp->_p += n;
334						if (__srefill(fp)) {
335							if (sum == 0)
336							    goto input_failure;
337							break;
338						}
339					} else {
340						sum += width;
341						fp->_r -= width;
342						fp->_p += width;
343						break;
344					}
345				}
346				nread += sum;
347			} else {
348				size_t r = fread((void *)va_arg(ap, char *), 1,
349				    width, fp);
350
351				if (r == 0)
352					goto input_failure;
353				nread += r;
354				nassigned++;
355			}
356			nconversions++;
357			break;
358
359		case CT_CCL:
360			/* scan a (nonempty) character class (sets NOSKIP) */
361			if (width == 0)
362				width = (size_t)~0;	/* `infinity' */
363			/* take only those things in the class */
364			if (flags & SUPPRESS) {
365				n = 0;
366				while (ccltab[*fp->_p]) {
367					n++, fp->_r--, fp->_p++;
368					if (--width == 0)
369						break;
370					if (fp->_r <= 0 && __srefill(fp)) {
371						if (n == 0)
372							goto input_failure;
373						break;
374					}
375				}
376				if (n == 0)
377					goto match_failure;
378			} else {
379				p0 = p = va_arg(ap, char *);
380				while (ccltab[*fp->_p]) {
381					fp->_r--;
382					*p++ = *fp->_p++;
383					if (--width == 0)
384						break;
385					if (fp->_r <= 0 && __srefill(fp)) {
386						if (p == p0)
387							goto input_failure;
388						break;
389					}
390				}
391				n = p - p0;
392				if (n == 0)
393					goto match_failure;
394				*p = 0;
395				nassigned++;
396			}
397			nread += n;
398			nconversions++;
399			break;
400
401		case CT_STRING:
402			/* like CCL, but zero-length string OK, & no NOSKIP */
403			if (width == 0)
404				width = (size_t)~0;
405			if (flags & SUPPRESS) {
406				n = 0;
407				while (!isspace(*fp->_p)) {
408					n++, fp->_r--, fp->_p++;
409					if (--width == 0)
410						break;
411					if (fp->_r <= 0 && __srefill(fp))
412						break;
413				}
414				nread += n;
415			} else {
416				p0 = p = va_arg(ap, char *);
417				while (!isspace(*fp->_p)) {
418					fp->_r--;
419					*p++ = *fp->_p++;
420					if (--width == 0)
421						break;
422					if (fp->_r <= 0 && __srefill(fp))
423						break;
424				}
425				*p = 0;
426				nread += p - p0;
427				nassigned++;
428			}
429			nconversions++;
430			continue;
431
432		case CT_INT:
433			/* scan an integer as if by strtoq/strtouq */
434#ifdef hardway
435			if (width == 0 || width > sizeof(buf) - 1)
436				width = sizeof(buf) - 1;
437#else
438			/* size_t is unsigned, hence this optimisation */
439			if (--width > sizeof(buf) - 2)
440				width = sizeof(buf) - 2;
441			width++;
442#endif
443			flags |= SIGNOK | NDIGITS | NZDIGITS;
444			for (p = buf; width; width--) {
445				c = *fp->_p;
446				/*
447				 * Switch on the character; `goto ok'
448				 * if we accept it as a part of number.
449				 */
450				switch (c) {
451
452				/*
453				 * The digit 0 is always legal, but is
454				 * special.  For %i conversions, if no
455				 * digits (zero or nonzero) have been
456				 * scanned (only signs), we will have
457				 * base==0.  In that case, we should set
458				 * it to 8 and enable 0x prefixing.
459				 * Also, if we have not scanned zero digits
460				 * before this, do not turn off prefixing
461				 * (someone else will turn it off if we
462				 * have scanned any nonzero digits).
463				 */
464				case '0':
465					if (base == 0) {
466						base = 8;
467						flags |= PFXOK;
468					}
469					if (flags & NZDIGITS)
470					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
471					else
472					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
473					goto ok;
474
475				/* 1 through 7 always legal */
476				case '1': case '2': case '3':
477				case '4': case '5': case '6': case '7':
478					base = basefix[base];
479					flags &= ~(SIGNOK | PFXOK | NDIGITS);
480					goto ok;
481
482				/* digits 8 and 9 ok iff decimal or hex */
483				case '8': case '9':
484					base = basefix[base];
485					if (base <= 8)
486						break;	/* not legal here */
487					flags &= ~(SIGNOK | PFXOK | NDIGITS);
488					goto ok;
489
490				/* letters ok iff hex */
491				case 'A': case 'B': case 'C':
492				case 'D': case 'E': case 'F':
493				case 'a': case 'b': case 'c':
494				case 'd': case 'e': case 'f':
495					/* no need to fix base here */
496					if (base <= 10)
497						break;	/* not legal here */
498					flags &= ~(SIGNOK | PFXOK | NDIGITS);
499					goto ok;
500
501				/* sign ok only as first character */
502				case '+': case '-':
503					if (flags & SIGNOK) {
504						flags &= ~SIGNOK;
505						goto ok;
506					}
507					break;
508
509				/* x ok iff flag still set & 2nd char */
510				case 'x': case 'X':
511					if (flags & PFXOK && p == buf + 1) {
512						base = 16;	/* if %i */
513						flags &= ~PFXOK;
514						goto ok;
515					}
516					break;
517				}
518
519				/*
520				 * If we got here, c is not a legal character
521				 * for a number.  Stop accumulating digits.
522				 */
523				break;
524		ok:
525				/*
526				 * c is legal: store it and look at the next.
527				 */
528				*p++ = c;
529				if (--fp->_r > 0)
530					fp->_p++;
531				else if (__srefill(fp))
532					break;		/* EOF */
533			}
534			/*
535			 * If we had only a sign, it is no good; push
536			 * back the sign.  If the number ends in `x',
537			 * it was [sign] '0' 'x', so push back the x
538			 * and treat it as [sign] '0'.
539			 */
540			if (flags & NDIGITS) {
541				if (p > buf)
542					(void) ungetc(*(u_char *)--p, fp);
543				goto match_failure;
544			}
545			c = ((u_char *)p)[-1];
546			if (c == 'x' || c == 'X') {
547				--p;
548				(void) ungetc(c, fp);
549			}
550			if ((flags & SUPPRESS) == 0) {
551				u_quad_t res;
552
553				*p = 0;
554				res = (*ccfn)(buf, (char **)NULL, base);
555				if (flags & POINTER)
556					*va_arg(ap, void **) =
557						(void *)(u_long)res;
558				else if (flags & SHORT)
559					*va_arg(ap, short *) = res;
560				else if (flags & LONG)
561					*va_arg(ap, long *) = res;
562				else if (flags & QUAD)
563					*va_arg(ap, quad_t *) = res;
564				else
565					*va_arg(ap, int *) = res;
566				nassigned++;
567			}
568			nread += p - buf;
569			nconversions++;
570			break;
571
572#ifdef FLOATING_POINT
573		case CT_FLOAT:
574			/* scan a floating point number as if by strtod */
575#ifdef hardway
576			if (width == 0 || width > sizeof(buf) - 1)
577				width = sizeof(buf) - 1;
578#else
579			/* size_t is unsigned, hence this optimisation */
580			if (--width > sizeof(buf) - 2)
581				width = sizeof(buf) - 2;
582			width++;
583#endif
584			flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
585			for (p = buf; width; width--) {
586				c = *fp->_p;
587				/*
588				 * This code mimicks the integer conversion
589				 * code, but is much simpler.
590				 */
591				switch (c) {
592
593				case '0': case '1': case '2': case '3':
594				case '4': case '5': case '6': case '7':
595				case '8': case '9':
596					flags &= ~(SIGNOK | NDIGITS);
597					goto fok;
598
599				case '+': case '-':
600					if (flags & SIGNOK) {
601						flags &= ~SIGNOK;
602						goto fok;
603					}
604					break;
605				case '.':
606					if (flags & DPTOK) {
607						flags &= ~(SIGNOK | DPTOK);
608						goto fok;
609					}
610					break;
611				case 'e': case 'E':
612					/* no exponent without some digits */
613					if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
614						flags =
615						    (flags & ~(EXPOK|DPTOK)) |
616						    SIGNOK | NDIGITS;
617						goto fok;
618					}
619					break;
620				}
621				break;
622		fok:
623				*p++ = c;
624				if (--fp->_r > 0)
625					fp->_p++;
626				else if (__srefill(fp))
627					break;	/* EOF */
628			}
629			/*
630			 * If no digits, might be missing exponent digits
631			 * (just give back the exponent) or might be missing
632			 * regular digits, but had sign and/or decimal point.
633			 */
634			if (flags & NDIGITS) {
635				if (flags & EXPOK) {
636					/* no digits at all */
637					while (p > buf)
638						ungetc(*(u_char *)--p, fp);
639					goto match_failure;
640				}
641				/* just a bad exponent (e and maybe sign) */
642				c = *(u_char *)--p;
643				if (c != 'e' && c != 'E') {
644					(void) ungetc(c, fp);/* sign */
645					c = *(u_char *)--p;
646				}
647				(void) ungetc(c, fp);
648			}
649			if ((flags & SUPPRESS) == 0) {
650				double res;
651
652				*p = 0;
653				/* XXX this loses precision for long doubles. */
654				res = strtod(buf, (char **) NULL);
655				if (flags & LONGDBL)
656					*va_arg(ap, long double *) = res;
657				else if (flags & LONG)
658					*va_arg(ap, double *) = res;
659				else
660					*va_arg(ap, float *) = res;
661				nassigned++;
662			}
663			nread += p - buf;
664			nconversions++;
665			break;
666#endif /* FLOATING_POINT */
667		}
668	}
669input_failure:
670	return (nconversions != 0 ? nassigned : EOF);
671match_failure:
672	return (nassigned);
673}
674
675/*
676 * Fill in the given table from the scanset at the given format
677 * (just after `[').  Return a pointer to the character past the
678 * closing `]'.  The table has a 1 wherever characters should be
679 * considered part of the scanset.
680 */
681static u_char *
682__sccl(tab, fmt)
683	register char *tab;
684	register u_char *fmt;
685{
686	register int c, n, v, i;
687
688	/* first `clear' the whole table */
689	c = *fmt++;		/* first char hat => negated scanset */
690	if (c == '^') {
691		v = 1;		/* default => accept */
692		c = *fmt++;	/* get new first char */
693	} else
694		v = 0;		/* default => reject */
695
696	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
697	(void) memset(tab, v, 256);
698
699	if (c == 0)
700		return (fmt - 1);/* format ended before closing ] */
701
702	/*
703	 * Now set the entries corresponding to the actual scanset
704	 * to the opposite of the above.
705	 *
706	 * The first character may be ']' (or '-') without being special;
707	 * the last character may be '-'.
708	 */
709	v = 1 - v;
710	for (;;) {
711		tab[c] = v;		/* take character c */
712doswitch:
713		n = *fmt++;		/* and examine the next */
714		switch (n) {
715
716		case 0:			/* format ended too soon */
717			return (fmt - 1);
718
719		case '-':
720			/*
721			 * A scanset of the form
722			 *	[01+-]
723			 * is defined as `the digit 0, the digit 1,
724			 * the character +, the character -', but
725			 * the effect of a scanset such as
726			 *	[a-zA-Z0-9]
727			 * is implementation defined.  The V7 Unix
728			 * scanf treats `a-z' as `the letters a through
729			 * z', but treats `a-a' as `the letter a, the
730			 * character -, and the letter a'.
731			 *
732			 * For compatibility, the `-' is not considerd
733			 * to define a range if the character following
734			 * it is either a close bracket (required by ANSI)
735			 * or is not numerically greater than the character
736			 * we just stored in the table (c).
737			 */
738			n = *fmt;
739			if (n == ']'
740			    || (__collate_load_error ? n < c :
741				__collate_range_cmp (n, c) < 0
742			       )
743			   ) {
744				c = '-';
745				break;	/* resume the for(;;) */
746			}
747			fmt++;
748			/* fill in the range */
749			if (__collate_load_error) {
750				do {
751					tab[++c] = v;
752				} while (c < n);
753			} else {
754				for (i = 0; i < 256; i ++)
755					if (   __collate_range_cmp (c, i) < 0
756					    && __collate_range_cmp (i, n) <= 0
757					   )
758						tab[i] = v;
759			}
760#if 1	/* XXX another disgusting compatibility hack */
761			c = n;
762			/*
763			 * Alas, the V7 Unix scanf also treats formats
764			 * such as [a-c-e] as `the letters a through e'.
765			 * This too is permitted by the standard....
766			 */
767			goto doswitch;
768#else
769			c = *fmt++;
770			if (c == 0)
771				return (fmt - 1);
772			if (c == ']')
773				return (fmt);
774#endif
775			break;
776
777		case ']':		/* end of scanset */
778			return (fmt);
779
780		default:		/* just another character */
781			c = n;
782			break;
783		}
784	}
785	/* NOTREACHED */
786}
787