vfwscanf.c revision 103856
1/*-
2 * Copyright (c) 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/cdefs.h>
38#if 0
39#if defined(LIBC_SCCS) && !defined(lint)
40static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
41#endif /* LIBC_SCCS and not lint */
42__FBSDID("FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.24 2002/08/13 09:30:41 tjr Exp ");
43#endif
44__FBSDID("$FreeBSD: head/lib/libc/stdio/vfwscanf.c 103856 2002-09-23 12:40:06Z tjr $");
45
46#include "namespace.h"
47#include <ctype.h>
48#include <inttypes.h>
49#include <stdio.h>
50#include <stdlib.h>
51#include <stddef.h>
52#include <stdarg.h>
53#include <string.h>
54#include <wchar.h>
55#include <wctype.h>
56#include "un-namespace.h"
57
58#include "libc_private.h"
59#include "local.h"
60
61#define FLOATING_POINT
62
63#ifdef FLOATING_POINT
64#include <locale.h>
65#include "floatio.h"
66#endif
67
68#define	BUF		513	/* Maximum length of numeric string. */
69
70/*
71 * Flags used during conversion.
72 */
73#define	LONG		0x01	/* l: long or double */
74#define	LONGDBL		0x02	/* L: long double */
75#define	SHORT		0x04	/* h: short */
76#define	SUPPRESS	0x08	/* *: suppress assignment */
77#define	POINTER		0x10	/* p: void * (as hex) */
78#define	NOSKIP		0x20	/* [ or c: do not skip blanks */
79#define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
80#define	INTMAXT		0x800	/* j: intmax_t */
81#define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
82#define	SIZET		0x2000	/* z: size_t */
83#define	SHORTSHORT	0x4000	/* hh: char */
84#define	UNSIGNED	0x8000	/* %[oupxX] conversions */
85
86/*
87 * The following are used in numeric conversions only:
88 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
89 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
90 */
91#define	SIGNOK		0x40	/* +/- is (still) legal */
92#define	NDIGITS		0x80	/* no digits detected */
93
94#define	DPTOK		0x100	/* (float) decimal point is still legal */
95#define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
96
97#define	PFXOK		0x100	/* 0x prefix is (still) legal */
98#define	NZDIGITS	0x200	/* no zero digits detected */
99
100/*
101 * Conversion types.
102 */
103#define	CT_CHAR		0	/* %c conversion */
104#define	CT_CCL		1	/* %[...] conversion */
105#define	CT_STRING	2	/* %s conversion */
106#define	CT_INT		3	/* %[dioupxX] conversion */
107#define	CT_FLOAT	4	/* %[efgEFG] conversion */
108
109#define	INCCL(_c)	\
110	(cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) : \
111	(wmemchr(ccls, (_c), ccle - ccls) != NULL))
112
113/*
114 * MT-safe version.
115 */
116int
117vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
118{
119	int ret;
120
121	FLOCKFILE(fp);
122	ORIENT(fp, 1);
123	ret = __vfwscanf(fp, fmt, ap);
124	FUNLOCKFILE(fp);
125	return (ret);
126}
127
128/*
129 * Non-MT-safe version.
130 */
131int
132__vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
133{
134	wint_t c;		/* character from format, or conversion */
135	size_t width;		/* field width, or 0 */
136	wchar_t *p;		/* points into all kinds of strings */
137	int n;			/* handy integer */
138	int flags;		/* flags as defined above */
139	wchar_t *p0;		/* saves original value of p when necessary */
140	int nassigned;		/* number of fields assigned */
141	int nconversions;	/* number of conversions */
142	int nread;		/* number of characters consumed from fp */
143	int base;		/* base argument to conversion function */
144	wchar_t buf[BUF];	/* buffer for numeric conversions */
145	const wchar_t *ccls;	/* character class start */
146	const wchar_t *ccle;	/* character class end */
147	int cclcompl;		/* ccl is complemented? */
148	wint_t wi;		/* handy wint_t */
149	char *mbp;		/* multibyte string pointer for %c %s %[ */
150	size_t nconv;		/* number of bytes in mb. conversion */
151	mbstate_t mbs;		/* multibyte state */
152
153	/* `basefix' is used to avoid `if' tests in the integer scanner */
154	static short basefix[17] =
155		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
156#ifdef FLOATING_POINT
157	char decimal_point = localeconv()->decimal_point[0];
158#endif
159
160	nassigned = 0;
161	nconversions = 0;
162	nread = 0;
163	ccls = ccle = NULL;
164	for (;;) {
165		c = *fmt++;
166		if (c == 0)
167			return (nassigned);
168		if (iswspace(c)) {
169			while ((c = __fgetwc(fp)) != WEOF &&
170			    iswspace(c))
171				;
172			if (c != WEOF)
173				__ungetwc(c, fp);
174			continue;
175		}
176		if (c != '%')
177			goto literal;
178		width = 0;
179		flags = 0;
180		/*
181		 * switch on the format.  continue if done;
182		 * break once format type is derived.
183		 */
184again:		c = *fmt++;
185		switch (c) {
186		case '%':
187literal:
188			if ((wi = __fgetwc(fp)) == WEOF)
189				goto input_failure;
190			if (wi != c) {
191				__ungetwc(wi, fp);
192				goto input_failure;
193			}
194			nread++;
195			continue;
196
197		case '*':
198			flags |= SUPPRESS;
199			goto again;
200		case 'j':
201			flags |= INTMAXT;
202			goto again;
203		case 'l':
204			if (flags & LONG) {
205				flags &= ~LONG;
206				flags |= LONGLONG;
207			} else
208				flags |= LONG;
209			goto again;
210		case 'q':
211			flags |= LONGLONG;	/* not quite */
212			goto again;
213		case 't':
214			flags |= PTRDIFFT;
215			goto again;
216		case 'z':
217			flags |= SIZET;
218			goto again;
219		case 'L':
220			flags |= LONGDBL;
221			goto again;
222		case 'h':
223			if (flags & SHORT) {
224				flags &= ~SHORT;
225				flags |= SHORTSHORT;
226			} else
227				flags |= SHORT;
228			goto again;
229
230		case '0': case '1': case '2': case '3': case '4':
231		case '5': case '6': case '7': case '8': case '9':
232			width = width * 10 + c - '0';
233			goto again;
234
235		/*
236		 * Conversions.
237		 */
238		case 'd':
239			c = CT_INT;
240			base = 10;
241			break;
242
243		case 'i':
244			c = CT_INT;
245			base = 0;
246			break;
247
248		case 'o':
249			c = CT_INT;
250			flags |= UNSIGNED;
251			base = 8;
252			break;
253
254		case 'u':
255			c = CT_INT;
256			flags |= UNSIGNED;
257			base = 10;
258			break;
259
260		case 'X':
261		case 'x':
262			flags |= PFXOK;	/* enable 0x prefixing */
263			c = CT_INT;
264			flags |= UNSIGNED;
265			base = 16;
266			break;
267
268#ifdef FLOATING_POINT
269		case 'E': case 'F': case 'G':
270		case 'e': case 'f': case 'g':
271			c = CT_FLOAT;
272			break;
273#endif
274
275		case 'S':
276			flags |= LONG;
277			/* FALLTHROUGH */
278		case 's':
279			c = CT_STRING;
280			break;
281
282		case '[':
283			ccls = fmt;
284			if (*fmt == '^') {
285				cclcompl = 1;
286				fmt++;
287			} else
288				cclcompl = 0;
289			if (*fmt == ']')
290				fmt++;
291			while (*fmt != '\0' && *fmt != ']')
292				fmt++;
293			ccle = fmt;
294			fmt++;
295			flags |= NOSKIP;
296			c = CT_CCL;
297			break;
298
299		case 'C':
300			flags |= LONG;
301			/* FALLTHROUGH */
302		case 'c':
303			flags |= NOSKIP;
304			c = CT_CHAR;
305			break;
306
307		case 'p':	/* pointer format is like hex */
308			flags |= POINTER | PFXOK;
309			c = CT_INT;		/* assumes sizeof(uintmax_t) */
310			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
311			base = 16;
312			break;
313
314		case 'n':
315			nconversions++;
316			if (flags & SUPPRESS)	/* ??? */
317				continue;
318			if (flags & SHORTSHORT)
319				*va_arg(ap, char *) = nread;
320			else if (flags & SHORT)
321				*va_arg(ap, short *) = nread;
322			else if (flags & LONG)
323				*va_arg(ap, long *) = nread;
324			else if (flags & LONGLONG)
325				*va_arg(ap, long long *) = nread;
326			else if (flags & INTMAXT)
327				*va_arg(ap, intmax_t *) = nread;
328			else if (flags & SIZET)
329				*va_arg(ap, size_t *) = nread;
330			else if (flags & PTRDIFFT)
331				*va_arg(ap, ptrdiff_t *) = nread;
332			else
333				*va_arg(ap, int *) = nread;
334			continue;
335
336		default:
337			goto match_failure;
338
339		/*
340		 * Disgusting backwards compatibility hack.	XXX
341		 */
342		case '\0':	/* compat */
343			return (EOF);
344		}
345
346		/*
347		 * We have a conversion that requires input.
348		 */
349		if (fp->_r <= 0 && __srefill(fp))
350			goto input_failure;
351
352		/*
353		 * Consume leading white space, except for formats
354		 * that suppress this.
355		 */
356		if ((flags & NOSKIP) == 0) {
357			while ((wi = __fgetwc(fp)) != WEOF && iswspace(wi))
358				nread++;
359			if (wi == WEOF)
360				goto input_failure;
361			__ungetwc(wi, fp);
362		}
363
364		/*
365		 * Do the conversion.
366		 */
367		switch (c) {
368
369		case CT_CHAR:
370			/* scan arbitrary characters (sets NOSKIP) */
371			if (width == 0)
372				width = 1;
373			if (flags & SUPPRESS) {
374				while (width-- != 0 &&
375				    (wi = __fgetwc(fp)) != WEOF)
376					nread++;
377			} else if (flags & LONG) {
378				p = va_arg(ap, wchar_t *);
379				n = 0;
380				while (width-- != 0 &&
381				    (wi = __fgetwc(fp)) != WEOF) {
382					*p++ = (wchar_t)wi;
383					n++;
384				}
385				if (n == 0)
386					goto input_failure;
387				nread += n;
388				nassigned++;
389			} else {
390				mbp = va_arg(ap, char *);
391				n = 0;
392				memset(&mbs, 0, sizeof(mbs));
393				while (width-- != 0 &&
394				    (wi = __fgetwc(fp)) != WEOF) {
395					nconv = wcrtomb(mbp, wi, &mbs);
396					if (nconv == (size_t)-1)
397						goto input_failure;
398					mbp += nconv;
399					n++;
400				}
401				if (n == 0)
402					goto input_failure;
403				nread += n;
404				nassigned++;
405			}
406			nconversions++;
407			break;
408
409		case CT_CCL:
410			/* scan a (nonempty) character class (sets NOSKIP) */
411			if (width == 0)
412				width = (size_t)~0;	/* `infinity' */
413			/* take only those things in the class */
414			if (flags & SUPPRESS) {
415				n = 0;
416				while ((wi = __fgetwc(fp)) != WEOF &&
417				    width-- != 0 && INCCL(wi))
418					n++;
419				if (wi != WEOF)
420					__ungetwc(wi, fp);
421				if (n == 0)
422					goto match_failure;
423			} else if (flags & LONG) {
424				p0 = p = va_arg(ap, wchar_t *);
425				while ((wi = __fgetwc(fp)) != WEOF &&
426				    width-- != 0 && INCCL(wi))
427					*p++ = (wchar_t)wi;
428				if (wi != WEOF)
429					__ungetwc(wi, fp);
430				n = p - p0;
431				if (n == 0)
432					goto match_failure;
433				*p = 0;
434				nassigned++;
435			} else {
436				mbp = va_arg(ap, char *);
437				n = 0;
438				memset(&mbs, 0, sizeof(mbs));
439				while ((wi = __fgetwc(fp)) != WEOF &&
440				    width-- != 0 && INCCL(wi)) {
441					nconv = wcrtomb(mbp, wi, &mbs);
442					if (nconv == (size_t)-1)
443						goto input_failure;
444					mbp += nconv;
445					n++;
446				}
447				if (wi != WEOF)
448					__ungetwc(wi, fp);
449				*mbp = 0;
450				nassigned++;
451			}
452			nread += n;
453			nconversions++;
454			break;
455
456		case CT_STRING:
457			/* like CCL, but zero-length string OK, & no NOSKIP */
458			if (width == 0)
459				width = (size_t)~0;
460			if (flags & SUPPRESS) {
461				while ((wi = __fgetwc(fp)) != WEOF &&
462				    width-- != 0 &&
463				    !iswspace(wi))
464					nread++;
465				if (wi != WEOF)
466					__ungetwc(wi, fp);
467			} else if (flags & LONG) {
468				p0 = p = va_arg(ap, wchar_t *);
469				while ((wi = __fgetwc(fp)) != WEOF &&
470				    width-- != 0 &&
471				    !iswspace(wi)) {
472					*p++ = (wchar_t)wi;
473					nread++;
474				}
475				if (wi != WEOF)
476					__ungetwc(wi, fp);
477				*p = '\0';
478				nassigned++;
479			} else {
480				mbp = va_arg(ap, char *);
481				memset(&mbs, 0, sizeof(mbs));
482				while ((wi = __fgetwc(fp)) != WEOF &&
483				    width-- != 0 &&
484				    !iswspace(wi)) {
485					nconv = wcrtomb(mbp, wi, &mbs);
486					if (nconv == (size_t)-1)
487						goto input_failure;
488					mbp += nconv;
489					nread++;
490				}
491				if (wi != WEOF)
492					__ungetwc(wi, fp);
493				*mbp = 0;
494				nassigned++;
495			}
496			nconversions++;
497			continue;
498
499		case CT_INT:
500			/* scan an integer as if by the conversion function */
501#ifdef hardway
502			if (width == 0 || width > sizeof(buf) - 1)
503				width = sizeof(buf) - 1;
504#else
505			/* size_t is unsigned, hence this optimisation */
506			if (--width > sizeof(buf) - 2)
507				width = sizeof(buf) - 2;
508			width++;
509#endif
510			flags |= SIGNOK | NDIGITS | NZDIGITS;
511			for (p = buf; width; width--) {
512				c = __fgetwc(fp);
513				/*
514				 * Switch on the character; `goto ok'
515				 * if we accept it as a part of number.
516				 */
517				switch (c) {
518
519				/*
520				 * The digit 0 is always legal, but is
521				 * special.  For %i conversions, if no
522				 * digits (zero or nonzero) have been
523				 * scanned (only signs), we will have
524				 * base==0.  In that case, we should set
525				 * it to 8 and enable 0x prefixing.
526				 * Also, if we have not scanned zero digits
527				 * before this, do not turn off prefixing
528				 * (someone else will turn it off if we
529				 * have scanned any nonzero digits).
530				 */
531				case '0':
532					if (base == 0) {
533						base = 8;
534						flags |= PFXOK;
535					}
536					if (flags & NZDIGITS)
537					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
538					else
539					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
540					goto ok;
541
542				/* 1 through 7 always legal */
543				case '1': case '2': case '3':
544				case '4': case '5': case '6': case '7':
545					base = basefix[base];
546					flags &= ~(SIGNOK | PFXOK | NDIGITS);
547					goto ok;
548
549				/* digits 8 and 9 ok iff decimal or hex */
550				case '8': case '9':
551					base = basefix[base];
552					if (base <= 8)
553						break;	/* not legal here */
554					flags &= ~(SIGNOK | PFXOK | NDIGITS);
555					goto ok;
556
557				/* letters ok iff hex */
558				case 'A': case 'B': case 'C':
559				case 'D': case 'E': case 'F':
560				case 'a': case 'b': case 'c':
561				case 'd': case 'e': case 'f':
562					/* no need to fix base here */
563					if (base <= 10)
564						break;	/* not legal here */
565					flags &= ~(SIGNOK | PFXOK | NDIGITS);
566					goto ok;
567
568				/* sign ok only as first character */
569				case '+': case '-':
570					if (flags & SIGNOK) {
571						flags &= ~SIGNOK;
572						goto ok;
573					}
574					break;
575
576				/* x ok iff flag still set & 2nd char */
577				case 'x': case 'X':
578					if (flags & PFXOK && p == buf + 1) {
579						base = 16;	/* if %i */
580						flags &= ~PFXOK;
581						goto ok;
582					}
583					break;
584				}
585
586				/*
587				 * If we got here, c is not a legal character
588				 * for a number.  Stop accumulating digits.
589				 */
590				if (c != WEOF)
591					__ungetwc(c, fp);
592				break;
593		ok:
594				/*
595				 * c is legal: store it and look at the next.
596				 */
597				*p++ = (wchar_t)c;
598			}
599			/*
600			 * If we had only a sign, it is no good; push
601			 * back the sign.  If the number ends in `x',
602			 * it was [sign] '0' 'x', so push back the x
603			 * and treat it as [sign] '0'.
604			 */
605			if (flags & NDIGITS) {
606				if (p > buf)
607					__ungetwc(*--p, fp);
608				goto match_failure;
609			}
610			c = p[-1];
611			if (c == 'x' || c == 'X') {
612				--p;
613				__ungetwc(c, fp);
614			}
615			if ((flags & SUPPRESS) == 0) {
616				uintmax_t res;
617
618				*p = 0;
619				if ((flags & UNSIGNED) == 0)
620				    res = wcstoimax(buf, NULL, base);
621				else
622				    res = wcstoumax(buf, NULL, base);
623				if (flags & POINTER)
624					*va_arg(ap, void **) =
625							(void *)(uintptr_t)res;
626				else if (flags & SHORTSHORT)
627					*va_arg(ap, char *) = res;
628				else if (flags & SHORT)
629					*va_arg(ap, short *) = res;
630				else if (flags & LONG)
631					*va_arg(ap, long *) = res;
632				else if (flags & LONGLONG)
633					*va_arg(ap, long long *) = res;
634				else if (flags & INTMAXT)
635					*va_arg(ap, intmax_t *) = res;
636				else if (flags & PTRDIFFT)
637					*va_arg(ap, ptrdiff_t *) = res;
638				else if (flags & SIZET)
639					*va_arg(ap, size_t *) = res;
640				else
641					*va_arg(ap, int *) = res;
642				nassigned++;
643			}
644			nread += p - buf;
645			nconversions++;
646			break;
647
648#ifdef FLOATING_POINT
649		case CT_FLOAT:
650			/* scan a floating point number as if by strtod */
651#ifdef hardway
652			if (width == 0 || width > sizeof(buf) - 1)
653				width = sizeof(buf) - 1;
654#else
655			/* size_t is unsigned, hence this optimisation */
656			if (--width > sizeof(buf) - 2)
657				width = sizeof(buf) - 2;
658			width++;
659#endif
660			flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
661			for (p = buf; width; width--) {
662				c = __fgetwc(fp);
663				/*
664				 * This code mimicks the integer conversion
665				 * code, but is much simpler.
666				 */
667				switch (c) {
668
669				case '0': case '1': case '2': case '3':
670				case '4': case '5': case '6': case '7':
671				case '8': case '9':
672					flags &= ~(SIGNOK | NDIGITS);
673					goto fok;
674
675				case '+': case '-':
676					if (flags & SIGNOK) {
677						flags &= ~SIGNOK;
678						goto fok;
679					}
680					break;
681				case 'e': case 'E':
682					/* no exponent without some digits */
683					if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
684						flags =
685						    (flags & ~(EXPOK|DPTOK)) |
686						    SIGNOK | NDIGITS;
687						goto fok;
688					}
689					break;
690				default:
691					if (c == (wchar_t)decimal_point &&
692					    (flags & DPTOK)) {
693						flags &= ~(SIGNOK | DPTOK);
694						goto fok;
695					}
696					break;
697				}
698				if (c != WEOF)
699					__ungetwc(c, fp);
700				break;
701		fok:
702				*p++ = c;
703			}
704			/*
705			 * If no digits, might be missing exponent digits
706			 * (just give back the exponent) or might be missing
707			 * regular digits, but had sign and/or decimal point.
708			 */
709			if (flags & NDIGITS) {
710				if (flags & EXPOK) {
711					/* no digits at all */
712					while (p > buf)
713						__ungetwc(*--p, fp);
714					goto match_failure;
715				}
716				/* just a bad exponent (e and maybe sign) */
717				c = *--p;
718				if (c != 'e' && c != 'E') {
719					__ungetwc(c, fp);/* sign */
720					c = *--p;
721				}
722				__ungetwc(c, fp);
723			}
724			if ((flags & SUPPRESS) == 0) {
725				double res;
726
727				*p = 0;
728				/* XXX this loses precision for long doubles. */
729				res = wcstod(buf, NULL);
730				if (flags & LONGDBL)
731					*va_arg(ap, long double *) = res;
732				else if (flags & LONG)
733					*va_arg(ap, double *) = res;
734				else
735					*va_arg(ap, float *) = res;
736				nassigned++;
737			}
738			nread += p - buf;
739			nconversions++;
740			break;
741#endif /* FLOATING_POINT */
742		}
743	}
744input_failure:
745	return (nconversions != 0 ? nassigned : EOF);
746match_failure:
747	return (nassigned);
748}
749