vfwscanf.c revision 103890
1/*-
2 * Copyright (c) 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/cdefs.h>
38#if 0
39#if defined(LIBC_SCCS) && !defined(lint)
40static char sccsid[] = "@(#)vfscanf.c	8.1 (Berkeley) 6/4/93";
41#endif /* LIBC_SCCS and not lint */
42__FBSDID("FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.24 2002/08/13 09:30:41 tjr Exp ");
43#endif
44__FBSDID("$FreeBSD: head/lib/libc/stdio/vfwscanf.c 103890 2002-09-24 09:18:32Z tjr $");
45
46#include "namespace.h"
47#include <ctype.h>
48#include <inttypes.h>
49#include <stdio.h>
50#include <stdlib.h>
51#include <stddef.h>
52#include <stdarg.h>
53#include <string.h>
54#include <wchar.h>
55#include <wctype.h>
56#include "un-namespace.h"
57
58#include "libc_private.h"
59#include "local.h"
60
61#define FLOATING_POINT
62
63#ifdef FLOATING_POINT
64#include <locale.h>
65#include "floatio.h"
66#endif
67
68#define	BUF		513	/* Maximum length of numeric string. */
69
70/*
71 * Flags used during conversion.
72 */
73#define	LONG		0x01	/* l: long or double */
74#define	LONGDBL		0x02	/* L: long double */
75#define	SHORT		0x04	/* h: short */
76#define	SUPPRESS	0x08	/* *: suppress assignment */
77#define	POINTER		0x10	/* p: void * (as hex) */
78#define	NOSKIP		0x20	/* [ or c: do not skip blanks */
79#define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
80#define	INTMAXT		0x800	/* j: intmax_t */
81#define	PTRDIFFT	0x1000	/* t: ptrdiff_t */
82#define	SIZET		0x2000	/* z: size_t */
83#define	SHORTSHORT	0x4000	/* hh: char */
84#define	UNSIGNED	0x8000	/* %[oupxX] conversions */
85
86/*
87 * The following are used in numeric conversions only:
88 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
89 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
90 */
91#define	SIGNOK		0x40	/* +/- is (still) legal */
92#define	NDIGITS		0x80	/* no digits detected */
93
94#define	DPTOK		0x100	/* (float) decimal point is still legal */
95#define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
96
97#define	PFXOK		0x100	/* 0x prefix is (still) legal */
98#define	NZDIGITS	0x200	/* no zero digits detected */
99
100/*
101 * Conversion types.
102 */
103#define	CT_CHAR		0	/* %c conversion */
104#define	CT_CCL		1	/* %[...] conversion */
105#define	CT_STRING	2	/* %s conversion */
106#define	CT_INT		3	/* %[dioupxX] conversion */
107#define	CT_FLOAT	4	/* %[efgEFG] conversion */
108
109#define	INCCL(_c)	\
110	(cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) : \
111	(wmemchr(ccls, (_c), ccle - ccls) != NULL))
112
113/*
114 * MT-safe version.
115 */
116int
117vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
118{
119	int ret;
120
121	FLOCKFILE(fp);
122	ORIENT(fp, 1);
123	ret = __vfwscanf(fp, fmt, ap);
124	FUNLOCKFILE(fp);
125	return (ret);
126}
127
128/*
129 * Non-MT-safe version.
130 */
131int
132__vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap)
133{
134	wint_t c;		/* character from format, or conversion */
135	size_t width;		/* field width, or 0 */
136	wchar_t *p;		/* points into all kinds of strings */
137	int n;			/* handy integer */
138	int flags;		/* flags as defined above */
139	wchar_t *p0;		/* saves original value of p when necessary */
140	int nassigned;		/* number of fields assigned */
141	int nconversions;	/* number of conversions */
142	int nread;		/* number of characters consumed from fp */
143	int base;		/* base argument to conversion function */
144	wchar_t buf[BUF];	/* buffer for numeric conversions */
145	const wchar_t *ccls;	/* character class start */
146	const wchar_t *ccle;	/* character class end */
147	int cclcompl;		/* ccl is complemented? */
148	wint_t wi;		/* handy wint_t */
149	char *mbp;		/* multibyte string pointer for %c %s %[ */
150	size_t nconv;		/* number of bytes in mb. conversion */
151	mbstate_t mbs;		/* multibyte state */
152
153	/* `basefix' is used to avoid `if' tests in the integer scanner */
154	static short basefix[17] =
155		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
156#ifdef FLOATING_POINT
157	char decimal_point = localeconv()->decimal_point[0];
158#endif
159
160	nassigned = 0;
161	nconversions = 0;
162	nread = 0;
163	ccls = ccle = NULL;
164	for (;;) {
165		c = *fmt++;
166		if (c == 0)
167			return (nassigned);
168		if (iswspace(c)) {
169			while ((c = __fgetwc(fp)) != WEOF &&
170			    iswspace(c))
171				;
172			if (c != WEOF)
173				__ungetwc(c, fp);
174			continue;
175		}
176		if (c != '%')
177			goto literal;
178		width = 0;
179		flags = 0;
180		/*
181		 * switch on the format.  continue if done;
182		 * break once format type is derived.
183		 */
184again:		c = *fmt++;
185		switch (c) {
186		case '%':
187literal:
188			if ((wi = __fgetwc(fp)) == WEOF)
189				goto input_failure;
190			if (wi != c) {
191				__ungetwc(wi, fp);
192				goto input_failure;
193			}
194			nread++;
195			continue;
196
197		case '*':
198			flags |= SUPPRESS;
199			goto again;
200		case 'j':
201			flags |= INTMAXT;
202			goto again;
203		case 'l':
204			if (flags & LONG) {
205				flags &= ~LONG;
206				flags |= LONGLONG;
207			} else
208				flags |= LONG;
209			goto again;
210		case 'q':
211			flags |= LONGLONG;	/* not quite */
212			goto again;
213		case 't':
214			flags |= PTRDIFFT;
215			goto again;
216		case 'z':
217			flags |= SIZET;
218			goto again;
219		case 'L':
220			flags |= LONGDBL;
221			goto again;
222		case 'h':
223			if (flags & SHORT) {
224				flags &= ~SHORT;
225				flags |= SHORTSHORT;
226			} else
227				flags |= SHORT;
228			goto again;
229
230		case '0': case '1': case '2': case '3': case '4':
231		case '5': case '6': case '7': case '8': case '9':
232			width = width * 10 + c - '0';
233			goto again;
234
235		/*
236		 * Conversions.
237		 */
238		case 'd':
239			c = CT_INT;
240			base = 10;
241			break;
242
243		case 'i':
244			c = CT_INT;
245			base = 0;
246			break;
247
248		case 'o':
249			c = CT_INT;
250			flags |= UNSIGNED;
251			base = 8;
252			break;
253
254		case 'u':
255			c = CT_INT;
256			flags |= UNSIGNED;
257			base = 10;
258			break;
259
260		case 'X':
261		case 'x':
262			flags |= PFXOK;	/* enable 0x prefixing */
263			c = CT_INT;
264			flags |= UNSIGNED;
265			base = 16;
266			break;
267
268#ifdef FLOATING_POINT
269		case 'E': case 'F': case 'G':
270		case 'e': case 'f': case 'g':
271			c = CT_FLOAT;
272			break;
273#endif
274
275		case 'S':
276			flags |= LONG;
277			/* FALLTHROUGH */
278		case 's':
279			c = CT_STRING;
280			break;
281
282		case '[':
283			ccls = fmt;
284			if (*fmt == '^') {
285				cclcompl = 1;
286				fmt++;
287			} else
288				cclcompl = 0;
289			if (*fmt == ']')
290				fmt++;
291			while (*fmt != '\0' && *fmt != ']')
292				fmt++;
293			ccle = fmt;
294			fmt++;
295			flags |= NOSKIP;
296			c = CT_CCL;
297			break;
298
299		case 'C':
300			flags |= LONG;
301			/* FALLTHROUGH */
302		case 'c':
303			flags |= NOSKIP;
304			c = CT_CHAR;
305			break;
306
307		case 'p':	/* pointer format is like hex */
308			flags |= POINTER | PFXOK;
309			c = CT_INT;		/* assumes sizeof(uintmax_t) */
310			flags |= UNSIGNED;	/*      >= sizeof(uintptr_t) */
311			base = 16;
312			break;
313
314		case 'n':
315			nconversions++;
316			if (flags & SUPPRESS)	/* ??? */
317				continue;
318			if (flags & SHORTSHORT)
319				*va_arg(ap, char *) = nread;
320			else if (flags & SHORT)
321				*va_arg(ap, short *) = nread;
322			else if (flags & LONG)
323				*va_arg(ap, long *) = nread;
324			else if (flags & LONGLONG)
325				*va_arg(ap, long long *) = nread;
326			else if (flags & INTMAXT)
327				*va_arg(ap, intmax_t *) = nread;
328			else if (flags & SIZET)
329				*va_arg(ap, size_t *) = nread;
330			else if (flags & PTRDIFFT)
331				*va_arg(ap, ptrdiff_t *) = nread;
332			else
333				*va_arg(ap, int *) = nread;
334			continue;
335
336		default:
337			goto match_failure;
338
339		/*
340		 * Disgusting backwards compatibility hack.	XXX
341		 */
342		case '\0':	/* compat */
343			return (EOF);
344		}
345
346		/*
347		 * Consume leading white space, except for formats
348		 * that suppress this.
349		 */
350		if ((flags & NOSKIP) == 0) {
351			while ((wi = __fgetwc(fp)) != WEOF && iswspace(wi))
352				nread++;
353			if (wi == WEOF)
354				goto input_failure;
355			__ungetwc(wi, fp);
356		}
357
358		/*
359		 * Do the conversion.
360		 */
361		switch (c) {
362
363		case CT_CHAR:
364			/* scan arbitrary characters (sets NOSKIP) */
365			if (width == 0)
366				width = 1;
367			if (flags & SUPPRESS) {
368				while (width-- != 0 &&
369				    (wi = __fgetwc(fp)) != WEOF)
370					nread++;
371			} else if (flags & LONG) {
372				p = va_arg(ap, wchar_t *);
373				n = 0;
374				while (width-- != 0 &&
375				    (wi = __fgetwc(fp)) != WEOF) {
376					*p++ = (wchar_t)wi;
377					n++;
378				}
379				if (n == 0)
380					goto input_failure;
381				nread += n;
382				nassigned++;
383			} else {
384				mbp = va_arg(ap, char *);
385				n = 0;
386				memset(&mbs, 0, sizeof(mbs));
387				while (width-- != 0 &&
388				    (wi = __fgetwc(fp)) != WEOF) {
389					nconv = wcrtomb(mbp, wi, &mbs);
390					if (nconv == (size_t)-1)
391						goto input_failure;
392					mbp += nconv;
393					n++;
394				}
395				if (n == 0)
396					goto input_failure;
397				nread += n;
398				nassigned++;
399			}
400			nconversions++;
401			break;
402
403		case CT_CCL:
404			/* scan a (nonempty) character class (sets NOSKIP) */
405			if (width == 0)
406				width = (size_t)~0;	/* `infinity' */
407			/* take only those things in the class */
408			if (flags & SUPPRESS) {
409				n = 0;
410				while ((wi = __fgetwc(fp)) != WEOF &&
411				    width-- != 0 && INCCL(wi))
412					n++;
413				if (wi != WEOF)
414					__ungetwc(wi, fp);
415				if (n == 0)
416					goto match_failure;
417			} else if (flags & LONG) {
418				p0 = p = va_arg(ap, wchar_t *);
419				while ((wi = __fgetwc(fp)) != WEOF &&
420				    width-- != 0 && INCCL(wi))
421					*p++ = (wchar_t)wi;
422				if (wi != WEOF)
423					__ungetwc(wi, fp);
424				n = p - p0;
425				if (n == 0)
426					goto match_failure;
427				*p = 0;
428				nassigned++;
429			} else {
430				mbp = va_arg(ap, char *);
431				n = 0;
432				memset(&mbs, 0, sizeof(mbs));
433				while ((wi = __fgetwc(fp)) != WEOF &&
434				    width-- != 0 && INCCL(wi)) {
435					nconv = wcrtomb(mbp, wi, &mbs);
436					if (nconv == (size_t)-1)
437						goto input_failure;
438					mbp += nconv;
439					n++;
440				}
441				if (wi != WEOF)
442					__ungetwc(wi, fp);
443				*mbp = 0;
444				nassigned++;
445			}
446			nread += n;
447			nconversions++;
448			break;
449
450		case CT_STRING:
451			/* like CCL, but zero-length string OK, & no NOSKIP */
452			if (width == 0)
453				width = (size_t)~0;
454			if (flags & SUPPRESS) {
455				while ((wi = __fgetwc(fp)) != WEOF &&
456				    width-- != 0 &&
457				    !iswspace(wi))
458					nread++;
459				if (wi != WEOF)
460					__ungetwc(wi, fp);
461			} else if (flags & LONG) {
462				p0 = p = va_arg(ap, wchar_t *);
463				while ((wi = __fgetwc(fp)) != WEOF &&
464				    width-- != 0 &&
465				    !iswspace(wi)) {
466					*p++ = (wchar_t)wi;
467					nread++;
468				}
469				if (wi != WEOF)
470					__ungetwc(wi, fp);
471				*p = '\0';
472				nassigned++;
473			} else {
474				mbp = va_arg(ap, char *);
475				memset(&mbs, 0, sizeof(mbs));
476				while ((wi = __fgetwc(fp)) != WEOF &&
477				    width-- != 0 &&
478				    !iswspace(wi)) {
479					nconv = wcrtomb(mbp, wi, &mbs);
480					if (nconv == (size_t)-1)
481						goto input_failure;
482					mbp += nconv;
483					nread++;
484				}
485				if (wi != WEOF)
486					__ungetwc(wi, fp);
487				*mbp = 0;
488				nassigned++;
489			}
490			nconversions++;
491			continue;
492
493		case CT_INT:
494			/* scan an integer as if by the conversion function */
495#ifdef hardway
496			if (width == 0 || width > sizeof(buf) - 1)
497				width = sizeof(buf) - 1;
498#else
499			/* size_t is unsigned, hence this optimisation */
500			if (--width > sizeof(buf) - 2)
501				width = sizeof(buf) - 2;
502			width++;
503#endif
504			flags |= SIGNOK | NDIGITS | NZDIGITS;
505			for (p = buf; width; width--) {
506				c = __fgetwc(fp);
507				/*
508				 * Switch on the character; `goto ok'
509				 * if we accept it as a part of number.
510				 */
511				switch (c) {
512
513				/*
514				 * The digit 0 is always legal, but is
515				 * special.  For %i conversions, if no
516				 * digits (zero or nonzero) have been
517				 * scanned (only signs), we will have
518				 * base==0.  In that case, we should set
519				 * it to 8 and enable 0x prefixing.
520				 * Also, if we have not scanned zero digits
521				 * before this, do not turn off prefixing
522				 * (someone else will turn it off if we
523				 * have scanned any nonzero digits).
524				 */
525				case '0':
526					if (base == 0) {
527						base = 8;
528						flags |= PFXOK;
529					}
530					if (flags & NZDIGITS)
531					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
532					else
533					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
534					goto ok;
535
536				/* 1 through 7 always legal */
537				case '1': case '2': case '3':
538				case '4': case '5': case '6': case '7':
539					base = basefix[base];
540					flags &= ~(SIGNOK | PFXOK | NDIGITS);
541					goto ok;
542
543				/* digits 8 and 9 ok iff decimal or hex */
544				case '8': case '9':
545					base = basefix[base];
546					if (base <= 8)
547						break;	/* not legal here */
548					flags &= ~(SIGNOK | PFXOK | NDIGITS);
549					goto ok;
550
551				/* letters ok iff hex */
552				case 'A': case 'B': case 'C':
553				case 'D': case 'E': case 'F':
554				case 'a': case 'b': case 'c':
555				case 'd': case 'e': case 'f':
556					/* no need to fix base here */
557					if (base <= 10)
558						break;	/* not legal here */
559					flags &= ~(SIGNOK | PFXOK | NDIGITS);
560					goto ok;
561
562				/* sign ok only as first character */
563				case '+': case '-':
564					if (flags & SIGNOK) {
565						flags &= ~SIGNOK;
566						goto ok;
567					}
568					break;
569
570				/* x ok iff flag still set & 2nd char */
571				case 'x': case 'X':
572					if (flags & PFXOK && p == buf + 1) {
573						base = 16;	/* if %i */
574						flags &= ~PFXOK;
575						goto ok;
576					}
577					break;
578				}
579
580				/*
581				 * If we got here, c is not a legal character
582				 * for a number.  Stop accumulating digits.
583				 */
584				if (c != WEOF)
585					__ungetwc(c, fp);
586				break;
587		ok:
588				/*
589				 * c is legal: store it and look at the next.
590				 */
591				*p++ = (wchar_t)c;
592			}
593			/*
594			 * If we had only a sign, it is no good; push
595			 * back the sign.  If the number ends in `x',
596			 * it was [sign] '0' 'x', so push back the x
597			 * and treat it as [sign] '0'.
598			 */
599			if (flags & NDIGITS) {
600				if (p > buf)
601					__ungetwc(*--p, fp);
602				goto match_failure;
603			}
604			c = p[-1];
605			if (c == 'x' || c == 'X') {
606				--p;
607				__ungetwc(c, fp);
608			}
609			if ((flags & SUPPRESS) == 0) {
610				uintmax_t res;
611
612				*p = 0;
613				if ((flags & UNSIGNED) == 0)
614				    res = wcstoimax(buf, NULL, base);
615				else
616				    res = wcstoumax(buf, NULL, base);
617				if (flags & POINTER)
618					*va_arg(ap, void **) =
619							(void *)(uintptr_t)res;
620				else if (flags & SHORTSHORT)
621					*va_arg(ap, char *) = res;
622				else if (flags & SHORT)
623					*va_arg(ap, short *) = res;
624				else if (flags & LONG)
625					*va_arg(ap, long *) = res;
626				else if (flags & LONGLONG)
627					*va_arg(ap, long long *) = res;
628				else if (flags & INTMAXT)
629					*va_arg(ap, intmax_t *) = res;
630				else if (flags & PTRDIFFT)
631					*va_arg(ap, ptrdiff_t *) = res;
632				else if (flags & SIZET)
633					*va_arg(ap, size_t *) = res;
634				else
635					*va_arg(ap, int *) = res;
636				nassigned++;
637			}
638			nread += p - buf;
639			nconversions++;
640			break;
641
642#ifdef FLOATING_POINT
643		case CT_FLOAT:
644			/* scan a floating point number as if by strtod */
645#ifdef hardway
646			if (width == 0 || width > sizeof(buf) - 1)
647				width = sizeof(buf) - 1;
648#else
649			/* size_t is unsigned, hence this optimisation */
650			if (--width > sizeof(buf) - 2)
651				width = sizeof(buf) - 2;
652			width++;
653#endif
654			flags |= SIGNOK | NDIGITS | DPTOK | EXPOK;
655			for (p = buf; width; width--) {
656				c = __fgetwc(fp);
657				/*
658				 * This code mimicks the integer conversion
659				 * code, but is much simpler.
660				 */
661				switch (c) {
662
663				case '0': case '1': case '2': case '3':
664				case '4': case '5': case '6': case '7':
665				case '8': case '9':
666					flags &= ~(SIGNOK | NDIGITS);
667					goto fok;
668
669				case '+': case '-':
670					if (flags & SIGNOK) {
671						flags &= ~SIGNOK;
672						goto fok;
673					}
674					break;
675				case 'e': case 'E':
676					/* no exponent without some digits */
677					if ((flags&(NDIGITS|EXPOK)) == EXPOK) {
678						flags =
679						    (flags & ~(EXPOK|DPTOK)) |
680						    SIGNOK | NDIGITS;
681						goto fok;
682					}
683					break;
684				default:
685					if (c == (wchar_t)decimal_point &&
686					    (flags & DPTOK)) {
687						flags &= ~(SIGNOK | DPTOK);
688						goto fok;
689					}
690					break;
691				}
692				if (c != WEOF)
693					__ungetwc(c, fp);
694				break;
695		fok:
696				*p++ = c;
697			}
698			/*
699			 * If no digits, might be missing exponent digits
700			 * (just give back the exponent) or might be missing
701			 * regular digits, but had sign and/or decimal point.
702			 */
703			if (flags & NDIGITS) {
704				if (flags & EXPOK) {
705					/* no digits at all */
706					while (p > buf)
707						__ungetwc(*--p, fp);
708					goto match_failure;
709				}
710				/* just a bad exponent (e and maybe sign) */
711				c = *--p;
712				if (c != 'e' && c != 'E') {
713					__ungetwc(c, fp);/* sign */
714					c = *--p;
715				}
716				__ungetwc(c, fp);
717			}
718			if ((flags & SUPPRESS) == 0) {
719				double res;
720
721				*p = 0;
722				/* XXX this loses precision for long doubles. */
723				res = wcstod(buf, NULL);
724				if (flags & LONGDBL)
725					*va_arg(ap, long double *) = res;
726				else if (flags & LONG)
727					*va_arg(ap, double *) = res;
728				else
729					*va_arg(ap, float *) = res;
730				nassigned++;
731			}
732			nread += p - buf;
733			nconversions++;
734			break;
735#endif /* FLOATING_POINT */
736		}
737	}
738input_failure:
739	return (nconversions != 0 ? nassigned : EOF);
740match_failure:
741	return (nassigned);
742}
743