1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*	Copyright (c) 1988 AT&T	*/
28/*	  All Rights Reserved  	*/
29
30#pragma ident	"%Z%%M%	%I%	%E% SMI"
31
32#include "lint.h"
33#include <sys/types.h>
34#include "mtlib.h"
35#include "file64.h"
36#include <stdio.h>
37#include <ctype.h>
38#include <stdarg.h>
39#include <values.h>
40#include <errno.h>
41#include <stdlib.h>
42#include <string.h>
43#include <math.h>
44#include <thread.h>
45#include <synch.h>
46#include <stdlib.h>
47#include <fnmatch.h>
48#include <limits.h>
49#include <wchar.h>
50#include <unistd.h>
51#include "libc.h"
52#include "stdiom.h"
53#include "xpg6.h"
54
55#define	NCHARS	(1 << BITSPERBYTE)
56
57/* if the _IOWRT flag is set, this must be a call from sscanf */
58#define	locgetc(cnt)	(cnt += 1, (iop->_flag & _IOWRT) ? \
59				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
60				GETC(iop))
61#define	locungetc(cnt, x) (cnt -= 1, (x == EOF) ? EOF : \
62				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
63				    (++iop->_cnt, *(--iop->_ptr))))
64
65#define	wlocgetc()	((iop->_flag & _IOWRT) ? \
66				((*iop->_ptr == '\0') ? EOF : *iop->_ptr++) : \
67				GETC(iop))
68#define	wlocungetc(x) ((x == EOF) ? EOF : \
69				((iop->_flag & _IOWRT) ? *(--iop->_ptr) : \
70				    UNGETC(x, iop)))
71
72#define	MAXARGS	30	/* max. number of args for fast positional paramters */
73
74/*
75 * stva_list is used to subvert C's restriction that a variable with an
76 * array type can not appear on the left hand side of an assignment operator.
77 * By putting the array inside a structure, the functionality of assigning to
78 * the whole array through a simple assignment is achieved..
79 */
80typedef struct stva_list {
81	va_list	ap;
82} stva_list;
83
84static int number(int *, int *, int, int, int, int, FILE *, va_list *);
85static int readchar(FILE *, int *);
86static int string(int *, int *, int, int, int, char *, FILE *, va_list *);
87static int wstring(int *, int *, int, int, int, FILE *, va_list *);
88static int	wbrstring(int *, int *, int, int, int, FILE *,
89	unsigned char *, va_list *);
90#ifdef	_WIDE
91static int	brstring(int *, int *, int, int, int, FILE *,
92	unsigned char *, va_list *);
93#endif
94static int _bi_getwc(FILE *);
95static int _bi_ungetwc(wint_t, FILE *);
96
97#ifdef	_WIDE
98static int _mkarglst(const wchar_t *, stva_list, stva_list[]);
99static wint_t	_wd_getwc(int *, FILE *);
100static wint_t	_wd_ungetwc(int *, wchar_t, FILE *);
101static int	_watoi(wchar_t *);
102#else  /* _WIDE */
103static int _mkarglst(const char *, stva_list, stva_list[]);
104#endif /* _WIDE */
105
106#ifndef	_WIDE
107int
108_doscan(FILE *iop, const char *fmt, va_list va_Alist)
109{
110	int ret;
111	rmutex_t *lk;
112
113	if (iop->_flag & _IOWRT)
114		ret = __doscan_u(iop, fmt, va_Alist, 0);
115	else {
116		FLOCKFILE(lk, iop);
117		ret = __doscan_u(iop, fmt, va_Alist, 0);
118		FUNLOCKFILE(lk);
119	}
120	return (ret);
121}
122#endif  /* _WIDE */
123
124/* ARGSUSED3 */
125#ifdef	_WIDE
126int
127__wdoscan_u(FILE *iop, const wchar_t *fmt, va_list va_Alist, int scflag)
128#else  /* _WIDE */
129int
130__doscan_u(FILE *iop, const char *sfmt, va_list va_Alist, int scflag)
131#endif /* _WIDE */
132{
133#ifdef	_WIDE
134	wchar_t	ch;
135	wchar_t	inchar, size;
136	int	nmatch = 0, len, stow;
137#else  /* _WIDE */
138	int	ch;
139	int		nmatch = 0, len, inchar, stow, size;
140#endif /* _WIDE */
141
142	unsigned char	*bracket_str = NULL;
143	int		chcount, flag_eof;
144	char	tab[NCHARS];
145
146	/* variables for postional parameters */
147#ifdef	_WIDE
148	const wchar_t	*sformat = fmt;	/* save the beginning of the format */
149#else  /* _WIDE */
150	const unsigned char	*fmt = (const unsigned char *)sfmt;
151	const char	*sformat = sfmt; /* save the beginning of the format */
152#endif /* _WIDE */
153	int		fpos = 1;	/* 1 if first postional parameter */
154	stva_list	args;	/* used to step through the argument list */
155	stva_list	sargs;	/* used to save start of the argument list */
156	stva_list	arglst[MAXARGS];
157					/*
158					 * array giving the appropriate values
159					 * for va_arg() to retrieve the
160					 * corresponding argument:
161					 * arglst[0] is the first argument
162					 * arglst[1] is the second argument,etc.
163					 */
164	/* Check if readable stream */
165	if (!(iop->_flag & (_IOREAD | _IORW))) {
166		errno = EBADF;
167		return (EOF);
168	}
169
170	/*
171	 * Initialize args and sargs to the start of the argument list.
172	 * We don't know any portable way to copy an arbitrary C object
173	 * so we use a system-specific routine(probably a macro) from
174	 * stdarg.h.  (Remember that if va_list is an array, in_args will
175	 * be a pointer and &in_args won't be what we would want for
176	 * memcpy.)
177	 */
178	va_copy(args.ap, va_Alist);
179
180	sargs = args;
181
182	chcount = 0; flag_eof = 0;
183
184	/*
185	 * ****************************************************
186	 * Main loop: reads format to determine a pattern,
187	 *		and then goes to read input stream
188	 *		in attempt to match the pattern.
189	 * ****************************************************
190	 */
191	for (; ; ) {
192		if ((ch = *fmt++) == '\0') {
193			return (nmatch); /* end of format */
194		}
195#ifdef	_WIDE
196		if (iswspace(ch)) {
197			if (!flag_eof) {
198				while (iswspace(inchar =
199				    _wd_getwc(&chcount, iop)))
200					;
201				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
202					flag_eof = 1;
203			}
204			continue;
205		}
206		if (ch != '%' || (ch = *fmt++) == '%') {
207			if (ch == '%') {
208				if (!flag_eof) {
209					while (iswspace(inchar =
210					    _wd_getwc(&chcount, iop)))
211						;
212					if (_wd_ungetwc(&chcount, inchar, iop)
213					    == WEOF)
214						flag_eof = 1;
215				}
216			}
217			if ((inchar = _wd_getwc(&chcount, iop)) == ch)
218				continue;
219			if (_wd_ungetwc(&chcount, inchar, iop) != WEOF) {
220				return (nmatch); /* failed to match input */
221			}
222			break;
223		}
224#else  /* _WIDE */
225		if (isspace(ch)) {
226			if (!flag_eof) {
227				while (isspace(inchar = locgetc(chcount)))
228					;
229				if (locungetc(chcount, inchar) == EOF)
230					flag_eof = 1;
231
232			}
233			continue;
234		}
235		if (ch != '%' || (ch = *fmt++) == '%') {
236			if (ch == '%') {
237				if (!flag_eof) {
238					while (isspace(inchar =
239					    locgetc(chcount)))
240						;
241					if (locungetc(chcount, inchar) == EOF)
242						flag_eof = 1;
243				}
244			}
245			if ((inchar = locgetc(chcount)) == ch)
246				continue;
247			if (locungetc(chcount, inchar) != EOF) {
248				return (nmatch); /* failed to match input */
249			}
250			break;
251		}
252#endif /* _WIDE */
253
254charswitch:	/* target of a goto 8-( */
255
256		if (ch == '*') {
257			stow = 0;
258			ch = *fmt++;
259		} else
260			stow = 1;
261
262#ifdef	_WIDE
263		for (len = 0; ((ch >= 0) && (ch < 256) && isdigit(ch));
264		    ch = *fmt++)
265			len = len * 10 + ch - '0';
266#else  /* _WIDE */
267		for (len = 0; isdigit(ch); ch = *fmt++)
268			len = len * 10 + ch - '0';
269#endif /* _WIDE */
270
271		if (ch == '$') {
272			/*
273			 * positional parameter handling - the number
274			 * specified in len gives the argument to which
275			 * the next conversion should be applied.
276			 * WARNING: This implementation of positional
277			 * parameters assumes that the sizes of all pointer
278			 * types are the same. (Code similar to that
279			 * in the portable doprnt.c should be used if this
280			 * assumption does not hold for a particular
281			 * port.)
282			 */
283			if (fpos) {
284				if (_mkarglst(sformat, sargs, arglst) != 0) {
285					return (EOF);
286				} else {
287					fpos = 0;
288				}
289			}
290			if (len <= MAXARGS) {
291				args = arglst[len - 1];
292			} else {
293				args = arglst[MAXARGS - 1];
294				for (len -= MAXARGS; len > 0; len--)
295					(void) va_arg(args.ap, void *);
296			}
297			len = 0;
298			ch = *fmt++;
299			goto charswitch;
300		}
301
302		if (len == 0)
303			len = MAXINT;
304#ifdef	_WIDE
305		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
306		    (size == 'j') || (size == 't') || (size == 'z'))
307			ch = *fmt++;
308#else  /* _WIDE */
309		if ((size = ch) == 'l' || (size == 'h') || (size == 'L') ||
310		    (size == 'w') || (size == 'j') || (size == 't') ||
311		    (size == 'z'))
312			ch = *fmt++;
313#endif /* _WIDE */
314		if (size == 'l' && ch == 'l') {
315			size = 'm';		/* size = 'm' if long long */
316			ch = *fmt++;
317		} else if (size == 'h' && ch == 'h') {
318			size = 'b';		/* use size = 'b' if char */
319			ch = *fmt++;
320		} else if ((size == 't') || (size == 'z')) {
321			size = 'l';
322		} else if (size == 'j') {
323#ifndef _LP64
324			/* check scflag for size of u/intmax_t (32-bit libc) */
325			if (!(scflag & _F_INTMAX32)) {
326#endif
327				size = 'm';
328#ifndef _LP64
329			}
330#endif
331		}
332		if (ch == '\0') {
333			return (EOF);		/* unexpected end of format */
334		}
335#ifdef	_WIDE
336		if (ch == '[') {
337			wchar_t	c;
338			size_t	len;
339			int	negflg = 0;
340			wchar_t	*p;
341			wchar_t	*wbracket_str;
342			size_t	wlen, clen;
343
344			/* p points to the address of '[' */
345			p = (wchar_t *)fmt - 1;
346			len = 0;
347			if (*fmt == '^') {
348				len++;
349				fmt++;
350				negflg = 1;
351			}
352			if (((c = *fmt) == ']') || (c == '-')) {
353				len++;
354				fmt++;
355			}
356			while ((c = *fmt) != ']') {
357				if (c == '\0') {
358					return (EOF); /* unexpected EOF */
359				} else {
360					len++;
361					fmt++;
362				}
363			}
364			fmt++;
365			len += 2;
366			wbracket_str = (wchar_t *)
367			    malloc(sizeof (wchar_t) * (len + 1));
368			if (wbracket_str == NULL) {
369				errno = ENOMEM;
370				return (EOF);
371			} else {
372				(void) wmemcpy(wbracket_str,
373				    (const wchar_t *)p, len);
374				*(wbracket_str + len) = L'\0';
375				if (negflg && *(wbracket_str + 1) == '^') {
376					*(wbracket_str + 1) = L'!';
377				}
378			}
379			wlen = wcslen(wbracket_str);
380			clen = wcstombs((char *)NULL, wbracket_str, 0);
381			if (clen == (size_t)-1) {
382				free(wbracket_str);
383				return (EOF);
384			}
385			bracket_str = (unsigned char *)
386			    malloc(sizeof (unsigned char) * (clen + 1));
387			if (bracket_str == NULL) {
388				free(wbracket_str);
389				errno = ENOMEM;
390				return (EOF);
391			}
392			clen = wcstombs((char *)bracket_str, wbracket_str,
393			    wlen + 1);
394			free(wbracket_str);
395			if (clen == (size_t)-1) {
396				free(bracket_str);
397				return (EOF);
398			}
399		}
400#else  /* _WIDE */
401		if (ch == '[') {
402			if (size == 'l') {
403				int	c, len, i;
404				int	negflg = 0;
405				unsigned char 	*p;
406
407				p = (unsigned char *)(fmt - 1);
408				len = 0;
409				if (*fmt == '^') {
410					len++;
411					fmt++;
412					negflg = 1;
413				}
414				if (((c = *fmt) == ']') || (c == '-')) {
415					len++;
416					fmt++;
417				}
418				while ((c = *fmt) != ']') {
419					if (c == '\0') {
420						return (EOF);
421					} else if (isascii(c)) {
422						len++;
423						fmt++;
424					} else {
425						i = mblen((const char *)fmt,
426						    MB_CUR_MAX);
427						if (i <= 0) {
428							return (EOF);
429						} else {
430							len += i;
431							fmt += i;
432						}
433					}
434				}
435				fmt++;
436				len += 2;
437				bracket_str = (unsigned char *)
438				    malloc(sizeof (unsigned char) * (len + 1));
439				if (bracket_str == NULL) {
440					errno = ENOMEM;
441					return (EOF);
442				} else {
443					(void) strncpy((char *)bracket_str,
444					    (const char *)p, len);
445					*(bracket_str + len) = '\0';
446					if (negflg &&
447					    *(bracket_str + 1) == '^') {
448						*(bracket_str + 1) = '!';
449					}
450				}
451			} else {
452				int	t = 0;
453				int	b, c, d;
454
455				if (*fmt == '^') {
456					t++;
457					fmt++;
458				}
459				(void) memset(tab, !t, NCHARS);
460				if ((c = *fmt) == ']' || c == '-') {
461					tab[c] = t;
462					fmt++;
463				}
464
465				while ((c = *fmt) != ']') {
466					if (c == '\0') {
467						return (EOF);
468					}
469					b = *(fmt - 1);
470					d = *(fmt + 1);
471					if ((c == '-') && (d != ']') &&
472					    (b < d)) {
473						(void) memset(&tab[b], t,
474						    d - b + 1);
475						fmt += 2;
476					} else {
477						tab[c] = t;
478						fmt++;
479					}
480				}
481				fmt++;
482			}
483		}
484#endif /* _WIDE */
485
486#ifdef	_WIDE
487		if ((ch >= 0) && (ch < 256) &&
488		    isupper((int)ch)) { /* no longer documented */
489			if (_lib_version == c_issue_4) {
490				if (size != 'm' && size != 'L')
491					size = 'l';
492			}
493			ch = _tolower((int)ch);
494		}
495		if (ch != 'n' && !flag_eof) {
496			if (ch != 'c' && ch != 'C' && ch != '[') {
497				while (iswspace(inchar =
498				    _wd_getwc(&chcount, iop)))
499					;
500				if (_wd_ungetwc(&chcount, inchar, iop) == WEOF)
501					break;
502
503			}
504		}
505#else  /* _WIDE */
506		if (isupper(ch)) { /* no longer documented */
507			if (_lib_version == c_issue_4) {
508				if (size != 'm' && size != 'L')
509					size = 'l';
510			}
511			ch = _tolower(ch);
512		}
513		if (ch != 'n' && !flag_eof) {
514			if (ch != 'c' && ch != 'C' && ch != '[') {
515				while (isspace(inchar = locgetc(chcount)))
516					;
517				if (locungetc(chcount, inchar) == EOF)
518					break;
519			}
520		}
521#endif /* _WIDE */
522
523		switch (ch) {
524		case 'C':
525		case 'S':
526		case 'c':
527		case 's':
528#ifdef	_WIDE
529			if ((size == 'l') || (size == 'C') || (size == 'S'))
530#else  /* _WIDE */
531			if ((size == 'w') || (size == 'l') || (size == 'C') ||
532			    (size == 'S'))
533#endif /* _WIDE */
534			{
535				size = wstring(&chcount, &flag_eof, stow,
536				    (int)ch, len, iop, &args.ap);
537			} else {
538				size = string(&chcount, &flag_eof, stow,
539				    (int)ch, len, tab, iop, &args.ap);
540			}
541			break;
542		case '[':
543			if (size == 'l') {
544				size = wbrstring(&chcount, &flag_eof, stow,
545				    (int)ch, len, iop, bracket_str, &args.ap);
546				free(bracket_str);
547				bracket_str = NULL;
548			} else {
549#ifdef	_WIDE
550				size = brstring(&chcount, &flag_eof, stow,
551				    (int)ch, len, iop, bracket_str, &args.ap);
552				free(bracket_str);
553				bracket_str = NULL;
554#else  /* _WIDE */
555				size = string(&chcount, &flag_eof, stow,
556				    ch, len, tab, iop, &args.ap);
557#endif /* _WIDE */
558			}
559			break;
560
561		case 'n':
562			if (stow == 0)
563				continue;
564			if (size == 'b')	/* char */
565				*va_arg(args.ap, char *) = (char)chcount;
566			else if (size == 'h')
567				*va_arg(args.ap, short *) = (short)chcount;
568			else if (size == 'l')
569				*va_arg(args.ap, long *) = (long)chcount;
570			else if (size == 'm') /* long long */
571				*va_arg(args.ap, long long *) =
572				    (long long) chcount;
573			else
574				*va_arg(args.ap, int *) = (int)chcount;
575			continue;
576
577		case 'i':
578		default:
579			size = number(&chcount, &flag_eof, stow, (int)ch,
580			    len, (int)size, iop, &args.ap);
581			break;
582		}
583		if (size)
584			nmatch += stow;
585		else {
586			return ((flag_eof && !nmatch) ? EOF : nmatch);
587		}
588		continue;
589	}
590	if (bracket_str)
591		free(bracket_str);
592	return (nmatch != 0 ? nmatch : EOF); /* end of input */
593}
594
595/* ****************************************************************** */
596/* Functions to read the input stream in an attempt to match incoming */
597/* data to the current pattern from the main loop of _doscan(). */
598/* ****************************************************************** */
599static int
600number(int *chcount, int *flag_eof, int stow, int type, int len, int size,
601	FILE *iop, va_list *listp)
602{
603	char	numbuf[64];
604	char	*np = numbuf;
605	int	c, base, inchar, lookahead;
606	int	digitseen = 0, floater = 0, negflg = 0;
607	int	lc;
608	long long	lcval = 0LL;
609
610	switch (type) {
611	case 'e':
612	case 'f':
613	case 'g':
614		/*
615		 * lc = 0 corresponds to c90 mode: do not recognize
616		 *	hexadecimal fp strings; attempt to push back
617		 *	all unused characters read
618		 *
619		 * lc = -1 corresponds to c99 mode: recognize hexa-
620		 *	decimal fp strings; push back at most one
621		 *	unused character
622		 */
623		lc = (__xpg6 & _C99SUSv3_recognize_hexfp)? -1 : 0;
624		floater = 1;
625		break;
626
627	case 'a':
628		lc = -1;
629		floater = 1;
630		break;
631
632	case 'd':
633	case 'u':
634	case 'i':
635		base = 10;
636		break;
637	case 'o':
638		base = 8;
639		break;
640	case 'p':
641#ifdef	_LP64
642		size = 'l'; /* pointers are long in LP64 */
643#endif	/*	_LP64	*/
644		/* FALLTHROUGH */
645	case 'x':
646		base = 16;
647		break;
648	default:
649		return (0); /* unrecognized conversion character */
650	}
651
652	if (floater != 0) {
653		/*
654		 * Handle floating point with
655		 * file_to_decimal.
656		 */
657		decimal_mode		dm;
658		decimal_record		dr;
659		fp_exception_field_type	efs;
660		enum decimal_string_form form;
661		char			*echar;
662		int			nread;
663		char			buffer[1024+1];
664		char			*nb = buffer;
665
666		if (len > 1024)
667			len = 1024;
668		file_to_decimal(&nb, len, lc, &dr, &form, &echar, iop, &nread);
669		if (lc == -1) {
670			/*
671			 * In C99 mode, the entire string read has to be
672			 * accepted in order to qualify as a match
673			 */
674			if (nb != buffer + nread)
675				form = invalid_form;
676		}
677		if (stow && (form != invalid_form)) {
678#if defined(__sparc)
679			dm.rd = _QgetRD();
680			if (size == 'L') {		/* long double */
681				if ((int)form < 0)
682					__hex_to_quadruple(&dr, dm.rd,
683					    va_arg(*listp, quadruple *), &efs);
684				else
685					decimal_to_quadruple(
686					    va_arg(*listp, quadruple *),
687					    &dm, &dr, &efs);
688			}
689#elif defined(__i386) || defined(__amd64)
690			dm.rd = __xgetRD();
691			if (size == 'L') {		/* long double */
692				if ((int)form < 0)
693					__hex_to_extended(&dr, dm.rd,
694					    va_arg(*listp, extended *), &efs);
695				else
696					decimal_to_extended(
697					    va_arg(*listp, extended *),
698					    &dm, &dr, &efs);
699			}
700#else
701#error Unknown architecture
702#endif
703			else if (size == 'l') {		/* double */
704				if ((int)form < 0)
705					__hex_to_double(&dr, dm.rd,
706					    va_arg(*listp, double *), &efs);
707				else
708					decimal_to_double(
709					    va_arg(*listp, double *),
710					    &dm, &dr, &efs);
711			} else {			/* float */
712				if ((int)form < 0)
713					__hex_to_single(&dr, dm.rd,
714					    va_arg(*listp, single *), &efs);
715				else
716					decimal_to_single((single *)
717					    va_arg(*listp, single *),
718					    &dm, &dr, &efs);
719			}
720			if ((efs & (1 << fp_overflow)) != 0) {
721				errno = ERANGE;
722			}
723			if ((efs & (1 << fp_underflow)) != 0) {
724				errno = ERANGE;
725			}
726		}
727		(*chcount) += nread;	/* Count characters read. */
728		c = locgetc((*chcount));
729		if (locungetc((*chcount), c) == EOF)
730			*flag_eof = 1;
731		return ((form == invalid_form) ? 0 : 1);
732				/* successful match if non-zero */
733	}
734
735	switch (c = locgetc((*chcount))) {
736	case '-':
737		negflg++;
738		/* FALLTHROUGH */
739	case '+':
740		if (--len <= 0)
741			break;
742		if ((c = locgetc((*chcount))) != '0')
743			break;
744		/* FALLTHROUGH */
745	case '0':
746		/*
747		 * If %i or %x, the characters 0x or 0X may optionally precede
748		 * the sequence of letters and digits (base 16).
749		 */
750		if ((type != 'i' && type != 'x') || (len <= 1))
751			break;
752		if (((inchar = locgetc((*chcount))) == 'x') ||
753		    (inchar == 'X')) {
754			lookahead = readchar(iop, chcount);
755			if (isxdigit(lookahead)) {
756				base = 16;
757
758				if (len <= 2) {
759					(void) locungetc((*chcount), lookahead);
760					/* Take into account the 'x' */
761					len -= 1;
762				} else {
763					c = lookahead;
764					/* Take into account '0x' */
765					len -= 2;
766				}
767			} else {
768				(void) locungetc((*chcount), lookahead);
769				(void) locungetc((*chcount), inchar);
770			}
771		} else {
772			/* inchar wans't 'x'. */
773			(void) locungetc((*chcount), inchar); /* Put it back. */
774			if (type == 'i') /* Only %i accepts an octal. */
775				base = 8;
776		}
777	}
778	for (; --len  >= 0; *np++ = (char)c, c = locgetc((*chcount))) {
779		if (np > numbuf + 62) {
780			errno = ERANGE;
781			return (0);
782		}
783		if (isdigit(c) || base == 16 && isxdigit(c)) {
784			int digit = c - (isdigit(c) ? '0' :
785			    isupper(c) ? 'A' - 10 : 'a' - 10);
786			if (digit >= base)
787				break;
788			if (stow)
789				lcval = base * lcval + digit;
790			digitseen++;
791			continue;
792		}
793		break;
794	}
795
796	if (stow && digitseen) {
797		/* suppress possible overflow on 2's-comp negation */
798		if (negflg && lcval != (1ULL << 63))
799			lcval = -lcval;
800		switch (size) {
801			case 'm':
802				*va_arg(*listp, long long *) = lcval;
803				break;
804			case 'l':
805				*va_arg(*listp, long *) = (long)lcval;
806				break;
807			case 'h':
808				*va_arg(*listp, short *) = (short)lcval;
809				break;
810			case 'b':
811				*va_arg(*listp, char *) = (char)lcval;
812				break;
813			default:
814				*va_arg(*listp, int *) = (int)lcval;
815				break;
816		}
817	}
818	if (locungetc((*chcount), c) == EOF)
819		*flag_eof = 1;
820	return (digitseen); /* successful match if non-zero */
821}
822
823/* Get a character. If not using sscanf and at the buffer's end */
824/* then do a direct read(). Characters read via readchar() */
825/* can be  pushed back on the input stream by locungetc((*chcount),) */
826/* since there is padding allocated at the end of the stream buffer. */
827static int
828readchar(FILE *iop, int *chcount)
829{
830	int	inchar;
831	char	buf[1];
832
833	if ((iop->_flag & _IOWRT) || (iop->_cnt != 0))
834		inchar = locgetc((*chcount));
835	else {
836		if (read(FILENO(iop), buf, 1) != 1)
837			return (EOF);
838		inchar = (int)buf[0];
839		(*chcount) += 1;
840	}
841	return (inchar);
842}
843
844static int
845string(int *chcount, int *flag_eof, int stow, int type, int len, char *tab,
846	FILE *iop, va_list *listp)
847{
848	int	ch;
849	char	*ptr;
850	char	*start;
851
852	start = ptr = stow ? va_arg(*listp, char *) : NULL;
853	if (((type == 'c') || (type == 'C')) && len == MAXINT)
854		len = 1;
855#ifdef	_WIDE
856	while ((ch = locgetc((*chcount))) != EOF &&
857	    !(((type == 's') || (type == 'S')) && isspace(ch))) {
858#else  /* _WIDE */
859	while ((ch = locgetc((*chcount))) != EOF &&
860	    !(((type == 's') || (type == 'S')) &&
861	    isspace(ch) || type == '[' && tab[ch])) {
862#endif /* _WIDE */
863		if (stow)
864			*ptr = (char)ch;
865		ptr++;
866		if (--len <= 0)
867			break;
868	}
869	if (ch == EOF) {
870		(*flag_eof) = 1;
871		(*chcount) -= 1;
872	} else if (len > 0 && locungetc((*chcount), ch) == EOF)
873		(*flag_eof) = 1;
874	if (ptr == start)
875		return (0);	/* no match */
876	if (stow && ((type != 'c') && (type != 'C')))
877		*ptr = '\0';
878	return (1);	/* successful match */
879}
880
881/* This function initializes arglst, to contain the appropriate */
882/* va_list values for the first MAXARGS arguments. */
883/* WARNING: this code assumes that the sizes of all pointer types */
884/* are the same. (Code similar to that in the portable doprnt.c */
885/* should be used if this assumption is not true for a */
886/* particular port.) */
887
888#ifdef	_WIDE
889static int
890_mkarglst(const wchar_t *fmt, stva_list args, stva_list arglst[])
891#else  /* _WIDE */
892static int
893_mkarglst(const char *fmt, stva_list args, stva_list arglst[])
894#endif /* _WIDE */
895{
896#ifdef	_WIDE
897#define	STRCHR	wcschr
898#define	STRSPN	wcsspn
899#define	ATOI(x)	_watoi((wchar_t *)x)
900#define	SPNSTR1	L"01234567890"
901#define	SPNSTR2	L"# +-.0123456789hL$"
902#else  /* _WIDE */
903#define	STRCHR	strchr
904#define	STRSPN	strspn
905#define	ATOI(x)	atoi(x)
906#define	SPNSTR1	"01234567890"
907#define	SPNSTR2	"# +-.0123456789hL$"
908#endif /* _WIDE */
909
910	int maxnum, curargno;
911	size_t n;
912
913	maxnum = -1;
914	curargno = 0;
915
916	while ((fmt = STRCHR(fmt, '%')) != NULL) {
917		fmt++;	/* skip % */
918		if (*fmt == '*' || *fmt == '%')
919			continue;
920		if (fmt[n = STRSPN(fmt, SPNSTR1)] == L'$') {
921			/* convert to zero base */
922			curargno = ATOI(fmt) - 1;
923			fmt += n + 1;
924		}
925
926		if (maxnum < curargno)
927			maxnum = curargno;
928		curargno++;	/* default to next in list */
929
930		fmt += STRSPN(fmt, SPNSTR2);
931		if (*fmt == '[') {
932			int	i;
933			fmt++; /* has to be at least on item in scan list */
934			if (*fmt == ']') {
935				fmt++;
936			}
937			while (*fmt != ']') {
938				if (*fmt == L'\0') {
939					return (-1); /* bad format */
940#ifdef	_WIDE
941				} else {
942					fmt++;
943				}
944#else  /* _WIDE */
945				} else if (isascii(*fmt)) {
946					fmt++;
947				} else {
948					i = mblen((const char *)
949					    fmt, MB_CUR_MAX);
950					if (i <= 0) {
951						return (-1);
952					} else {
953						fmt += i;
954					}
955				}
956#endif /* _WIDE */
957			}
958		}
959	}
960	if (maxnum > MAXARGS)
961		maxnum = MAXARGS;
962	for (n = 0; n <= maxnum; n++) {
963		arglst[n] = args;
964		(void) va_arg(args.ap, void *);
965	}
966	return (0);
967}
968
969
970/*
971 * For wide character handling
972 */
973
974#ifdef	_WIDE
975static int
976wstring(int *chcount, int *flag_eof, int stow, int type,
977	int len, FILE *iop, va_list *listp)
978{
979	wint_t	wch;
980	wchar_t	*ptr;
981	wchar_t	*wstart;
982	int	dummy;
983
984	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
985
986	if ((type == 'c') && len == MAXINT)
987		len = 1;
988	while (((wch = _wd_getwc(chcount, iop)) != WEOF) &&
989	    !(type == 's' && iswspace(wch))) {
990		if (stow)
991			*ptr = wch;
992		ptr++;
993		if (--len <= 0)
994			break;
995	}
996	if (wch == WEOF) {
997		*flag_eof = 1;
998		(*chcount) -= 1;
999	} else {
1000		if (len > 0 && _wd_ungetwc(chcount, wch, iop) == WEOF)
1001			*flag_eof = 1;
1002	}
1003	if (ptr == wstart)
1004		return (0); /* no match */
1005	if (stow && (type != 'c'))
1006		*ptr = '\0';
1007	return (1); /* successful match */
1008}
1009
1010#else  /* _WIDE */
1011static int
1012wstring(int *chcount, int *flag_eof, int stow, int type, int len, FILE *iop,
1013	va_list *listp)
1014{
1015	int	wch;
1016	wchar_t	*ptr;
1017	wchar_t	*wstart;
1018
1019	wstart = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1020
1021	if ((type == 'c') && len == MAXINT)
1022		len = 1;
1023	while (((wch = _bi_getwc(iop)) != EOF) &&
1024	    !(type == 's' && (isascii(wch) ? isspace(wch) : 0))) {
1025		(*chcount) += _scrwidth((wchar_t)wch);
1026		if (stow)
1027			*ptr = wch;
1028		ptr++;
1029		if (--len <= 0)
1030			break;
1031	}
1032	if (wch == EOF) {
1033		(*flag_eof) = 1;
1034		(*chcount) -= 1;
1035	} else {
1036		if (len > 0 && _bi_ungetwc(wch, iop) == EOF)
1037			(*flag_eof) = 1;
1038	}
1039	if (ptr == wstart)
1040		return (0); /* no match */
1041	if (stow && (type != 'c'))
1042		*ptr = '\0';
1043	return (1); /* successful match */
1044}
1045#endif /* _WIDE */
1046
1047#ifdef	_WIDE
1048static wint_t
1049_wd_getwc(int *chcount, FILE *iop)
1050{
1051	wint_t	wc;
1052	int	len;
1053
1054	if (!(iop->_flag & _IOWRT)) {
1055		/* call from fwscanf, wscanf */
1056		wc = __fgetwc_xpg5(iop);
1057		(*chcount)++;
1058		return (wc);
1059	} else {
1060		/* call from swscanf */
1061		if (*iop->_ptr == '\0')
1062			return (WEOF);
1063		len = mbtowc((wchar_t *)&wc, (const char *)iop->_ptr,
1064		    MB_CUR_MAX);
1065		if (len == -1)
1066			return (WEOF);
1067		iop->_ptr += len;
1068		(*chcount)++;
1069		return (wc);
1070	}
1071}
1072
1073static wint_t
1074_wd_ungetwc(int *chcount, wchar_t wc, FILE *iop)
1075{
1076	wint_t	ret;
1077	int	len;
1078	char	mbs[MB_LEN_MAX];
1079
1080	if (wc == WEOF)
1081		return (WEOF);
1082
1083	if (!(iop->_flag & _IOWRT)) {
1084		/* call from fwscanf, wscanf */
1085		ret = __ungetwc_xpg5((wint_t)wc, iop);
1086		if (ret != (wint_t)wc)
1087			return (WEOF);
1088		(*chcount)--;
1089		return (ret);
1090	} else {
1091		/* call from swscanf */
1092		len = wctomb(mbs, wc);
1093		if (len == -1)
1094			return (WEOF);
1095		iop->_ptr -= len;
1096		(*chcount)--;
1097		return ((wint_t)wc);
1098	}
1099}
1100
1101static int
1102_watoi(wchar_t *fmt)
1103{
1104	int	n = 0;
1105	wchar_t	ch;
1106
1107	ch = *fmt;
1108	if ((ch >= 0) && (ch < 256) && isdigit((int)ch)) {
1109		n = ch - '0';
1110		while (((ch = *++fmt) >= 0) && (ch < 256) &&
1111		    isdigit((int)ch)) {
1112			n *= 10;
1113			n += ch - '0';
1114		}
1115	}
1116	return (n);
1117}
1118#endif /* _WIDE */
1119
1120/* ARGSUSED3 */
1121static int
1122wbrstring(int *chcount, int *flag_eof, int stow, int type,
1123	int len, FILE *iop, unsigned char *brstr, va_list *listp)
1124{
1125	wint_t	wch;
1126	int	i;
1127	char	str[MB_LEN_MAX + 1]; /* include null termination */
1128	wchar_t	*ptr, *start;
1129#ifdef	_WIDE
1130	int	dummy;
1131#endif /* _WIDE */
1132
1133	start = ptr = stow ? va_arg(*listp, wchar_t *) : NULL;
1134
1135#ifdef	_WIDE
1136	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1137#else  /* _WIDE */
1138	while ((wch = _bi_getwc(iop)) != WEOF) {
1139#endif /* _WIDE */
1140		i = wctomb(str, (wchar_t)wch);
1141		if (i == -1) {
1142			return (0);
1143		}
1144		str[i] = '\0';
1145		if (fnmatch((const char *)brstr, (const char *)str,
1146		    FNM_NOESCAPE)) {
1147			break;
1148		} else {
1149			if (len > 0) {
1150#ifdef	_WIDE
1151				(*chcount)++;
1152#else  /* _WIDE */
1153				(*chcount) += _scrwidth(wch);
1154#endif /* _WIDE */
1155				len--;
1156				if (stow) {
1157					*ptr = wch;
1158				}
1159				ptr++;
1160				if (len <= 0)
1161					break;
1162			} else {
1163				break;
1164			}
1165		}
1166	}
1167	if (wch == WEOF) {
1168		*flag_eof = 1;
1169	} else {
1170#ifdef	_WIDE
1171		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1172#else  /* _WIDE */
1173		if (len > 0 && _bi_ungetwc(wch, iop) == WEOF)
1174#endif /* _WIDE */
1175			*flag_eof = 1;
1176	}
1177	if (ptr == start)
1178		return (0);				/* no match */
1179	if (stow)
1180		*ptr = L'\0';
1181	return (1);					/* successful match */
1182}
1183
1184#ifdef	_WIDE
1185static int
1186brstring(int *chcount, int *flag_eof, int stow, int type,
1187	int len, FILE *iop, unsigned char *brstr, va_list *listp)
1188{
1189	wint_t	wch;
1190	int	i;
1191	char	str[MB_LEN_MAX + 1]; /* include null termination */
1192	char	*ptr, *start, *p;
1193	int	dummy;
1194
1195	start = ptr = stow ? va_arg(*listp, char *) : NULL;
1196
1197	while ((wch = _wd_getwc(&dummy, iop)) != WEOF) {
1198		p = str;
1199		i = wctomb(str, (wchar_t)wch);
1200		if (i == -1) {
1201			return (0);
1202		}
1203		str[i] = '\0';
1204		if (fnmatch((const char *)brstr, (const char *)str,
1205		    FNM_NOESCAPE)) {
1206			break;
1207		} else {
1208			if (len >= i) {
1209				(*chcount)++;
1210				len -= i;
1211				if (stow) {
1212					while (i-- > 0) {
1213						*ptr++ = *p++;
1214					}
1215				} else {
1216					while (i-- > 0) {
1217						ptr++;
1218					}
1219				}
1220				if (len <= 0)
1221					break;
1222			} else {
1223				break;
1224			}
1225		}
1226	}
1227	if (wch == WEOF) {
1228		*flag_eof = 1;
1229	} else {
1230		if (len > 0 && _wd_ungetwc(&dummy, wch, iop) == WEOF)
1231			*flag_eof = 1;
1232	}
1233	if (ptr == start)
1234		return (0);				/* no match */
1235	if (stow)
1236		*ptr = '\0';
1237	return (1);					/* successful match */
1238}
1239#endif /* _WIDE */
1240
1241/*
1242 * Locally define getwc and ungetwc
1243 */
1244static int
1245_bi_getwc(FILE *iop)
1246{
1247	int c;
1248	wchar_t intcode;
1249	int i, nbytes, cur_max;
1250	char buff[MB_LEN_MAX];
1251
1252	if ((c = wlocgetc()) == EOF)
1253		return (WEOF);
1254
1255	if (isascii(c))	/* ASCII code */
1256		return ((wint_t)c);
1257
1258	buff[0] = (char)c;
1259
1260	cur_max = (int)MB_CUR_MAX;
1261	/* MB_CUR_MAX doen't exeed the value of MB_LEN_MAX */
1262	/* So we use MB_CUR_MAX instead of MB_LEN_MAX for */
1263	/* improving the performance. */
1264	for (i = 1; i < cur_max; i++) {
1265		c = wlocgetc();
1266		if (c == '\n') {
1267			(void) wlocungetc(c);
1268			break;
1269		}
1270		if (c == EOF) {
1271			/* this still may be a valid multibyte character */
1272			break;
1273		}
1274		buff[i] = (char)c;
1275	}
1276
1277	if ((nbytes = mbtowc(&intcode, buff, i)) == -1) {
1278		/*
1279		 * If mbtowc fails, the input was not a legal character.
1280		 *	ungetc all but one character.
1281		 *
1282		 * Note:  the number of pushback characters that
1283		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1284		 *	In Solaris 2.x, the number of pushback
1285		 *	characters is 4.
1286		 */
1287		while (i-- > 1) {
1288			(void) wlocungetc((signed char)buff[i]);
1289		}
1290		errno = EILSEQ;
1291		return (WEOF); /* Illegal EUC sequence. */
1292	}
1293
1294	while (i-- > nbytes) {
1295		/*
1296		 * Note:  the number of pushback characters that
1297		 *	ungetc() can handle must be >= (MB_LEN_MAX - 1).
1298		 *	In Solaris 2.x, the number of pushback
1299		 *	characters is 4.
1300		 */
1301		(void) wlocungetc((signed char)buff[i]);
1302	}
1303	return ((int)intcode);
1304}
1305
1306static int
1307_bi_ungetwc(wint_t wc, FILE *iop)
1308{
1309	char mbs[MB_LEN_MAX];
1310	unsigned char *p;
1311	int n;
1312
1313	if ((wc == WEOF) || ((iop->_flag & _IOREAD) == 0))
1314		return (WEOF);
1315
1316	n = wctomb(mbs, (wchar_t)wc);
1317	if (n <= 0)
1318		return (WEOF);
1319
1320	if (iop->_ptr <= iop->_base) {
1321		if (iop->_base == NULL) {
1322			return (WEOF);
1323		}
1324		if ((iop->_ptr == iop->_base) && (iop->_cnt == 0)) {
1325			++iop->_ptr;
1326		} else if ((iop->_ptr - n) < (iop->_base - PUSHBACK)) {
1327			return (WEOF);
1328		}
1329	}
1330
1331	p = (unsigned char *)(mbs+n-1); /* p points the last byte */
1332	/* if _IOWRT is set to iop->_flag, it means this is */
1333	/* an invocation from sscanf(), and in that time we */
1334	/* don't touch iop->_cnt.  Otherwise, which means an */
1335	/* invocation from fscanf() or scanf(), we touch iop->_cnt */
1336	if ((iop->_flag & _IOWRT) == 0) {
1337		/* scanf() and fscanf() */
1338		iop->_cnt += n;
1339		while (n--) {
1340			*--iop->_ptr = *(p--);
1341		}
1342	} else {
1343		/* sscanf() */
1344		iop->_ptr -= n;
1345	}
1346	return (wc);
1347}
1348