vfscanf.c revision 187422
162143Sarchie/*- 262143Sarchie * Copyright (c) 1990, 1993 362143Sarchie * The Regents of the University of California. All rights reserved. 4139823Simp * 5139823Simp * This code is derived from software contributed to Berkeley by 6139823Simp * Chris Torek. 762143Sarchie * 862143Sarchie * Redistribution and use in source and binary forms, with or without 962143Sarchie * modification, are permitted provided that the following conditions 1062143Sarchie * are met: 1162143Sarchie * 1. Redistributions of source code must retain the above copyright 1262143Sarchie * notice, this list of conditions and the following disclaimer. 1362143Sarchie * 2. Redistributions in binary form must reproduce the above copyright 1462143Sarchie * notice, this list of conditions and the following disclaimer in the 1562143Sarchie * documentation and/or other materials provided with the distribution. 1662143Sarchie * 4. Neither the name of the University nor the names of its contributors 1762143Sarchie * may be used to endorse or promote products derived from this software 1862143Sarchie * without specific prior written permission. 1962143Sarchie * 2062143Sarchie * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2162143Sarchie * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2262143Sarchie * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2362143Sarchie * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2462143Sarchie * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2562143Sarchie * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2662143Sarchie * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2762143Sarchie * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2862143Sarchie * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2962143Sarchie * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3062143Sarchie * SUCH DAMAGE. 3162143Sarchie */ 3262143Sarchie 3362143Sarchie#if defined(LIBC_SCCS) && !defined(lint) 3462143Sarchiestatic char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93"; 3562143Sarchie#endif /* LIBC_SCCS and not lint */ 3662143Sarchie#include <sys/cdefs.h> 3762143Sarchie__FBSDID("$FreeBSD: head/lib/libc/stdio/vfscanf.c 187422 2009-01-19 06:19:51Z das $"); 3862143Sarchie 3962143Sarchie#include "namespace.h" 4062143Sarchie#include <ctype.h> 4162143Sarchie#include <inttypes.h> 4262143Sarchie#include <stdio.h> 4362143Sarchie#include <stdlib.h> 4462143Sarchie#include <stddef.h> 4562143Sarchie#include <stdarg.h> 4662143Sarchie#include <string.h> 4762143Sarchie#include <wchar.h> 4862143Sarchie#include <wctype.h> 4962143Sarchie#include "un-namespace.h" 5062143Sarchie 5162143Sarchie#include "collate.h" 5262143Sarchie#include "libc_private.h" 5362143Sarchie#include "local.h" 5462143Sarchie 55196019Srwatson#ifndef NO_FLOATING_POINT 5662143Sarchie#include <locale.h> 5762143Sarchie#endif 5862143Sarchie 5962143Sarchie#define BUF 513 /* Maximum length of numeric string. */ 60141721Sglebius 6162143Sarchie/* 6262143Sarchie * Flags used during conversion. 6362143Sarchie */ 6462143Sarchie#define LONG 0x01 /* l: long or double */ 65151305Sthompsa#define LONGDBL 0x02 /* L: long double */ 66185571Sbz#define SHORT 0x04 /* h: short */ 6762143Sarchie#define SUPPRESS 0x08 /* *: suppress assignment */ 6862143Sarchie#define POINTER 0x10 /* p: void * (as hex) */ 6962143Sarchie#define NOSKIP 0x20 /* [ or c: do not skip blanks */ 7062143Sarchie#define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ 7162143Sarchie#define INTMAXT 0x800 /* j: intmax_t */ 7262143Sarchie#define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 73152243Sru#define SIZET 0x2000 /* z: size_t */ 7462143Sarchie#define SHORTSHORT 0x4000 /* hh: char */ 75126035Spjd#define UNSIGNED 0x8000 /* %[oupxX] conversions */ 76126035Spjd 77126035Spjd/* 78126035Spjd * The following are used in integral conversions only: 79129281Sarchie * SIGNOK, NDIGITS, PFXOK, and NZDIGITS 80129281Sarchie */ 81126035Spjd#define SIGNOK 0x40 /* +/- is (still) legal */ 82126035Spjd#define NDIGITS 0x80 /* no digits detected */ 83126035Spjd#define PFXOK 0x100 /* 0x prefix is (still) legal */ 84126035Spjd#define NZDIGITS 0x200 /* no zero digits detected */ 85126035Spjd#define HAVESIGN 0x10000 /* sign detected */ 86126035Spjd 8762143Sarchie/* 88106933Ssam * Conversion types. 89106933Ssam */ 90106933Ssam#define CT_CHAR 0 /* %c conversion */ 91106933Ssam#define CT_CCL 1 /* %[...] conversion */ 92106933Ssam#define CT_STRING 2 /* %s conversion */ 93106933Ssam#define CT_INT 3 /* %[dioupxX] conversion */ 94139903Sglebius#define CT_FLOAT 4 /* %[efgEFG] conversion */ 95106933Ssam 9662143Sarchiestatic const u_char *__sccl(char *, const u_char *); 97106933Ssam#ifndef NO_FLOATING_POINT 98106933Ssamstatic int parsefloat(FILE *, char *, char *); 9962143Sarchie#endif 10062143Sarchie 10162143Sarchie__weak_reference(__vfscanf, vfscanf); 102139903Sglebius 10362143Sarchie/* 10462143Sarchie * __vfscanf - MT-safe version 105186488Sjulian */ 106186488Sjulianint 10762143Sarchie__vfscanf(FILE *fp, char const *fmt0, va_list ap) 10862143Sarchie{ 10962143Sarchie int ret; 11062143Sarchie 11170700Sjulian FLOCKFILE(fp); 11262143Sarchie ret = __svfscanf(fp, fmt0, ap); 11362143Sarchie FUNLOCKFILE(fp); 11462143Sarchie return (ret); 11562143Sarchie} 11662143Sarchie 11762143Sarchie/* 11862143Sarchie * __svfscanf - non-MT-safe version of __vfscanf 11962143Sarchie */ 12062143Sarchieint 12162143Sarchie__svfscanf(FILE *fp, const char *fmt0, va_list ap) 12262143Sarchie{ 12362143Sarchie const u_char *fmt = (const u_char *)fmt0; 12462143Sarchie int c; /* character from format, or conversion */ 12562143Sarchie size_t width; /* field width, or 0 */ 12662143Sarchie char *p; /* points into all kinds of strings */ 12762143Sarchie int n; /* handy integer */ 12862143Sarchie int flags; /* flags as defined above */ 12962143Sarchie char *p0; /* saves original value of p when necessary */ 13062143Sarchie int nassigned; /* number of fields assigned */ 13162143Sarchie int nconversions; /* number of conversions */ 13262143Sarchie int nread; /* number of characters consumed from fp */ 13364358Sarchie int base; /* base argument to conversion function */ 13464358Sarchie char ccltab[256]; /* character class table for %[...] */ 13564358Sarchie char buf[BUF]; /* buffer for numeric and mb conversions */ 13664358Sarchie wchar_t *wcp; /* handy wide character pointer */ 13764358Sarchie size_t nconv; /* length of multibyte sequence converted */ 138123600Sru static const mbstate_t initial; 13964358Sarchie mbstate_t mbs; 14064358Sarchie 14164358Sarchie /* `basefix' is used to avoid `if' tests in the integer scanner */ 14264653Sarchie static short basefix[17] = 14364653Sarchie { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 144123600Sru 14564653Sarchie ORIENT(fp, -1); 14664653Sarchie 14764653Sarchie nassigned = 0; 14864653Sarchie nconversions = 0; 14964653Sarchie nread = 0; 15064653Sarchie for (;;) { 15164653Sarchie c = *fmt++; 15264653Sarchie if (c == 0) 15364653Sarchie return (nassigned); 15464653Sarchie if (isspace(c)) { 15564653Sarchie while ((fp->_r > 0 || __srefill(fp) == 0) && isspace(*fp->_p)) 15664358Sarchie nread++, fp->_r--, fp->_p++; 15764358Sarchie continue; 15864358Sarchie } 15964358Sarchie if (c != '%') 16064358Sarchie goto literal; 16164358Sarchie width = 0; 16264358Sarchie flags = 0; 16364653Sarchie /* 16464653Sarchie * switch on the format. continue if done; 16564653Sarchie * break once format type is derived. 16664653Sarchie */ 16764653Sarchieagain: c = *fmt++; 16864653Sarchie switch (c) { 16964653Sarchie case '%': 17064358Sarchieliteral: 17164358Sarchie if (fp->_r <= 0 && __srefill(fp)) 17264358Sarchie goto input_failure; 17364358Sarchie if (*fp->_p != c) 17464358Sarchie goto match_failure; 175141721Sglebius fp->_r--, fp->_p++; 176141721Sglebius nread++; 177141721Sglebius continue; 178141721Sglebius 179141721Sglebius case '*': 180141721Sglebius flags |= SUPPRESS; 181141721Sglebius goto again; 182141721Sglebius case 'j': 183141721Sglebius flags |= INTMAXT; 184141721Sglebius goto again; 185141721Sglebius case 'l': 186141721Sglebius if (flags & LONG) { 187141721Sglebius flags &= ~LONG; 188141721Sglebius flags |= LONGLONG; 189141910Sglebius } else 190141910Sglebius flags |= LONG; 191141910Sglebius goto again; 192141910Sglebius case 'q': 193141910Sglebius flags |= LONGLONG; /* not quite */ 194141910Sglebius goto again; 195141910Sglebius case 't': 19662143Sarchie flags |= PTRDIFFT; 19762143Sarchie goto again; 19862143Sarchie case 'z': 19962143Sarchie flags |= SIZET; 200129823Sjulian goto again; 201129823Sjulian case 'L': 202129823Sjulian flags |= LONGDBL; 203129823Sjulian goto again; 204129823Sjulian case 'h': 205129823Sjulian if (flags & SHORT) { 206129823Sjulian flags &= ~SHORT; 207129823Sjulian flags |= SHORTSHORT; 208129823Sjulian } else 209129823Sjulian flags |= SHORT; 21062143Sarchie goto again; 21162143Sarchie 21262143Sarchie case '0': case '1': case '2': case '3': case '4': 21362143Sarchie case '5': case '6': case '7': case '8': case '9': 21462143Sarchie width = width * 10 + c - '0'; 21562143Sarchie goto again; 21662143Sarchie 21762143Sarchie /* 21862143Sarchie * Conversions. 21962143Sarchie */ 22062143Sarchie case 'd': 22162143Sarchie c = CT_INT; 22262143Sarchie base = 10; 22362143Sarchie break; 224106933Ssam 22562143Sarchie case 'i': 22662143Sarchie c = CT_INT; 22770784Sjulian base = 0; 228129281Sarchie break; 22962143Sarchie 23062143Sarchie case 'o': 231129281Sarchie c = CT_INT; 23262143Sarchie flags |= UNSIGNED; 233129281Sarchie base = 8; 23462143Sarchie break; 23562143Sarchie 23662143Sarchie case 'u': 23762143Sarchie c = CT_INT; 23862143Sarchie flags |= UNSIGNED; 23962143Sarchie base = 10; 24062143Sarchie break; 24162143Sarchie 24262143Sarchie case 'X': 243106933Ssam case 'x': 24462143Sarchie flags |= PFXOK; /* enable 0x prefixing */ 24562143Sarchie c = CT_INT; 24670784Sjulian flags |= UNSIGNED; 247129281Sarchie base = 16; 24862143Sarchie break; 249129281Sarchie 250129281Sarchie#ifndef NO_FLOATING_POINT 25162143Sarchie case 'A': case 'E': case 'F': case 'G': 25262143Sarchie case 'a': case 'e': case 'f': case 'g': 25362143Sarchie c = CT_FLOAT; 254129281Sarchie break; 25562143Sarchie#endif 25662143Sarchie 25762143Sarchie case 'S': 25862143Sarchie flags |= LONG; 25962143Sarchie /* FALLTHROUGH */ 26062143Sarchie case 's': 26162143Sarchie c = CT_STRING; 26262143Sarchie break; 26362143Sarchie 26462143Sarchie case '[': 26570784Sjulian fmt = __sccl(ccltab, fmt); 26662143Sarchie flags |= NOSKIP; 26762143Sarchie c = CT_CCL; 26862143Sarchie break; 26962143Sarchie 27062143Sarchie case 'C': 27162143Sarchie flags |= LONG; 27262143Sarchie /* FALLTHROUGH */ 273194012Szec case 'c': 27470700Sjulian flags |= NOSKIP; 275194012Szec c = CT_CHAR; 27662143Sarchie break; 27762143Sarchie 27862143Sarchie case 'p': /* pointer format is like hex */ 27962143Sarchie flags |= POINTER | PFXOK; 28062143Sarchie c = CT_INT; /* assumes sizeof(uintmax_t) */ 28162143Sarchie flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ 28262143Sarchie base = 16; 28362143Sarchie break; 28462143Sarchie 28562143Sarchie case 'n': 28662143Sarchie nconversions++; 28762143Sarchie if (flags & SUPPRESS) /* ??? */ 28862143Sarchie continue; 289191510Szec if (flags & SHORTSHORT) 290191510Szec *va_arg(ap, char *) = nread; 291191510Szec else if (flags & SHORT) 292191510Szec *va_arg(ap, short *) = nread; 293191510Szec else if (flags & LONG) 294191510Szec *va_arg(ap, long *) = nread; 295191510Szec else if (flags & LONGLONG) 296191510Szec *va_arg(ap, long long *) = nread; 297191510Szec else if (flags & INTMAXT) 298191510Szec *va_arg(ap, intmax_t *) = nread; 299191510Szec else if (flags & SIZET) 300191510Szec *va_arg(ap, size_t *) = nread; 30162143Sarchie else if (flags & PTRDIFFT) 30287599Sobrien *va_arg(ap, ptrdiff_t *) = nread; 30362143Sarchie else 30462143Sarchie *va_arg(ap, int *) = nread; 305121816Sbrooks continue; 30662143Sarchie 30762143Sarchie default: 30862143Sarchie goto match_failure; 30962143Sarchie 310184205Sdes /* 31162143Sarchie * Disgusting backwards compatibility hack. XXX 31262143Sarchie */ 313121816Sbrooks case '\0': /* compat */ 31470784Sjulian return (EOF); 31562143Sarchie } 31662143Sarchie 31770784Sjulian /* 31862143Sarchie * We have a conversion that requires input. 319152243Sru */ 32090249Sarchie if (fp->_r <= 0 && __srefill(fp)) 32162143Sarchie goto input_failure; 32262143Sarchie 323121816Sbrooks /* 32462143Sarchie * Consume leading white space, except for formats 325121816Sbrooks * that suppress this. 32662143Sarchie */ 32762143Sarchie if ((flags & NOSKIP) == 0) { 32862143Sarchie while (isspace(*fp->_p)) { 32962143Sarchie nread++; 33062143Sarchie if (--fp->_r > 0) 33171849Sjulian fp->_p++; 33262143Sarchie else if (__srefill(fp)) 33362143Sarchie goto input_failure; 33462143Sarchie } 33562143Sarchie /* 33662143Sarchie * Note that there is at least one character in 33771849Sjulian * the buffer, so conversions that do not set NOSKIP 33862143Sarchie * ca no longer result in an input failure. 33971849Sjulian */ 34071849Sjulian } 34171849Sjulian 34271849Sjulian /* 34371849Sjulian * Do the conversion. 34471849Sjulian */ 345152243Sru switch (c) { 34671849Sjulian 34771849Sjulian case CT_CHAR: 34862143Sarchie /* scan arbitrary characters (sets NOSKIP) */ 34962143Sarchie if (width == 0) 350139903Sglebius width = 1; 351139903Sglebius if (flags & LONG) { 352139903Sglebius if ((flags & SUPPRESS) == 0) 353139903Sglebius wcp = va_arg(ap, wchar_t *); 354139903Sglebius else 355139903Sglebius wcp = NULL; 356139903Sglebius n = 0; 357139903Sglebius while (width != 0) { 358139903Sglebius if (n == MB_CUR_MAX) { 359139903Sglebius fp->_flags |= __SERR; 360139903Sglebius goto input_failure; 361139903Sglebius } 362139903Sglebius buf[n++] = *fp->_p; 363139903Sglebius fp->_p++; 364139903Sglebius fp->_r--; 365139903Sglebius mbs = initial; 366139903Sglebius nconv = mbrtowc(wcp, buf, n, &mbs); 367139903Sglebius if (nconv == (size_t)-1) { 368139903Sglebius fp->_flags |= __SERR; 369201924Sfjoe goto input_failure; 370201924Sfjoe } 371201924Sfjoe if (nconv == 0 && !(flags & SUPPRESS)) 372201924Sfjoe *wcp = L'\0'; 373201924Sfjoe if (nconv != (size_t)-2) { 374201924Sfjoe nread += n; 375201924Sfjoe width--; 376201924Sfjoe if (!(flags & SUPPRESS)) 377201924Sfjoe wcp++; 378201924Sfjoe n = 0; 379139903Sglebius } 380139903Sglebius if (fp->_r <= 0 && __srefill(fp)) { 38162143Sarchie if (n != 0) { 38262143Sarchie fp->_flags |= __SERR; 38362143Sarchie goto input_failure; 38462143Sarchie } 38562143Sarchie break; 38662143Sarchie } 38762143Sarchie } 38862143Sarchie if (!(flags & SUPPRESS)) 38962143Sarchie nassigned++; 39062143Sarchie } else if (flags & SUPPRESS) { 39170700Sjulian size_t sum = 0; 39262143Sarchie for (;;) { 39362143Sarchie if ((n = fp->_r) < width) { 39462143Sarchie sum += n; 39562143Sarchie width -= n; 39662143Sarchie fp->_p += n; 39762143Sarchie if (__srefill(fp)) { 39862143Sarchie if (sum == 0) 39962143Sarchie goto input_failure; 40062143Sarchie break; 40162143Sarchie } 40270784Sjulian } else { 40362143Sarchie sum += width; 40462143Sarchie fp->_r -= width; 40562143Sarchie fp->_p += width; 40662143Sarchie break; 40762143Sarchie } 40862143Sarchie } 40962143Sarchie nread += sum; 410186488Sjulian } else { 41162143Sarchie size_t r = __fread((void *)va_arg(ap, char *), 1, 412186488Sjulian width, fp); 413194012Szec 414186488Sjulian if (r == 0) 41562143Sarchie goto input_failure; 416186488Sjulian nread += r; 417186488Sjulian nassigned++; 418129281Sarchie } 419186488Sjulian nconversions++; 420186488Sjulian break; 42162143Sarchie 42262143Sarchie case CT_CCL: 42362143Sarchie /* scan a (nonempty) character class (sets NOSKIP) */ 42462143Sarchie if (width == 0) 42562143Sarchie width = (size_t)~0; /* `infinity' */ 42662143Sarchie /* take only those things in the class */ 42790249Sarchie if (flags & LONG) { 42890249Sarchie wchar_t twc; 42990249Sarchie int nchars; 430194699Smav 43162143Sarchie if ((flags & SUPPRESS) == 0) 43262143Sarchie wcp = va_arg(ap, wchar_t *); 43362143Sarchie else 43462143Sarchie wcp = &twc; 43562143Sarchie n = 0; 43662143Sarchie nchars = 0; 43762143Sarchie while (width != 0) { 43862143Sarchie if (n == MB_CUR_MAX) { 43962143Sarchie fp->_flags |= __SERR; 44070700Sjulian goto input_failure; 44162143Sarchie } 44270784Sjulian buf[n++] = *fp->_p; 44362143Sarchie fp->_p++; 44462143Sarchie fp->_r--; 44570700Sjulian mbs = initial; 44662143Sarchie nconv = mbrtowc(wcp, buf, n, &mbs); 44770700Sjulian if (nconv == (size_t)-1) { 44862143Sarchie fp->_flags |= __SERR; 44962143Sarchie goto input_failure; 45062143Sarchie } 45162143Sarchie if (nconv == 0) 452141195Sru *wcp = L'\0'; 45362143Sarchie if (nconv != (size_t)-2) { 45462143Sarchie if (wctob(*wcp) != EOF && 45562143Sarchie !ccltab[wctob(*wcp)]) { 45662143Sarchie while (n != 0) { 457141195Sru n--; 45862143Sarchie __ungetc(buf[n], 45962143Sarchie fp); 46062143Sarchie } 46162143Sarchie break; 46262143Sarchie } 46362143Sarchie nread += n; 46462143Sarchie width--; 46562143Sarchie if (!(flags & SUPPRESS)) 46662143Sarchie wcp++; 46764358Sarchie nchars++; 46864358Sarchie n = 0; 46964358Sarchie } 47064358Sarchie if (fp->_r <= 0 && __srefill(fp)) { 47164358Sarchie if (n != 0) { 47264358Sarchie fp->_flags |= __SERR; 473152315Sru goto input_failure; 47464358Sarchie } 47564358Sarchie break; 47664653Sarchie } 47764653Sarchie } 47864653Sarchie if (n != 0) { 47964653Sarchie fp->_flags |= __SERR; 48064653Sarchie goto input_failure; 48164653Sarchie } 48264653Sarchie n = nchars; 48364653Sarchie if (n == 0) 484202588Sthompsa goto match_failure; 48564653Sarchie if (!(flags & SUPPRESS)) { 48664653Sarchie *wcp = L'\0'; 48764653Sarchie nassigned++; 48864653Sarchie } 48964653Sarchie } else if (flags & SUPPRESS) { 49064653Sarchie n = 0; 49164653Sarchie while (ccltab[*fp->_p]) { 49264653Sarchie n++, fp->_r--, fp->_p++; 49364653Sarchie if (--width == 0) 49464653Sarchie break; 49564358Sarchie if (fp->_r <= 0 && __srefill(fp)) { 49664358Sarchie if (n == 0) 49764358Sarchie goto input_failure; 49864358Sarchie break; 49964358Sarchie } 50064358Sarchie } 50164358Sarchie if (n == 0) 50264358Sarchie goto match_failure; 50364358Sarchie } else { 50464358Sarchie p0 = p = va_arg(ap, char *); 50564358Sarchie while (ccltab[*fp->_p]) { 50664358Sarchie fp->_r--; 50764358Sarchie *p++ = *fp->_p++; 50864358Sarchie if (--width == 0) 50964358Sarchie break; 51064358Sarchie if (fp->_r <= 0 && __srefill(fp)) { 51164653Sarchie if (p == p0) 51264653Sarchie goto input_failure; 51364653Sarchie break; 51464653Sarchie } 51564653Sarchie } 51664653Sarchie n = p - p0; 51764653Sarchie if (n == 0) 51864653Sarchie goto match_failure; 51964358Sarchie *p = 0; 52064358Sarchie nassigned++; 52164358Sarchie } 52264358Sarchie nread += n; 52364358Sarchie nconversions++; 52464358Sarchie break; 52564358Sarchie 526141721Sglebius case CT_STRING: 527141721Sglebius /* like CCL, but zero-length string OK, & no NOSKIP */ 528141721Sglebius if (width == 0) 529167729Sbms width = (size_t)~0; 530141721Sglebius if (flags & LONG) { 531141721Sglebius wchar_t twc; 532141721Sglebius 533141721Sglebius if ((flags & SUPPRESS) == 0) 534141721Sglebius wcp = va_arg(ap, wchar_t *); 535141755Sglebius else 536141721Sglebius wcp = &twc; 537141721Sglebius n = 0; 538141755Sglebius while (!isspace(*fp->_p) && width != 0) { 539141721Sglebius if (n == MB_CUR_MAX) { 540141721Sglebius fp->_flags |= __SERR; 541167729Sbms goto input_failure; 542167729Sbms } 543167729Sbms buf[n++] = *fp->_p; 544167729Sbms fp->_p++; 545167729Sbms fp->_r--; 546167729Sbms mbs = initial; 547167729Sbms nconv = mbrtowc(wcp, buf, n, &mbs); 548195049Srwatson if (nconv == (size_t)-1) { 549167729Sbms fp->_flags |= __SERR; 550167729Sbms goto input_failure; 551195049Srwatson } 552167729Sbms if (nconv == 0) 553167729Sbms *wcp = L'\0'; 554167729Sbms if (nconv != (size_t)-2) { 555167729Sbms if (iswspace(*wcp)) { 556167729Sbms while (n != 0) { 557167729Sbms n--; 558141721Sglebius __ungetc(buf[n], 559141721Sglebius fp); 560141721Sglebius } 561141721Sglebius break; 562141721Sglebius } 563141721Sglebius nread += n; 564141721Sglebius width--; 565141721Sglebius if (!(flags & SUPPRESS)) 566141721Sglebius wcp++; 567141721Sglebius n = 0; 568141755Sglebius } 569141721Sglebius if (fp->_r <= 0 && __srefill(fp)) { 570141721Sglebius if (n != 0) { 571141755Sglebius fp->_flags |= __SERR; 572141721Sglebius goto input_failure; 573141721Sglebius } 574141721Sglebius break; 575141721Sglebius } 576141721Sglebius } 577141721Sglebius if (!(flags & SUPPRESS)) { 578141910Sglebius *wcp = L'\0'; 579141910Sglebius nassigned++; 580141910Sglebius } 58162143Sarchie } else if (flags & SUPPRESS) { 58262143Sarchie n = 0; 58362143Sarchie while (!isspace(*fp->_p)) { 58462143Sarchie n++, fp->_r--, fp->_p++; 58562143Sarchie if (--width == 0) 58662143Sarchie break; 58762143Sarchie if (fp->_r <= 0 && __srefill(fp)) 58862143Sarchie break; 58962143Sarchie } 59070700Sjulian nread += n; 59170700Sjulian } else { 59262143Sarchie p0 = p = va_arg(ap, char *); 59362143Sarchie while (!isspace(*fp->_p)) { 59462143Sarchie fp->_r--; 59562143Sarchie *p++ = *fp->_p++; 59662143Sarchie if (--width == 0) 597186488Sjulian break; 59862143Sarchie if (fp->_r <= 0 && __srefill(fp)) 59962143Sarchie break; 60070700Sjulian } 60162143Sarchie *p = 0; 60270700Sjulian nread += p - p0; 603131155Sjulian nassigned++; 60487599Sobrien } 605129281Sarchie nconversions++; 606136312Sdes continue; 60783366Sjulian 60862143Sarchie case CT_INT: 60962143Sarchie /* scan an integer as if by the conversion function */ 61062143Sarchie#ifdef hardway 611129281Sarchie if (width == 0 || width > sizeof(buf) - 1) 61262143Sarchie width = sizeof(buf) - 1; 61362143Sarchie#else 614186488Sjulian /* size_t is unsigned, hence this optimisation */ 61562143Sarchie if (--width > sizeof(buf) - 2) 616186488Sjulian width = sizeof(buf) - 2; 617186488Sjulian width++; 61870784Sjulian#endif 61996265Sarchie flags |= SIGNOK | NDIGITS | NZDIGITS; 62062143Sarchie for (p = buf; width; width--) { 621186488Sjulian c = *fp->_p; 622186488Sjulian /* 623186488Sjulian * Switch on the character; `goto ok' 62496265Sarchie * if we accept it as a part of number. 625186488Sjulian */ 626148887Srwatson switch (c) { 627148887Srwatson 62896265Sarchie /* 62996265Sarchie * The digit 0 is always legal, but is 63096265Sarchie * special. For %i conversions, if no 63196265Sarchie * digits (zero or nonzero) have been 63262143Sarchie * scanned (only signs), we will have 63362143Sarchie * base==0. In that case, we should set 63470700Sjulian * it to 8 and enable 0x prefixing. 63562143Sarchie * Also, if we have not scanned zero digits 63662143Sarchie * before this, do not turn off prefixing 63762143Sarchie * (someone else will turn it off if we 63897896Sarchie * have scanned any nonzero digits). 63962143Sarchie */ 64062143Sarchie case '0': 64164358Sarchie if (base == 0) { 64264358Sarchie base = 8; 64397896Sarchie flags |= PFXOK; 64497896Sarchie } 64597896Sarchie if (flags & NZDIGITS) 64697896Sarchie flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 64797896Sarchie else 64897896Sarchie flags &= ~(SIGNOK|PFXOK|NDIGITS); 64997896Sarchie goto ok; 650152315Sru 65164358Sarchie /* 1 through 7 always legal */ 65264358Sarchie case '1': case '2': case '3': 65364358Sarchie case '4': case '5': case '6': case '7': 65462678Sjulian base = basefix[base]; 65562143Sarchie flags &= ~(SIGNOK | PFXOK | NDIGITS); 65696265Sarchie goto ok; 65762143Sarchie 65862143Sarchie /* digits 8 and 9 ok iff decimal or hex */ 65962143Sarchie case '8': case '9': 66062143Sarchie base = basefix[base]; 66162143Sarchie if (base <= 8) 66262143Sarchie break; /* not legal here */ 663186488Sjulian flags &= ~(SIGNOK | PFXOK | NDIGITS); 66462143Sarchie goto ok; 665186488Sjulian 666186488Sjulian /* letters ok iff hex */ 66770784Sjulian case 'A': case 'B': case 'C': 668151063Sglebius case 'D': case 'E': case 'F': 66962143Sarchie case 'a': case 'b': case 'c': 670186488Sjulian case 'd': case 'e': case 'f': 671186488Sjulian /* no need to fix base here */ 672186488Sjulian if (base <= 10) 673152001Sru break; /* not legal here */ 674152001Sru flags &= ~(SIGNOK | PFXOK | NDIGITS); 675152001Sru goto ok; 676152001Sru 677152001Sru /* sign ok only as first character */ 678152001Sru case '+': case '-': 679152001Sru if (flags & SIGNOK) { 680152001Sru flags &= ~SIGNOK; 681152001Sru flags |= HAVESIGN; 682151063Sglebius goto ok; 68362143Sarchie } 684151305Sthompsa break; 685151063Sglebius 686151305Sthompsa /* 687151063Sglebius * x ok iff flag still set & 2nd char (or 688151063Sglebius * 3rd char if we have a sign). 689151063Sglebius */ 690151063Sglebius case 'x': case 'X': 69162143Sarchie if (flags & PFXOK && p == 692151305Sthompsa buf + 1 + !!(flags & HAVESIGN)) { 69362143Sarchie base = 16; /* if %i */ 69462143Sarchie flags &= ~PFXOK; 69562143Sarchie goto ok; 69662143Sarchie } 69771849Sjulian break; 69871849Sjulian } 69962143Sarchie 70062143Sarchie /* 70170700Sjulian * If we got here, c is not a legal character 70262143Sarchie * for a number. Stop accumulating digits. 70370784Sjulian */ 70464358Sarchie break; 705132464Sjulian ok: 70671849Sjulian /* 70771849Sjulian * c is legal: store it and look at the next. 70871849Sjulian */ 70971849Sjulian *p++ = c; 71071849Sjulian if (--fp->_r > 0) 71171849Sjulian fp->_p++; 71271849Sjulian else if (__srefill(fp)) 713184205Sdes break; /* EOF */ 71471849Sjulian } 71571849Sjulian /* 71670700Sjulian * If we had only a sign, it is no good; push 717124269Sgreen * back the sign. If the number ends in `x', 718124269Sgreen * it was [sign] '0' 'x', so push back the x 719124269Sgreen * and treat it as [sign] '0'. 720124269Sgreen */ 72164358Sarchie if (flags & NDIGITS) { 722132464Sjulian if (p > buf) 723132464Sjulian (void) __ungetc(*(u_char *)--p, fp); 72462143Sarchie goto match_failure; 72562143Sarchie } 72662143Sarchie c = ((u_char *)p)[-1]; 72762143Sarchie if (c == 'x' || c == 'X') { 72862143Sarchie --p; 72962143Sarchie (void) __ungetc(c, fp); 73062143Sarchie } 73162143Sarchie if ((flags & SUPPRESS) == 0) { 73262143Sarchie uintmax_t res; 73370784Sjulian 73462143Sarchie *p = 0; 73590249Sarchie if ((flags & UNSIGNED) == 0) 73662143Sarchie res = strtoimax(buf, (char **)NULL, base); 737124270Sgreen else 738124270Sgreen res = strtoumax(buf, (char **)NULL, base); 739129281Sarchie if (flags & POINTER) 74062143Sarchie *va_arg(ap, void **) = 741129281Sarchie (void *)(uintptr_t)res; 742129281Sarchie else if (flags & SHORTSHORT) 743129281Sarchie *va_arg(ap, char *) = res; 74487599Sobrien else if (flags & SHORT) 74570784Sjulian *va_arg(ap, short *) = res; 74670784Sjulian else if (flags & LONG) 74770784Sjulian *va_arg(ap, long *) = res; 74862143Sarchie else if (flags & LONGLONG) 74962143Sarchie *va_arg(ap, long long *) = res; 75062143Sarchie else if (flags & INTMAXT) 75162143Sarchie *va_arg(ap, intmax_t *) = res; 75262143Sarchie else if (flags & PTRDIFFT) 75362143Sarchie *va_arg(ap, ptrdiff_t *) = res; 75462143Sarchie else if (flags & SIZET) 75562143Sarchie *va_arg(ap, size_t *) = res; 75662143Sarchie else 75762143Sarchie *va_arg(ap, int *) = res; 75862143Sarchie nassigned++; 75962143Sarchie } 76062143Sarchie nread += p - buf; 76162143Sarchie nconversions++; 76262143Sarchie break; 76362143Sarchie 76462143Sarchie#ifndef NO_FLOATING_POINT 76562143Sarchie case CT_FLOAT: 76662143Sarchie /* scan a floating point number as if by strtod */ 76762143Sarchie if (width == 0 || width > sizeof(buf) - 1) 76862143Sarchie width = sizeof(buf) - 1; 76962143Sarchie if ((width = parsefloat(fp, buf, buf + width)) == 0) 77062143Sarchie goto match_failure; 77162143Sarchie if ((flags & SUPPRESS) == 0) { 77262143Sarchie if (flags & LONGDBL) { 77362143Sarchie long double res = strtold(buf, &p); 77462143Sarchie *va_arg(ap, long double *) = res; 77562143Sarchie } else if (flags & LONG) { 77662143Sarchie double res = strtod(buf, &p); 77762143Sarchie *va_arg(ap, double *) = res; 778139903Sglebius } else { 77962143Sarchie float res = strtof(buf, &p); 78062143Sarchie *va_arg(ap, float *) = res; 78162143Sarchie } 78262143Sarchie nassigned++; 78362143Sarchie } 78462143Sarchie nread += width; 78562143Sarchie nconversions++; 78662143Sarchie break; 78762143Sarchie#endif /* !NO_FLOATING_POINT */ 78862143Sarchie } 78962143Sarchie } 79062143Sarchieinput_failure: 79162143Sarchie return (nconversions != 0 ? nassigned : EOF); 79262143Sarchiematch_failure: 79362143Sarchie return (nassigned); 79462143Sarchie} 79562143Sarchie 79662143Sarchie/* 79762143Sarchie * Fill in the given table from the scanset at the given format 798139903Sglebius * (just after `['). Return a pointer to the character past the 79962143Sarchie * closing `]'. The table has a 1 wherever characters should be 80062143Sarchie * considered part of the scanset. 80162143Sarchie */ 80262143Sarchiestatic const u_char * 80362143Sarchie__sccl(tab, fmt) 80462143Sarchie char *tab; 80562143Sarchie const u_char *fmt; 80662143Sarchie{ 80762143Sarchie int c, n, v, i; 80862143Sarchie 809195837Srwatson /* first `clear' the whole table */ 810195837Srwatson c = *fmt++; /* first char hat => negated scanset */ 811191510Szec if (c == '^') { 812191510Szec v = 1; /* default => accept */ 813191510Szec c = *fmt++; /* get new first char */ 814195837Srwatson } else 815195837Srwatson v = 0; /* default => reject */ 816195837Srwatson 817195837Srwatson /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 818191510Szec (void) memset(tab, v, 256); 819191510Szec 820191510Szec if (c == 0) 821191510Szec return (fmt - 1);/* format ended before closing ] */ 822191510Szec 823191510Szec /* 824191510Szec * Now set the entries corresponding to the actual scanset 825191510Szec * to the opposite of the above. 826191510Szec * 827195837Srwatson * The first character may be ']' (or '-') without being special; 828195837Srwatson * the last character may be '-'. 829 */ 830 v = 1 - v; 831 for (;;) { 832 tab[c] = v; /* take character c */ 833doswitch: 834 n = *fmt++; /* and examine the next */ 835 switch (n) { 836 837 case 0: /* format ended too soon */ 838 return (fmt - 1); 839 840 case '-': 841 /* 842 * A scanset of the form 843 * [01+-] 844 * is defined as `the digit 0, the digit 1, 845 * the character +, the character -', but 846 * the effect of a scanset such as 847 * [a-zA-Z0-9] 848 * is implementation defined. The V7 Unix 849 * scanf treats `a-z' as `the letters a through 850 * z', but treats `a-a' as `the letter a, the 851 * character -, and the letter a'. 852 * 853 * For compatibility, the `-' is not considerd 854 * to define a range if the character following 855 * it is either a close bracket (required by ANSI) 856 * or is not numerically greater than the character 857 * we just stored in the table (c). 858 */ 859 n = *fmt; 860 if (n == ']' 861 || (__collate_load_error ? n < c : 862 __collate_range_cmp (n, c) < 0 863 ) 864 ) { 865 c = '-'; 866 break; /* resume the for(;;) */ 867 } 868 fmt++; 869 /* fill in the range */ 870 if (__collate_load_error) { 871 do { 872 tab[++c] = v; 873 } while (c < n); 874 } else { 875 for (i = 0; i < 256; i ++) 876 if ( __collate_range_cmp (c, i) < 0 877 && __collate_range_cmp (i, n) <= 0 878 ) 879 tab[i] = v; 880 } 881#if 1 /* XXX another disgusting compatibility hack */ 882 c = n; 883 /* 884 * Alas, the V7 Unix scanf also treats formats 885 * such as [a-c-e] as `the letters a through e'. 886 * This too is permitted by the standard.... 887 */ 888 goto doswitch; 889#else 890 c = *fmt++; 891 if (c == 0) 892 return (fmt - 1); 893 if (c == ']') 894 return (fmt); 895#endif 896 break; 897 898 case ']': /* end of scanset */ 899 return (fmt); 900 901 default: /* just another character */ 902 c = n; 903 break; 904 } 905 } 906 /* NOTREACHED */ 907} 908 909#ifndef NO_FLOATING_POINT 910static int 911parsefloat(FILE *fp, char *buf, char *end) 912{ 913 char *commit, *p; 914 int infnanpos = 0, decptpos = 0; 915 enum { 916 S_START, S_GOTSIGN, S_INF, S_NAN, S_DONE, S_MAYBEHEX, 917 S_DIGITS, S_DECPT, S_FRAC, S_EXP, S_EXPDIGITS 918 } state = S_START; 919 unsigned char c; 920 const char *decpt = localeconv()->decimal_point; 921 _Bool gotmantdig = 0, ishex = 0; 922 923 /* 924 * We set commit = p whenever the string we have read so far 925 * constitutes a valid representation of a floating point 926 * number by itself. At some point, the parse will complete 927 * or fail, and we will ungetc() back to the last commit point. 928 * To ensure that the file offset gets updated properly, it is 929 * always necessary to read at least one character that doesn't 930 * match; thus, we can't short-circuit "infinity" or "nan(...)". 931 */ 932 commit = buf - 1; 933 for (p = buf; p < end; ) { 934 c = *fp->_p; 935reswitch: 936 switch (state) { 937 case S_START: 938 state = S_GOTSIGN; 939 if (c == '-' || c == '+') 940 break; 941 else 942 goto reswitch; 943 case S_GOTSIGN: 944 switch (c) { 945 case '0': 946 state = S_MAYBEHEX; 947 commit = p; 948 break; 949 case 'I': 950 case 'i': 951 state = S_INF; 952 break; 953 case 'N': 954 case 'n': 955 state = S_NAN; 956 break; 957 default: 958 state = S_DIGITS; 959 goto reswitch; 960 } 961 break; 962 case S_INF: 963 if (infnanpos > 6 || 964 (c != "nfinity"[infnanpos] && 965 c != "NFINITY"[infnanpos])) 966 goto parsedone; 967 if (infnanpos == 1 || infnanpos == 6) 968 commit = p; /* inf or infinity */ 969 infnanpos++; 970 break; 971 case S_NAN: 972 switch (infnanpos) { 973 case 0: 974 if (c != 'A' && c != 'a') 975 goto parsedone; 976 break; 977 case 1: 978 if (c != 'N' && c != 'n') 979 goto parsedone; 980 else 981 commit = p; 982 break; 983 case 2: 984 if (c != '(') 985 goto parsedone; 986 break; 987 default: 988 if (c == ')') { 989 commit = p; 990 state = S_DONE; 991 } else if (!isalnum(c) && c != '_') 992 goto parsedone; 993 break; 994 } 995 infnanpos++; 996 break; 997 case S_DONE: 998 goto parsedone; 999 case S_MAYBEHEX: 1000 state = S_DIGITS; 1001 if (c == 'X' || c == 'x') { 1002 ishex = 1; 1003 break; 1004 } else { /* we saw a '0', but no 'x' */ 1005 gotmantdig = 1; 1006 goto reswitch; 1007 } 1008 case S_DIGITS: 1009 if ((ishex && isxdigit(c)) || isdigit(c)) { 1010 gotmantdig = 1; 1011 commit = p; 1012 break; 1013 } else { 1014 state = S_DECPT; 1015 goto reswitch; 1016 } 1017 case S_DECPT: 1018 if (c == decpt[decptpos]) { 1019 if (decpt[++decptpos] == '\0') { 1020 /* We read the complete decpt seq. */ 1021 state = S_FRAC; 1022 if (gotmantdig) 1023 commit = p; 1024 } 1025 break; 1026 } else if (!decptpos) { 1027 /* We didn't read any decpt characters. */ 1028 state = S_FRAC; 1029 goto reswitch; 1030 } else { 1031 /* 1032 * We read part of a multibyte decimal point, 1033 * but the rest is invalid, so bail. 1034 */ 1035 goto parsedone; 1036 } 1037 case S_FRAC: 1038 if (((c == 'E' || c == 'e') && !ishex) || 1039 ((c == 'P' || c == 'p') && ishex)) { 1040 if (!gotmantdig) 1041 goto parsedone; 1042 else 1043 state = S_EXP; 1044 } else if ((ishex && isxdigit(c)) || isdigit(c)) { 1045 commit = p; 1046 gotmantdig = 1; 1047 } else 1048 goto parsedone; 1049 break; 1050 case S_EXP: 1051 state = S_EXPDIGITS; 1052 if (c == '-' || c == '+') 1053 break; 1054 else 1055 goto reswitch; 1056 case S_EXPDIGITS: 1057 if (isdigit(c)) 1058 commit = p; 1059 else 1060 goto parsedone; 1061 break; 1062 default: 1063 abort(); 1064 } 1065 *p++ = c; 1066 if (--fp->_r > 0) 1067 fp->_p++; 1068 else if (__srefill(fp)) 1069 break; /* EOF */ 1070 } 1071 1072parsedone: 1073 while (commit < --p) 1074 __ungetc(*(u_char *)p, fp); 1075 *++commit = '\0'; 1076 return (commit - buf); 1077} 1078#endif 1079