vfwscanf.c revision 103856
1/*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#include <sys/cdefs.h> 38#if 0 39#if defined(LIBC_SCCS) && !defined(lint) 40static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93"; 41#endif /* LIBC_SCCS and not lint */ 42__FBSDID("FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.24 2002/08/13 09:30:41 tjr Exp "); 43#endif 44__FBSDID("$FreeBSD: head/lib/libc/stdio/vfwscanf.c 103856 2002-09-23 12:40:06Z tjr $"); 45 46#include "namespace.h" 47#include <ctype.h> 48#include <inttypes.h> 49#include <stdio.h> 50#include <stdlib.h> 51#include <stddef.h> 52#include <stdarg.h> 53#include <string.h> 54#include <wchar.h> 55#include <wctype.h> 56#include "un-namespace.h" 57 58#include "libc_private.h" 59#include "local.h" 60 61#define FLOATING_POINT 62 63#ifdef FLOATING_POINT 64#include <locale.h> 65#include "floatio.h" 66#endif 67 68#define BUF 513 /* Maximum length of numeric string. */ 69 70/* 71 * Flags used during conversion. 72 */ 73#define LONG 0x01 /* l: long or double */ 74#define LONGDBL 0x02 /* L: long double */ 75#define SHORT 0x04 /* h: short */ 76#define SUPPRESS 0x08 /* *: suppress assignment */ 77#define POINTER 0x10 /* p: void * (as hex) */ 78#define NOSKIP 0x20 /* [ or c: do not skip blanks */ 79#define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ 80#define INTMAXT 0x800 /* j: intmax_t */ 81#define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 82#define SIZET 0x2000 /* z: size_t */ 83#define SHORTSHORT 0x4000 /* hh: char */ 84#define UNSIGNED 0x8000 /* %[oupxX] conversions */ 85 86/* 87 * The following are used in numeric conversions only: 88 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 89 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 90 */ 91#define SIGNOK 0x40 /* +/- is (still) legal */ 92#define NDIGITS 0x80 /* no digits detected */ 93 94#define DPTOK 0x100 /* (float) decimal point is still legal */ 95#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 96 97#define PFXOK 0x100 /* 0x prefix is (still) legal */ 98#define NZDIGITS 0x200 /* no zero digits detected */ 99 100/* 101 * Conversion types. 102 */ 103#define CT_CHAR 0 /* %c conversion */ 104#define CT_CCL 1 /* %[...] conversion */ 105#define CT_STRING 2 /* %s conversion */ 106#define CT_INT 3 /* %[dioupxX] conversion */ 107#define CT_FLOAT 4 /* %[efgEFG] conversion */ 108 109#define INCCL(_c) \ 110 (cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) : \ 111 (wmemchr(ccls, (_c), ccle - ccls) != NULL)) 112 113/* 114 * MT-safe version. 115 */ 116int 117vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap) 118{ 119 int ret; 120 121 FLOCKFILE(fp); 122 ORIENT(fp, 1); 123 ret = __vfwscanf(fp, fmt, ap); 124 FUNLOCKFILE(fp); 125 return (ret); 126} 127 128/* 129 * Non-MT-safe version. 130 */ 131int 132__vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap) 133{ 134 wint_t c; /* character from format, or conversion */ 135 size_t width; /* field width, or 0 */ 136 wchar_t *p; /* points into all kinds of strings */ 137 int n; /* handy integer */ 138 int flags; /* flags as defined above */ 139 wchar_t *p0; /* saves original value of p when necessary */ 140 int nassigned; /* number of fields assigned */ 141 int nconversions; /* number of conversions */ 142 int nread; /* number of characters consumed from fp */ 143 int base; /* base argument to conversion function */ 144 wchar_t buf[BUF]; /* buffer for numeric conversions */ 145 const wchar_t *ccls; /* character class start */ 146 const wchar_t *ccle; /* character class end */ 147 int cclcompl; /* ccl is complemented? */ 148 wint_t wi; /* handy wint_t */ 149 char *mbp; /* multibyte string pointer for %c %s %[ */ 150 size_t nconv; /* number of bytes in mb. conversion */ 151 mbstate_t mbs; /* multibyte state */ 152 153 /* `basefix' is used to avoid `if' tests in the integer scanner */ 154 static short basefix[17] = 155 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 156#ifdef FLOATING_POINT 157 char decimal_point = localeconv()->decimal_point[0]; 158#endif 159 160 nassigned = 0; 161 nconversions = 0; 162 nread = 0; 163 ccls = ccle = NULL; 164 for (;;) { 165 c = *fmt++; 166 if (c == 0) 167 return (nassigned); 168 if (iswspace(c)) { 169 while ((c = __fgetwc(fp)) != WEOF && 170 iswspace(c)) 171 ; 172 if (c != WEOF) 173 __ungetwc(c, fp); 174 continue; 175 } 176 if (c != '%') 177 goto literal; 178 width = 0; 179 flags = 0; 180 /* 181 * switch on the format. continue if done; 182 * break once format type is derived. 183 */ 184again: c = *fmt++; 185 switch (c) { 186 case '%': 187literal: 188 if ((wi = __fgetwc(fp)) == WEOF) 189 goto input_failure; 190 if (wi != c) { 191 __ungetwc(wi, fp); 192 goto input_failure; 193 } 194 nread++; 195 continue; 196 197 case '*': 198 flags |= SUPPRESS; 199 goto again; 200 case 'j': 201 flags |= INTMAXT; 202 goto again; 203 case 'l': 204 if (flags & LONG) { 205 flags &= ~LONG; 206 flags |= LONGLONG; 207 } else 208 flags |= LONG; 209 goto again; 210 case 'q': 211 flags |= LONGLONG; /* not quite */ 212 goto again; 213 case 't': 214 flags |= PTRDIFFT; 215 goto again; 216 case 'z': 217 flags |= SIZET; 218 goto again; 219 case 'L': 220 flags |= LONGDBL; 221 goto again; 222 case 'h': 223 if (flags & SHORT) { 224 flags &= ~SHORT; 225 flags |= SHORTSHORT; 226 } else 227 flags |= SHORT; 228 goto again; 229 230 case '0': case '1': case '2': case '3': case '4': 231 case '5': case '6': case '7': case '8': case '9': 232 width = width * 10 + c - '0'; 233 goto again; 234 235 /* 236 * Conversions. 237 */ 238 case 'd': 239 c = CT_INT; 240 base = 10; 241 break; 242 243 case 'i': 244 c = CT_INT; 245 base = 0; 246 break; 247 248 case 'o': 249 c = CT_INT; 250 flags |= UNSIGNED; 251 base = 8; 252 break; 253 254 case 'u': 255 c = CT_INT; 256 flags |= UNSIGNED; 257 base = 10; 258 break; 259 260 case 'X': 261 case 'x': 262 flags |= PFXOK; /* enable 0x prefixing */ 263 c = CT_INT; 264 flags |= UNSIGNED; 265 base = 16; 266 break; 267 268#ifdef FLOATING_POINT 269 case 'E': case 'F': case 'G': 270 case 'e': case 'f': case 'g': 271 c = CT_FLOAT; 272 break; 273#endif 274 275 case 'S': 276 flags |= LONG; 277 /* FALLTHROUGH */ 278 case 's': 279 c = CT_STRING; 280 break; 281 282 case '[': 283 ccls = fmt; 284 if (*fmt == '^') { 285 cclcompl = 1; 286 fmt++; 287 } else 288 cclcompl = 0; 289 if (*fmt == ']') 290 fmt++; 291 while (*fmt != '\0' && *fmt != ']') 292 fmt++; 293 ccle = fmt; 294 fmt++; 295 flags |= NOSKIP; 296 c = CT_CCL; 297 break; 298 299 case 'C': 300 flags |= LONG; 301 /* FALLTHROUGH */ 302 case 'c': 303 flags |= NOSKIP; 304 c = CT_CHAR; 305 break; 306 307 case 'p': /* pointer format is like hex */ 308 flags |= POINTER | PFXOK; 309 c = CT_INT; /* assumes sizeof(uintmax_t) */ 310 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ 311 base = 16; 312 break; 313 314 case 'n': 315 nconversions++; 316 if (flags & SUPPRESS) /* ??? */ 317 continue; 318 if (flags & SHORTSHORT) 319 *va_arg(ap, char *) = nread; 320 else if (flags & SHORT) 321 *va_arg(ap, short *) = nread; 322 else if (flags & LONG) 323 *va_arg(ap, long *) = nread; 324 else if (flags & LONGLONG) 325 *va_arg(ap, long long *) = nread; 326 else if (flags & INTMAXT) 327 *va_arg(ap, intmax_t *) = nread; 328 else if (flags & SIZET) 329 *va_arg(ap, size_t *) = nread; 330 else if (flags & PTRDIFFT) 331 *va_arg(ap, ptrdiff_t *) = nread; 332 else 333 *va_arg(ap, int *) = nread; 334 continue; 335 336 default: 337 goto match_failure; 338 339 /* 340 * Disgusting backwards compatibility hack. XXX 341 */ 342 case '\0': /* compat */ 343 return (EOF); 344 } 345 346 /* 347 * We have a conversion that requires input. 348 */ 349 if (fp->_r <= 0 && __srefill(fp)) 350 goto input_failure; 351 352 /* 353 * Consume leading white space, except for formats 354 * that suppress this. 355 */ 356 if ((flags & NOSKIP) == 0) { 357 while ((wi = __fgetwc(fp)) != WEOF && iswspace(wi)) 358 nread++; 359 if (wi == WEOF) 360 goto input_failure; 361 __ungetwc(wi, fp); 362 } 363 364 /* 365 * Do the conversion. 366 */ 367 switch (c) { 368 369 case CT_CHAR: 370 /* scan arbitrary characters (sets NOSKIP) */ 371 if (width == 0) 372 width = 1; 373 if (flags & SUPPRESS) { 374 while (width-- != 0 && 375 (wi = __fgetwc(fp)) != WEOF) 376 nread++; 377 } else if (flags & LONG) { 378 p = va_arg(ap, wchar_t *); 379 n = 0; 380 while (width-- != 0 && 381 (wi = __fgetwc(fp)) != WEOF) { 382 *p++ = (wchar_t)wi; 383 n++; 384 } 385 if (n == 0) 386 goto input_failure; 387 nread += n; 388 nassigned++; 389 } else { 390 mbp = va_arg(ap, char *); 391 n = 0; 392 memset(&mbs, 0, sizeof(mbs)); 393 while (width-- != 0 && 394 (wi = __fgetwc(fp)) != WEOF) { 395 nconv = wcrtomb(mbp, wi, &mbs); 396 if (nconv == (size_t)-1) 397 goto input_failure; 398 mbp += nconv; 399 n++; 400 } 401 if (n == 0) 402 goto input_failure; 403 nread += n; 404 nassigned++; 405 } 406 nconversions++; 407 break; 408 409 case CT_CCL: 410 /* scan a (nonempty) character class (sets NOSKIP) */ 411 if (width == 0) 412 width = (size_t)~0; /* `infinity' */ 413 /* take only those things in the class */ 414 if (flags & SUPPRESS) { 415 n = 0; 416 while ((wi = __fgetwc(fp)) != WEOF && 417 width-- != 0 && INCCL(wi)) 418 n++; 419 if (wi != WEOF) 420 __ungetwc(wi, fp); 421 if (n == 0) 422 goto match_failure; 423 } else if (flags & LONG) { 424 p0 = p = va_arg(ap, wchar_t *); 425 while ((wi = __fgetwc(fp)) != WEOF && 426 width-- != 0 && INCCL(wi)) 427 *p++ = (wchar_t)wi; 428 if (wi != WEOF) 429 __ungetwc(wi, fp); 430 n = p - p0; 431 if (n == 0) 432 goto match_failure; 433 *p = 0; 434 nassigned++; 435 } else { 436 mbp = va_arg(ap, char *); 437 n = 0; 438 memset(&mbs, 0, sizeof(mbs)); 439 while ((wi = __fgetwc(fp)) != WEOF && 440 width-- != 0 && INCCL(wi)) { 441 nconv = wcrtomb(mbp, wi, &mbs); 442 if (nconv == (size_t)-1) 443 goto input_failure; 444 mbp += nconv; 445 n++; 446 } 447 if (wi != WEOF) 448 __ungetwc(wi, fp); 449 *mbp = 0; 450 nassigned++; 451 } 452 nread += n; 453 nconversions++; 454 break; 455 456 case CT_STRING: 457 /* like CCL, but zero-length string OK, & no NOSKIP */ 458 if (width == 0) 459 width = (size_t)~0; 460 if (flags & SUPPRESS) { 461 while ((wi = __fgetwc(fp)) != WEOF && 462 width-- != 0 && 463 !iswspace(wi)) 464 nread++; 465 if (wi != WEOF) 466 __ungetwc(wi, fp); 467 } else if (flags & LONG) { 468 p0 = p = va_arg(ap, wchar_t *); 469 while ((wi = __fgetwc(fp)) != WEOF && 470 width-- != 0 && 471 !iswspace(wi)) { 472 *p++ = (wchar_t)wi; 473 nread++; 474 } 475 if (wi != WEOF) 476 __ungetwc(wi, fp); 477 *p = '\0'; 478 nassigned++; 479 } else { 480 mbp = va_arg(ap, char *); 481 memset(&mbs, 0, sizeof(mbs)); 482 while ((wi = __fgetwc(fp)) != WEOF && 483 width-- != 0 && 484 !iswspace(wi)) { 485 nconv = wcrtomb(mbp, wi, &mbs); 486 if (nconv == (size_t)-1) 487 goto input_failure; 488 mbp += nconv; 489 nread++; 490 } 491 if (wi != WEOF) 492 __ungetwc(wi, fp); 493 *mbp = 0; 494 nassigned++; 495 } 496 nconversions++; 497 continue; 498 499 case CT_INT: 500 /* scan an integer as if by the conversion function */ 501#ifdef hardway 502 if (width == 0 || width > sizeof(buf) - 1) 503 width = sizeof(buf) - 1; 504#else 505 /* size_t is unsigned, hence this optimisation */ 506 if (--width > sizeof(buf) - 2) 507 width = sizeof(buf) - 2; 508 width++; 509#endif 510 flags |= SIGNOK | NDIGITS | NZDIGITS; 511 for (p = buf; width; width--) { 512 c = __fgetwc(fp); 513 /* 514 * Switch on the character; `goto ok' 515 * if we accept it as a part of number. 516 */ 517 switch (c) { 518 519 /* 520 * The digit 0 is always legal, but is 521 * special. For %i conversions, if no 522 * digits (zero or nonzero) have been 523 * scanned (only signs), we will have 524 * base==0. In that case, we should set 525 * it to 8 and enable 0x prefixing. 526 * Also, if we have not scanned zero digits 527 * before this, do not turn off prefixing 528 * (someone else will turn it off if we 529 * have scanned any nonzero digits). 530 */ 531 case '0': 532 if (base == 0) { 533 base = 8; 534 flags |= PFXOK; 535 } 536 if (flags & NZDIGITS) 537 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 538 else 539 flags &= ~(SIGNOK|PFXOK|NDIGITS); 540 goto ok; 541 542 /* 1 through 7 always legal */ 543 case '1': case '2': case '3': 544 case '4': case '5': case '6': case '7': 545 base = basefix[base]; 546 flags &= ~(SIGNOK | PFXOK | NDIGITS); 547 goto ok; 548 549 /* digits 8 and 9 ok iff decimal or hex */ 550 case '8': case '9': 551 base = basefix[base]; 552 if (base <= 8) 553 break; /* not legal here */ 554 flags &= ~(SIGNOK | PFXOK | NDIGITS); 555 goto ok; 556 557 /* letters ok iff hex */ 558 case 'A': case 'B': case 'C': 559 case 'D': case 'E': case 'F': 560 case 'a': case 'b': case 'c': 561 case 'd': case 'e': case 'f': 562 /* no need to fix base here */ 563 if (base <= 10) 564 break; /* not legal here */ 565 flags &= ~(SIGNOK | PFXOK | NDIGITS); 566 goto ok; 567 568 /* sign ok only as first character */ 569 case '+': case '-': 570 if (flags & SIGNOK) { 571 flags &= ~SIGNOK; 572 goto ok; 573 } 574 break; 575 576 /* x ok iff flag still set & 2nd char */ 577 case 'x': case 'X': 578 if (flags & PFXOK && p == buf + 1) { 579 base = 16; /* if %i */ 580 flags &= ~PFXOK; 581 goto ok; 582 } 583 break; 584 } 585 586 /* 587 * If we got here, c is not a legal character 588 * for a number. Stop accumulating digits. 589 */ 590 if (c != WEOF) 591 __ungetwc(c, fp); 592 break; 593 ok: 594 /* 595 * c is legal: store it and look at the next. 596 */ 597 *p++ = (wchar_t)c; 598 } 599 /* 600 * If we had only a sign, it is no good; push 601 * back the sign. If the number ends in `x', 602 * it was [sign] '0' 'x', so push back the x 603 * and treat it as [sign] '0'. 604 */ 605 if (flags & NDIGITS) { 606 if (p > buf) 607 __ungetwc(*--p, fp); 608 goto match_failure; 609 } 610 c = p[-1]; 611 if (c == 'x' || c == 'X') { 612 --p; 613 __ungetwc(c, fp); 614 } 615 if ((flags & SUPPRESS) == 0) { 616 uintmax_t res; 617 618 *p = 0; 619 if ((flags & UNSIGNED) == 0) 620 res = wcstoimax(buf, NULL, base); 621 else 622 res = wcstoumax(buf, NULL, base); 623 if (flags & POINTER) 624 *va_arg(ap, void **) = 625 (void *)(uintptr_t)res; 626 else if (flags & SHORTSHORT) 627 *va_arg(ap, char *) = res; 628 else if (flags & SHORT) 629 *va_arg(ap, short *) = res; 630 else if (flags & LONG) 631 *va_arg(ap, long *) = res; 632 else if (flags & LONGLONG) 633 *va_arg(ap, long long *) = res; 634 else if (flags & INTMAXT) 635 *va_arg(ap, intmax_t *) = res; 636 else if (flags & PTRDIFFT) 637 *va_arg(ap, ptrdiff_t *) = res; 638 else if (flags & SIZET) 639 *va_arg(ap, size_t *) = res; 640 else 641 *va_arg(ap, int *) = res; 642 nassigned++; 643 } 644 nread += p - buf; 645 nconversions++; 646 break; 647 648#ifdef FLOATING_POINT 649 case CT_FLOAT: 650 /* scan a floating point number as if by strtod */ 651#ifdef hardway 652 if (width == 0 || width > sizeof(buf) - 1) 653 width = sizeof(buf) - 1; 654#else 655 /* size_t is unsigned, hence this optimisation */ 656 if (--width > sizeof(buf) - 2) 657 width = sizeof(buf) - 2; 658 width++; 659#endif 660 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK; 661 for (p = buf; width; width--) { 662 c = __fgetwc(fp); 663 /* 664 * This code mimicks the integer conversion 665 * code, but is much simpler. 666 */ 667 switch (c) { 668 669 case '0': case '1': case '2': case '3': 670 case '4': case '5': case '6': case '7': 671 case '8': case '9': 672 flags &= ~(SIGNOK | NDIGITS); 673 goto fok; 674 675 case '+': case '-': 676 if (flags & SIGNOK) { 677 flags &= ~SIGNOK; 678 goto fok; 679 } 680 break; 681 case 'e': case 'E': 682 /* no exponent without some digits */ 683 if ((flags&(NDIGITS|EXPOK)) == EXPOK) { 684 flags = 685 (flags & ~(EXPOK|DPTOK)) | 686 SIGNOK | NDIGITS; 687 goto fok; 688 } 689 break; 690 default: 691 if (c == (wchar_t)decimal_point && 692 (flags & DPTOK)) { 693 flags &= ~(SIGNOK | DPTOK); 694 goto fok; 695 } 696 break; 697 } 698 if (c != WEOF) 699 __ungetwc(c, fp); 700 break; 701 fok: 702 *p++ = c; 703 } 704 /* 705 * If no digits, might be missing exponent digits 706 * (just give back the exponent) or might be missing 707 * regular digits, but had sign and/or decimal point. 708 */ 709 if (flags & NDIGITS) { 710 if (flags & EXPOK) { 711 /* no digits at all */ 712 while (p > buf) 713 __ungetwc(*--p, fp); 714 goto match_failure; 715 } 716 /* just a bad exponent (e and maybe sign) */ 717 c = *--p; 718 if (c != 'e' && c != 'E') { 719 __ungetwc(c, fp);/* sign */ 720 c = *--p; 721 } 722 __ungetwc(c, fp); 723 } 724 if ((flags & SUPPRESS) == 0) { 725 double res; 726 727 *p = 0; 728 /* XXX this loses precision for long doubles. */ 729 res = wcstod(buf, NULL); 730 if (flags & LONGDBL) 731 *va_arg(ap, long double *) = res; 732 else if (flags & LONG) 733 *va_arg(ap, double *) = res; 734 else 735 *va_arg(ap, float *) = res; 736 nassigned++; 737 } 738 nread += p - buf; 739 nconversions++; 740 break; 741#endif /* FLOATING_POINT */ 742 } 743 } 744input_failure: 745 return (nconversions != 0 ? nassigned : EOF); 746match_failure: 747 return (nassigned); 748} 749