vfwscanf.c revision 103890
1/*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#include <sys/cdefs.h> 38#if 0 39#if defined(LIBC_SCCS) && !defined(lint) 40static char sccsid[] = "@(#)vfscanf.c 8.1 (Berkeley) 6/4/93"; 41#endif /* LIBC_SCCS and not lint */ 42__FBSDID("FreeBSD: src/lib/libc/stdio/vfscanf.c,v 1.24 2002/08/13 09:30:41 tjr Exp "); 43#endif 44__FBSDID("$FreeBSD: head/lib/libc/stdio/vfwscanf.c 103890 2002-09-24 09:18:32Z tjr $"); 45 46#include "namespace.h" 47#include <ctype.h> 48#include <inttypes.h> 49#include <stdio.h> 50#include <stdlib.h> 51#include <stddef.h> 52#include <stdarg.h> 53#include <string.h> 54#include <wchar.h> 55#include <wctype.h> 56#include "un-namespace.h" 57 58#include "libc_private.h" 59#include "local.h" 60 61#define FLOATING_POINT 62 63#ifdef FLOATING_POINT 64#include <locale.h> 65#include "floatio.h" 66#endif 67 68#define BUF 513 /* Maximum length of numeric string. */ 69 70/* 71 * Flags used during conversion. 72 */ 73#define LONG 0x01 /* l: long or double */ 74#define LONGDBL 0x02 /* L: long double */ 75#define SHORT 0x04 /* h: short */ 76#define SUPPRESS 0x08 /* *: suppress assignment */ 77#define POINTER 0x10 /* p: void * (as hex) */ 78#define NOSKIP 0x20 /* [ or c: do not skip blanks */ 79#define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ 80#define INTMAXT 0x800 /* j: intmax_t */ 81#define PTRDIFFT 0x1000 /* t: ptrdiff_t */ 82#define SIZET 0x2000 /* z: size_t */ 83#define SHORTSHORT 0x4000 /* hh: char */ 84#define UNSIGNED 0x8000 /* %[oupxX] conversions */ 85 86/* 87 * The following are used in numeric conversions only: 88 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 89 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 90 */ 91#define SIGNOK 0x40 /* +/- is (still) legal */ 92#define NDIGITS 0x80 /* no digits detected */ 93 94#define DPTOK 0x100 /* (float) decimal point is still legal */ 95#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 96 97#define PFXOK 0x100 /* 0x prefix is (still) legal */ 98#define NZDIGITS 0x200 /* no zero digits detected */ 99 100/* 101 * Conversion types. 102 */ 103#define CT_CHAR 0 /* %c conversion */ 104#define CT_CCL 1 /* %[...] conversion */ 105#define CT_STRING 2 /* %s conversion */ 106#define CT_INT 3 /* %[dioupxX] conversion */ 107#define CT_FLOAT 4 /* %[efgEFG] conversion */ 108 109#define INCCL(_c) \ 110 (cclcompl ? (wmemchr(ccls, (_c), ccle - ccls) == NULL) : \ 111 (wmemchr(ccls, (_c), ccle - ccls) != NULL)) 112 113/* 114 * MT-safe version. 115 */ 116int 117vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap) 118{ 119 int ret; 120 121 FLOCKFILE(fp); 122 ORIENT(fp, 1); 123 ret = __vfwscanf(fp, fmt, ap); 124 FUNLOCKFILE(fp); 125 return (ret); 126} 127 128/* 129 * Non-MT-safe version. 130 */ 131int 132__vfwscanf(FILE * __restrict fp, const wchar_t * __restrict fmt, va_list ap) 133{ 134 wint_t c; /* character from format, or conversion */ 135 size_t width; /* field width, or 0 */ 136 wchar_t *p; /* points into all kinds of strings */ 137 int n; /* handy integer */ 138 int flags; /* flags as defined above */ 139 wchar_t *p0; /* saves original value of p when necessary */ 140 int nassigned; /* number of fields assigned */ 141 int nconversions; /* number of conversions */ 142 int nread; /* number of characters consumed from fp */ 143 int base; /* base argument to conversion function */ 144 wchar_t buf[BUF]; /* buffer for numeric conversions */ 145 const wchar_t *ccls; /* character class start */ 146 const wchar_t *ccle; /* character class end */ 147 int cclcompl; /* ccl is complemented? */ 148 wint_t wi; /* handy wint_t */ 149 char *mbp; /* multibyte string pointer for %c %s %[ */ 150 size_t nconv; /* number of bytes in mb. conversion */ 151 mbstate_t mbs; /* multibyte state */ 152 153 /* `basefix' is used to avoid `if' tests in the integer scanner */ 154 static short basefix[17] = 155 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 156#ifdef FLOATING_POINT 157 char decimal_point = localeconv()->decimal_point[0]; 158#endif 159 160 nassigned = 0; 161 nconversions = 0; 162 nread = 0; 163 ccls = ccle = NULL; 164 for (;;) { 165 c = *fmt++; 166 if (c == 0) 167 return (nassigned); 168 if (iswspace(c)) { 169 while ((c = __fgetwc(fp)) != WEOF && 170 iswspace(c)) 171 ; 172 if (c != WEOF) 173 __ungetwc(c, fp); 174 continue; 175 } 176 if (c != '%') 177 goto literal; 178 width = 0; 179 flags = 0; 180 /* 181 * switch on the format. continue if done; 182 * break once format type is derived. 183 */ 184again: c = *fmt++; 185 switch (c) { 186 case '%': 187literal: 188 if ((wi = __fgetwc(fp)) == WEOF) 189 goto input_failure; 190 if (wi != c) { 191 __ungetwc(wi, fp); 192 goto input_failure; 193 } 194 nread++; 195 continue; 196 197 case '*': 198 flags |= SUPPRESS; 199 goto again; 200 case 'j': 201 flags |= INTMAXT; 202 goto again; 203 case 'l': 204 if (flags & LONG) { 205 flags &= ~LONG; 206 flags |= LONGLONG; 207 } else 208 flags |= LONG; 209 goto again; 210 case 'q': 211 flags |= LONGLONG; /* not quite */ 212 goto again; 213 case 't': 214 flags |= PTRDIFFT; 215 goto again; 216 case 'z': 217 flags |= SIZET; 218 goto again; 219 case 'L': 220 flags |= LONGDBL; 221 goto again; 222 case 'h': 223 if (flags & SHORT) { 224 flags &= ~SHORT; 225 flags |= SHORTSHORT; 226 } else 227 flags |= SHORT; 228 goto again; 229 230 case '0': case '1': case '2': case '3': case '4': 231 case '5': case '6': case '7': case '8': case '9': 232 width = width * 10 + c - '0'; 233 goto again; 234 235 /* 236 * Conversions. 237 */ 238 case 'd': 239 c = CT_INT; 240 base = 10; 241 break; 242 243 case 'i': 244 c = CT_INT; 245 base = 0; 246 break; 247 248 case 'o': 249 c = CT_INT; 250 flags |= UNSIGNED; 251 base = 8; 252 break; 253 254 case 'u': 255 c = CT_INT; 256 flags |= UNSIGNED; 257 base = 10; 258 break; 259 260 case 'X': 261 case 'x': 262 flags |= PFXOK; /* enable 0x prefixing */ 263 c = CT_INT; 264 flags |= UNSIGNED; 265 base = 16; 266 break; 267 268#ifdef FLOATING_POINT 269 case 'E': case 'F': case 'G': 270 case 'e': case 'f': case 'g': 271 c = CT_FLOAT; 272 break; 273#endif 274 275 case 'S': 276 flags |= LONG; 277 /* FALLTHROUGH */ 278 case 's': 279 c = CT_STRING; 280 break; 281 282 case '[': 283 ccls = fmt; 284 if (*fmt == '^') { 285 cclcompl = 1; 286 fmt++; 287 } else 288 cclcompl = 0; 289 if (*fmt == ']') 290 fmt++; 291 while (*fmt != '\0' && *fmt != ']') 292 fmt++; 293 ccle = fmt; 294 fmt++; 295 flags |= NOSKIP; 296 c = CT_CCL; 297 break; 298 299 case 'C': 300 flags |= LONG; 301 /* FALLTHROUGH */ 302 case 'c': 303 flags |= NOSKIP; 304 c = CT_CHAR; 305 break; 306 307 case 'p': /* pointer format is like hex */ 308 flags |= POINTER | PFXOK; 309 c = CT_INT; /* assumes sizeof(uintmax_t) */ 310 flags |= UNSIGNED; /* >= sizeof(uintptr_t) */ 311 base = 16; 312 break; 313 314 case 'n': 315 nconversions++; 316 if (flags & SUPPRESS) /* ??? */ 317 continue; 318 if (flags & SHORTSHORT) 319 *va_arg(ap, char *) = nread; 320 else if (flags & SHORT) 321 *va_arg(ap, short *) = nread; 322 else if (flags & LONG) 323 *va_arg(ap, long *) = nread; 324 else if (flags & LONGLONG) 325 *va_arg(ap, long long *) = nread; 326 else if (flags & INTMAXT) 327 *va_arg(ap, intmax_t *) = nread; 328 else if (flags & SIZET) 329 *va_arg(ap, size_t *) = nread; 330 else if (flags & PTRDIFFT) 331 *va_arg(ap, ptrdiff_t *) = nread; 332 else 333 *va_arg(ap, int *) = nread; 334 continue; 335 336 default: 337 goto match_failure; 338 339 /* 340 * Disgusting backwards compatibility hack. XXX 341 */ 342 case '\0': /* compat */ 343 return (EOF); 344 } 345 346 /* 347 * Consume leading white space, except for formats 348 * that suppress this. 349 */ 350 if ((flags & NOSKIP) == 0) { 351 while ((wi = __fgetwc(fp)) != WEOF && iswspace(wi)) 352 nread++; 353 if (wi == WEOF) 354 goto input_failure; 355 __ungetwc(wi, fp); 356 } 357 358 /* 359 * Do the conversion. 360 */ 361 switch (c) { 362 363 case CT_CHAR: 364 /* scan arbitrary characters (sets NOSKIP) */ 365 if (width == 0) 366 width = 1; 367 if (flags & SUPPRESS) { 368 while (width-- != 0 && 369 (wi = __fgetwc(fp)) != WEOF) 370 nread++; 371 } else if (flags & LONG) { 372 p = va_arg(ap, wchar_t *); 373 n = 0; 374 while (width-- != 0 && 375 (wi = __fgetwc(fp)) != WEOF) { 376 *p++ = (wchar_t)wi; 377 n++; 378 } 379 if (n == 0) 380 goto input_failure; 381 nread += n; 382 nassigned++; 383 } else { 384 mbp = va_arg(ap, char *); 385 n = 0; 386 memset(&mbs, 0, sizeof(mbs)); 387 while (width-- != 0 && 388 (wi = __fgetwc(fp)) != WEOF) { 389 nconv = wcrtomb(mbp, wi, &mbs); 390 if (nconv == (size_t)-1) 391 goto input_failure; 392 mbp += nconv; 393 n++; 394 } 395 if (n == 0) 396 goto input_failure; 397 nread += n; 398 nassigned++; 399 } 400 nconversions++; 401 break; 402 403 case CT_CCL: 404 /* scan a (nonempty) character class (sets NOSKIP) */ 405 if (width == 0) 406 width = (size_t)~0; /* `infinity' */ 407 /* take only those things in the class */ 408 if (flags & SUPPRESS) { 409 n = 0; 410 while ((wi = __fgetwc(fp)) != WEOF && 411 width-- != 0 && INCCL(wi)) 412 n++; 413 if (wi != WEOF) 414 __ungetwc(wi, fp); 415 if (n == 0) 416 goto match_failure; 417 } else if (flags & LONG) { 418 p0 = p = va_arg(ap, wchar_t *); 419 while ((wi = __fgetwc(fp)) != WEOF && 420 width-- != 0 && INCCL(wi)) 421 *p++ = (wchar_t)wi; 422 if (wi != WEOF) 423 __ungetwc(wi, fp); 424 n = p - p0; 425 if (n == 0) 426 goto match_failure; 427 *p = 0; 428 nassigned++; 429 } else { 430 mbp = va_arg(ap, char *); 431 n = 0; 432 memset(&mbs, 0, sizeof(mbs)); 433 while ((wi = __fgetwc(fp)) != WEOF && 434 width-- != 0 && INCCL(wi)) { 435 nconv = wcrtomb(mbp, wi, &mbs); 436 if (nconv == (size_t)-1) 437 goto input_failure; 438 mbp += nconv; 439 n++; 440 } 441 if (wi != WEOF) 442 __ungetwc(wi, fp); 443 *mbp = 0; 444 nassigned++; 445 } 446 nread += n; 447 nconversions++; 448 break; 449 450 case CT_STRING: 451 /* like CCL, but zero-length string OK, & no NOSKIP */ 452 if (width == 0) 453 width = (size_t)~0; 454 if (flags & SUPPRESS) { 455 while ((wi = __fgetwc(fp)) != WEOF && 456 width-- != 0 && 457 !iswspace(wi)) 458 nread++; 459 if (wi != WEOF) 460 __ungetwc(wi, fp); 461 } else if (flags & LONG) { 462 p0 = p = va_arg(ap, wchar_t *); 463 while ((wi = __fgetwc(fp)) != WEOF && 464 width-- != 0 && 465 !iswspace(wi)) { 466 *p++ = (wchar_t)wi; 467 nread++; 468 } 469 if (wi != WEOF) 470 __ungetwc(wi, fp); 471 *p = '\0'; 472 nassigned++; 473 } else { 474 mbp = va_arg(ap, char *); 475 memset(&mbs, 0, sizeof(mbs)); 476 while ((wi = __fgetwc(fp)) != WEOF && 477 width-- != 0 && 478 !iswspace(wi)) { 479 nconv = wcrtomb(mbp, wi, &mbs); 480 if (nconv == (size_t)-1) 481 goto input_failure; 482 mbp += nconv; 483 nread++; 484 } 485 if (wi != WEOF) 486 __ungetwc(wi, fp); 487 *mbp = 0; 488 nassigned++; 489 } 490 nconversions++; 491 continue; 492 493 case CT_INT: 494 /* scan an integer as if by the conversion function */ 495#ifdef hardway 496 if (width == 0 || width > sizeof(buf) - 1) 497 width = sizeof(buf) - 1; 498#else 499 /* size_t is unsigned, hence this optimisation */ 500 if (--width > sizeof(buf) - 2) 501 width = sizeof(buf) - 2; 502 width++; 503#endif 504 flags |= SIGNOK | NDIGITS | NZDIGITS; 505 for (p = buf; width; width--) { 506 c = __fgetwc(fp); 507 /* 508 * Switch on the character; `goto ok' 509 * if we accept it as a part of number. 510 */ 511 switch (c) { 512 513 /* 514 * The digit 0 is always legal, but is 515 * special. For %i conversions, if no 516 * digits (zero or nonzero) have been 517 * scanned (only signs), we will have 518 * base==0. In that case, we should set 519 * it to 8 and enable 0x prefixing. 520 * Also, if we have not scanned zero digits 521 * before this, do not turn off prefixing 522 * (someone else will turn it off if we 523 * have scanned any nonzero digits). 524 */ 525 case '0': 526 if (base == 0) { 527 base = 8; 528 flags |= PFXOK; 529 } 530 if (flags & NZDIGITS) 531 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 532 else 533 flags &= ~(SIGNOK|PFXOK|NDIGITS); 534 goto ok; 535 536 /* 1 through 7 always legal */ 537 case '1': case '2': case '3': 538 case '4': case '5': case '6': case '7': 539 base = basefix[base]; 540 flags &= ~(SIGNOK | PFXOK | NDIGITS); 541 goto ok; 542 543 /* digits 8 and 9 ok iff decimal or hex */ 544 case '8': case '9': 545 base = basefix[base]; 546 if (base <= 8) 547 break; /* not legal here */ 548 flags &= ~(SIGNOK | PFXOK | NDIGITS); 549 goto ok; 550 551 /* letters ok iff hex */ 552 case 'A': case 'B': case 'C': 553 case 'D': case 'E': case 'F': 554 case 'a': case 'b': case 'c': 555 case 'd': case 'e': case 'f': 556 /* no need to fix base here */ 557 if (base <= 10) 558 break; /* not legal here */ 559 flags &= ~(SIGNOK | PFXOK | NDIGITS); 560 goto ok; 561 562 /* sign ok only as first character */ 563 case '+': case '-': 564 if (flags & SIGNOK) { 565 flags &= ~SIGNOK; 566 goto ok; 567 } 568 break; 569 570 /* x ok iff flag still set & 2nd char */ 571 case 'x': case 'X': 572 if (flags & PFXOK && p == buf + 1) { 573 base = 16; /* if %i */ 574 flags &= ~PFXOK; 575 goto ok; 576 } 577 break; 578 } 579 580 /* 581 * If we got here, c is not a legal character 582 * for a number. Stop accumulating digits. 583 */ 584 if (c != WEOF) 585 __ungetwc(c, fp); 586 break; 587 ok: 588 /* 589 * c is legal: store it and look at the next. 590 */ 591 *p++ = (wchar_t)c; 592 } 593 /* 594 * If we had only a sign, it is no good; push 595 * back the sign. If the number ends in `x', 596 * it was [sign] '0' 'x', so push back the x 597 * and treat it as [sign] '0'. 598 */ 599 if (flags & NDIGITS) { 600 if (p > buf) 601 __ungetwc(*--p, fp); 602 goto match_failure; 603 } 604 c = p[-1]; 605 if (c == 'x' || c == 'X') { 606 --p; 607 __ungetwc(c, fp); 608 } 609 if ((flags & SUPPRESS) == 0) { 610 uintmax_t res; 611 612 *p = 0; 613 if ((flags & UNSIGNED) == 0) 614 res = wcstoimax(buf, NULL, base); 615 else 616 res = wcstoumax(buf, NULL, base); 617 if (flags & POINTER) 618 *va_arg(ap, void **) = 619 (void *)(uintptr_t)res; 620 else if (flags & SHORTSHORT) 621 *va_arg(ap, char *) = res; 622 else if (flags & SHORT) 623 *va_arg(ap, short *) = res; 624 else if (flags & LONG) 625 *va_arg(ap, long *) = res; 626 else if (flags & LONGLONG) 627 *va_arg(ap, long long *) = res; 628 else if (flags & INTMAXT) 629 *va_arg(ap, intmax_t *) = res; 630 else if (flags & PTRDIFFT) 631 *va_arg(ap, ptrdiff_t *) = res; 632 else if (flags & SIZET) 633 *va_arg(ap, size_t *) = res; 634 else 635 *va_arg(ap, int *) = res; 636 nassigned++; 637 } 638 nread += p - buf; 639 nconversions++; 640 break; 641 642#ifdef FLOATING_POINT 643 case CT_FLOAT: 644 /* scan a floating point number as if by strtod */ 645#ifdef hardway 646 if (width == 0 || width > sizeof(buf) - 1) 647 width = sizeof(buf) - 1; 648#else 649 /* size_t is unsigned, hence this optimisation */ 650 if (--width > sizeof(buf) - 2) 651 width = sizeof(buf) - 2; 652 width++; 653#endif 654 flags |= SIGNOK | NDIGITS | DPTOK | EXPOK; 655 for (p = buf; width; width--) { 656 c = __fgetwc(fp); 657 /* 658 * This code mimicks the integer conversion 659 * code, but is much simpler. 660 */ 661 switch (c) { 662 663 case '0': case '1': case '2': case '3': 664 case '4': case '5': case '6': case '7': 665 case '8': case '9': 666 flags &= ~(SIGNOK | NDIGITS); 667 goto fok; 668 669 case '+': case '-': 670 if (flags & SIGNOK) { 671 flags &= ~SIGNOK; 672 goto fok; 673 } 674 break; 675 case 'e': case 'E': 676 /* no exponent without some digits */ 677 if ((flags&(NDIGITS|EXPOK)) == EXPOK) { 678 flags = 679 (flags & ~(EXPOK|DPTOK)) | 680 SIGNOK | NDIGITS; 681 goto fok; 682 } 683 break; 684 default: 685 if (c == (wchar_t)decimal_point && 686 (flags & DPTOK)) { 687 flags &= ~(SIGNOK | DPTOK); 688 goto fok; 689 } 690 break; 691 } 692 if (c != WEOF) 693 __ungetwc(c, fp); 694 break; 695 fok: 696 *p++ = c; 697 } 698 /* 699 * If no digits, might be missing exponent digits 700 * (just give back the exponent) or might be missing 701 * regular digits, but had sign and/or decimal point. 702 */ 703 if (flags & NDIGITS) { 704 if (flags & EXPOK) { 705 /* no digits at all */ 706 while (p > buf) 707 __ungetwc(*--p, fp); 708 goto match_failure; 709 } 710 /* just a bad exponent (e and maybe sign) */ 711 c = *--p; 712 if (c != 'e' && c != 'E') { 713 __ungetwc(c, fp);/* sign */ 714 c = *--p; 715 } 716 __ungetwc(c, fp); 717 } 718 if ((flags & SUPPRESS) == 0) { 719 double res; 720 721 *p = 0; 722 /* XXX this loses precision for long doubles. */ 723 res = wcstod(buf, NULL); 724 if (flags & LONGDBL) 725 *va_arg(ap, long double *) = res; 726 else if (flags & LONG) 727 *va_arg(ap, double *) = res; 728 else 729 *va_arg(ap, float *) = res; 730 nassigned++; 731 } 732 nread += p - buf; 733 nconversions++; 734 break; 735#endif /* FLOATING_POINT */ 736 } 737 } 738input_failure: 739 return (nconversions != 0 ? nassigned : EOF); 740match_failure: 741 return (nassigned); 742} 743