subr_scanf.c revision 44016
1/*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * $Id: subr_scanf.c,v 1.4 1999/01/29 08:09:32 dillon Exp $ 37 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 38 */ 39 40#include <sys/param.h> 41#include <sys/systm.h> 42#include <sys/kernel.h> 43#include <machine/limits.h> 44 45/* 46 * Note that stdarg.h and the ANSI style va_start macro is used for both 47 * ANSI and traditional C compilers. 48 */ 49#include <machine/stdarg.h> 50 51#define BUF 32 /* Maximum length of numeric string. */ 52 53/* 54 * Flags used during conversion. 55 */ 56#define LONG 0x01 /* l: long or double */ 57#define SHORT 0x04 /* h: short */ 58#define SUPPRESS 0x08 /* suppress assignment */ 59#define POINTER 0x10 /* weird %p pointer (`fake hex') */ 60#define NOSKIP 0x20 /* do not skip blanks */ 61#define QUAD 0x400 62 63/* 64 * The following are used in numeric conversions only: 65 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 66 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 67 */ 68#define SIGNOK 0x40 /* +/- is (still) legal */ 69#define NDIGITS 0x80 /* no digits detected */ 70 71#define DPTOK 0x100 /* (float) decimal point is still legal */ 72#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 73 74#define PFXOK 0x100 /* 0x prefix is (still) legal */ 75#define NZDIGITS 0x200 /* no zero digits detected */ 76 77/* 78 * Conversion types. 79 */ 80#define CT_CHAR 0 /* %c conversion */ 81#define CT_CCL 1 /* %[...] conversion */ 82#define CT_STRING 2 /* %s conversion */ 83#define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 84typedef u_quad_t (*ccfntype)(const char *, const char **, int); 85 86#define isspace(c) ((c) == ' ' || (c) == '\t' || \ 87 (c) == '\r' || (c) == '\n') 88#define isascii(c) (((c) & ~0x7f) == 0) 89#define isupper(c) ((c) >= 'A' && (c) <= 'Z') 90#define islower(c) ((c) >= 'a' && (c) <= 'z') 91#define isalpha(c) (isupper(c) || (islower(c))) 92#define isdigit(c) ((c) >= '0' && (c) <= '9') 93 94static const u_char *__sccl(char *, const u_char *); 95 96int 97sscanf(const char *ibuf, const char *fmt, ...) 98{ 99 va_list ap; 100 int ret; 101 102 va_start(ap, fmt); 103 ret = vsscanf(ibuf, fmt, ap); 104 va_end(ap); 105 return(ret); 106} 107 108int 109vsscanf(const char *inp, char const *fmt0, va_list ap) 110{ 111 int inr; 112 const u_char *fmt = (const u_char *)fmt0; 113 int c; /* character from format, or conversion */ 114 size_t width; /* field width, or 0 */ 115 char *p; /* points into all kinds of strings */ 116 int n; /* handy integer */ 117 int flags; /* flags as defined above */ 118 char *p0; /* saves original value of p when necessary */ 119 int nassigned; /* number of fields assigned */ 120 int nconversions; /* number of conversions */ 121 int nread; /* number of characters consumed from fp */ 122 int base; /* base argument to strtoq/strtouq */ 123 ccfntype ccfn; /* conversion function (strtoq/strtouq) */ 124 char ccltab[256]; /* character class table for %[...] */ 125 char buf[BUF]; /* buffer for numeric conversions */ 126 127 /* `basefix' is used to avoid `if' tests in the integer scanner */ 128 static short basefix[17] = 129 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 130 131 inr = strlen(inp); 132 133 nassigned = 0; 134 nconversions = 0; 135 nread = 0; 136 base = 0; /* XXX just to keep gcc happy */ 137 ccfn = NULL; /* XXX just to keep gcc happy */ 138 for (;;) { 139 c = *fmt++; 140 if (c == 0) 141 return (nassigned); 142 if (isspace(c)) { 143 while (inr > 0 && isspace(*inp)) 144 nread++, inr--, inp++; 145 continue; 146 } 147 if (c != '%') 148 goto literal; 149 width = 0; 150 flags = 0; 151 /* 152 * switch on the format. continue if done; 153 * break once format type is derived. 154 */ 155again: c = *fmt++; 156 switch (c) { 157 case '%': 158literal: 159 if (inr <= 0) 160 goto input_failure; 161 if (*inp != c) 162 goto match_failure; 163 inr--, inp++; 164 nread++; 165 continue; 166 167 case '*': 168 flags |= SUPPRESS; 169 goto again; 170 case 'l': 171 flags |= LONG; 172 goto again; 173 case 'q': 174 flags |= QUAD; 175 goto again; 176 case 'h': 177 flags |= SHORT; 178 goto again; 179 180 case '0': case '1': case '2': case '3': case '4': 181 case '5': case '6': case '7': case '8': case '9': 182 width = width * 10 + c - '0'; 183 goto again; 184 185 /* 186 * Conversions. 187 * 188 */ 189 case 'd': 190 c = CT_INT; 191 ccfn = (ccfntype)strtoq; 192 base = 10; 193 break; 194 195 case 'i': 196 c = CT_INT; 197 ccfn = (ccfntype)strtoq; 198 base = 0; 199 break; 200 201 case 'o': 202 c = CT_INT; 203 ccfn = strtouq; 204 base = 8; 205 break; 206 207 case 'u': 208 c = CT_INT; 209 ccfn = strtouq; 210 base = 10; 211 break; 212 213 case 'x': 214 flags |= PFXOK; /* enable 0x prefixing */ 215 c = CT_INT; 216 ccfn = strtouq; 217 base = 16; 218 break; 219 220 case 's': 221 c = CT_STRING; 222 break; 223 224 case '[': 225 fmt = __sccl(ccltab, fmt); 226 flags |= NOSKIP; 227 c = CT_CCL; 228 break; 229 230 case 'c': 231 flags |= NOSKIP; 232 c = CT_CHAR; 233 break; 234 235 case 'p': /* pointer format is like hex */ 236 flags |= POINTER | PFXOK; 237 c = CT_INT; 238 ccfn = strtouq; 239 base = 16; 240 break; 241 242 case 'n': 243 nconversions++; 244 if (flags & SUPPRESS) /* ??? */ 245 continue; 246 if (flags & SHORT) 247 *va_arg(ap, short *) = nread; 248 else if (flags & LONG) 249 *va_arg(ap, long *) = nread; 250 else if (flags & QUAD) 251 *va_arg(ap, quad_t *) = nread; 252 else 253 *va_arg(ap, int *) = nread; 254 continue; 255 } 256 257 /* 258 * We have a conversion that requires input. 259 */ 260 if (inr <= 0) 261 goto input_failure; 262 263 /* 264 * Consume leading white space, except for formats 265 * that suppress this. 266 */ 267 if ((flags & NOSKIP) == 0) { 268 while (isspace(*inp)) { 269 nread++; 270 if (--inr > 0) 271 inp++; 272 else 273 goto input_failure; 274 } 275 /* 276 * Note that there is at least one character in 277 * the buffer, so conversions that do not set NOSKIP 278 * can no longer result in an input failure. 279 */ 280 } 281 282 /* 283 * Do the conversion. 284 */ 285 switch (c) { 286 287 case CT_CHAR: 288 /* scan arbitrary characters (sets NOSKIP) */ 289 if (width == 0) 290 width = 1; 291 if (flags & SUPPRESS) { 292 size_t sum = 0; 293 for (;;) { 294 if ((n = inr) < width) { 295 sum += n; 296 width -= n; 297 inp += n; 298 if (sum == 0) 299 goto input_failure; 300 break; 301 } else { 302 sum += width; 303 inr -= width; 304 inp += width; 305 break; 306 } 307 } 308 nread += sum; 309 } else { 310 bcopy(inp, va_arg(ap, char *), width); 311 inr -= width; 312 inp += width; 313 nread += width; 314 nassigned++; 315 } 316 nconversions++; 317 break; 318 319 case CT_CCL: 320 /* scan a (nonempty) character class (sets NOSKIP) */ 321 if (width == 0) 322 width = (size_t)~0; /* `infinity' */ 323 /* take only those things in the class */ 324 if (flags & SUPPRESS) { 325 n = 0; 326 while (ccltab[(unsigned char)*inp]) { 327 n++, inr--, inp++; 328 if (--width == 0) 329 break; 330 if (inr <= 0) { 331 if (n == 0) 332 goto input_failure; 333 break; 334 } 335 } 336 if (n == 0) 337 goto match_failure; 338 } else { 339 p0 = p = va_arg(ap, char *); 340 while (ccltab[(unsigned char)*inp]) { 341 inr--; 342 *p++ = *inp++; 343 if (--width == 0) 344 break; 345 if (inr <= 0) { 346 if (p == p0) 347 goto input_failure; 348 break; 349 } 350 } 351 n = p - p0; 352 if (n == 0) 353 goto match_failure; 354 *p = 0; 355 nassigned++; 356 } 357 nread += n; 358 nconversions++; 359 break; 360 361 case CT_STRING: 362 /* like CCL, but zero-length string OK, & no NOSKIP */ 363 if (width == 0) 364 width = (size_t)~0; 365 if (flags & SUPPRESS) { 366 n = 0; 367 while (!isspace(*inp)) { 368 n++, inr--, inp++; 369 if (--width == 0) 370 break; 371 if (inr <= 0) 372 break; 373 } 374 nread += n; 375 } else { 376 p0 = p = va_arg(ap, char *); 377 while (!isspace(*inp)) { 378 inr--; 379 *p++ = *inp++; 380 if (--width == 0) 381 break; 382 if (inr <= 0) 383 break; 384 } 385 *p = 0; 386 nread += p - p0; 387 nassigned++; 388 } 389 nconversions++; 390 continue; 391 392 case CT_INT: 393 /* scan an integer as if by strtoq/strtouq */ 394#ifdef hardway 395 if (width == 0 || width > sizeof(buf) - 1) 396 width = sizeof(buf) - 1; 397#else 398 /* size_t is unsigned, hence this optimisation */ 399 if (--width > sizeof(buf) - 2) 400 width = sizeof(buf) - 2; 401 width++; 402#endif 403 flags |= SIGNOK | NDIGITS | NZDIGITS; 404 for (p = buf; width; width--) { 405 c = *inp; 406 /* 407 * Switch on the character; `goto ok' 408 * if we accept it as a part of number. 409 */ 410 switch (c) { 411 412 /* 413 * The digit 0 is always legal, but is 414 * special. For %i conversions, if no 415 * digits (zero or nonzero) have been 416 * scanned (only signs), we will have 417 * base==0. In that case, we should set 418 * it to 8 and enable 0x prefixing. 419 * Also, if we have not scanned zero digits 420 * before this, do not turn off prefixing 421 * (someone else will turn it off if we 422 * have scanned any nonzero digits). 423 */ 424 case '0': 425 if (base == 0) { 426 base = 8; 427 flags |= PFXOK; 428 } 429 if (flags & NZDIGITS) 430 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 431 else 432 flags &= ~(SIGNOK|PFXOK|NDIGITS); 433 goto ok; 434 435 /* 1 through 7 always legal */ 436 case '1': case '2': case '3': 437 case '4': case '5': case '6': case '7': 438 base = basefix[base]; 439 flags &= ~(SIGNOK | PFXOK | NDIGITS); 440 goto ok; 441 442 /* digits 8 and 9 ok iff decimal or hex */ 443 case '8': case '9': 444 base = basefix[base]; 445 if (base <= 8) 446 break; /* not legal here */ 447 flags &= ~(SIGNOK | PFXOK | NDIGITS); 448 goto ok; 449 450 /* letters ok iff hex */ 451 case 'A': case 'B': case 'C': 452 case 'D': case 'E': case 'F': 453 case 'a': case 'b': case 'c': 454 case 'd': case 'e': case 'f': 455 /* no need to fix base here */ 456 if (base <= 10) 457 break; /* not legal here */ 458 flags &= ~(SIGNOK | PFXOK | NDIGITS); 459 goto ok; 460 461 /* sign ok only as first character */ 462 case '+': case '-': 463 if (flags & SIGNOK) { 464 flags &= ~SIGNOK; 465 goto ok; 466 } 467 break; 468 469 /* x ok iff flag still set & 2nd char */ 470 case 'x': case 'X': 471 if (flags & PFXOK && p == buf + 1) { 472 base = 16; /* if %i */ 473 flags &= ~PFXOK; 474 goto ok; 475 } 476 break; 477 } 478 479 /* 480 * If we got here, c is not a legal character 481 * for a number. Stop accumulating digits. 482 */ 483 break; 484 ok: 485 /* 486 * c is legal: store it and look at the next. 487 */ 488 *p++ = c; 489 if (--inr > 0) 490 inp++; 491 else 492 break; /* end of input */ 493 } 494 /* 495 * If we had only a sign, it is no good; push 496 * back the sign. If the number ends in `x', 497 * it was [sign] '0' 'x', so push back the x 498 * and treat it as [sign] '0'. 499 */ 500 if (flags & NDIGITS) { 501 if (p > buf) { 502 inp--; 503 inr++; 504 } 505 goto match_failure; 506 } 507 c = ((u_char *)p)[-1]; 508 if (c == 'x' || c == 'X') { 509 --p; 510 inp--; 511 inr++; 512 } 513 if ((flags & SUPPRESS) == 0) { 514 u_quad_t res; 515 516 *p = 0; 517 res = (*ccfn)(buf, (const char **)NULL, base); 518 if (flags & POINTER) 519 *va_arg(ap, void **) = 520 (void *)(u_long)res; 521 else if (flags & SHORT) 522 *va_arg(ap, short *) = res; 523 else if (flags & LONG) 524 *va_arg(ap, long *) = res; 525 else if (flags & QUAD) 526 *va_arg(ap, quad_t *) = res; 527 else 528 *va_arg(ap, int *) = res; 529 nassigned++; 530 } 531 nread += p - buf; 532 nconversions++; 533 break; 534 535 } 536 } 537input_failure: 538 return (nconversions != 0 ? nassigned : -1); 539match_failure: 540 return (nassigned); 541} 542 543/* 544 * Fill in the given table from the scanset at the given format 545 * (just after `['). Return a pointer to the character past the 546 * closing `]'. The table has a 1 wherever characters should be 547 * considered part of the scanset. 548 */ 549static const u_char * 550__sccl(char *tab, const u_char *fmt) 551{ 552 int c, n, v; 553 554 /* first `clear' the whole table */ 555 c = *fmt++; /* first char hat => negated scanset */ 556 if (c == '^') { 557 v = 1; /* default => accept */ 558 c = *fmt++; /* get new first char */ 559 } else 560 v = 0; /* default => reject */ 561 562 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 563 for (n = 0; n < 256; n++) 564 tab[n] = v; /* memset(tab, v, 256) */ 565 566 if (c == 0) 567 return (fmt - 1);/* format ended before closing ] */ 568 569 /* 570 * Now set the entries corresponding to the actual scanset 571 * to the opposite of the above. 572 * 573 * The first character may be ']' (or '-') without being special; 574 * the last character may be '-'. 575 */ 576 v = 1 - v; 577 for (;;) { 578 tab[c] = v; /* take character c */ 579doswitch: 580 n = *fmt++; /* and examine the next */ 581 switch (n) { 582 583 case 0: /* format ended too soon */ 584 return (fmt - 1); 585 586 case '-': 587 /* 588 * A scanset of the form 589 * [01+-] 590 * is defined as `the digit 0, the digit 1, 591 * the character +, the character -', but 592 * the effect of a scanset such as 593 * [a-zA-Z0-9] 594 * is implementation defined. The V7 Unix 595 * scanf treats `a-z' as `the letters a through 596 * z', but treats `a-a' as `the letter a, the 597 * character -, and the letter a'. 598 * 599 * For compatibility, the `-' is not considerd 600 * to define a range if the character following 601 * it is either a close bracket (required by ANSI) 602 * or is not numerically greater than the character 603 * we just stored in the table (c). 604 */ 605 n = *fmt; 606 if (n == ']' || n < c) { 607 c = '-'; 608 break; /* resume the for(;;) */ 609 } 610 fmt++; 611 /* fill in the range */ 612 do { 613 tab[++c] = v; 614 } while (c < n); 615 c = n; 616 /* 617 * Alas, the V7 Unix scanf also treats formats 618 * such as [a-c-e] as `the letters a through e'. 619 * This too is permitted by the standard.... 620 */ 621 goto doswitch; 622 break; 623 624 case ']': /* end of scanset */ 625 return (fmt); 626 627 default: /* just another character */ 628 c = n; 629 break; 630 } 631 } 632 /* NOTREACHED */ 633} 634 635/* 636 * Convert a string to an unsigned quad integer. 637 * 638 * Ignores `locale' stuff. Assumes that the upper and lower case 639 * alphabets and digits are each contiguous. 640 */ 641u_quad_t 642strtouq(const char *nptr, const char **endptr, int base) 643{ 644 const char *s = nptr; 645 u_quad_t acc; 646 unsigned char c; 647 u_quad_t qbase, cutoff; 648 int neg, any, cutlim; 649 650 /* 651 * See strtoq for comments as to the logic used. 652 */ 653 s = nptr; 654 do { 655 c = *s++; 656 } while (isspace(c)); 657 if (c == '-') { 658 neg = 1; 659 c = *s++; 660 } else { 661 neg = 0; 662 if (c == '+') 663 c = *s++; 664 } 665 if ((base == 0 || base == 16) && 666 c == '0' && (*s == 'x' || *s == 'X')) { 667 c = s[1]; 668 s += 2; 669 base = 16; 670 } 671 if (base == 0) 672 base = c == '0' ? 8 : 10; 673 qbase = (unsigned)base; 674 cutoff = (u_quad_t)UQUAD_MAX / qbase; 675 cutlim = (u_quad_t)UQUAD_MAX % qbase; 676 for (acc = 0, any = 0;; c = *s++) { 677 if (!isascii(c)) 678 break; 679 if (isdigit(c)) 680 c -= '0'; 681 else if (isalpha(c)) 682 c -= isupper(c) ? 'A' - 10 : 'a' - 10; 683 else 684 break; 685 if (c >= base) 686 break; 687 if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) 688 any = -1; 689 else { 690 any = 1; 691 acc *= qbase; 692 acc += c; 693 } 694 } 695 if (any < 0) { 696 acc = UQUAD_MAX; 697 } else if (neg) 698 acc = -acc; 699 if (endptr != 0) 700 *endptr = (const char *)(any ? s - 1 : nptr); 701 return (acc); 702} 703 704/* 705 * Convert a string to a quad integer. 706 * 707 * Ignores `locale' stuff. Assumes that the upper and lower case 708 * alphabets and digits are each contiguous. 709 */ 710quad_t 711strtoq(const char *nptr, const char **endptr, int base) 712{ 713 const char *s; 714 u_quad_t acc; 715 unsigned char c; 716 u_quad_t qbase, cutoff; 717 int neg, any, cutlim; 718 719 /* 720 * Skip white space and pick up leading +/- sign if any. 721 * If base is 0, allow 0x for hex and 0 for octal, else 722 * assume decimal; if base is already 16, allow 0x. 723 */ 724 s = nptr; 725 do { 726 c = *s++; 727 } while (isspace(c)); 728 if (c == '-') { 729 neg = 1; 730 c = *s++; 731 } else { 732 neg = 0; 733 if (c == '+') 734 c = *s++; 735 } 736 if ((base == 0 || base == 16) && 737 c == '0' && (*s == 'x' || *s == 'X')) { 738 c = s[1]; 739 s += 2; 740 base = 16; 741 } 742 if (base == 0) 743 base = c == '0' ? 8 : 10; 744 745 /* 746 * Compute the cutoff value between legal numbers and illegal 747 * numbers. That is the largest legal value, divided by the 748 * base. An input number that is greater than this value, if 749 * followed by a legal input character, is too big. One that 750 * is equal to this value may be valid or not; the limit 751 * between valid and invalid numbers is then based on the last 752 * digit. For instance, if the range for quads is 753 * [-9223372036854775808..9223372036854775807] and the input base 754 * is 10, cutoff will be set to 922337203685477580 and cutlim to 755 * either 7 (neg==0) or 8 (neg==1), meaning that if we have 756 * accumulated a value > 922337203685477580, or equal but the 757 * next digit is > 7 (or 8), the number is too big, and we will 758 * return a range error. 759 * 760 * Set any if any `digits' consumed; make it negative to indicate 761 * overflow. 762 */ 763 qbase = (unsigned)base; 764 cutoff = neg ? (u_quad_t)-(QUAD_MIN + QUAD_MAX) + QUAD_MAX : QUAD_MAX; 765 cutlim = cutoff % qbase; 766 cutoff /= qbase; 767 for (acc = 0, any = 0;; c = *s++) { 768 if (!isascii(c)) 769 break; 770 if (isdigit(c)) 771 c -= '0'; 772 else if (isalpha(c)) 773 c -= isupper(c) ? 'A' - 10 : 'a' - 10; 774 else 775 break; 776 if (c >= base) 777 break; 778 if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) 779 any = -1; 780 else { 781 any = 1; 782 acc *= qbase; 783 acc += c; 784 } 785 } 786 if (any < 0) { 787 acc = neg ? QUAD_MIN : QUAD_MAX; 788 } else if (neg) 789 acc = -acc; 790 if (endptr != 0) 791 *endptr = (const char *)(any ? s - 1 : nptr); 792 return (acc); 793} 794