subr_scanf.c revision 52757
1/*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Chris Torek. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * $FreeBSD: head/sys/kern/subr_scanf.c 52757 1999-11-01 15:04:04Z phk $ 37 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 38 * From: static char sccsid[] = "@(#)strtol.c 8.1 (Berkeley) 6/4/93"; 39 * From: static char sccsid[] = "@(#)strtoul.c 8.1 (Berkeley) 6/4/93"; 40 */ 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <machine/limits.h> 45 46/* 47 * Note that stdarg.h and the ANSI style va_start macro is used for both 48 * ANSI and traditional C compilers. 49 */ 50#include <machine/stdarg.h> 51 52#define BUF 32 /* Maximum length of numeric string. */ 53 54/* 55 * Flags used during conversion. 56 */ 57#define LONG 0x01 /* l: long or double */ 58#define SHORT 0x04 /* h: short */ 59#define SUPPRESS 0x08 /* suppress assignment */ 60#define POINTER 0x10 /* weird %p pointer (`fake hex') */ 61#define NOSKIP 0x20 /* do not skip blanks */ 62#define QUAD 0x400 63 64/* 65 * The following are used in numeric conversions only: 66 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 67 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 68 */ 69#define SIGNOK 0x40 /* +/- is (still) legal */ 70#define NDIGITS 0x80 /* no digits detected */ 71 72#define DPTOK 0x100 /* (float) decimal point is still legal */ 73#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 74 75#define PFXOK 0x100 /* 0x prefix is (still) legal */ 76#define NZDIGITS 0x200 /* no zero digits detected */ 77 78/* 79 * Conversion types. 80 */ 81#define CT_CHAR 0 /* %c conversion */ 82#define CT_CCL 1 /* %[...] conversion */ 83#define CT_STRING 2 /* %s conversion */ 84#define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 85typedef u_quad_t (*ccfntype)(const char *, const char **, int); 86 87#define isspace(c) ((c) == ' ' || (c) == '\t' || \ 88 (c) == '\r' || (c) == '\n') 89#define isascii(c) (((c) & ~0x7f) == 0) 90#define isupper(c) ((c) >= 'A' && (c) <= 'Z') 91#define islower(c) ((c) >= 'a' && (c) <= 'z') 92#define isalpha(c) (isupper(c) || (islower(c))) 93#define isdigit(c) ((c) >= '0' && (c) <= '9') 94 95static const u_char *__sccl(char *, const u_char *); 96 97int 98sscanf(const char *ibuf, const char *fmt, ...) 99{ 100 va_list ap; 101 int ret; 102 103 va_start(ap, fmt); 104 ret = vsscanf(ibuf, fmt, ap); 105 va_end(ap); 106 return(ret); 107} 108 109int 110vsscanf(const char *inp, char const *fmt0, va_list ap) 111{ 112 int inr; 113 const u_char *fmt = (const u_char *)fmt0; 114 int c; /* character from format, or conversion */ 115 size_t width; /* field width, or 0 */ 116 char *p; /* points into all kinds of strings */ 117 int n; /* handy integer */ 118 int flags; /* flags as defined above */ 119 char *p0; /* saves original value of p when necessary */ 120 int nassigned; /* number of fields assigned */ 121 int nconversions; /* number of conversions */ 122 int nread; /* number of characters consumed from fp */ 123 int base; /* base argument to strtoq/strtouq */ 124 ccfntype ccfn; /* conversion function (strtoq/strtouq) */ 125 char ccltab[256]; /* character class table for %[...] */ 126 char buf[BUF]; /* buffer for numeric conversions */ 127 128 /* `basefix' is used to avoid `if' tests in the integer scanner */ 129 static short basefix[17] = 130 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 131 132 inr = strlen(inp); 133 134 nassigned = 0; 135 nconversions = 0; 136 nread = 0; 137 base = 0; /* XXX just to keep gcc happy */ 138 ccfn = NULL; /* XXX just to keep gcc happy */ 139 for (;;) { 140 c = *fmt++; 141 if (c == 0) 142 return (nassigned); 143 if (isspace(c)) { 144 while (inr > 0 && isspace(*inp)) 145 nread++, inr--, inp++; 146 continue; 147 } 148 if (c != '%') 149 goto literal; 150 width = 0; 151 flags = 0; 152 /* 153 * switch on the format. continue if done; 154 * break once format type is derived. 155 */ 156again: c = *fmt++; 157 switch (c) { 158 case '%': 159literal: 160 if (inr <= 0) 161 goto input_failure; 162 if (*inp != c) 163 goto match_failure; 164 inr--, inp++; 165 nread++; 166 continue; 167 168 case '*': 169 flags |= SUPPRESS; 170 goto again; 171 case 'l': 172 flags |= LONG; 173 goto again; 174 case 'q': 175 flags |= QUAD; 176 goto again; 177 case 'h': 178 flags |= SHORT; 179 goto again; 180 181 case '0': case '1': case '2': case '3': case '4': 182 case '5': case '6': case '7': case '8': case '9': 183 width = width * 10 + c - '0'; 184 goto again; 185 186 /* 187 * Conversions. 188 * 189 */ 190 case 'd': 191 c = CT_INT; 192 ccfn = (ccfntype)strtoq; 193 base = 10; 194 break; 195 196 case 'i': 197 c = CT_INT; 198 ccfn = (ccfntype)strtoq; 199 base = 0; 200 break; 201 202 case 'o': 203 c = CT_INT; 204 ccfn = strtouq; 205 base = 8; 206 break; 207 208 case 'u': 209 c = CT_INT; 210 ccfn = strtouq; 211 base = 10; 212 break; 213 214 case 'x': 215 flags |= PFXOK; /* enable 0x prefixing */ 216 c = CT_INT; 217 ccfn = strtouq; 218 base = 16; 219 break; 220 221 case 's': 222 c = CT_STRING; 223 break; 224 225 case '[': 226 fmt = __sccl(ccltab, fmt); 227 flags |= NOSKIP; 228 c = CT_CCL; 229 break; 230 231 case 'c': 232 flags |= NOSKIP; 233 c = CT_CHAR; 234 break; 235 236 case 'p': /* pointer format is like hex */ 237 flags |= POINTER | PFXOK; 238 c = CT_INT; 239 ccfn = strtouq; 240 base = 16; 241 break; 242 243 case 'n': 244 nconversions++; 245 if (flags & SUPPRESS) /* ??? */ 246 continue; 247 if (flags & SHORT) 248 *va_arg(ap, short *) = nread; 249 else if (flags & LONG) 250 *va_arg(ap, long *) = nread; 251 else if (flags & QUAD) 252 *va_arg(ap, quad_t *) = nread; 253 else 254 *va_arg(ap, int *) = nread; 255 continue; 256 } 257 258 /* 259 * We have a conversion that requires input. 260 */ 261 if (inr <= 0) 262 goto input_failure; 263 264 /* 265 * Consume leading white space, except for formats 266 * that suppress this. 267 */ 268 if ((flags & NOSKIP) == 0) { 269 while (isspace(*inp)) { 270 nread++; 271 if (--inr > 0) 272 inp++; 273 else 274 goto input_failure; 275 } 276 /* 277 * Note that there is at least one character in 278 * the buffer, so conversions that do not set NOSKIP 279 * can no longer result in an input failure. 280 */ 281 } 282 283 /* 284 * Do the conversion. 285 */ 286 switch (c) { 287 288 case CT_CHAR: 289 /* scan arbitrary characters (sets NOSKIP) */ 290 if (width == 0) 291 width = 1; 292 if (flags & SUPPRESS) { 293 size_t sum = 0; 294 for (;;) { 295 if ((n = inr) < width) { 296 sum += n; 297 width -= n; 298 inp += n; 299 if (sum == 0) 300 goto input_failure; 301 break; 302 } else { 303 sum += width; 304 inr -= width; 305 inp += width; 306 break; 307 } 308 } 309 nread += sum; 310 } else { 311 bcopy(inp, va_arg(ap, char *), width); 312 inr -= width; 313 inp += width; 314 nread += width; 315 nassigned++; 316 } 317 nconversions++; 318 break; 319 320 case CT_CCL: 321 /* scan a (nonempty) character class (sets NOSKIP) */ 322 if (width == 0) 323 width = (size_t)~0; /* `infinity' */ 324 /* take only those things in the class */ 325 if (flags & SUPPRESS) { 326 n = 0; 327 while (ccltab[(unsigned char)*inp]) { 328 n++, inr--, inp++; 329 if (--width == 0) 330 break; 331 if (inr <= 0) { 332 if (n == 0) 333 goto input_failure; 334 break; 335 } 336 } 337 if (n == 0) 338 goto match_failure; 339 } else { 340 p0 = p = va_arg(ap, char *); 341 while (ccltab[(unsigned char)*inp]) { 342 inr--; 343 *p++ = *inp++; 344 if (--width == 0) 345 break; 346 if (inr <= 0) { 347 if (p == p0) 348 goto input_failure; 349 break; 350 } 351 } 352 n = p - p0; 353 if (n == 0) 354 goto match_failure; 355 *p = 0; 356 nassigned++; 357 } 358 nread += n; 359 nconversions++; 360 break; 361 362 case CT_STRING: 363 /* like CCL, but zero-length string OK, & no NOSKIP */ 364 if (width == 0) 365 width = (size_t)~0; 366 if (flags & SUPPRESS) { 367 n = 0; 368 while (!isspace(*inp)) { 369 n++, inr--, inp++; 370 if (--width == 0) 371 break; 372 if (inr <= 0) 373 break; 374 } 375 nread += n; 376 } else { 377 p0 = p = va_arg(ap, char *); 378 while (!isspace(*inp)) { 379 inr--; 380 *p++ = *inp++; 381 if (--width == 0) 382 break; 383 if (inr <= 0) 384 break; 385 } 386 *p = 0; 387 nread += p - p0; 388 nassigned++; 389 } 390 nconversions++; 391 continue; 392 393 case CT_INT: 394 /* scan an integer as if by strtoq/strtouq */ 395#ifdef hardway 396 if (width == 0 || width > sizeof(buf) - 1) 397 width = sizeof(buf) - 1; 398#else 399 /* size_t is unsigned, hence this optimisation */ 400 if (--width > sizeof(buf) - 2) 401 width = sizeof(buf) - 2; 402 width++; 403#endif 404 flags |= SIGNOK | NDIGITS | NZDIGITS; 405 for (p = buf; width; width--) { 406 c = *inp; 407 /* 408 * Switch on the character; `goto ok' 409 * if we accept it as a part of number. 410 */ 411 switch (c) { 412 413 /* 414 * The digit 0 is always legal, but is 415 * special. For %i conversions, if no 416 * digits (zero or nonzero) have been 417 * scanned (only signs), we will have 418 * base==0. In that case, we should set 419 * it to 8 and enable 0x prefixing. 420 * Also, if we have not scanned zero digits 421 * before this, do not turn off prefixing 422 * (someone else will turn it off if we 423 * have scanned any nonzero digits). 424 */ 425 case '0': 426 if (base == 0) { 427 base = 8; 428 flags |= PFXOK; 429 } 430 if (flags & NZDIGITS) 431 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 432 else 433 flags &= ~(SIGNOK|PFXOK|NDIGITS); 434 goto ok; 435 436 /* 1 through 7 always legal */ 437 case '1': case '2': case '3': 438 case '4': case '5': case '6': case '7': 439 base = basefix[base]; 440 flags &= ~(SIGNOK | PFXOK | NDIGITS); 441 goto ok; 442 443 /* digits 8 and 9 ok iff decimal or hex */ 444 case '8': case '9': 445 base = basefix[base]; 446 if (base <= 8) 447 break; /* not legal here */ 448 flags &= ~(SIGNOK | PFXOK | NDIGITS); 449 goto ok; 450 451 /* letters ok iff hex */ 452 case 'A': case 'B': case 'C': 453 case 'D': case 'E': case 'F': 454 case 'a': case 'b': case 'c': 455 case 'd': case 'e': case 'f': 456 /* no need to fix base here */ 457 if (base <= 10) 458 break; /* not legal here */ 459 flags &= ~(SIGNOK | PFXOK | NDIGITS); 460 goto ok; 461 462 /* sign ok only as first character */ 463 case '+': case '-': 464 if (flags & SIGNOK) { 465 flags &= ~SIGNOK; 466 goto ok; 467 } 468 break; 469 470 /* x ok iff flag still set & 2nd char */ 471 case 'x': case 'X': 472 if (flags & PFXOK && p == buf + 1) { 473 base = 16; /* if %i */ 474 flags &= ~PFXOK; 475 goto ok; 476 } 477 break; 478 } 479 480 /* 481 * If we got here, c is not a legal character 482 * for a number. Stop accumulating digits. 483 */ 484 break; 485 ok: 486 /* 487 * c is legal: store it and look at the next. 488 */ 489 *p++ = c; 490 if (--inr > 0) 491 inp++; 492 else 493 break; /* end of input */ 494 } 495 /* 496 * If we had only a sign, it is no good; push 497 * back the sign. If the number ends in `x', 498 * it was [sign] '0' 'x', so push back the x 499 * and treat it as [sign] '0'. 500 */ 501 if (flags & NDIGITS) { 502 if (p > buf) { 503 inp--; 504 inr++; 505 } 506 goto match_failure; 507 } 508 c = ((u_char *)p)[-1]; 509 if (c == 'x' || c == 'X') { 510 --p; 511 inp--; 512 inr++; 513 } 514 if ((flags & SUPPRESS) == 0) { 515 u_quad_t res; 516 517 *p = 0; 518 res = (*ccfn)(buf, (const char **)NULL, base); 519 if (flags & POINTER) 520 *va_arg(ap, void **) = 521 (void *)(uintptr_t)res; 522 else if (flags & SHORT) 523 *va_arg(ap, short *) = res; 524 else if (flags & LONG) 525 *va_arg(ap, long *) = res; 526 else if (flags & QUAD) 527 *va_arg(ap, quad_t *) = res; 528 else 529 *va_arg(ap, int *) = res; 530 nassigned++; 531 } 532 nread += p - buf; 533 nconversions++; 534 break; 535 536 } 537 } 538input_failure: 539 return (nconversions != 0 ? nassigned : -1); 540match_failure: 541 return (nassigned); 542} 543 544/* 545 * Fill in the given table from the scanset at the given format 546 * (just after `['). Return a pointer to the character past the 547 * closing `]'. The table has a 1 wherever characters should be 548 * considered part of the scanset. 549 */ 550static const u_char * 551__sccl(char *tab, const u_char *fmt) 552{ 553 int c, n, v; 554 555 /* first `clear' the whole table */ 556 c = *fmt++; /* first char hat => negated scanset */ 557 if (c == '^') { 558 v = 1; /* default => accept */ 559 c = *fmt++; /* get new first char */ 560 } else 561 v = 0; /* default => reject */ 562 563 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 564 for (n = 0; n < 256; n++) 565 tab[n] = v; /* memset(tab, v, 256) */ 566 567 if (c == 0) 568 return (fmt - 1);/* format ended before closing ] */ 569 570 /* 571 * Now set the entries corresponding to the actual scanset 572 * to the opposite of the above. 573 * 574 * The first character may be ']' (or '-') without being special; 575 * the last character may be '-'. 576 */ 577 v = 1 - v; 578 for (;;) { 579 tab[c] = v; /* take character c */ 580doswitch: 581 n = *fmt++; /* and examine the next */ 582 switch (n) { 583 584 case 0: /* format ended too soon */ 585 return (fmt - 1); 586 587 case '-': 588 /* 589 * A scanset of the form 590 * [01+-] 591 * is defined as `the digit 0, the digit 1, 592 * the character +, the character -', but 593 * the effect of a scanset such as 594 * [a-zA-Z0-9] 595 * is implementation defined. The V7 Unix 596 * scanf treats `a-z' as `the letters a through 597 * z', but treats `a-a' as `the letter a, the 598 * character -, and the letter a'. 599 * 600 * For compatibility, the `-' is not considerd 601 * to define a range if the character following 602 * it is either a close bracket (required by ANSI) 603 * or is not numerically greater than the character 604 * we just stored in the table (c). 605 */ 606 n = *fmt; 607 if (n == ']' || n < c) { 608 c = '-'; 609 break; /* resume the for(;;) */ 610 } 611 fmt++; 612 /* fill in the range */ 613 do { 614 tab[++c] = v; 615 } while (c < n); 616 c = n; 617 /* 618 * Alas, the V7 Unix scanf also treats formats 619 * such as [a-c-e] as `the letters a through e'. 620 * This too is permitted by the standard.... 621 */ 622 goto doswitch; 623 break; 624 625 case ']': /* end of scanset */ 626 return (fmt); 627 628 default: /* just another character */ 629 c = n; 630 break; 631 } 632 } 633 /* NOTREACHED */ 634} 635 636/* 637 * Convert a string to an unsigned quad integer. 638 * 639 * Ignores `locale' stuff. Assumes that the upper and lower case 640 * alphabets and digits are each contiguous. 641 */ 642u_quad_t 643strtouq(const char *nptr, const char **endptr, int base) 644{ 645 const char *s = nptr; 646 u_quad_t acc; 647 unsigned char c; 648 u_quad_t qbase, cutoff; 649 int neg, any, cutlim; 650 651 /* 652 * See strtoq for comments as to the logic used. 653 */ 654 s = nptr; 655 do { 656 c = *s++; 657 } while (isspace(c)); 658 if (c == '-') { 659 neg = 1; 660 c = *s++; 661 } else { 662 neg = 0; 663 if (c == '+') 664 c = *s++; 665 } 666 if ((base == 0 || base == 16) && 667 c == '0' && (*s == 'x' || *s == 'X')) { 668 c = s[1]; 669 s += 2; 670 base = 16; 671 } 672 if (base == 0) 673 base = c == '0' ? 8 : 10; 674 qbase = (unsigned)base; 675 cutoff = (u_quad_t)UQUAD_MAX / qbase; 676 cutlim = (u_quad_t)UQUAD_MAX % qbase; 677 for (acc = 0, any = 0;; c = *s++) { 678 if (!isascii(c)) 679 break; 680 if (isdigit(c)) 681 c -= '0'; 682 else if (isalpha(c)) 683 c -= isupper(c) ? 'A' - 10 : 'a' - 10; 684 else 685 break; 686 if (c >= base) 687 break; 688 if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) 689 any = -1; 690 else { 691 any = 1; 692 acc *= qbase; 693 acc += c; 694 } 695 } 696 if (any < 0) { 697 acc = UQUAD_MAX; 698 } else if (neg) 699 acc = -acc; 700 if (endptr != 0) 701 *endptr = (const char *)(any ? s - 1 : nptr); 702 return (acc); 703} 704 705/* 706 * Convert a string to a quad integer. 707 * 708 * Ignores `locale' stuff. Assumes that the upper and lower case 709 * alphabets and digits are each contiguous. 710 */ 711quad_t 712strtoq(const char *nptr, const char **endptr, int base) 713{ 714 const char *s; 715 u_quad_t acc; 716 unsigned char c; 717 u_quad_t qbase, cutoff; 718 int neg, any, cutlim; 719 720 /* 721 * Skip white space and pick up leading +/- sign if any. 722 * If base is 0, allow 0x for hex and 0 for octal, else 723 * assume decimal; if base is already 16, allow 0x. 724 */ 725 s = nptr; 726 do { 727 c = *s++; 728 } while (isspace(c)); 729 if (c == '-') { 730 neg = 1; 731 c = *s++; 732 } else { 733 neg = 0; 734 if (c == '+') 735 c = *s++; 736 } 737 if ((base == 0 || base == 16) && 738 c == '0' && (*s == 'x' || *s == 'X')) { 739 c = s[1]; 740 s += 2; 741 base = 16; 742 } 743 if (base == 0) 744 base = c == '0' ? 8 : 10; 745 746 /* 747 * Compute the cutoff value between legal numbers and illegal 748 * numbers. That is the largest legal value, divided by the 749 * base. An input number that is greater than this value, if 750 * followed by a legal input character, is too big. One that 751 * is equal to this value may be valid or not; the limit 752 * between valid and invalid numbers is then based on the last 753 * digit. For instance, if the range for quads is 754 * [-9223372036854775808..9223372036854775807] and the input base 755 * is 10, cutoff will be set to 922337203685477580 and cutlim to 756 * either 7 (neg==0) or 8 (neg==1), meaning that if we have 757 * accumulated a value > 922337203685477580, or equal but the 758 * next digit is > 7 (or 8), the number is too big, and we will 759 * return a range error. 760 * 761 * Set any if any `digits' consumed; make it negative to indicate 762 * overflow. 763 */ 764 qbase = (unsigned)base; 765 cutoff = neg ? (u_quad_t)-(QUAD_MIN + QUAD_MAX) + QUAD_MAX : QUAD_MAX; 766 cutlim = cutoff % qbase; 767 cutoff /= qbase; 768 for (acc = 0, any = 0;; c = *s++) { 769 if (!isascii(c)) 770 break; 771 if (isdigit(c)) 772 c -= '0'; 773 else if (isalpha(c)) 774 c -= isupper(c) ? 'A' - 10 : 'a' - 10; 775 else 776 break; 777 if (c >= base) 778 break; 779 if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) 780 any = -1; 781 else { 782 any = 1; 783 acc *= qbase; 784 acc += c; 785 } 786 } 787 if (any < 0) { 788 acc = neg ? QUAD_MIN : QUAD_MAX; 789 } else if (neg) 790 acc = -acc; 791 if (endptr != 0) 792 *endptr = (const char *)(any ? s - 1 : nptr); 793 return (acc); 794} 795 796/* 797 * Convert a string to a long integer. 798 * 799 * Ignores `locale' stuff. Assumes that the upper and lower case 800 * alphabets and digits are each contiguous. 801 */ 802long 803strtol(nptr, endptr, base) 804 const char *nptr; 805 const char **endptr; 806 int base; 807{ 808 const char *s = nptr; 809 unsigned long acc; 810 unsigned char c; 811 unsigned long cutoff; 812 int neg = 0, any, cutlim; 813 814 /* 815 * Skip white space and pick up leading +/- sign if any. 816 * If base is 0, allow 0x for hex and 0 for octal, else 817 * assume decimal; if base is already 16, allow 0x. 818 */ 819 do { 820 c = *s++; 821 } while (isspace(c)); 822 if (c == '-') { 823 neg = 1; 824 c = *s++; 825 } else if (c == '+') 826 c = *s++; 827 if ((base == 0 || base == 16) && 828 c == '0' && (*s == 'x' || *s == 'X')) { 829 c = s[1]; 830 s += 2; 831 base = 16; 832 } 833 if (base == 0) 834 base = c == '0' ? 8 : 10; 835 836 /* 837 * Compute the cutoff value between legal numbers and illegal 838 * numbers. That is the largest legal value, divided by the 839 * base. An input number that is greater than this value, if 840 * followed by a legal input character, is too big. One that 841 * is equal to this value may be valid or not; the limit 842 * between valid and invalid numbers is then based on the last 843 * digit. For instance, if the range for longs is 844 * [-2147483648..2147483647] and the input base is 10, 845 * cutoff will be set to 214748364 and cutlim to either 846 * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated 847 * a value > 214748364, or equal but the next digit is > 7 (or 8), 848 * the number is too big, and we will return a range error. 849 * 850 * Set any if any `digits' consumed; make it negative to indicate 851 * overflow. 852 */ 853 cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX; 854 cutlim = cutoff % (unsigned long)base; 855 cutoff /= (unsigned long)base; 856 for (acc = 0, any = 0;; c = *s++) { 857 if (!isascii(c)) 858 break; 859 if (isdigit(c)) 860 c -= '0'; 861 else if (isalpha(c)) 862 c -= isupper(c) ? 'A' - 10 : 'a' - 10; 863 else 864 break; 865 if (c >= base) 866 break; 867 if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) 868 any = -1; 869 else { 870 any = 1; 871 acc *= base; 872 acc += c; 873 } 874 } 875 if (any < 0) { 876 acc = neg ? LONG_MIN : LONG_MAX; 877 } else if (neg) 878 acc = -acc; 879 if (endptr != 0) 880 *endptr = (const char *)(any ? s - 1 : nptr); 881 return (acc); 882} 883 884/* 885 * Convert a string to an unsigned long integer. 886 * 887 * Ignores `locale' stuff. Assumes that the upper and lower case 888 * alphabets and digits are each contiguous. 889 */ 890unsigned long 891strtoul(nptr, endptr, base) 892 const char *nptr; 893 const char **endptr; 894 int base; 895{ 896 const char *s = nptr; 897 unsigned long acc; 898 unsigned char c; 899 unsigned long cutoff; 900 int neg = 0, any, cutlim; 901 902 /* 903 * See strtol for comments as to the logic used. 904 */ 905 do { 906 c = *s++; 907 } while (isspace(c)); 908 if (c == '-') { 909 neg = 1; 910 c = *s++; 911 } else if (c == '+') 912 c = *s++; 913 if ((base == 0 || base == 16) && 914 c == '0' && (*s == 'x' || *s == 'X')) { 915 c = s[1]; 916 s += 2; 917 base = 16; 918 } 919 if (base == 0) 920 base = c == '0' ? 8 : 10; 921 cutoff = (unsigned long)ULONG_MAX / (unsigned long)base; 922 cutlim = (unsigned long)ULONG_MAX % (unsigned long)base; 923 for (acc = 0, any = 0;; c = *s++) { 924 if (!isascii(c)) 925 break; 926 if (isdigit(c)) 927 c -= '0'; 928 else if (isalpha(c)) 929 c -= isupper(c) ? 'A' - 10 : 'a' - 10; 930 else 931 break; 932 if (c >= base) 933 break; 934 if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) 935 any = -1; 936 else { 937 any = 1; 938 acc *= base; 939 acc += c; 940 } 941 } 942 if (any < 0) { 943 acc = ULONG_MAX; 944 } else if (neg) 945 acc = -acc; 946 if (endptr != 0) 947 *endptr = (const char *)(any ? s - 1 : nptr); 948 return (acc); 949} 950