subr_scanf.c revision 43383
1185377Ssam/*- 2187831Ssam * Copyright (c) 1990, 1993 3185377Ssam * The Regents of the University of California. All rights reserved. 4185377Ssam * 5185377Ssam * This code is derived from software contributed to Berkeley by 6185377Ssam * Chris Torek. 7185377Ssam * 8185377Ssam * Redistribution and use in source and binary forms, with or without 9185377Ssam * modification, are permitted provided that the following conditions 10185377Ssam * are met: 11185377Ssam * 1. Redistributions of source code must retain the above copyright 12185377Ssam * notice, this list of conditions and the following disclaimer. 13185377Ssam * 2. Redistributions in binary form must reproduce the above copyright 14185377Ssam * notice, this list of conditions and the following disclaimer in the 15185377Ssam * documentation and/or other materials provided with the distribution. 16185377Ssam * 3. All advertising materials mentioning features or use of this software 17187510Ssam * must display the following acknowledgement: 18185377Ssam * This product includes software developed by the University of 19185377Ssam * California, Berkeley and its contributors. 20185377Ssam * 4. Neither the name of the University nor the names of its contributors 21185377Ssam * may be used to endorse or promote products derived from this software 22185377Ssam * without specific prior written permission. 23185377Ssam * 24185377Ssam * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25185377Ssam * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26185377Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27185377Ssam * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28185377Ssam * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29185377Ssam * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30185377Ssam * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31185377Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32185377Ssam * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33185377Ssam * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34185377Ssam * SUCH DAMAGE. 35185377Ssam * 36185377Ssam * $Id: subr_scanf.c,v 1.3 1999/01/28 00:57:47 dillon Exp $ 37185377Ssam * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 38185377Ssam */ 39185377Ssam 40185377Ssam#include <sys/param.h> 41185377Ssam#include <sys/systm.h> 42185377Ssam#include <sys/kernel.h> 43185377Ssam#include <machine/limits.h> 44185377Ssam 45185377Ssam/* 46185377Ssam * Note that stdarg.h and the ANSI style va_start macro is used for both 47185377Ssam * ANSI and traditional C compilers. 48185377Ssam */ 49185377Ssam#include <machine/stdarg.h> 50185377Ssam 51185377Ssam#define BUF 32 /* Maximum length of numeric string. */ 52185377Ssam 53185377Ssam/* 54185377Ssam * Flags used during conversion. 55185377Ssam */ 56185377Ssam#define LONG 0x01 /* l: long or double */ 57185377Ssam#define SHORT 0x04 /* h: short */ 58185377Ssam#define SUPPRESS 0x08 /* suppress assignment */ 59185377Ssam#define POINTER 0x10 /* weird %p pointer (`fake hex') */ 60185377Ssam#define NOSKIP 0x20 /* do not skip blanks */ 61185377Ssam#define QUAD 0x400 62185377Ssam 63185377Ssam/* 64185377Ssam * The following are used in numeric conversions only: 65185377Ssam * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 66185377Ssam * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 67185377Ssam */ 68185377Ssam#define SIGNOK 0x40 /* +/- is (still) legal */ 69185377Ssam#define NDIGITS 0x80 /* no digits detected */ 70185377Ssam 71185377Ssam#define DPTOK 0x100 /* (float) decimal point is still legal */ 72185377Ssam#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 73185377Ssam 74185377Ssam#define PFXOK 0x100 /* 0x prefix is (still) legal */ 75185377Ssam#define NZDIGITS 0x200 /* no zero digits detected */ 76185377Ssam 77185377Ssam/* 78185377Ssam * Conversion types. 79185377Ssam */ 80185377Ssam#define CT_CHAR 0 /* %c conversion */ 81185377Ssam#define CT_CCL 1 /* %[...] conversion */ 82185377Ssam#define CT_STRING 2 /* %s conversion */ 83185377Ssam#define CT_INT 3 /* integer, i.e., strtoq or strtouq */ 84185380Ssamtypedef u_quad_t (*ccfntype)(const char *, const char **, int); 85185377Ssam 86185380Ssam#define isspace(c) ((c) == ' ' || (c) == '\t' || \ 87185380Ssam (c) == '\r' || (c) == '\n') 88185380Ssam#define isascii(c) (((c) & ~0x7f) == 0) 89185380Ssam#define isupper(c) ((c) >= 'A' && (c) <= 'Z') 90185380Ssam#define islower(c) ((c) >= 'a' && (c) <= 'z') 91185380Ssam#define isalpha(c) (isupper(c) || (islower(c))) 92185380Ssam#define isdigit(c) ((c) >= '0' && (c) <= '9') 93185380Ssam 94185380Ssamstatic const u_char *__sccl(char *, const u_char *); 95185380Ssam 96185380Ssamint 97185380Ssamsscanf(const char *ibuf, const char *fmt, ...) 98185380Ssam{ 99185380Ssam va_list ap; 100185380Ssam int ret; 101185380Ssam 102185380Ssam va_start(ap, fmt); 103185380Ssam ret = vsscanf(ibuf, fmt, ap); 104185380Ssam va_end(ap); 105185380Ssam return(ret); 106185380Ssam} 107185380Ssam 108185380Ssamint 109185380Ssamvsscanf(const char *inp, char const *fmt0, va_list ap) 110185380Ssam{ 111185380Ssam int inr; 112185380Ssam const u_char *fmt = (const u_char *)fmt0; 113185380Ssam int c; /* character from format, or conversion */ 114185380Ssam size_t width; /* field width, or 0 */ 115185380Ssam char *p; /* points into all kinds of strings */ 116185380Ssam int n; /* handy integer */ 117185380Ssam int flags; /* flags as defined above */ 118185380Ssam char *p0; /* saves original value of p when necessary */ 119185380Ssam int nassigned; /* number of fields assigned */ 120185380Ssam int nconversions; /* number of conversions */ 121185380Ssam int nread; /* number of characters consumed from fp */ 122185380Ssam int base; /* base argument to strtoq/strtouq */ 123185380Ssam ccfntype ccfn; /* conversion function (strtoq/strtouq) */ 124185380Ssam char ccltab[256]; /* character class table for %[...] */ 125185380Ssam char buf[BUF]; /* buffer for numeric conversions */ 126185380Ssam 127185380Ssam /* `basefix' is used to avoid `if' tests in the integer scanner */ 128185380Ssam static short basefix[17] = 129185380Ssam { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 130185380Ssam 131185380Ssam inr = strlen(inp); 132185380Ssam 133185380Ssam nassigned = 0; 134185377Ssam nconversions = 0; 135185377Ssam nread = 0; 136185377Ssam base = 0; /* XXX just to keep gcc happy */ 137185377Ssam ccfn = NULL; /* XXX just to keep gcc happy */ 138185377Ssam for (;;) { 139185377Ssam c = *fmt++; 140185377Ssam if (c == 0) 141185377Ssam return (nassigned); 142185377Ssam if (isspace(c)) { 143185377Ssam while (inr > 0 && isspace(*inp)) 144185377Ssam nread++, inr--, inp++; 145185377Ssam continue; 146185377Ssam } 147185377Ssam if (c != '%') 148185377Ssam goto literal; 149185377Ssam width = 0; 150185377Ssam flags = 0; 151185377Ssam /* 152185377Ssam * switch on the format. continue if done; 153185377Ssam * break once format type is derived. 154185377Ssam */ 155185377Ssamagain: c = *fmt++; 156185377Ssam switch (c) { 157185377Ssam case '%': 158185377Ssamliteral: 159185377Ssam if (inr <= 0) 160185377Ssam goto input_failure; 161185377Ssam if (*inp != c) 162185377Ssam goto match_failure; 163185377Ssam inr--, inp++; 164185377Ssam nread++; 165185377Ssam continue; 166185377Ssam 167185377Ssam case '*': 168185377Ssam flags |= SUPPRESS; 169185377Ssam goto again; 170185377Ssam case 'l': 171185377Ssam flags |= LONG; 172185377Ssam goto again; 173185377Ssam case 'q': 174185377Ssam flags |= QUAD; 175185377Ssam goto again; 176185377Ssam case 'h': 177185377Ssam flags |= SHORT; 178185377Ssam goto again; 179185377Ssam 180185377Ssam case '0': case '1': case '2': case '3': case '4': 181185377Ssam case '5': case '6': case '7': case '8': case '9': 182185377Ssam width = width * 10 + c - '0'; 183185377Ssam goto again; 184185377Ssam 185185377Ssam /* 186185377Ssam * Conversions. 187185377Ssam * 188185377Ssam */ 189185377Ssam case 'd': 190185377Ssam c = CT_INT; 191185377Ssam ccfn = (ccfntype)strtoq; 192185377Ssam base = 10; 193185377Ssam break; 194185377Ssam 195185377Ssam case 'i': 196185377Ssam c = CT_INT; 197185377Ssam ccfn = (ccfntype)strtoq; 198185377Ssam base = 0; 199185377Ssam break; 200185377Ssam 201185377Ssam case 'o': 202185377Ssam c = CT_INT; 203185377Ssam ccfn = strtouq; 204185377Ssam base = 8; 205185377Ssam break; 206185377Ssam 207185377Ssam case 'u': 208185377Ssam c = CT_INT; 209185377Ssam ccfn = strtouq; 210185377Ssam base = 10; 211185377Ssam break; 212185377Ssam 213185377Ssam case 'x': 214185377Ssam flags |= PFXOK; /* enable 0x prefixing */ 215185377Ssam c = CT_INT; 216185377Ssam ccfn = strtouq; 217185377Ssam base = 16; 218185377Ssam break; 219185377Ssam 220185377Ssam case 's': 221185377Ssam c = CT_STRING; 222185377Ssam break; 223185377Ssam 224185377Ssam case '[': 225185377Ssam fmt = __sccl(ccltab, fmt); 226185377Ssam flags |= NOSKIP; 227185377Ssam c = CT_CCL; 228185377Ssam break; 229185377Ssam 230185377Ssam case 'c': 231185377Ssam flags |= NOSKIP; 232185377Ssam c = CT_CHAR; 233185377Ssam break; 234185377Ssam 235185377Ssam case 'p': /* pointer format is like hex */ 236185377Ssam flags |= POINTER | PFXOK; 237185377Ssam c = CT_INT; 238185377Ssam ccfn = strtouq; 239185377Ssam base = 16; 240185377Ssam break; 241185377Ssam 242185377Ssam case 'n': 243185377Ssam nconversions++; 244185377Ssam if (flags & SUPPRESS) /* ??? */ 245185377Ssam continue; 246185377Ssam if (flags & SHORT) 247185377Ssam *va_arg(ap, short *) = nread; 248185377Ssam else if (flags & LONG) 249185377Ssam *va_arg(ap, long *) = nread; 250185377Ssam else if (flags & QUAD) 251185377Ssam *va_arg(ap, quad_t *) = nread; 252185377Ssam else 253185377Ssam *va_arg(ap, int *) = nread; 254185377Ssam continue; 255185377Ssam } 256185377Ssam 257185377Ssam /* 258185377Ssam * We have a conversion that requires input. 259185377Ssam */ 260185377Ssam if (inr <= 0) 261185377Ssam goto input_failure; 262185377Ssam 263185377Ssam /* 264185377Ssam * Consume leading white space, except for formats 265185377Ssam * that suppress this. 266185377Ssam */ 267185377Ssam if ((flags & NOSKIP) == 0) { 268185377Ssam while (isspace(*inp)) { 269185377Ssam nread++; 270185377Ssam if (--inr > 0) 271185377Ssam inp++; 272185377Ssam else 273185377Ssam goto input_failure; 274185377Ssam } 275185377Ssam /* 276185377Ssam * Note that there is at least one character in 277185377Ssam * the buffer, so conversions that do not set NOSKIP 278185377Ssam * can no longer result in an input failure. 279185377Ssam */ 280185377Ssam } 281185377Ssam 282185377Ssam /* 283185377Ssam * Do the conversion. 284185377Ssam */ 285185377Ssam switch (c) { 286185377Ssam 287185377Ssam case CT_CHAR: 288185377Ssam /* scan arbitrary characters (sets NOSKIP) */ 289185377Ssam if (width == 0) 290185377Ssam width = 1; 291185377Ssam if (flags & SUPPRESS) { 292185377Ssam size_t sum = 0; 293185377Ssam for (;;) { 294185377Ssam if ((n = inr) < width) { 295185377Ssam sum += n; 296185377Ssam width -= n; 297185377Ssam inp += n; 298185377Ssam if (sum == 0) 299185377Ssam goto input_failure; 300185377Ssam break; 301185377Ssam } else { 302185377Ssam sum += width; 303185377Ssam inr -= width; 304185377Ssam inp += width; 305185377Ssam break; 306185377Ssam } 307185377Ssam } 308185377Ssam nread += sum; 309185377Ssam } else { 310185377Ssam bcopy(inp, va_arg(ap, char *), width); 311185377Ssam inr -= width; 312185377Ssam inp += width; 313185377Ssam nread += width; 314185377Ssam nassigned++; 315185377Ssam } 316185377Ssam nconversions++; 317185377Ssam break; 318185377Ssam 319185377Ssam case CT_CCL: 320185377Ssam /* scan a (nonempty) character class (sets NOSKIP) */ 321185377Ssam if (width == 0) 322185377Ssam width = (size_t)~0; /* `infinity' */ 323185377Ssam /* take only those things in the class */ 324185377Ssam if (flags & SUPPRESS) { 325185377Ssam n = 0; 326185377Ssam while (ccltab[(int)(unsigned char)*inp]) { 327185377Ssam n++, inr--, inp++; 328185377Ssam if (--width == 0) 329185377Ssam break; 330185377Ssam if (inr <= 0) { 331185377Ssam if (n == 0) 332185377Ssam goto input_failure; 333185377Ssam break; 334185377Ssam } 335185377Ssam } 336185377Ssam if (n == 0) 337185377Ssam goto match_failure; 338185377Ssam } else { 339185377Ssam p0 = p = va_arg(ap, char *); 340185377Ssam while (ccltab[(int)(unsigned char)*inp]) { 341185377Ssam inr--; 342185380Ssam *p++ = *inp++; 343185380Ssam if (--width == 0) 344185380Ssam break; 345185380Ssam if (inr <= 0) { 346185377Ssam if (p == p0) 347185377Ssam goto input_failure; 348185377Ssam break; 349185377Ssam } 350185377Ssam } 351185377Ssam n = p - p0; 352185377Ssam if (n == 0) 353185377Ssam goto match_failure; 354185377Ssam *p = 0; 355185377Ssam nassigned++; 356185377Ssam } 357185377Ssam nread += n; 358185377Ssam nconversions++; 359185377Ssam break; 360185377Ssam 361185377Ssam case CT_STRING: 362185377Ssam /* like CCL, but zero-length string OK, & no NOSKIP */ 363185377Ssam if (width == 0) 364185377Ssam width = (size_t)~0; 365185377Ssam if (flags & SUPPRESS) { 366185377Ssam n = 0; 367185377Ssam while (!isspace(*inp)) { 368185377Ssam n++, inr--, inp++; 369185377Ssam if (--width == 0) 370185377Ssam break; 371185377Ssam if (inr <= 0) 372185377Ssam break; 373185377Ssam } 374185377Ssam nread += n; 375185377Ssam } else { 376185377Ssam p0 = p = va_arg(ap, char *); 377185377Ssam while (!isspace(*inp)) { 378185377Ssam inr--; 379185377Ssam *p++ = *inp++; 380185377Ssam if (--width == 0) 381185377Ssam break; 382185377Ssam if (inr <= 0) 383185377Ssam break; 384185377Ssam } 385185377Ssam *p = 0; 386185377Ssam nread += p - p0; 387185377Ssam nassigned++; 388185377Ssam } 389185377Ssam nconversions++; 390185377Ssam continue; 391185377Ssam 392185377Ssam case CT_INT: 393185377Ssam /* scan an integer as if by strtoq/strtouq */ 394185377Ssam#ifdef hardway 395185377Ssam if (width == 0 || width > sizeof(buf) - 1) 396185377Ssam width = sizeof(buf) - 1; 397185377Ssam#else 398185377Ssam /* size_t is unsigned, hence this optimisation */ 399185377Ssam if (--width > sizeof(buf) - 2) 400185377Ssam width = sizeof(buf) - 2; 401185377Ssam width++; 402185377Ssam#endif 403185377Ssam flags |= SIGNOK | NDIGITS | NZDIGITS; 404185377Ssam for (p = buf; width; width--) { 405185377Ssam c = *inp; 406185377Ssam /* 407185377Ssam * Switch on the character; `goto ok' 408185377Ssam * if we accept it as a part of number. 409185377Ssam */ 410185377Ssam switch (c) { 411185377Ssam 412185377Ssam /* 413185377Ssam * The digit 0 is always legal, but is 414185377Ssam * special. For %i conversions, if no 415185377Ssam * digits (zero or nonzero) have been 416185377Ssam * scanned (only signs), we will have 417185377Ssam * base==0. In that case, we should set 418185377Ssam * it to 8 and enable 0x prefixing. 419185377Ssam * Also, if we have not scanned zero digits 420185377Ssam * before this, do not turn off prefixing 421185377Ssam * (someone else will turn it off if we 422185377Ssam * have scanned any nonzero digits). 423185377Ssam */ 424185377Ssam case '0': 425185377Ssam if (base == 0) { 426185377Ssam base = 8; 427185377Ssam flags |= PFXOK; 428185377Ssam } 429185377Ssam if (flags & NZDIGITS) 430185377Ssam flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 431185377Ssam else 432185377Ssam flags &= ~(SIGNOK|PFXOK|NDIGITS); 433185377Ssam goto ok; 434185377Ssam 435187831Ssam /* 1 through 7 always legal */ 436185377Ssam case '1': case '2': case '3': 437185377Ssam case '4': case '5': case '6': case '7': 438185377Ssam base = basefix[base]; 439185377Ssam flags &= ~(SIGNOK | PFXOK | NDIGITS); 440185377Ssam goto ok; 441185377Ssam 442185377Ssam /* digits 8 and 9 ok iff decimal or hex */ 443185377Ssam case '8': case '9': 444185377Ssam base = basefix[base]; 445185377Ssam if (base <= 8) 446185377Ssam break; /* not legal here */ 447185377Ssam flags &= ~(SIGNOK | PFXOK | NDIGITS); 448185377Ssam goto ok; 449185377Ssam 450185377Ssam /* letters ok iff hex */ 451185377Ssam case 'A': case 'B': case 'C': 452185377Ssam case 'D': case 'E': case 'F': 453185377Ssam case 'a': case 'b': case 'c': 454185377Ssam case 'd': case 'e': case 'f': 455185377Ssam /* no need to fix base here */ 456185377Ssam if (base <= 10) 457185377Ssam break; /* not legal here */ 458185377Ssam flags &= ~(SIGNOK | PFXOK | NDIGITS); 459185377Ssam goto ok; 460185377Ssam 461185377Ssam /* sign ok only as first character */ 462185377Ssam case '+': case '-': 463185377Ssam if (flags & SIGNOK) { 464185377Ssam flags &= ~SIGNOK; 465185377Ssam goto ok; 466185377Ssam } 467185377Ssam break; 468185377Ssam 469185377Ssam /* x ok iff flag still set & 2nd char */ 470185377Ssam case 'x': case 'X': 471185377Ssam if (flags & PFXOK && p == buf + 1) { 472185377Ssam base = 16; /* if %i */ 473185377Ssam flags &= ~PFXOK; 474185377Ssam goto ok; 475185377Ssam } 476185377Ssam break; 477185377Ssam } 478185377Ssam 479185377Ssam /* 480185377Ssam * If we got here, c is not a legal character 481185377Ssam * for a number. Stop accumulating digits. 482185377Ssam */ 483185377Ssam break; 484185377Ssam ok: 485185377Ssam /* 486185377Ssam * c is legal: store it and look at the next. 487185377Ssam */ 488185377Ssam *p++ = c; 489185377Ssam if (--inr > 0) 490185377Ssam inp++; 491185377Ssam else 492185377Ssam break; /* end of input */ 493185377Ssam } 494185377Ssam /* 495185377Ssam * If we had only a sign, it is no good; push 496185377Ssam * back the sign. If the number ends in `x', 497185377Ssam * it was [sign] '0' 'x', so push back the x 498185377Ssam * and treat it as [sign] '0'. 499185377Ssam */ 500185377Ssam if (flags & NDIGITS) { 501185377Ssam if (p > buf) { 502185377Ssam inp--; 503185377Ssam inr++; 504185377Ssam } 505185377Ssam goto match_failure; 506185377Ssam } 507185377Ssam c = ((u_char *)p)[-1]; 508185377Ssam if (c == 'x' || c == 'X') { 509185377Ssam --p; 510185377Ssam inp--; 511185377Ssam inr++; 512185377Ssam } 513185377Ssam if ((flags & SUPPRESS) == 0) { 514185377Ssam u_quad_t res; 515185377Ssam 516185377Ssam *p = 0; 517185377Ssam res = (*ccfn)(buf, (const char **)NULL, base); 518185377Ssam if (flags & POINTER) 519185377Ssam *va_arg(ap, void **) = 520185377Ssam (void *)(u_long)res; 521185377Ssam else if (flags & SHORT) 522185377Ssam *va_arg(ap, short *) = res; 523185377Ssam else if (flags & LONG) 524185377Ssam *va_arg(ap, long *) = res; 525185377Ssam else if (flags & QUAD) 526185377Ssam *va_arg(ap, quad_t *) = res; 527185377Ssam else 528185377Ssam *va_arg(ap, int *) = res; 529185377Ssam nassigned++; 530185377Ssam } 531185377Ssam nread += p - buf; 532185377Ssam nconversions++; 533185377Ssam break; 534185377Ssam 535185377Ssam } 536185377Ssam } 537185377Ssaminput_failure: 538185377Ssam return (nconversions != 0 ? nassigned : -1); 539185377Ssammatch_failure: 540185377Ssam return (nassigned); 541185377Ssam} 542185377Ssam 543185377Ssam/* 544185377Ssam * Fill in the given table from the scanset at the given format 545185377Ssam * (just after `['). Return a pointer to the character past the 546185377Ssam * closing `]'. The table has a 1 wherever characters should be 547185377Ssam * considered part of the scanset. 548185377Ssam */ 549185377Ssamstatic const u_char * 550185377Ssam__sccl(char *tab, const u_char *fmt) 551185377Ssam{ 552185377Ssam int c, n, v; 553185377Ssam 554185377Ssam /* first `clear' the whole table */ 555185377Ssam c = *fmt++; /* first char hat => negated scanset */ 556185377Ssam if (c == '^') { 557185377Ssam v = 1; /* default => accept */ 558185377Ssam c = *fmt++; /* get new first char */ 559185377Ssam } else 560185377Ssam v = 0; /* default => reject */ 561185377Ssam 562185377Ssam /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 563185377Ssam for (n = 0; n < 256; n++) 564185377Ssam tab[n] = v; /* memset(tab, v, 256) */ 565185377Ssam 566185377Ssam if (c == 0) 567187831Ssam return (fmt - 1);/* format ended before closing ] */ 568187831Ssam 569185377Ssam /* 570185377Ssam * Now set the entries corresponding to the actual scanset 571185377Ssam * to the opposite of the above. 572185377Ssam * 573185377Ssam * The first character may be ']' (or '-') without being special; 574185377Ssam * the last character may be '-'. 575185377Ssam */ 576185377Ssam v = 1 - v; 577185377Ssam for (;;) { 578185377Ssam tab[c] = v; /* take character c */ 579185377Ssamdoswitch: 580185377Ssam n = *fmt++; /* and examine the next */ 581185377Ssam switch (n) { 582185377Ssam 583185377Ssam case 0: /* format ended too soon */ 584185377Ssam return (fmt - 1); 585185377Ssam 586185377Ssam case '-': 587185377Ssam /* 588185377Ssam * A scanset of the form 589185377Ssam * [01+-] 590185377Ssam * is defined as `the digit 0, the digit 1, 591185377Ssam * the character +, the character -', but 592185377Ssam * the effect of a scanset such as 593185377Ssam * [a-zA-Z0-9] 594185377Ssam * is implementation defined. The V7 Unix 595185377Ssam * scanf treats `a-z' as `the letters a through 596185377Ssam * z', but treats `a-a' as `the letter a, the 597185377Ssam * character -, and the letter a'. 598185377Ssam * 599185377Ssam * For compatibility, the `-' is not considerd 600185377Ssam * to define a range if the character following 601185377Ssam * it is either a close bracket (required by ANSI) 602185377Ssam * or is not numerically greater than the character 603185377Ssam * we just stored in the table (c). 604185377Ssam */ 605185377Ssam n = *fmt; 606185377Ssam if (n == ']' || n < c) { 607187831Ssam c = '-'; 608185377Ssam break; /* resume the for(;;) */ 609185377Ssam } 610185377Ssam fmt++; 611185377Ssam /* fill in the range */ 612185377Ssam do { 613185377Ssam tab[++c] = v; 614185377Ssam } while (c < n); 615185377Ssam c = n; 616185377Ssam /* 617185377Ssam * Alas, the V7 Unix scanf also treats formats 618185377Ssam * such as [a-c-e] as `the letters a through e'. 619185377Ssam * This too is permitted by the standard.... 620185377Ssam */ 621185377Ssam goto doswitch; 622185377Ssam break; 623185377Ssam 624185377Ssam case ']': /* end of scanset */ 625185377Ssam return (fmt); 626185377Ssam 627185377Ssam default: /* just another character */ 628185377Ssam c = n; 629185377Ssam break; 630185377Ssam } 631185377Ssam } 632185377Ssam /* NOTREACHED */ 633185377Ssam} 634185377Ssam 635185377Ssam/* 636185377Ssam * Convert a string to an unsigned quad integer. 637185377Ssam * 638185377Ssam * Ignores `locale' stuff. Assumes that the upper and lower case 639185377Ssam * alphabets and digits are each contiguous. 640185377Ssam */ 641185377Ssamu_quad_t 642185377Ssamstrtouq(const char *nptr, const char **endptr, int base) 643185377Ssam{ 644185377Ssam const char *s = nptr; 645185377Ssam u_quad_t acc; 646185377Ssam unsigned char c; 647185377Ssam u_quad_t qbase, cutoff; 648185377Ssam int neg, any, cutlim; 649185377Ssam 650185377Ssam /* 651185377Ssam * See strtoq for comments as to the logic used. 652185377Ssam */ 653185377Ssam s = nptr; 654185377Ssam do { 655185377Ssam c = *s++; 656185377Ssam } while (isspace(c)); 657185377Ssam if (c == '-') { 658185377Ssam neg = 1; 659185377Ssam c = *s++; 660185377Ssam } else { 661185377Ssam neg = 0; 662185377Ssam if (c == '+') 663185377Ssam c = *s++; 664185377Ssam } 665185377Ssam if ((base == 0 || base == 16) && 666185377Ssam c == '0' && (*s == 'x' || *s == 'X')) { 667185377Ssam c = s[1]; 668185377Ssam s += 2; 669185377Ssam base = 16; 670185377Ssam } 671185377Ssam if (base == 0) 672185377Ssam base = c == '0' ? 8 : 10; 673185377Ssam qbase = (unsigned)base; 674185377Ssam cutoff = (u_quad_t)UQUAD_MAX / qbase; 675185377Ssam cutlim = (u_quad_t)UQUAD_MAX % qbase; 676185377Ssam for (acc = 0, any = 0;; c = *s++) { 677185377Ssam if (!isascii(c)) 678185377Ssam break; 679185377Ssam if (isdigit(c)) 680185377Ssam c -= '0'; 681185377Ssam else if (isalpha(c)) 682185377Ssam c -= isupper(c) ? 'A' - 10 : 'a' - 10; 683185377Ssam else 684185377Ssam break; 685185377Ssam if (c >= base) 686 break; 687 if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) 688 any = -1; 689 else { 690 any = 1; 691 acc *= qbase; 692 acc += c; 693 } 694 } 695 if (any < 0) { 696 acc = UQUAD_MAX; 697 } else if (neg) 698 acc = -acc; 699 if (endptr != 0) 700 *endptr = (const char *)(any ? s - 1 : nptr); 701 return (acc); 702} 703 704/* 705 * Convert a string to a quad integer. 706 * 707 * Ignores `locale' stuff. Assumes that the upper and lower case 708 * alphabets and digits are each contiguous. 709 */ 710quad_t 711strtoq(const char *nptr, const char **endptr, int base) 712{ 713 const char *s; 714 u_quad_t acc; 715 unsigned char c; 716 u_quad_t qbase, cutoff; 717 int neg, any, cutlim; 718 719 /* 720 * Skip white space and pick up leading +/- sign if any. 721 * If base is 0, allow 0x for hex and 0 for octal, else 722 * assume decimal; if base is already 16, allow 0x. 723 */ 724 s = nptr; 725 do { 726 c = *s++; 727 } while (isspace(c)); 728 if (c == '-') { 729 neg = 1; 730 c = *s++; 731 } else { 732 neg = 0; 733 if (c == '+') 734 c = *s++; 735 } 736 if ((base == 0 || base == 16) && 737 c == '0' && (*s == 'x' || *s == 'X')) { 738 c = s[1]; 739 s += 2; 740 base = 16; 741 } 742 if (base == 0) 743 base = c == '0' ? 8 : 10; 744 745 /* 746 * Compute the cutoff value between legal numbers and illegal 747 * numbers. That is the largest legal value, divided by the 748 * base. An input number that is greater than this value, if 749 * followed by a legal input character, is too big. One that 750 * is equal to this value may be valid or not; the limit 751 * between valid and invalid numbers is then based on the last 752 * digit. For instance, if the range for quads is 753 * [-9223372036854775808..9223372036854775807] and the input base 754 * is 10, cutoff will be set to 922337203685477580 and cutlim to 755 * either 7 (neg==0) or 8 (neg==1), meaning that if we have 756 * accumulated a value > 922337203685477580, or equal but the 757 * next digit is > 7 (or 8), the number is too big, and we will 758 * return a range error. 759 * 760 * Set any if any `digits' consumed; make it negative to indicate 761 * overflow. 762 */ 763 qbase = (unsigned)base; 764 cutoff = neg ? (u_quad_t)-(QUAD_MIN + QUAD_MAX) + QUAD_MAX : QUAD_MAX; 765 cutlim = cutoff % qbase; 766 cutoff /= qbase; 767 for (acc = 0, any = 0;; c = *s++) { 768 if (!isascii(c)) 769 break; 770 if (isdigit(c)) 771 c -= '0'; 772 else if (isalpha(c)) 773 c -= isupper(c) ? 'A' - 10 : 'a' - 10; 774 else 775 break; 776 if (c >= base) 777 break; 778 if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) 779 any = -1; 780 else { 781 any = 1; 782 acc *= qbase; 783 acc += c; 784 } 785 } 786 if (any < 0) { 787 acc = neg ? QUAD_MIN : QUAD_MAX; 788 } else if (neg) 789 acc = -acc; 790 if (endptr != 0) 791 *endptr = (const char *)(any ? s - 1 : nptr); 792 return (acc); 793} 794