1/* 2 * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/*- 29 * Copyright (c) 1990, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * This code is derived from software contributed to Berkeley by 33 * Chris Torek. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgement: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * 4. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64#include <sys/cdefs.h> 65 66#if 0 /* XXX coming soon */ 67#include <ctype.h> 68#else 69static inline int 70isspace(char c) 71{ 72 return (c == ' ' || c == '\t' || c == '\n' || c == '\12'); 73} 74#endif 75#include <stdarg.h> 76#include <string.h> 77#include <sys/param.h> 78#include <sys/systm.h> 79 80#define BUF 32 /* Maximum length of numeric string. */ 81 82/* 83 * Flags used during conversion. 84 */ 85#define LONG 0x01 /* l: long or double */ 86#define SHORT 0x04 /* h: short */ 87#define SUPPRESS 0x08 /* *: suppress assignment */ 88#define POINTER 0x10 /* p: void * (as hex) */ 89#define NOSKIP 0x20 /* [ or c: do not skip blanks */ 90#define LONGLONG 0x400 /* ll: long long (+ deprecated q: quad) */ 91#define SHORTSHORT 0x4000 /* hh: char */ 92#define UNSIGNED 0x8000 /* %[oupxX] conversions */ 93 94/* 95 * The following are used in numeric conversions only: 96 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point; 97 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral. 98 */ 99#define SIGNOK 0x40 /* +/- is (still) legal */ 100#define NDIGITS 0x80 /* no digits detected */ 101 102#define DPTOK 0x100 /* (float) decimal point is still legal */ 103#define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */ 104 105#define PFXOK 0x100 /* 0x prefix is (still) legal */ 106#define NZDIGITS 0x200 /* no zero digits detected */ 107 108/* 109 * Conversion types. 110 */ 111#define CT_CHAR 0 /* %c conversion */ 112#define CT_CCL 1 /* %[...] conversion */ 113#define CT_STRING 2 /* %s conversion */ 114#define CT_INT 3 /* %[dioupxX] conversion */ 115 116static const u_char *__sccl(char *, const u_char *); 117 118int 119sscanf(const char *ibuf, const char *fmt, ...) 120{ 121 va_list ap; 122 int ret; 123 124 va_start(ap, fmt); 125 ret = vsscanf(ibuf, fmt, ap); 126 va_end(ap); 127 return(ret); 128} 129 130int 131vsscanf(const char *inp, char const *fmt0, va_list ap) 132{ 133 int inr; 134 const u_char *fmt = (const u_char *)fmt0; 135 int c; /* character from format, or conversion */ 136 size_t width; /* field width, or 0 */ 137 char *p; /* points into all kinds of strings */ 138 int n; /* handy integer */ 139 int flags; /* flags as defined above */ 140 char *p0; /* saves original value of p when necessary */ 141 int nassigned; /* number of fields assigned */ 142 int nconversions; /* number of conversions */ 143 int nread; /* number of characters consumed from fp */ 144 int base; /* base argument to conversion function */ 145 char ccltab[256]; /* character class table for %[...] */ 146 char buf[BUF]; /* buffer for numeric conversions */ 147 148 /* `basefix' is used to avoid `if' tests in the integer scanner */ 149 static short basefix[17] = 150 { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; 151 152 inr = strlen(inp); 153 154 nassigned = 0; 155 nconversions = 0; 156 nread = 0; 157 base = 0; /* XXX just to keep gcc happy */ 158 for (;;) { 159 c = *fmt++; 160 if (c == 0) 161 return (nassigned); 162 if (isspace(c)) { 163 while (inr > 0 && isspace(*inp)) 164 nread++, inr--, inp++; 165 continue; 166 } 167 if (c != '%') 168 goto literal; 169 width = 0; 170 flags = 0; 171 /* 172 * switch on the format. continue if done; 173 * break once format type is derived. 174 */ 175again: c = *fmt++; 176 switch (c) { 177 case '%': 178literal: 179 if (inr <= 0) 180 goto input_failure; 181 if (*inp != c) 182 goto match_failure; 183 inr--, inp++; 184 nread++; 185 continue; 186 187 case '*': 188 flags |= SUPPRESS; 189 goto again; 190 case 'l': 191 if (flags & LONG) { 192 flags &= ~LONG; 193 flags |= LONGLONG; 194 } else 195 flags |= LONG; 196 goto again; 197 case 'q': 198 flags |= LONGLONG; /* not quite */ 199 goto again; 200 case 'h': 201 if (flags & SHORT) { 202 flags &= ~SHORT; 203 flags |= SHORTSHORT; 204 } else 205 flags |= SHORT; 206 goto again; 207 208 case '0': case '1': case '2': case '3': case '4': 209 case '5': case '6': case '7': case '8': case '9': 210 width = width * 10 + c - '0'; 211 goto again; 212 213 /* 214 * Conversions. 215 */ 216 case 'd': 217 c = CT_INT; 218 base = 10; 219 break; 220 221 case 'i': 222 c = CT_INT; 223 base = 0; 224 break; 225 226 case 'o': 227 c = CT_INT; 228 flags |= UNSIGNED; 229 base = 8; 230 break; 231 232 case 'u': 233 c = CT_INT; 234 flags |= UNSIGNED; 235 base = 10; 236 break; 237 238 case 'X': 239 case 'x': 240 flags |= PFXOK; /* enable 0x prefixing */ 241 c = CT_INT; 242 flags |= UNSIGNED; 243 base = 16; 244 break; 245 246 case 's': 247 c = CT_STRING; 248 break; 249 250 case '[': 251 fmt = __sccl(ccltab, fmt); 252 flags |= NOSKIP; 253 c = CT_CCL; 254 break; 255 256 case 'c': 257 flags |= NOSKIP; 258 c = CT_CHAR; 259 break; 260 261 case 'p': /* pointer format is like hex */ 262 flags |= POINTER | PFXOK; 263 c = CT_INT; 264 flags |= UNSIGNED; 265 base = 16; 266 break; 267 268 case 'n': 269 nconversions++; 270 if (flags & SUPPRESS) /* ??? */ 271 continue; 272 if (flags & SHORTSHORT) 273 *va_arg(ap, char *) = nread; 274 else if (flags & SHORT) 275 *va_arg(ap, short *) = nread; 276 else if (flags & LONG) 277 *va_arg(ap, long *) = nread; 278 else if (flags & LONGLONG) 279 *va_arg(ap, long long *) = nread; 280 else 281 *va_arg(ap, int *) = nread; 282 continue; 283 } 284 285 /* 286 * We have a conversion that requires input. 287 */ 288 if (inr <= 0) 289 goto input_failure; 290 291 /* 292 * Consume leading white space, except for formats 293 * that suppress this. 294 */ 295 if ((flags & NOSKIP) == 0) { 296 while (isspace(*inp)) { 297 nread++; 298 if (--inr > 0) 299 inp++; 300 else 301 goto input_failure; 302 } 303 /* 304 * Note that there is at least one character in 305 * the buffer, so conversions that do not set NOSKIP 306 * can no longer result in an input failure. 307 */ 308 } 309 310 /* 311 * Do the conversion. 312 */ 313 switch (c) { 314 315 case CT_CHAR: 316 /* scan arbitrary characters (sets NOSKIP) */ 317 if (width == 0) 318 width = 1; 319 if (flags & SUPPRESS) { 320 size_t sum = 0; 321 for (;;) { 322 if ((n = inr) < (int)width) { 323 sum += n; 324 width -= n; 325 inp += n; 326 if (sum == 0) 327 goto input_failure; 328 break; 329 } else { 330 sum += width; 331 inr -= width; 332 inp += width; 333 break; 334 } 335 } 336 nread += sum; 337 } else { 338 bcopy(inp, va_arg(ap, char *), width); 339 inr -= width; 340 inp += width; 341 nread += width; 342 nassigned++; 343 } 344 nconversions++; 345 break; 346 347 case CT_CCL: 348 /* scan a (nonempty) character class (sets NOSKIP) */ 349 if (width == 0) 350 width = (size_t)~0; /* `infinity' */ 351 /* take only those things in the class */ 352 if (flags & SUPPRESS) { 353 n = 0; 354 while (ccltab[(unsigned char)*inp]) { 355 n++, inr--, inp++; 356 if (--width == 0) 357 break; 358 if (inr <= 0) { 359 if (n == 0) 360 goto input_failure; 361 break; 362 } 363 } 364 if (n == 0) 365 goto match_failure; 366 } else { 367 p0 = p = va_arg(ap, char *); 368 while (ccltab[(unsigned char)*inp]) { 369 inr--; 370 *p++ = *inp++; 371 if (--width == 0) 372 break; 373 if (inr <= 0) { 374 if (p == p0) 375 goto input_failure; 376 break; 377 } 378 } 379 n = p - p0; 380 if (n == 0) 381 goto match_failure; 382 *p = 0; 383 nassigned++; 384 } 385 nread += n; 386 nconversions++; 387 break; 388 389 case CT_STRING: 390 /* like CCL, but zero-length string OK, & no NOSKIP */ 391 if (width == 0) 392 width = (size_t)~0; 393 if (flags & SUPPRESS) { 394 n = 0; 395 while (!isspace(*inp)) { 396 n++, inr--, inp++; 397 if (--width == 0) 398 break; 399 if (inr <= 0) 400 break; 401 } 402 nread += n; 403 } else { 404 p0 = p = va_arg(ap, char *); 405 while (!isspace(*inp)) { 406 inr--; 407 *p++ = *inp++; 408 if (--width == 0) 409 break; 410 if (inr <= 0) 411 break; 412 } 413 *p = 0; 414 nread += p - p0; 415 nassigned++; 416 } 417 nconversions++; 418 continue; 419 420 case CT_INT: 421 /* scan an integer as if by the conversion function */ 422#ifdef hardway 423 if (width == 0 || width > sizeof(buf) - 1) 424 width = sizeof(buf) - 1; 425#else 426 /* size_t is unsigned, hence this optimisation */ 427 if (--width > sizeof(buf) - 2) 428 width = sizeof(buf) - 2; 429 width++; 430#endif 431 flags |= SIGNOK | NDIGITS | NZDIGITS; 432 for (p = buf; width; width--) { 433 c = *inp; 434 /* 435 * Switch on the character; `goto ok' 436 * if we accept it as a part of number. 437 */ 438 switch (c) { 439 440 /* 441 * The digit 0 is always legal, but is 442 * special. For %i conversions, if no 443 * digits (zero or nonzero) have been 444 * scanned (only signs), we will have 445 * base==0. In that case, we should set 446 * it to 8 and enable 0x prefixing. 447 * Also, if we have not scanned zero digits 448 * before this, do not turn off prefixing 449 * (someone else will turn it off if we 450 * have scanned any nonzero digits). 451 */ 452 case '0': 453 if (base == 0) { 454 base = 8; 455 flags |= PFXOK; 456 } 457 if (flags & NZDIGITS) 458 flags &= ~(SIGNOK|NZDIGITS|NDIGITS); 459 else 460 flags &= ~(SIGNOK|PFXOK|NDIGITS); 461 goto ok; 462 463 /* 1 through 7 always legal */ 464 case '1': case '2': case '3': 465 case '4': case '5': case '6': case '7': 466 base = basefix[base]; 467 flags &= ~(SIGNOK | PFXOK | NDIGITS); 468 goto ok; 469 470 /* digits 8 and 9 ok iff decimal or hex */ 471 case '8': case '9': 472 base = basefix[base]; 473 if (base <= 8) 474 break; /* not legal here */ 475 flags &= ~(SIGNOK | PFXOK | NDIGITS); 476 goto ok; 477 478 /* letters ok iff hex */ 479 case 'A': case 'B': case 'C': 480 case 'D': case 'E': case 'F': 481 case 'a': case 'b': case 'c': 482 case 'd': case 'e': case 'f': 483 /* no need to fix base here */ 484 if (base <= 10) 485 break; /* not legal here */ 486 flags &= ~(SIGNOK | PFXOK | NDIGITS); 487 goto ok; 488 489 /* sign ok only as first character */ 490 case '+': case '-': 491 if (flags & SIGNOK) { 492 flags &= ~SIGNOK; 493 goto ok; 494 } 495 break; 496 497 /* x ok iff flag still set & 2nd char */ 498 case 'x': case 'X': 499 if (flags & PFXOK && p == buf + 1) { 500 base = 16; /* if %i */ 501 flags &= ~PFXOK; 502 goto ok; 503 } 504 break; 505 } 506 507 /* 508 * If we got here, c is not a legal character 509 * for a number. Stop accumulating digits. 510 */ 511 break; 512 ok: 513 /* 514 * c is legal: store it and look at the next. 515 */ 516 *p++ = c; 517 if (--inr > 0) 518 inp++; 519 else 520 break; /* end of input */ 521 } 522 /* 523 * If we had only a sign, it is no good; push 524 * back the sign. If the number ends in `x', 525 * it was [sign] '0' 'x', so push back the x 526 * and treat it as [sign] '0'. 527 */ 528 if (flags & NDIGITS) { 529 if (p > buf) { 530 inp--; 531 inr++; 532 } 533 goto match_failure; 534 } 535 c = ((u_char *)p)[-1]; 536 if (c == 'x' || c == 'X') { 537 --p; 538 inp--; 539 inr++; 540 } 541 if ((flags & SUPPRESS) == 0) { 542 u_quad_t res; 543 544 *p = 0; 545 if ((flags & UNSIGNED) == 0) 546 res = strtoq(buf, (char **)NULL, base); 547 else 548 res = strtouq(buf, (char **)NULL, base); 549 if (flags & POINTER) 550 *va_arg(ap, void **) = 551 (void *)(uintptr_t)res; 552 else if (flags & SHORTSHORT) 553 *va_arg(ap, char *) = res; 554 else if (flags & SHORT) 555 *va_arg(ap, short *) = res; 556 else if (flags & LONG) 557 *va_arg(ap, long *) = res; 558 else if (flags & LONGLONG) 559 *va_arg(ap, long long *) = res; 560 else 561 *va_arg(ap, int *) = res; 562 nassigned++; 563 } 564 nread += p - buf; 565 nconversions++; 566 break; 567 568 } 569 } 570input_failure: 571 return (nconversions != 0 ? nassigned : -1); 572match_failure: 573 return (nassigned); 574} 575 576/* 577 * Fill in the given table from the scanset at the given format 578 * (just after `['). Return a pointer to the character past the 579 * closing `]'. The table has a 1 wherever characters should be 580 * considered part of the scanset. 581 */ 582static const u_char * 583__sccl(char *tab, const u_char *fmt) 584{ 585 int c, n, v; 586 587 /* first `clear' the whole table */ 588 c = *fmt++; /* first char hat => negated scanset */ 589 if (c == '^') { 590 v = 1; /* default => accept */ 591 c = *fmt++; /* get new first char */ 592 } else 593 v = 0; /* default => reject */ 594 595 /* XXX: Will not work if sizeof(tab*) > sizeof(char) */ 596 (void) memset(tab, v, 256); 597 598 if (c == 0) 599 return (fmt - 1);/* format ended before closing ] */ 600 601 /* 602 * Now set the entries corresponding to the actual scanset 603 * to the opposite of the above. 604 * 605 * The first character may be ']' (or '-') without being special; 606 * the last character may be '-'. 607 */ 608 v = 1 - v; 609 for (;;) { 610 tab[c] = v; /* take character c */ 611doswitch: 612 n = *fmt++; /* and examine the next */ 613 switch (n) { 614 615 case 0: /* format ended too soon */ 616 return (fmt - 1); 617 618 case '-': 619 /* 620 * A scanset of the form 621 * [01+-] 622 * is defined as `the digit 0, the digit 1, 623 * the character +, the character -', but 624 * the effect of a scanset such as 625 * [a-zA-Z0-9] 626 * is implementation defined. The V7 Unix 627 * scanf treats `a-z' as `the letters a through 628 * z', but treats `a-a' as `the letter a, the 629 * character -, and the letter a'. 630 * 631 * For compatibility, the `-' is not considerd 632 * to define a range if the character following 633 * it is either a close bracket (required by ANSI) 634 * or is not numerically greater than the character 635 * we just stored in the table (c). 636 */ 637 n = *fmt; 638 if (n == ']' || n < c) { 639 c = '-'; 640 break; /* resume the for(;;) */ 641 } 642 fmt++; 643 /* fill in the range */ 644 do { 645 tab[++c] = v; 646 } while (c < n); 647 c = n; 648 /* 649 * Alas, the V7 Unix scanf also treats formats 650 * such as [a-c-e] as `the letters a through e'. 651 * This too is permitted by the standard.... 652 */ 653 goto doswitch; 654 break; 655 656 case ']': /* end of scanset */ 657 return (fmt); 658 659 default: /* just another character */ 660 c = n; 661 break; 662 } 663 } 664 /* NOTREACHED */ 665} 666