1/*
2 * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*-
29 * Copyright (c) 1990, 1993
30 *	The Regents of the University of California.  All rights reserved.
31 *
32 * This code is derived from software contributed to Berkeley by
33 * Chris Torek.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 *    notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 *    notice, this list of conditions and the following disclaimer in the
42 *    documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software
44 *    must display the following acknowledgement:
45 *	This product includes software developed by the University of
46 *	California, Berkeley and its contributors.
47 * 4. Neither the name of the University nor the names of its contributors
48 *    may be used to endorse or promote products derived from this software
49 *    without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 */
63
64#include <sys/cdefs.h>
65
66#if 0 /* XXX coming soon */
67#include <ctype.h>
68#else
69static inline int
70isspace(char c)
71{
72	return (c == ' ' || c == '\t' || c == '\n' || c == '\12');
73}
74#endif
75#include <stdarg.h>
76#include <string.h>
77#include <sys/param.h>
78#include <sys/systm.h>
79
80#define	BUF		32 	/* Maximum length of numeric string. */
81
82/*
83 * Flags used during conversion.
84 */
85#define	LONG		0x01	/* l: long or double */
86#define	SHORT		0x04	/* h: short */
87#define	SUPPRESS	0x08	/* *: suppress assignment */
88#define	POINTER		0x10	/* p: void * (as hex) */
89#define	NOSKIP		0x20	/* [ or c: do not skip blanks */
90#define	LONGLONG	0x400	/* ll: long long (+ deprecated q: quad) */
91#define	SHORTSHORT	0x4000	/* hh: char */
92#define	UNSIGNED	0x8000	/* %[oupxX] conversions */
93
94/*
95 * The following are used in numeric conversions only:
96 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
97 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
98 */
99#define	SIGNOK		0x40	/* +/- is (still) legal */
100#define	NDIGITS		0x80	/* no digits detected */
101
102#define	DPTOK		0x100	/* (float) decimal point is still legal */
103#define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
104
105#define	PFXOK		0x100	/* 0x prefix is (still) legal */
106#define	NZDIGITS	0x200	/* no zero digits detected */
107
108/*
109 * Conversion types.
110 */
111#define	CT_CHAR		0	/* %c conversion */
112#define	CT_CCL		1	/* %[...] conversion */
113#define	CT_STRING	2	/* %s conversion */
114#define	CT_INT		3	/* %[dioupxX] conversion */
115
116static const u_char *__sccl(char *, const u_char *);
117
118int
119sscanf(const char *ibuf, const char *fmt, ...)
120{
121	va_list ap;
122	int ret;
123
124	va_start(ap, fmt);
125	ret = vsscanf(ibuf, fmt, ap);
126	va_end(ap);
127	return(ret);
128}
129
130int
131vsscanf(const char *inp, char const *fmt0, va_list ap)
132{
133	int inr;
134	const u_char *fmt = (const u_char *)fmt0;
135	int c;			/* character from format, or conversion */
136	size_t width;		/* field width, or 0 */
137	char *p;		/* points into all kinds of strings */
138	int n;			/* handy integer */
139	int flags;		/* flags as defined above */
140	char *p0;		/* saves original value of p when necessary */
141	int nassigned;		/* number of fields assigned */
142	int nconversions;	/* number of conversions */
143	int nread;		/* number of characters consumed from fp */
144	int base;		/* base argument to conversion function */
145	char ccltab[256];	/* character class table for %[...] */
146	char buf[BUF];		/* buffer for numeric conversions */
147
148	/* `basefix' is used to avoid `if' tests in the integer scanner */
149	static short basefix[17] =
150		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
151
152	inr = strlen(inp);
153
154	nassigned = 0;
155	nconversions = 0;
156	nread = 0;
157	base = 0;		/* XXX just to keep gcc happy */
158	for (;;) {
159		c = *fmt++;
160		if (c == 0)
161			return (nassigned);
162		if (isspace(c)) {
163			while (inr > 0 && isspace(*inp))
164				nread++, inr--, inp++;
165			continue;
166		}
167		if (c != '%')
168			goto literal;
169		width = 0;
170		flags = 0;
171		/*
172		 * switch on the format.  continue if done;
173		 * break once format type is derived.
174		 */
175again:		c = *fmt++;
176		switch (c) {
177		case '%':
178literal:
179			if (inr <= 0)
180				goto input_failure;
181			if (*inp != c)
182				goto match_failure;
183			inr--, inp++;
184			nread++;
185			continue;
186
187		case '*':
188			flags |= SUPPRESS;
189			goto again;
190		case 'l':
191			if (flags & LONG) {
192				flags &= ~LONG;
193				flags |= LONGLONG;
194			} else
195				flags |= LONG;
196			goto again;
197		case 'q':
198			flags |= LONGLONG;	/* not quite */
199			goto again;
200		case 'h':
201			if (flags & SHORT) {
202				flags &= ~SHORT;
203				flags |= SHORTSHORT;
204			} else
205				flags |= SHORT;
206			goto again;
207
208		case '0': case '1': case '2': case '3': case '4':
209		case '5': case '6': case '7': case '8': case '9':
210			width = width * 10 + c - '0';
211			goto again;
212
213		/*
214		 * Conversions.
215		 */
216		case 'd':
217			c = CT_INT;
218			base = 10;
219			break;
220
221		case 'i':
222			c = CT_INT;
223			base = 0;
224			break;
225
226		case 'o':
227			c = CT_INT;
228			flags |= UNSIGNED;
229			base = 8;
230			break;
231
232		case 'u':
233			c = CT_INT;
234			flags |= UNSIGNED;
235			base = 10;
236			break;
237
238		case 'X':
239		case 'x':
240			flags |= PFXOK;	/* enable 0x prefixing */
241			c = CT_INT;
242			flags |= UNSIGNED;
243			base = 16;
244			break;
245
246		case 's':
247			c = CT_STRING;
248			break;
249
250		case '[':
251			fmt = __sccl(ccltab, fmt);
252			flags |= NOSKIP;
253			c = CT_CCL;
254			break;
255
256		case 'c':
257			flags |= NOSKIP;
258			c = CT_CHAR;
259			break;
260
261		case 'p':	/* pointer format is like hex */
262			flags |= POINTER | PFXOK;
263			c = CT_INT;
264			flags |= UNSIGNED;
265			base = 16;
266			break;
267
268		case 'n':
269			nconversions++;
270			if (flags & SUPPRESS)	/* ??? */
271				continue;
272			if (flags & SHORTSHORT)
273				*va_arg(ap, char *) = nread;
274			else if (flags & SHORT)
275				*va_arg(ap, short *) = nread;
276			else if (flags & LONG)
277				*va_arg(ap, long *) = nread;
278			else if (flags & LONGLONG)
279				*va_arg(ap, long long *) = nread;
280			else
281				*va_arg(ap, int *) = nread;
282			continue;
283		}
284
285		/*
286		 * We have a conversion that requires input.
287		 */
288		if (inr <= 0)
289			goto input_failure;
290
291		/*
292		 * Consume leading white space, except for formats
293		 * that suppress this.
294		 */
295		if ((flags & NOSKIP) == 0) {
296			while (isspace(*inp)) {
297				nread++;
298				if (--inr > 0)
299					inp++;
300				else
301					goto input_failure;
302			}
303			/*
304			 * Note that there is at least one character in
305			 * the buffer, so conversions that do not set NOSKIP
306			 * can no longer result in an input failure.
307			 */
308		}
309
310		/*
311		 * Do the conversion.
312		 */
313		switch (c) {
314
315		case CT_CHAR:
316			/* scan arbitrary characters (sets NOSKIP) */
317			if (width == 0)
318				width = 1;
319			if (flags & SUPPRESS) {
320				size_t sum = 0;
321				for (;;) {
322					if ((n = inr) < (int)width) {
323						sum += n;
324						width -= n;
325						inp += n;
326						if (sum == 0)
327							goto input_failure;
328						break;
329					} else {
330						sum += width;
331						inr -= width;
332						inp += width;
333						break;
334					}
335				}
336				nread += sum;
337			} else {
338				bcopy(inp, va_arg(ap, char *), width);
339				inr -= width;
340				inp += width;
341				nread += width;
342				nassigned++;
343			}
344			nconversions++;
345			break;
346
347		case CT_CCL:
348			/* scan a (nonempty) character class (sets NOSKIP) */
349			if (width == 0)
350				width = (size_t)~0;	/* `infinity' */
351			/* take only those things in the class */
352			if (flags & SUPPRESS) {
353				n = 0;
354				while (ccltab[(unsigned char)*inp]) {
355					n++, inr--, inp++;
356					if (--width == 0)
357						break;
358					if (inr <= 0) {
359						if (n == 0)
360							goto input_failure;
361						break;
362					}
363				}
364				if (n == 0)
365					goto match_failure;
366			} else {
367				p0 = p = va_arg(ap, char *);
368				while (ccltab[(unsigned char)*inp]) {
369					inr--;
370					*p++ = *inp++;
371					if (--width == 0)
372						break;
373					if (inr <= 0) {
374						if (p == p0)
375							goto input_failure;
376						break;
377					}
378				}
379				n = p - p0;
380				if (n == 0)
381					goto match_failure;
382				*p = 0;
383				nassigned++;
384			}
385			nread += n;
386			nconversions++;
387			break;
388
389		case CT_STRING:
390			/* like CCL, but zero-length string OK, & no NOSKIP */
391			if (width == 0)
392				width = (size_t)~0;
393			if (flags & SUPPRESS) {
394				n = 0;
395				while (!isspace(*inp)) {
396					n++, inr--, inp++;
397					if (--width == 0)
398						break;
399					if (inr <= 0)
400						break;
401				}
402				nread += n;
403			} else {
404				p0 = p = va_arg(ap, char *);
405				while (!isspace(*inp)) {
406					inr--;
407					*p++ = *inp++;
408					if (--width == 0)
409						break;
410					if (inr <= 0)
411						break;
412				}
413				*p = 0;
414				nread += p - p0;
415				nassigned++;
416			}
417			nconversions++;
418			continue;
419
420		case CT_INT:
421			/* scan an integer as if by the conversion function */
422#ifdef hardway
423			if (width == 0 || width > sizeof(buf) - 1)
424				width = sizeof(buf) - 1;
425#else
426			/* size_t is unsigned, hence this optimisation */
427			if (--width > sizeof(buf) - 2)
428				width = sizeof(buf) - 2;
429			width++;
430#endif
431			flags |= SIGNOK | NDIGITS | NZDIGITS;
432			for (p = buf; width; width--) {
433				c = *inp;
434				/*
435				 * Switch on the character; `goto ok'
436				 * if we accept it as a part of number.
437				 */
438				switch (c) {
439
440				/*
441				 * The digit 0 is always legal, but is
442				 * special.  For %i conversions, if no
443				 * digits (zero or nonzero) have been
444				 * scanned (only signs), we will have
445				 * base==0.  In that case, we should set
446				 * it to 8 and enable 0x prefixing.
447				 * Also, if we have not scanned zero digits
448				 * before this, do not turn off prefixing
449				 * (someone else will turn it off if we
450				 * have scanned any nonzero digits).
451				 */
452				case '0':
453					if (base == 0) {
454						base = 8;
455						flags |= PFXOK;
456					}
457					if (flags & NZDIGITS)
458					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
459					else
460					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
461					goto ok;
462
463				/* 1 through 7 always legal */
464				case '1': case '2': case '3':
465				case '4': case '5': case '6': case '7':
466					base = basefix[base];
467					flags &= ~(SIGNOK | PFXOK | NDIGITS);
468					goto ok;
469
470				/* digits 8 and 9 ok iff decimal or hex */
471				case '8': case '9':
472					base = basefix[base];
473					if (base <= 8)
474						break;	/* not legal here */
475					flags &= ~(SIGNOK | PFXOK | NDIGITS);
476					goto ok;
477
478				/* letters ok iff hex */
479				case 'A': case 'B': case 'C':
480				case 'D': case 'E': case 'F':
481				case 'a': case 'b': case 'c':
482				case 'd': case 'e': case 'f':
483					/* no need to fix base here */
484					if (base <= 10)
485						break;	/* not legal here */
486					flags &= ~(SIGNOK | PFXOK | NDIGITS);
487					goto ok;
488
489				/* sign ok only as first character */
490				case '+': case '-':
491					if (flags & SIGNOK) {
492						flags &= ~SIGNOK;
493						goto ok;
494					}
495					break;
496
497				/* x ok iff flag still set & 2nd char */
498				case 'x': case 'X':
499					if (flags & PFXOK && p == buf + 1) {
500						base = 16;	/* if %i */
501						flags &= ~PFXOK;
502						goto ok;
503					}
504					break;
505				}
506
507				/*
508				 * If we got here, c is not a legal character
509				 * for a number.  Stop accumulating digits.
510				 */
511				break;
512		ok:
513				/*
514				 * c is legal: store it and look at the next.
515				 */
516				*p++ = c;
517				if (--inr > 0)
518					inp++;
519				else
520					break;		/* end of input */
521			}
522			/*
523			 * If we had only a sign, it is no good; push
524			 * back the sign.  If the number ends in `x',
525			 * it was [sign] '0' 'x', so push back the x
526			 * and treat it as [sign] '0'.
527			 */
528			if (flags & NDIGITS) {
529				if (p > buf) {
530					inp--;
531					inr++;
532				}
533				goto match_failure;
534			}
535			c = ((u_char *)p)[-1];
536			if (c == 'x' || c == 'X') {
537				--p;
538				inp--;
539				inr++;
540			}
541			if ((flags & SUPPRESS) == 0) {
542				u_quad_t res;
543
544				*p = 0;
545				if ((flags & UNSIGNED) == 0)
546				    res = strtoq(buf, (char **)NULL, base);
547				else
548				    res = strtouq(buf, (char **)NULL, base);
549				if (flags & POINTER)
550					*va_arg(ap, void **) =
551						(void *)(uintptr_t)res;
552				else if (flags & SHORTSHORT)
553					*va_arg(ap, char *) = res;
554				else if (flags & SHORT)
555					*va_arg(ap, short *) = res;
556				else if (flags & LONG)
557					*va_arg(ap, long *) = res;
558				else if (flags & LONGLONG)
559					*va_arg(ap, long long *) = res;
560				else
561					*va_arg(ap, int *) = res;
562				nassigned++;
563			}
564			nread += p - buf;
565			nconversions++;
566			break;
567
568		}
569	}
570input_failure:
571	return (nconversions != 0 ? nassigned : -1);
572match_failure:
573	return (nassigned);
574}
575
576/*
577 * Fill in the given table from the scanset at the given format
578 * (just after `[').  Return a pointer to the character past the
579 * closing `]'.  The table has a 1 wherever characters should be
580 * considered part of the scanset.
581 */
582static const u_char *
583__sccl(char *tab, const u_char *fmt)
584{
585	int c, n, v;
586
587	/* first `clear' the whole table */
588	c = *fmt++;		/* first char hat => negated scanset */
589	if (c == '^') {
590		v = 1;		/* default => accept */
591		c = *fmt++;	/* get new first char */
592	} else
593		v = 0;		/* default => reject */
594
595	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
596	(void) memset(tab, v, 256);
597
598	if (c == 0)
599		return (fmt - 1);/* format ended before closing ] */
600
601	/*
602	 * Now set the entries corresponding to the actual scanset
603	 * to the opposite of the above.
604	 *
605	 * The first character may be ']' (or '-') without being special;
606	 * the last character may be '-'.
607	 */
608	v = 1 - v;
609	for (;;) {
610		tab[c] = v;		/* take character c */
611doswitch:
612		n = *fmt++;		/* and examine the next */
613		switch (n) {
614
615		case 0:			/* format ended too soon */
616			return (fmt - 1);
617
618		case '-':
619			/*
620			 * A scanset of the form
621			 *	[01+-]
622			 * is defined as `the digit 0, the digit 1,
623			 * the character +, the character -', but
624			 * the effect of a scanset such as
625			 *	[a-zA-Z0-9]
626			 * is implementation defined.  The V7 Unix
627			 * scanf treats `a-z' as `the letters a through
628			 * z', but treats `a-a' as `the letter a, the
629			 * character -, and the letter a'.
630			 *
631			 * For compatibility, the `-' is not considerd
632			 * to define a range if the character following
633			 * it is either a close bracket (required by ANSI)
634			 * or is not numerically greater than the character
635			 * we just stored in the table (c).
636			 */
637			n = *fmt;
638			if (n == ']' || n < c) {
639				c = '-';
640				break;	/* resume the for(;;) */
641			}
642			fmt++;
643			/* fill in the range */
644			do {
645			    tab[++c] = v;
646			} while (c < n);
647			c = n;
648			/*
649			 * Alas, the V7 Unix scanf also treats formats
650			 * such as [a-c-e] as `the letters a through e'.
651			 * This too is permitted by the standard....
652			 */
653			goto doswitch;
654			break;
655
656		case ']':		/* end of scanset */
657			return (fmt);
658
659		default:		/* just another character */
660			c = n;
661			break;
662		}
663	}
664	/* NOTREACHED */
665}
666