subr_scanf.c revision 43383
1185377Ssam/*-
2187831Ssam * Copyright (c) 1990, 1993
3185377Ssam *	The Regents of the University of California.  All rights reserved.
4185377Ssam *
5185377Ssam * This code is derived from software contributed to Berkeley by
6185377Ssam * Chris Torek.
7185377Ssam *
8185377Ssam * Redistribution and use in source and binary forms, with or without
9185377Ssam * modification, are permitted provided that the following conditions
10185377Ssam * are met:
11185377Ssam * 1. Redistributions of source code must retain the above copyright
12185377Ssam *    notice, this list of conditions and the following disclaimer.
13185377Ssam * 2. Redistributions in binary form must reproduce the above copyright
14185377Ssam *    notice, this list of conditions and the following disclaimer in the
15185377Ssam *    documentation and/or other materials provided with the distribution.
16185377Ssam * 3. All advertising materials mentioning features or use of this software
17187510Ssam *    must display the following acknowledgement:
18185377Ssam *	This product includes software developed by the University of
19185377Ssam *	California, Berkeley and its contributors.
20185377Ssam * 4. Neither the name of the University nor the names of its contributors
21185377Ssam *    may be used to endorse or promote products derived from this software
22185377Ssam *    without specific prior written permission.
23185377Ssam *
24185377Ssam * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25185377Ssam * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26185377Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27185377Ssam * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28185377Ssam * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29185377Ssam * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30185377Ssam * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31185377Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32185377Ssam * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33185377Ssam * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34185377Ssam * SUCH DAMAGE.
35185377Ssam *
36185377Ssam * $Id: subr_scanf.c,v 1.3 1999/01/28 00:57:47 dillon Exp $
37185377Ssam * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp
38185377Ssam */
39185377Ssam
40185377Ssam#include <sys/param.h>
41185377Ssam#include <sys/systm.h>
42185377Ssam#include <sys/kernel.h>
43185377Ssam#include <machine/limits.h>
44185377Ssam
45185377Ssam/*
46185377Ssam * Note that stdarg.h and the ANSI style va_start macro is used for both
47185377Ssam * ANSI and traditional C compilers.
48185377Ssam */
49185377Ssam#include <machine/stdarg.h>
50185377Ssam
51185377Ssam#define	BUF		32 	/* Maximum length of numeric string. */
52185377Ssam
53185377Ssam/*
54185377Ssam * Flags used during conversion.
55185377Ssam */
56185377Ssam#define	LONG		0x01	/* l: long or double */
57185377Ssam#define	SHORT		0x04	/* h: short */
58185377Ssam#define	SUPPRESS	0x08	/* suppress assignment */
59185377Ssam#define	POINTER		0x10	/* weird %p pointer (`fake hex') */
60185377Ssam#define	NOSKIP		0x20	/* do not skip blanks */
61185377Ssam#define	QUAD		0x400
62185377Ssam
63185377Ssam/*
64185377Ssam * The following are used in numeric conversions only:
65185377Ssam * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
66185377Ssam * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
67185377Ssam */
68185377Ssam#define	SIGNOK		0x40	/* +/- is (still) legal */
69185377Ssam#define	NDIGITS		0x80	/* no digits detected */
70185377Ssam
71185377Ssam#define	DPTOK		0x100	/* (float) decimal point is still legal */
72185377Ssam#define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
73185377Ssam
74185377Ssam#define	PFXOK		0x100	/* 0x prefix is (still) legal */
75185377Ssam#define	NZDIGITS	0x200	/* no zero digits detected */
76185377Ssam
77185377Ssam/*
78185377Ssam * Conversion types.
79185377Ssam */
80185377Ssam#define	CT_CHAR		0	/* %c conversion */
81185377Ssam#define	CT_CCL		1	/* %[...] conversion */
82185377Ssam#define	CT_STRING	2	/* %s conversion */
83185377Ssam#define	CT_INT		3	/* integer, i.e., strtoq or strtouq */
84185380Ssamtypedef u_quad_t (*ccfntype)(const char *, const char **, int);
85185377Ssam
86185380Ssam#define isspace(c)	((c) == ' ' || (c) == '\t' || \
87185380Ssam			 (c) == '\r' || (c) == '\n')
88185380Ssam#define isascii(c)	(((c) & ~0x7f) == 0)
89185380Ssam#define isupper(c)	((c) >= 'A' && (c) <= 'Z')
90185380Ssam#define islower(c)	((c) >= 'a' && (c) <= 'z')
91185380Ssam#define isalpha(c)	(isupper(c) || (islower(c)))
92185380Ssam#define isdigit(c)	((c) >= '0' && (c) <= '9')
93185380Ssam
94185380Ssamstatic const u_char *__sccl(char *, const u_char *);
95185380Ssam
96185380Ssamint
97185380Ssamsscanf(const char *ibuf, const char *fmt, ...)
98185380Ssam{
99185380Ssam	va_list ap;
100185380Ssam	int ret;
101185380Ssam
102185380Ssam	va_start(ap, fmt);
103185380Ssam	ret = vsscanf(ibuf, fmt, ap);
104185380Ssam	va_end(ap);
105185380Ssam	return(ret);
106185380Ssam}
107185380Ssam
108185380Ssamint
109185380Ssamvsscanf(const char *inp, char const *fmt0, va_list ap)
110185380Ssam{
111185380Ssam	int inr;
112185380Ssam	const u_char *fmt = (const u_char *)fmt0;
113185380Ssam	int c;			/* character from format, or conversion */
114185380Ssam	size_t width;		/* field width, or 0 */
115185380Ssam	char *p;		/* points into all kinds of strings */
116185380Ssam	int n;			/* handy integer */
117185380Ssam	int flags;		/* flags as defined above */
118185380Ssam	char *p0;		/* saves original value of p when necessary */
119185380Ssam	int nassigned;		/* number of fields assigned */
120185380Ssam	int nconversions;	/* number of conversions */
121185380Ssam	int nread;		/* number of characters consumed from fp */
122185380Ssam	int base;		/* base argument to strtoq/strtouq */
123185380Ssam	ccfntype ccfn;		/* conversion function (strtoq/strtouq) */
124185380Ssam	char ccltab[256];	/* character class table for %[...] */
125185380Ssam	char buf[BUF];		/* buffer for numeric conversions */
126185380Ssam
127185380Ssam	/* `basefix' is used to avoid `if' tests in the integer scanner */
128185380Ssam	static short basefix[17] =
129185380Ssam		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
130185380Ssam
131185380Ssam	inr = strlen(inp);
132185380Ssam
133185380Ssam	nassigned = 0;
134185377Ssam	nconversions = 0;
135185377Ssam	nread = 0;
136185377Ssam	base = 0;		/* XXX just to keep gcc happy */
137185377Ssam	ccfn = NULL;		/* XXX just to keep gcc happy */
138185377Ssam	for (;;) {
139185377Ssam		c = *fmt++;
140185377Ssam		if (c == 0)
141185377Ssam			return (nassigned);
142185377Ssam		if (isspace(c)) {
143185377Ssam			while (inr > 0 && isspace(*inp))
144185377Ssam				nread++, inr--, inp++;
145185377Ssam			continue;
146185377Ssam		}
147185377Ssam		if (c != '%')
148185377Ssam			goto literal;
149185377Ssam		width = 0;
150185377Ssam		flags = 0;
151185377Ssam		/*
152185377Ssam		 * switch on the format.  continue if done;
153185377Ssam		 * break once format type is derived.
154185377Ssam		 */
155185377Ssamagain:		c = *fmt++;
156185377Ssam		switch (c) {
157185377Ssam		case '%':
158185377Ssamliteral:
159185377Ssam			if (inr <= 0)
160185377Ssam				goto input_failure;
161185377Ssam			if (*inp != c)
162185377Ssam				goto match_failure;
163185377Ssam			inr--, inp++;
164185377Ssam			nread++;
165185377Ssam			continue;
166185377Ssam
167185377Ssam		case '*':
168185377Ssam			flags |= SUPPRESS;
169185377Ssam			goto again;
170185377Ssam		case 'l':
171185377Ssam			flags |= LONG;
172185377Ssam			goto again;
173185377Ssam		case 'q':
174185377Ssam			flags |= QUAD;
175185377Ssam			goto again;
176185377Ssam		case 'h':
177185377Ssam			flags |= SHORT;
178185377Ssam			goto again;
179185377Ssam
180185377Ssam		case '0': case '1': case '2': case '3': case '4':
181185377Ssam		case '5': case '6': case '7': case '8': case '9':
182185377Ssam			width = width * 10 + c - '0';
183185377Ssam			goto again;
184185377Ssam
185185377Ssam		/*
186185377Ssam		 * Conversions.
187185377Ssam		 *
188185377Ssam		 */
189185377Ssam		case 'd':
190185377Ssam			c = CT_INT;
191185377Ssam			ccfn = (ccfntype)strtoq;
192185377Ssam			base = 10;
193185377Ssam			break;
194185377Ssam
195185377Ssam		case 'i':
196185377Ssam			c = CT_INT;
197185377Ssam			ccfn = (ccfntype)strtoq;
198185377Ssam			base = 0;
199185377Ssam			break;
200185377Ssam
201185377Ssam		case 'o':
202185377Ssam			c = CT_INT;
203185377Ssam			ccfn = strtouq;
204185377Ssam			base = 8;
205185377Ssam			break;
206185377Ssam
207185377Ssam		case 'u':
208185377Ssam			c = CT_INT;
209185377Ssam			ccfn = strtouq;
210185377Ssam			base = 10;
211185377Ssam			break;
212185377Ssam
213185377Ssam		case 'x':
214185377Ssam			flags |= PFXOK;	/* enable 0x prefixing */
215185377Ssam			c = CT_INT;
216185377Ssam			ccfn = strtouq;
217185377Ssam			base = 16;
218185377Ssam			break;
219185377Ssam
220185377Ssam		case 's':
221185377Ssam			c = CT_STRING;
222185377Ssam			break;
223185377Ssam
224185377Ssam		case '[':
225185377Ssam			fmt = __sccl(ccltab, fmt);
226185377Ssam			flags |= NOSKIP;
227185377Ssam			c = CT_CCL;
228185377Ssam			break;
229185377Ssam
230185377Ssam		case 'c':
231185377Ssam			flags |= NOSKIP;
232185377Ssam			c = CT_CHAR;
233185377Ssam			break;
234185377Ssam
235185377Ssam		case 'p':	/* pointer format is like hex */
236185377Ssam			flags |= POINTER | PFXOK;
237185377Ssam			c = CT_INT;
238185377Ssam			ccfn = strtouq;
239185377Ssam			base = 16;
240185377Ssam			break;
241185377Ssam
242185377Ssam		case 'n':
243185377Ssam			nconversions++;
244185377Ssam			if (flags & SUPPRESS)	/* ??? */
245185377Ssam				continue;
246185377Ssam			if (flags & SHORT)
247185377Ssam				*va_arg(ap, short *) = nread;
248185377Ssam			else if (flags & LONG)
249185377Ssam				*va_arg(ap, long *) = nread;
250185377Ssam			else if (flags & QUAD)
251185377Ssam				*va_arg(ap, quad_t *) = nread;
252185377Ssam			else
253185377Ssam				*va_arg(ap, int *) = nread;
254185377Ssam			continue;
255185377Ssam		}
256185377Ssam
257185377Ssam		/*
258185377Ssam		 * We have a conversion that requires input.
259185377Ssam		 */
260185377Ssam		if (inr <= 0)
261185377Ssam			goto input_failure;
262185377Ssam
263185377Ssam		/*
264185377Ssam		 * Consume leading white space, except for formats
265185377Ssam		 * that suppress this.
266185377Ssam		 */
267185377Ssam		if ((flags & NOSKIP) == 0) {
268185377Ssam			while (isspace(*inp)) {
269185377Ssam				nread++;
270185377Ssam				if (--inr > 0)
271185377Ssam					inp++;
272185377Ssam				else
273185377Ssam					goto input_failure;
274185377Ssam			}
275185377Ssam			/*
276185377Ssam			 * Note that there is at least one character in
277185377Ssam			 * the buffer, so conversions that do not set NOSKIP
278185377Ssam			 * can no longer result in an input failure.
279185377Ssam			 */
280185377Ssam		}
281185377Ssam
282185377Ssam		/*
283185377Ssam		 * Do the conversion.
284185377Ssam		 */
285185377Ssam		switch (c) {
286185377Ssam
287185377Ssam		case CT_CHAR:
288185377Ssam			/* scan arbitrary characters (sets NOSKIP) */
289185377Ssam			if (width == 0)
290185377Ssam				width = 1;
291185377Ssam			if (flags & SUPPRESS) {
292185377Ssam				size_t sum = 0;
293185377Ssam				for (;;) {
294185377Ssam					if ((n = inr) < width) {
295185377Ssam						sum += n;
296185377Ssam						width -= n;
297185377Ssam						inp += n;
298185377Ssam						if (sum == 0)
299185377Ssam							goto input_failure;
300185377Ssam							break;
301185377Ssam					} else {
302185377Ssam						sum += width;
303185377Ssam						inr -= width;
304185377Ssam						inp += width;
305185377Ssam						break;
306185377Ssam					}
307185377Ssam				}
308185377Ssam				nread += sum;
309185377Ssam			} else {
310185377Ssam				bcopy(inp, va_arg(ap, char *), width);
311185377Ssam				inr -= width;
312185377Ssam				inp += width;
313185377Ssam				nread += width;
314185377Ssam				nassigned++;
315185377Ssam			}
316185377Ssam			nconversions++;
317185377Ssam			break;
318185377Ssam
319185377Ssam		case CT_CCL:
320185377Ssam			/* scan a (nonempty) character class (sets NOSKIP) */
321185377Ssam			if (width == 0)
322185377Ssam				width = (size_t)~0;	/* `infinity' */
323185377Ssam			/* take only those things in the class */
324185377Ssam			if (flags & SUPPRESS) {
325185377Ssam				n = 0;
326185377Ssam				while (ccltab[(int)(unsigned char)*inp]) {
327185377Ssam					n++, inr--, inp++;
328185377Ssam					if (--width == 0)
329185377Ssam						break;
330185377Ssam					if (inr <= 0) {
331185377Ssam						if (n == 0)
332185377Ssam							goto input_failure;
333185377Ssam						break;
334185377Ssam					}
335185377Ssam				}
336185377Ssam				if (n == 0)
337185377Ssam					goto match_failure;
338185377Ssam			} else {
339185377Ssam				p0 = p = va_arg(ap, char *);
340185377Ssam				while (ccltab[(int)(unsigned char)*inp]) {
341185377Ssam					inr--;
342185380Ssam					*p++ = *inp++;
343185380Ssam					if (--width == 0)
344185380Ssam						break;
345185380Ssam					if (inr <= 0) {
346185377Ssam						if (p == p0)
347185377Ssam							goto input_failure;
348185377Ssam						break;
349185377Ssam					}
350185377Ssam				}
351185377Ssam				n = p - p0;
352185377Ssam				if (n == 0)
353185377Ssam					goto match_failure;
354185377Ssam				*p = 0;
355185377Ssam				nassigned++;
356185377Ssam			}
357185377Ssam			nread += n;
358185377Ssam			nconversions++;
359185377Ssam			break;
360185377Ssam
361185377Ssam		case CT_STRING:
362185377Ssam			/* like CCL, but zero-length string OK, & no NOSKIP */
363185377Ssam			if (width == 0)
364185377Ssam				width = (size_t)~0;
365185377Ssam			if (flags & SUPPRESS) {
366185377Ssam				n = 0;
367185377Ssam				while (!isspace(*inp)) {
368185377Ssam					n++, inr--, inp++;
369185377Ssam					if (--width == 0)
370185377Ssam						break;
371185377Ssam					if (inr <= 0)
372185377Ssam						break;
373185377Ssam				}
374185377Ssam				nread += n;
375185377Ssam			} else {
376185377Ssam				p0 = p = va_arg(ap, char *);
377185377Ssam				while (!isspace(*inp)) {
378185377Ssam					inr--;
379185377Ssam					*p++ = *inp++;
380185377Ssam					if (--width == 0)
381185377Ssam						break;
382185377Ssam					if (inr <= 0)
383185377Ssam						break;
384185377Ssam				}
385185377Ssam				*p = 0;
386185377Ssam				nread += p - p0;
387185377Ssam				nassigned++;
388185377Ssam			}
389185377Ssam			nconversions++;
390185377Ssam			continue;
391185377Ssam
392185377Ssam		case CT_INT:
393185377Ssam			/* scan an integer as if by strtoq/strtouq */
394185377Ssam#ifdef hardway
395185377Ssam			if (width == 0 || width > sizeof(buf) - 1)
396185377Ssam				width = sizeof(buf) - 1;
397185377Ssam#else
398185377Ssam			/* size_t is unsigned, hence this optimisation */
399185377Ssam			if (--width > sizeof(buf) - 2)
400185377Ssam				width = sizeof(buf) - 2;
401185377Ssam			width++;
402185377Ssam#endif
403185377Ssam			flags |= SIGNOK | NDIGITS | NZDIGITS;
404185377Ssam			for (p = buf; width; width--) {
405185377Ssam				c = *inp;
406185377Ssam				/*
407185377Ssam				 * Switch on the character; `goto ok'
408185377Ssam				 * if we accept it as a part of number.
409185377Ssam				 */
410185377Ssam				switch (c) {
411185377Ssam
412185377Ssam				/*
413185377Ssam				 * The digit 0 is always legal, but is
414185377Ssam				 * special.  For %i conversions, if no
415185377Ssam				 * digits (zero or nonzero) have been
416185377Ssam				 * scanned (only signs), we will have
417185377Ssam				 * base==0.  In that case, we should set
418185377Ssam				 * it to 8 and enable 0x prefixing.
419185377Ssam				 * Also, if we have not scanned zero digits
420185377Ssam				 * before this, do not turn off prefixing
421185377Ssam				 * (someone else will turn it off if we
422185377Ssam				 * have scanned any nonzero digits).
423185377Ssam				 */
424185377Ssam				case '0':
425185377Ssam					if (base == 0) {
426185377Ssam						base = 8;
427185377Ssam						flags |= PFXOK;
428185377Ssam					}
429185377Ssam					if (flags & NZDIGITS)
430185377Ssam					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
431185377Ssam					else
432185377Ssam					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
433185377Ssam					goto ok;
434185377Ssam
435187831Ssam				/* 1 through 7 always legal */
436185377Ssam				case '1': case '2': case '3':
437185377Ssam				case '4': case '5': case '6': case '7':
438185377Ssam					base = basefix[base];
439185377Ssam					flags &= ~(SIGNOK | PFXOK | NDIGITS);
440185377Ssam					goto ok;
441185377Ssam
442185377Ssam				/* digits 8 and 9 ok iff decimal or hex */
443185377Ssam				case '8': case '9':
444185377Ssam					base = basefix[base];
445185377Ssam					if (base <= 8)
446185377Ssam						break;	/* not legal here */
447185377Ssam					flags &= ~(SIGNOK | PFXOK | NDIGITS);
448185377Ssam					goto ok;
449185377Ssam
450185377Ssam				/* letters ok iff hex */
451185377Ssam				case 'A': case 'B': case 'C':
452185377Ssam				case 'D': case 'E': case 'F':
453185377Ssam				case 'a': case 'b': case 'c':
454185377Ssam				case 'd': case 'e': case 'f':
455185377Ssam					/* no need to fix base here */
456185377Ssam					if (base <= 10)
457185377Ssam						break;	/* not legal here */
458185377Ssam					flags &= ~(SIGNOK | PFXOK | NDIGITS);
459185377Ssam					goto ok;
460185377Ssam
461185377Ssam				/* sign ok only as first character */
462185377Ssam				case '+': case '-':
463185377Ssam					if (flags & SIGNOK) {
464185377Ssam						flags &= ~SIGNOK;
465185377Ssam						goto ok;
466185377Ssam					}
467185377Ssam					break;
468185377Ssam
469185377Ssam				/* x ok iff flag still set & 2nd char */
470185377Ssam				case 'x': case 'X':
471185377Ssam					if (flags & PFXOK && p == buf + 1) {
472185377Ssam						base = 16;	/* if %i */
473185377Ssam						flags &= ~PFXOK;
474185377Ssam						goto ok;
475185377Ssam					}
476185377Ssam					break;
477185377Ssam				}
478185377Ssam
479185377Ssam				/*
480185377Ssam				 * If we got here, c is not a legal character
481185377Ssam				 * for a number.  Stop accumulating digits.
482185377Ssam				 */
483185377Ssam				break;
484185377Ssam		ok:
485185377Ssam				/*
486185377Ssam				 * c is legal: store it and look at the next.
487185377Ssam				 */
488185377Ssam				*p++ = c;
489185377Ssam				if (--inr > 0)
490185377Ssam					inp++;
491185377Ssam				else
492185377Ssam					break;		/* end of input */
493185377Ssam			}
494185377Ssam			/*
495185377Ssam			 * If we had only a sign, it is no good; push
496185377Ssam			 * back the sign.  If the number ends in `x',
497185377Ssam			 * it was [sign] '0' 'x', so push back the x
498185377Ssam			 * and treat it as [sign] '0'.
499185377Ssam			 */
500185377Ssam			if (flags & NDIGITS) {
501185377Ssam				if (p > buf) {
502185377Ssam					inp--;
503185377Ssam					inr++;
504185377Ssam				}
505185377Ssam				goto match_failure;
506185377Ssam			}
507185377Ssam			c = ((u_char *)p)[-1];
508185377Ssam			if (c == 'x' || c == 'X') {
509185377Ssam				--p;
510185377Ssam				inp--;
511185377Ssam				inr++;
512185377Ssam			}
513185377Ssam			if ((flags & SUPPRESS) == 0) {
514185377Ssam				u_quad_t res;
515185377Ssam
516185377Ssam				*p = 0;
517185377Ssam				res = (*ccfn)(buf, (const char **)NULL, base);
518185377Ssam				if (flags & POINTER)
519185377Ssam					*va_arg(ap, void **) =
520185377Ssam						(void *)(u_long)res;
521185377Ssam				else if (flags & SHORT)
522185377Ssam					*va_arg(ap, short *) = res;
523185377Ssam				else if (flags & LONG)
524185377Ssam					*va_arg(ap, long *) = res;
525185377Ssam				else if (flags & QUAD)
526185377Ssam					*va_arg(ap, quad_t *) = res;
527185377Ssam				else
528185377Ssam					*va_arg(ap, int *) = res;
529185377Ssam				nassigned++;
530185377Ssam			}
531185377Ssam			nread += p - buf;
532185377Ssam			nconversions++;
533185377Ssam			break;
534185377Ssam
535185377Ssam		}
536185377Ssam	}
537185377Ssaminput_failure:
538185377Ssam	return (nconversions != 0 ? nassigned : -1);
539185377Ssammatch_failure:
540185377Ssam	return (nassigned);
541185377Ssam}
542185377Ssam
543185377Ssam/*
544185377Ssam * Fill in the given table from the scanset at the given format
545185377Ssam * (just after `[').  Return a pointer to the character past the
546185377Ssam * closing `]'.  The table has a 1 wherever characters should be
547185377Ssam * considered part of the scanset.
548185377Ssam */
549185377Ssamstatic const u_char *
550185377Ssam__sccl(char *tab, const u_char *fmt)
551185377Ssam{
552185377Ssam	int c, n, v;
553185377Ssam
554185377Ssam	/* first `clear' the whole table */
555185377Ssam	c = *fmt++;		/* first char hat => negated scanset */
556185377Ssam	if (c == '^') {
557185377Ssam		v = 1;		/* default => accept */
558185377Ssam		c = *fmt++;	/* get new first char */
559185377Ssam	} else
560185377Ssam		v = 0;		/* default => reject */
561185377Ssam
562185377Ssam	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
563185377Ssam	for (n = 0; n < 256; n++)
564185377Ssam		     tab[n] = v;	/* memset(tab, v, 256) */
565185377Ssam
566185377Ssam	if (c == 0)
567187831Ssam		return (fmt - 1);/* format ended before closing ] */
568187831Ssam
569185377Ssam	/*
570185377Ssam	 * Now set the entries corresponding to the actual scanset
571185377Ssam	 * to the opposite of the above.
572185377Ssam	 *
573185377Ssam	 * The first character may be ']' (or '-') without being special;
574185377Ssam	 * the last character may be '-'.
575185377Ssam	 */
576185377Ssam	v = 1 - v;
577185377Ssam	for (;;) {
578185377Ssam		tab[c] = v;		/* take character c */
579185377Ssamdoswitch:
580185377Ssam		n = *fmt++;		/* and examine the next */
581185377Ssam		switch (n) {
582185377Ssam
583185377Ssam		case 0:			/* format ended too soon */
584185377Ssam			return (fmt - 1);
585185377Ssam
586185377Ssam		case '-':
587185377Ssam			/*
588185377Ssam			 * A scanset of the form
589185377Ssam			 *	[01+-]
590185377Ssam			 * is defined as `the digit 0, the digit 1,
591185377Ssam			 * the character +, the character -', but
592185377Ssam			 * the effect of a scanset such as
593185377Ssam			 *	[a-zA-Z0-9]
594185377Ssam			 * is implementation defined.  The V7 Unix
595185377Ssam			 * scanf treats `a-z' as `the letters a through
596185377Ssam			 * z', but treats `a-a' as `the letter a, the
597185377Ssam			 * character -, and the letter a'.
598185377Ssam			 *
599185377Ssam			 * For compatibility, the `-' is not considerd
600185377Ssam			 * to define a range if the character following
601185377Ssam			 * it is either a close bracket (required by ANSI)
602185377Ssam			 * or is not numerically greater than the character
603185377Ssam			 * we just stored in the table (c).
604185377Ssam			 */
605185377Ssam			n = *fmt;
606185377Ssam			if (n == ']' || n < c) {
607187831Ssam				c = '-';
608185377Ssam				break;	/* resume the for(;;) */
609185377Ssam			}
610185377Ssam			fmt++;
611185377Ssam			/* fill in the range */
612185377Ssam			do {
613185377Ssam			    tab[++c] = v;
614185377Ssam			} while (c < n);
615185377Ssam			c = n;
616185377Ssam			/*
617185377Ssam			 * Alas, the V7 Unix scanf also treats formats
618185377Ssam			 * such as [a-c-e] as `the letters a through e'.
619185377Ssam			 * This too is permitted by the standard....
620185377Ssam			 */
621185377Ssam			goto doswitch;
622185377Ssam			break;
623185377Ssam
624185377Ssam		case ']':		/* end of scanset */
625185377Ssam			return (fmt);
626185377Ssam
627185377Ssam		default:		/* just another character */
628185377Ssam			c = n;
629185377Ssam			break;
630185377Ssam		}
631185377Ssam	}
632185377Ssam	/* NOTREACHED */
633185377Ssam}
634185377Ssam
635185377Ssam/*
636185377Ssam * Convert a string to an unsigned quad integer.
637185377Ssam *
638185377Ssam * Ignores `locale' stuff.  Assumes that the upper and lower case
639185377Ssam * alphabets and digits are each contiguous.
640185377Ssam */
641185377Ssamu_quad_t
642185377Ssamstrtouq(const char *nptr, const char **endptr, int base)
643185377Ssam{
644185377Ssam	const char *s = nptr;
645185377Ssam	u_quad_t acc;
646185377Ssam	unsigned char c;
647185377Ssam	u_quad_t qbase, cutoff;
648185377Ssam	int neg, any, cutlim;
649185377Ssam
650185377Ssam	/*
651185377Ssam	 * See strtoq for comments as to the logic used.
652185377Ssam	 */
653185377Ssam	s = nptr;
654185377Ssam	do {
655185377Ssam		c = *s++;
656185377Ssam	} while (isspace(c));
657185377Ssam	if (c == '-') {
658185377Ssam		neg = 1;
659185377Ssam		c = *s++;
660185377Ssam	} else {
661185377Ssam		neg = 0;
662185377Ssam		if (c == '+')
663185377Ssam			c = *s++;
664185377Ssam	}
665185377Ssam	if ((base == 0 || base == 16) &&
666185377Ssam	    c == '0' && (*s == 'x' || *s == 'X')) {
667185377Ssam		c = s[1];
668185377Ssam		s += 2;
669185377Ssam		base = 16;
670185377Ssam	}
671185377Ssam	if (base == 0)
672185377Ssam		base = c == '0' ? 8 : 10;
673185377Ssam	qbase = (unsigned)base;
674185377Ssam	cutoff = (u_quad_t)UQUAD_MAX / qbase;
675185377Ssam	cutlim = (u_quad_t)UQUAD_MAX % qbase;
676185377Ssam	for (acc = 0, any = 0;; c = *s++) {
677185377Ssam		if (!isascii(c))
678185377Ssam			break;
679185377Ssam		if (isdigit(c))
680185377Ssam			c -= '0';
681185377Ssam		else if (isalpha(c))
682185377Ssam			c -= isupper(c) ? 'A' - 10 : 'a' - 10;
683185377Ssam		else
684185377Ssam			break;
685185377Ssam		if (c >= base)
686			break;
687		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
688			any = -1;
689		else {
690			any = 1;
691			acc *= qbase;
692			acc += c;
693		}
694	}
695	if (any < 0) {
696		acc = UQUAD_MAX;
697	} else if (neg)
698		acc = -acc;
699	if (endptr != 0)
700		*endptr = (const char *)(any ? s - 1 : nptr);
701	return (acc);
702}
703
704/*
705 * Convert a string to a quad integer.
706 *
707 * Ignores `locale' stuff.  Assumes that the upper and lower case
708 * alphabets and digits are each contiguous.
709 */
710quad_t
711strtoq(const char *nptr, const char **endptr, int base)
712{
713	const char *s;
714	u_quad_t acc;
715	unsigned char c;
716	u_quad_t qbase, cutoff;
717	int neg, any, cutlim;
718
719	/*
720	 * Skip white space and pick up leading +/- sign if any.
721	 * If base is 0, allow 0x for hex and 0 for octal, else
722	 * assume decimal; if base is already 16, allow 0x.
723	 */
724	s = nptr;
725	do {
726		c = *s++;
727	} while (isspace(c));
728	if (c == '-') {
729		neg = 1;
730		c = *s++;
731	} else {
732		neg = 0;
733		if (c == '+')
734			c = *s++;
735	}
736	if ((base == 0 || base == 16) &&
737	    c == '0' && (*s == 'x' || *s == 'X')) {
738		c = s[1];
739		s += 2;
740		base = 16;
741	}
742	if (base == 0)
743		base = c == '0' ? 8 : 10;
744
745	/*
746	 * Compute the cutoff value between legal numbers and illegal
747	 * numbers.  That is the largest legal value, divided by the
748	 * base.  An input number that is greater than this value, if
749	 * followed by a legal input character, is too big.  One that
750	 * is equal to this value may be valid or not; the limit
751	 * between valid and invalid numbers is then based on the last
752	 * digit.  For instance, if the range for quads is
753	 * [-9223372036854775808..9223372036854775807] and the input base
754	 * is 10, cutoff will be set to 922337203685477580 and cutlim to
755	 * either 7 (neg==0) or 8 (neg==1), meaning that if we have
756	 * accumulated a value > 922337203685477580, or equal but the
757	 * next digit is > 7 (or 8), the number is too big, and we will
758	 * return a range error.
759	 *
760	 * Set any if any `digits' consumed; make it negative to indicate
761	 * overflow.
762	 */
763	qbase = (unsigned)base;
764	cutoff = neg ? (u_quad_t)-(QUAD_MIN + QUAD_MAX) + QUAD_MAX : QUAD_MAX;
765	cutlim = cutoff % qbase;
766	cutoff /= qbase;
767	for (acc = 0, any = 0;; c = *s++) {
768		if (!isascii(c))
769			break;
770		if (isdigit(c))
771			c -= '0';
772		else if (isalpha(c))
773			c -= isupper(c) ? 'A' - 10 : 'a' - 10;
774		else
775			break;
776		if (c >= base)
777			break;
778		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
779			any = -1;
780		else {
781			any = 1;
782			acc *= qbase;
783			acc += c;
784		}
785	}
786	if (any < 0) {
787		acc = neg ? QUAD_MIN : QUAD_MAX;
788	} else if (neg)
789		acc = -acc;
790	if (endptr != 0)
791		*endptr = (const char *)(any ? s - 1 : nptr);
792	return (acc);
793}
794