subr_scanf.c revision 52757
1/*-
2 * Copyright (c) 1990, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Chris Torek.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * $FreeBSD: head/sys/kern/subr_scanf.c 52757 1999-11-01 15:04:04Z phk $
37 * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp
38 * From: static char sccsid[] = "@(#)strtol.c	8.1 (Berkeley) 6/4/93";
39 * From: static char sccsid[] = "@(#)strtoul.c	8.1 (Berkeley) 6/4/93";
40 */
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <machine/limits.h>
45
46/*
47 * Note that stdarg.h and the ANSI style va_start macro is used for both
48 * ANSI and traditional C compilers.
49 */
50#include <machine/stdarg.h>
51
52#define	BUF		32 	/* Maximum length of numeric string. */
53
54/*
55 * Flags used during conversion.
56 */
57#define	LONG		0x01	/* l: long or double */
58#define	SHORT		0x04	/* h: short */
59#define	SUPPRESS	0x08	/* suppress assignment */
60#define	POINTER		0x10	/* weird %p pointer (`fake hex') */
61#define	NOSKIP		0x20	/* do not skip blanks */
62#define	QUAD		0x400
63
64/*
65 * The following are used in numeric conversions only:
66 * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
67 * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
68 */
69#define	SIGNOK		0x40	/* +/- is (still) legal */
70#define	NDIGITS		0x80	/* no digits detected */
71
72#define	DPTOK		0x100	/* (float) decimal point is still legal */
73#define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
74
75#define	PFXOK		0x100	/* 0x prefix is (still) legal */
76#define	NZDIGITS	0x200	/* no zero digits detected */
77
78/*
79 * Conversion types.
80 */
81#define	CT_CHAR		0	/* %c conversion */
82#define	CT_CCL		1	/* %[...] conversion */
83#define	CT_STRING	2	/* %s conversion */
84#define	CT_INT		3	/* integer, i.e., strtoq or strtouq */
85typedef u_quad_t (*ccfntype)(const char *, const char **, int);
86
87#define isspace(c)	((c) == ' ' || (c) == '\t' || \
88			 (c) == '\r' || (c) == '\n')
89#define isascii(c)	(((c) & ~0x7f) == 0)
90#define isupper(c)	((c) >= 'A' && (c) <= 'Z')
91#define islower(c)	((c) >= 'a' && (c) <= 'z')
92#define isalpha(c)	(isupper(c) || (islower(c)))
93#define isdigit(c)	((c) >= '0' && (c) <= '9')
94
95static const u_char *__sccl(char *, const u_char *);
96
97int
98sscanf(const char *ibuf, const char *fmt, ...)
99{
100	va_list ap;
101	int ret;
102
103	va_start(ap, fmt);
104	ret = vsscanf(ibuf, fmt, ap);
105	va_end(ap);
106	return(ret);
107}
108
109int
110vsscanf(const char *inp, char const *fmt0, va_list ap)
111{
112	int inr;
113	const u_char *fmt = (const u_char *)fmt0;
114	int c;			/* character from format, or conversion */
115	size_t width;		/* field width, or 0 */
116	char *p;		/* points into all kinds of strings */
117	int n;			/* handy integer */
118	int flags;		/* flags as defined above */
119	char *p0;		/* saves original value of p when necessary */
120	int nassigned;		/* number of fields assigned */
121	int nconversions;	/* number of conversions */
122	int nread;		/* number of characters consumed from fp */
123	int base;		/* base argument to strtoq/strtouq */
124	ccfntype ccfn;		/* conversion function (strtoq/strtouq) */
125	char ccltab[256];	/* character class table for %[...] */
126	char buf[BUF];		/* buffer for numeric conversions */
127
128	/* `basefix' is used to avoid `if' tests in the integer scanner */
129	static short basefix[17] =
130		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
131
132	inr = strlen(inp);
133
134	nassigned = 0;
135	nconversions = 0;
136	nread = 0;
137	base = 0;		/* XXX just to keep gcc happy */
138	ccfn = NULL;		/* XXX just to keep gcc happy */
139	for (;;) {
140		c = *fmt++;
141		if (c == 0)
142			return (nassigned);
143		if (isspace(c)) {
144			while (inr > 0 && isspace(*inp))
145				nread++, inr--, inp++;
146			continue;
147		}
148		if (c != '%')
149			goto literal;
150		width = 0;
151		flags = 0;
152		/*
153		 * switch on the format.  continue if done;
154		 * break once format type is derived.
155		 */
156again:		c = *fmt++;
157		switch (c) {
158		case '%':
159literal:
160			if (inr <= 0)
161				goto input_failure;
162			if (*inp != c)
163				goto match_failure;
164			inr--, inp++;
165			nread++;
166			continue;
167
168		case '*':
169			flags |= SUPPRESS;
170			goto again;
171		case 'l':
172			flags |= LONG;
173			goto again;
174		case 'q':
175			flags |= QUAD;
176			goto again;
177		case 'h':
178			flags |= SHORT;
179			goto again;
180
181		case '0': case '1': case '2': case '3': case '4':
182		case '5': case '6': case '7': case '8': case '9':
183			width = width * 10 + c - '0';
184			goto again;
185
186		/*
187		 * Conversions.
188		 *
189		 */
190		case 'd':
191			c = CT_INT;
192			ccfn = (ccfntype)strtoq;
193			base = 10;
194			break;
195
196		case 'i':
197			c = CT_INT;
198			ccfn = (ccfntype)strtoq;
199			base = 0;
200			break;
201
202		case 'o':
203			c = CT_INT;
204			ccfn = strtouq;
205			base = 8;
206			break;
207
208		case 'u':
209			c = CT_INT;
210			ccfn = strtouq;
211			base = 10;
212			break;
213
214		case 'x':
215			flags |= PFXOK;	/* enable 0x prefixing */
216			c = CT_INT;
217			ccfn = strtouq;
218			base = 16;
219			break;
220
221		case 's':
222			c = CT_STRING;
223			break;
224
225		case '[':
226			fmt = __sccl(ccltab, fmt);
227			flags |= NOSKIP;
228			c = CT_CCL;
229			break;
230
231		case 'c':
232			flags |= NOSKIP;
233			c = CT_CHAR;
234			break;
235
236		case 'p':	/* pointer format is like hex */
237			flags |= POINTER | PFXOK;
238			c = CT_INT;
239			ccfn = strtouq;
240			base = 16;
241			break;
242
243		case 'n':
244			nconversions++;
245			if (flags & SUPPRESS)	/* ??? */
246				continue;
247			if (flags & SHORT)
248				*va_arg(ap, short *) = nread;
249			else if (flags & LONG)
250				*va_arg(ap, long *) = nread;
251			else if (flags & QUAD)
252				*va_arg(ap, quad_t *) = nread;
253			else
254				*va_arg(ap, int *) = nread;
255			continue;
256		}
257
258		/*
259		 * We have a conversion that requires input.
260		 */
261		if (inr <= 0)
262			goto input_failure;
263
264		/*
265		 * Consume leading white space, except for formats
266		 * that suppress this.
267		 */
268		if ((flags & NOSKIP) == 0) {
269			while (isspace(*inp)) {
270				nread++;
271				if (--inr > 0)
272					inp++;
273				else
274					goto input_failure;
275			}
276			/*
277			 * Note that there is at least one character in
278			 * the buffer, so conversions that do not set NOSKIP
279			 * can no longer result in an input failure.
280			 */
281		}
282
283		/*
284		 * Do the conversion.
285		 */
286		switch (c) {
287
288		case CT_CHAR:
289			/* scan arbitrary characters (sets NOSKIP) */
290			if (width == 0)
291				width = 1;
292			if (flags & SUPPRESS) {
293				size_t sum = 0;
294				for (;;) {
295					if ((n = inr) < width) {
296						sum += n;
297						width -= n;
298						inp += n;
299						if (sum == 0)
300							goto input_failure;
301							break;
302					} else {
303						sum += width;
304						inr -= width;
305						inp += width;
306						break;
307					}
308				}
309				nread += sum;
310			} else {
311				bcopy(inp, va_arg(ap, char *), width);
312				inr -= width;
313				inp += width;
314				nread += width;
315				nassigned++;
316			}
317			nconversions++;
318			break;
319
320		case CT_CCL:
321			/* scan a (nonempty) character class (sets NOSKIP) */
322			if (width == 0)
323				width = (size_t)~0;	/* `infinity' */
324			/* take only those things in the class */
325			if (flags & SUPPRESS) {
326				n = 0;
327				while (ccltab[(unsigned char)*inp]) {
328					n++, inr--, inp++;
329					if (--width == 0)
330						break;
331					if (inr <= 0) {
332						if (n == 0)
333							goto input_failure;
334						break;
335					}
336				}
337				if (n == 0)
338					goto match_failure;
339			} else {
340				p0 = p = va_arg(ap, char *);
341				while (ccltab[(unsigned char)*inp]) {
342					inr--;
343					*p++ = *inp++;
344					if (--width == 0)
345						break;
346					if (inr <= 0) {
347						if (p == p0)
348							goto input_failure;
349						break;
350					}
351				}
352				n = p - p0;
353				if (n == 0)
354					goto match_failure;
355				*p = 0;
356				nassigned++;
357			}
358			nread += n;
359			nconversions++;
360			break;
361
362		case CT_STRING:
363			/* like CCL, but zero-length string OK, & no NOSKIP */
364			if (width == 0)
365				width = (size_t)~0;
366			if (flags & SUPPRESS) {
367				n = 0;
368				while (!isspace(*inp)) {
369					n++, inr--, inp++;
370					if (--width == 0)
371						break;
372					if (inr <= 0)
373						break;
374				}
375				nread += n;
376			} else {
377				p0 = p = va_arg(ap, char *);
378				while (!isspace(*inp)) {
379					inr--;
380					*p++ = *inp++;
381					if (--width == 0)
382						break;
383					if (inr <= 0)
384						break;
385				}
386				*p = 0;
387				nread += p - p0;
388				nassigned++;
389			}
390			nconversions++;
391			continue;
392
393		case CT_INT:
394			/* scan an integer as if by strtoq/strtouq */
395#ifdef hardway
396			if (width == 0 || width > sizeof(buf) - 1)
397				width = sizeof(buf) - 1;
398#else
399			/* size_t is unsigned, hence this optimisation */
400			if (--width > sizeof(buf) - 2)
401				width = sizeof(buf) - 2;
402			width++;
403#endif
404			flags |= SIGNOK | NDIGITS | NZDIGITS;
405			for (p = buf; width; width--) {
406				c = *inp;
407				/*
408				 * Switch on the character; `goto ok'
409				 * if we accept it as a part of number.
410				 */
411				switch (c) {
412
413				/*
414				 * The digit 0 is always legal, but is
415				 * special.  For %i conversions, if no
416				 * digits (zero or nonzero) have been
417				 * scanned (only signs), we will have
418				 * base==0.  In that case, we should set
419				 * it to 8 and enable 0x prefixing.
420				 * Also, if we have not scanned zero digits
421				 * before this, do not turn off prefixing
422				 * (someone else will turn it off if we
423				 * have scanned any nonzero digits).
424				 */
425				case '0':
426					if (base == 0) {
427						base = 8;
428						flags |= PFXOK;
429					}
430					if (flags & NZDIGITS)
431					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
432					else
433					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
434					goto ok;
435
436				/* 1 through 7 always legal */
437				case '1': case '2': case '3':
438				case '4': case '5': case '6': case '7':
439					base = basefix[base];
440					flags &= ~(SIGNOK | PFXOK | NDIGITS);
441					goto ok;
442
443				/* digits 8 and 9 ok iff decimal or hex */
444				case '8': case '9':
445					base = basefix[base];
446					if (base <= 8)
447						break;	/* not legal here */
448					flags &= ~(SIGNOK | PFXOK | NDIGITS);
449					goto ok;
450
451				/* letters ok iff hex */
452				case 'A': case 'B': case 'C':
453				case 'D': case 'E': case 'F':
454				case 'a': case 'b': case 'c':
455				case 'd': case 'e': case 'f':
456					/* no need to fix base here */
457					if (base <= 10)
458						break;	/* not legal here */
459					flags &= ~(SIGNOK | PFXOK | NDIGITS);
460					goto ok;
461
462				/* sign ok only as first character */
463				case '+': case '-':
464					if (flags & SIGNOK) {
465						flags &= ~SIGNOK;
466						goto ok;
467					}
468					break;
469
470				/* x ok iff flag still set & 2nd char */
471				case 'x': case 'X':
472					if (flags & PFXOK && p == buf + 1) {
473						base = 16;	/* if %i */
474						flags &= ~PFXOK;
475						goto ok;
476					}
477					break;
478				}
479
480				/*
481				 * If we got here, c is not a legal character
482				 * for a number.  Stop accumulating digits.
483				 */
484				break;
485		ok:
486				/*
487				 * c is legal: store it and look at the next.
488				 */
489				*p++ = c;
490				if (--inr > 0)
491					inp++;
492				else
493					break;		/* end of input */
494			}
495			/*
496			 * If we had only a sign, it is no good; push
497			 * back the sign.  If the number ends in `x',
498			 * it was [sign] '0' 'x', so push back the x
499			 * and treat it as [sign] '0'.
500			 */
501			if (flags & NDIGITS) {
502				if (p > buf) {
503					inp--;
504					inr++;
505				}
506				goto match_failure;
507			}
508			c = ((u_char *)p)[-1];
509			if (c == 'x' || c == 'X') {
510				--p;
511				inp--;
512				inr++;
513			}
514			if ((flags & SUPPRESS) == 0) {
515				u_quad_t res;
516
517				*p = 0;
518				res = (*ccfn)(buf, (const char **)NULL, base);
519				if (flags & POINTER)
520					*va_arg(ap, void **) =
521						(void *)(uintptr_t)res;
522				else if (flags & SHORT)
523					*va_arg(ap, short *) = res;
524				else if (flags & LONG)
525					*va_arg(ap, long *) = res;
526				else if (flags & QUAD)
527					*va_arg(ap, quad_t *) = res;
528				else
529					*va_arg(ap, int *) = res;
530				nassigned++;
531			}
532			nread += p - buf;
533			nconversions++;
534			break;
535
536		}
537	}
538input_failure:
539	return (nconversions != 0 ? nassigned : -1);
540match_failure:
541	return (nassigned);
542}
543
544/*
545 * Fill in the given table from the scanset at the given format
546 * (just after `[').  Return a pointer to the character past the
547 * closing `]'.  The table has a 1 wherever characters should be
548 * considered part of the scanset.
549 */
550static const u_char *
551__sccl(char *tab, const u_char *fmt)
552{
553	int c, n, v;
554
555	/* first `clear' the whole table */
556	c = *fmt++;		/* first char hat => negated scanset */
557	if (c == '^') {
558		v = 1;		/* default => accept */
559		c = *fmt++;	/* get new first char */
560	} else
561		v = 0;		/* default => reject */
562
563	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
564	for (n = 0; n < 256; n++)
565		     tab[n] = v;	/* memset(tab, v, 256) */
566
567	if (c == 0)
568		return (fmt - 1);/* format ended before closing ] */
569
570	/*
571	 * Now set the entries corresponding to the actual scanset
572	 * to the opposite of the above.
573	 *
574	 * The first character may be ']' (or '-') without being special;
575	 * the last character may be '-'.
576	 */
577	v = 1 - v;
578	for (;;) {
579		tab[c] = v;		/* take character c */
580doswitch:
581		n = *fmt++;		/* and examine the next */
582		switch (n) {
583
584		case 0:			/* format ended too soon */
585			return (fmt - 1);
586
587		case '-':
588			/*
589			 * A scanset of the form
590			 *	[01+-]
591			 * is defined as `the digit 0, the digit 1,
592			 * the character +, the character -', but
593			 * the effect of a scanset such as
594			 *	[a-zA-Z0-9]
595			 * is implementation defined.  The V7 Unix
596			 * scanf treats `a-z' as `the letters a through
597			 * z', but treats `a-a' as `the letter a, the
598			 * character -, and the letter a'.
599			 *
600			 * For compatibility, the `-' is not considerd
601			 * to define a range if the character following
602			 * it is either a close bracket (required by ANSI)
603			 * or is not numerically greater than the character
604			 * we just stored in the table (c).
605			 */
606			n = *fmt;
607			if (n == ']' || n < c) {
608				c = '-';
609				break;	/* resume the for(;;) */
610			}
611			fmt++;
612			/* fill in the range */
613			do {
614			    tab[++c] = v;
615			} while (c < n);
616			c = n;
617			/*
618			 * Alas, the V7 Unix scanf also treats formats
619			 * such as [a-c-e] as `the letters a through e'.
620			 * This too is permitted by the standard....
621			 */
622			goto doswitch;
623			break;
624
625		case ']':		/* end of scanset */
626			return (fmt);
627
628		default:		/* just another character */
629			c = n;
630			break;
631		}
632	}
633	/* NOTREACHED */
634}
635
636/*
637 * Convert a string to an unsigned quad integer.
638 *
639 * Ignores `locale' stuff.  Assumes that the upper and lower case
640 * alphabets and digits are each contiguous.
641 */
642u_quad_t
643strtouq(const char *nptr, const char **endptr, int base)
644{
645	const char *s = nptr;
646	u_quad_t acc;
647	unsigned char c;
648	u_quad_t qbase, cutoff;
649	int neg, any, cutlim;
650
651	/*
652	 * See strtoq for comments as to the logic used.
653	 */
654	s = nptr;
655	do {
656		c = *s++;
657	} while (isspace(c));
658	if (c == '-') {
659		neg = 1;
660		c = *s++;
661	} else {
662		neg = 0;
663		if (c == '+')
664			c = *s++;
665	}
666	if ((base == 0 || base == 16) &&
667	    c == '0' && (*s == 'x' || *s == 'X')) {
668		c = s[1];
669		s += 2;
670		base = 16;
671	}
672	if (base == 0)
673		base = c == '0' ? 8 : 10;
674	qbase = (unsigned)base;
675	cutoff = (u_quad_t)UQUAD_MAX / qbase;
676	cutlim = (u_quad_t)UQUAD_MAX % qbase;
677	for (acc = 0, any = 0;; c = *s++) {
678		if (!isascii(c))
679			break;
680		if (isdigit(c))
681			c -= '0';
682		else if (isalpha(c))
683			c -= isupper(c) ? 'A' - 10 : 'a' - 10;
684		else
685			break;
686		if (c >= base)
687			break;
688		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
689			any = -1;
690		else {
691			any = 1;
692			acc *= qbase;
693			acc += c;
694		}
695	}
696	if (any < 0) {
697		acc = UQUAD_MAX;
698	} else if (neg)
699		acc = -acc;
700	if (endptr != 0)
701		*endptr = (const char *)(any ? s - 1 : nptr);
702	return (acc);
703}
704
705/*
706 * Convert a string to a quad integer.
707 *
708 * Ignores `locale' stuff.  Assumes that the upper and lower case
709 * alphabets and digits are each contiguous.
710 */
711quad_t
712strtoq(const char *nptr, const char **endptr, int base)
713{
714	const char *s;
715	u_quad_t acc;
716	unsigned char c;
717	u_quad_t qbase, cutoff;
718	int neg, any, cutlim;
719
720	/*
721	 * Skip white space and pick up leading +/- sign if any.
722	 * If base is 0, allow 0x for hex and 0 for octal, else
723	 * assume decimal; if base is already 16, allow 0x.
724	 */
725	s = nptr;
726	do {
727		c = *s++;
728	} while (isspace(c));
729	if (c == '-') {
730		neg = 1;
731		c = *s++;
732	} else {
733		neg = 0;
734		if (c == '+')
735			c = *s++;
736	}
737	if ((base == 0 || base == 16) &&
738	    c == '0' && (*s == 'x' || *s == 'X')) {
739		c = s[1];
740		s += 2;
741		base = 16;
742	}
743	if (base == 0)
744		base = c == '0' ? 8 : 10;
745
746	/*
747	 * Compute the cutoff value between legal numbers and illegal
748	 * numbers.  That is the largest legal value, divided by the
749	 * base.  An input number that is greater than this value, if
750	 * followed by a legal input character, is too big.  One that
751	 * is equal to this value may be valid or not; the limit
752	 * between valid and invalid numbers is then based on the last
753	 * digit.  For instance, if the range for quads is
754	 * [-9223372036854775808..9223372036854775807] and the input base
755	 * is 10, cutoff will be set to 922337203685477580 and cutlim to
756	 * either 7 (neg==0) or 8 (neg==1), meaning that if we have
757	 * accumulated a value > 922337203685477580, or equal but the
758	 * next digit is > 7 (or 8), the number is too big, and we will
759	 * return a range error.
760	 *
761	 * Set any if any `digits' consumed; make it negative to indicate
762	 * overflow.
763	 */
764	qbase = (unsigned)base;
765	cutoff = neg ? (u_quad_t)-(QUAD_MIN + QUAD_MAX) + QUAD_MAX : QUAD_MAX;
766	cutlim = cutoff % qbase;
767	cutoff /= qbase;
768	for (acc = 0, any = 0;; c = *s++) {
769		if (!isascii(c))
770			break;
771		if (isdigit(c))
772			c -= '0';
773		else if (isalpha(c))
774			c -= isupper(c) ? 'A' - 10 : 'a' - 10;
775		else
776			break;
777		if (c >= base)
778			break;
779		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
780			any = -1;
781		else {
782			any = 1;
783			acc *= qbase;
784			acc += c;
785		}
786	}
787	if (any < 0) {
788		acc = neg ? QUAD_MIN : QUAD_MAX;
789	} else if (neg)
790		acc = -acc;
791	if (endptr != 0)
792		*endptr = (const char *)(any ? s - 1 : nptr);
793	return (acc);
794}
795
796/*
797 * Convert a string to a long integer.
798 *
799 * Ignores `locale' stuff.  Assumes that the upper and lower case
800 * alphabets and digits are each contiguous.
801 */
802long
803strtol(nptr, endptr, base)
804	const char *nptr;
805	const char **endptr;
806	int base;
807{
808	const char *s = nptr;
809	unsigned long acc;
810	unsigned char c;
811	unsigned long cutoff;
812	int neg = 0, any, cutlim;
813
814	/*
815	 * Skip white space and pick up leading +/- sign if any.
816	 * If base is 0, allow 0x for hex and 0 for octal, else
817	 * assume decimal; if base is already 16, allow 0x.
818	 */
819	do {
820		c = *s++;
821	} while (isspace(c));
822	if (c == '-') {
823		neg = 1;
824		c = *s++;
825	} else if (c == '+')
826		c = *s++;
827	if ((base == 0 || base == 16) &&
828	    c == '0' && (*s == 'x' || *s == 'X')) {
829		c = s[1];
830		s += 2;
831		base = 16;
832	}
833	if (base == 0)
834		base = c == '0' ? 8 : 10;
835
836	/*
837	 * Compute the cutoff value between legal numbers and illegal
838	 * numbers.  That is the largest legal value, divided by the
839	 * base.  An input number that is greater than this value, if
840	 * followed by a legal input character, is too big.  One that
841	 * is equal to this value may be valid or not; the limit
842	 * between valid and invalid numbers is then based on the last
843	 * digit.  For instance, if the range for longs is
844	 * [-2147483648..2147483647] and the input base is 10,
845	 * cutoff will be set to 214748364 and cutlim to either
846	 * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
847	 * a value > 214748364, or equal but the next digit is > 7 (or 8),
848	 * the number is too big, and we will return a range error.
849	 *
850	 * Set any if any `digits' consumed; make it negative to indicate
851	 * overflow.
852	 */
853	cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
854	cutlim = cutoff % (unsigned long)base;
855	cutoff /= (unsigned long)base;
856	for (acc = 0, any = 0;; c = *s++) {
857		if (!isascii(c))
858			break;
859		if (isdigit(c))
860			c -= '0';
861		else if (isalpha(c))
862			c -= isupper(c) ? 'A' - 10 : 'a' - 10;
863		else
864			break;
865		if (c >= base)
866			break;
867		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
868			any = -1;
869		else {
870			any = 1;
871			acc *= base;
872			acc += c;
873		}
874	}
875	if (any < 0) {
876		acc = neg ? LONG_MIN : LONG_MAX;
877	} else if (neg)
878		acc = -acc;
879	if (endptr != 0)
880		*endptr = (const char *)(any ? s - 1 : nptr);
881	return (acc);
882}
883
884/*
885 * Convert a string to an unsigned long integer.
886 *
887 * Ignores `locale' stuff.  Assumes that the upper and lower case
888 * alphabets and digits are each contiguous.
889 */
890unsigned long
891strtoul(nptr, endptr, base)
892	const char *nptr;
893	const char **endptr;
894	int base;
895{
896	const char *s = nptr;
897	unsigned long acc;
898	unsigned char c;
899	unsigned long cutoff;
900	int neg = 0, any, cutlim;
901
902	/*
903	 * See strtol for comments as to the logic used.
904	 */
905	do {
906		c = *s++;
907	} while (isspace(c));
908	if (c == '-') {
909		neg = 1;
910		c = *s++;
911	} else if (c == '+')
912		c = *s++;
913	if ((base == 0 || base == 16) &&
914	    c == '0' && (*s == 'x' || *s == 'X')) {
915		c = s[1];
916		s += 2;
917		base = 16;
918	}
919	if (base == 0)
920		base = c == '0' ? 8 : 10;
921	cutoff = (unsigned long)ULONG_MAX / (unsigned long)base;
922	cutlim = (unsigned long)ULONG_MAX % (unsigned long)base;
923	for (acc = 0, any = 0;; c = *s++) {
924		if (!isascii(c))
925			break;
926		if (isdigit(c))
927			c -= '0';
928		else if (isalpha(c))
929			c -= isupper(c) ? 'A' - 10 : 'a' - 10;
930		else
931			break;
932		if (c >= base)
933			break;
934		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
935			any = -1;
936		else {
937			any = 1;
938			acc *= base;
939			acc += c;
940		}
941	}
942	if (any < 0) {
943		acc = ULONG_MAX;
944	} else if (neg)
945		acc = -acc;
946	if (endptr != 0)
947		*endptr = (const char *)(any ? s - 1 : nptr);
948	return (acc);
949}
950