1/*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*	CFStringScanner.c
25	Copyright (c) 1999-2013, Apple Inc. All rights reserved.
26	Responsibility: Ali Ozer
27*/
28
29#include "CFInternal.h"
30#include <CoreFoundation/CFString.h>
31#include <sys/types.h>
32#include <limits.h>
33#include <stdlib.h>
34#include <string.h>
35
36CF_INLINE Boolean __CFCharacterIsADigit(UniChar ch) {
37    return (ch >= '0' && ch <= '9') ? true : false;
38}
39
40/* Returns -1 on illegal value */
41CF_INLINE SInt32 __CFCharacterNumericOrHexValue (UniChar ch) {
42    if (ch >= '0' && ch <= '9') {
43        return ch - '0';
44    } else if (ch >= 'A' && ch <= 'F') {
45        return ch + 10 - 'A';
46    } else if (ch >= 'a' && ch <= 'f') {
47        return ch + 10 - 'a';
48    } else {
49        return -1;
50    }
51}
52
53/* Returns -1 on illegal value */
54CF_INLINE SInt32 __CFCharacterNumericValue(UniChar ch) {
55    return (ch >= '0' && ch <= '9') ? (ch - '0') : -1;
56}
57
58CF_INLINE UniChar __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(CFStringInlineBuffer *buf, SInt32 *indexPtr) {
59    UniChar ch;
60    while (__CFIsWhitespace(ch = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr))) (*indexPtr)++;
61    return ch;
62}
63
64/* result is int64_t or int, depending on doLonglong
65*/
66CF_PRIVATE Boolean __CFStringScanInteger(CFStringInlineBuffer *buf, CFTypeRef locale, SInt32 *indexPtr, Boolean doLonglong, void *result) {
67    Boolean doingLonglong = false;	/* Set to true if doLonglong, and we overflow an int... */
68    Boolean neg = false;
69    int intResult = 0;
70    register int64_t longlongResult = 0;	/* ??? int64_t is slow when not in regs; I hope this does the right thing. */
71    UniChar ch;
72
73    ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
74
75    if (ch == '-' || ch == '+') {
76	neg = (ch == '-');
77	(*indexPtr)++;
78    	ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
79    }
80
81    if (! __CFCharacterIsADigit(ch)) return false;	/* No digits, bail out... */
82    do {
83	if (doingLonglong) {
84            if ((longlongResult >= LLONG_MAX / 10) && ((longlongResult > LLONG_MAX / 10) || (__CFCharacterNumericValue(ch) - (neg ? 1 : 0) >= LLONG_MAX - longlongResult * 10))) {
85                /* ??? This might not handle LLONG_MIN correctly... */
86                longlongResult = neg ? LLONG_MIN : LLONG_MAX;
87                neg = false;
88                while (__CFCharacterIsADigit(ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr))));	/* Skip remaining digits */
89            } else {
90                longlongResult = longlongResult * 10 + __CFCharacterNumericValue(ch);
91                ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
92            }
93	} else {
94            if ((intResult >= INT_MAX / 10) && ((intResult > INT_MAX / 10) || (__CFCharacterNumericValue(ch) - (neg ? 1 : 0) >= INT_MAX - intResult * 10))) {
95                // Overflow, check for int64_t...
96                if (doLonglong) {
97                    longlongResult = intResult;
98                    doingLonglong = true;
99                } else {
100                    /* ??? This might not handle INT_MIN correctly... */
101                    intResult = neg ? INT_MIN : INT_MAX;
102                    neg = false;
103                    while (__CFCharacterIsADigit(ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr))));	/* Skip remaining digits */
104                }
105            } else {
106                intResult = intResult * 10 + __CFCharacterNumericValue(ch);
107                ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
108            }
109	}
110    } while (__CFCharacterIsADigit(ch));
111
112    if (result) {
113        if (doLonglong) {
114	    if (!doingLonglong) longlongResult = intResult;
115	    *(int64_t *)result = neg ? -longlongResult : longlongResult;
116	} else {
117	    *(int *)result = neg ? -intResult : intResult;
118	}
119    }
120
121    return true;
122}
123
124CF_PRIVATE Boolean __CFStringScanHex(CFStringInlineBuffer *buf, SInt32 *indexPtr, unsigned *result) {
125    UInt32 value = 0;
126    SInt32 curDigit;
127    UniChar ch;
128
129    ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
130    /* Ignore the optional "0x" or "0X"; if it's followed by a non-hex, just parse the "0" and leave pointer at "x" */
131    if (ch == '0') {
132	ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
133        if (ch == 'x' || ch == 'X') ch = __CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr));
134	curDigit = __CFCharacterNumericOrHexValue(ch);
135        if (curDigit == -1) {
136	    (*indexPtr)--;	/* Go back over the "x" or "X" */
137	    if (result) *result = 0;
138            return true;	/* We just saw "0" */
139        }
140    } else {
141	curDigit = __CFCharacterNumericOrHexValue(ch);
142        if (curDigit == -1) return false;
143    }
144
145    do {
146        if (value > (UINT_MAX >> 4)) {
147	    value = UINT_MAX;	/* We do this over and over again, but it's an error case anyway */
148        } else {
149            value = (value << 4) + curDigit;
150        }
151	curDigit = __CFCharacterNumericOrHexValue(__CFStringGetCharacterFromInlineBufferAux(buf, ++(*indexPtr)));
152    } while (curDigit != -1);
153
154    if (result) *result = value;
155    return true;
156}
157
158// Packed array of Boolean
159static const unsigned char __CFNumberSet[16] = {
160    0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  nul soh stx etx eot enq ack bel
161    0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  bs  ht  nl  vt  np  cr  so  si
162    0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  dle dc1 dc2 dc3 dc4 nak syn etb
163    0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  can em  sub esc fs  gs  rs  us
164    0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  sp   !   "   #   $   %   &   '
165    0X68, // 0, 0, 0, 1, 0, 1, 1, 0, //  (   )   *   +   ,   -   .   /
166    0xFF, // 1, 1, 1, 1, 1, 1, 1, 1, //  0   1   2   3   4   5   6   7
167    0X03, // 1, 1, 0, 0, 0, 0, 0, 0, //  8   9   :   ;   <   =   >   ?
168    0X20, // 0, 0, 0, 0, 0, 1, 0, 0, //  @   A   B   C   D   E   F   G
169    0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  H   I   J   K   L   M   N   O
170    0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  P   Q   R   S   T   U   V   W
171    0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  X   Y   Z   [   \   ]   ^   _
172    0X20, // 0, 0, 0, 0, 0, 1, 0, 0, //  `   a   b   c   d   e   f   g
173    0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  h   i   j   k   l   m   n   o
174    0X00, // 0, 0, 0, 0, 0, 0, 0, 0, //  p   q   r   s   t   u   v   w
175    0X00, // 0, 0, 0, 0, 0, 0, 0, 0  //  x   y   z   {   |   }   ~  del
176};
177
178CF_PRIVATE Boolean __CFStringScanDouble(CFStringInlineBuffer *buf, CFTypeRef locale, SInt32 *indexPtr, double *resultPtr) {
179    #define STACK_BUFFER_SIZE 256
180    #define ALLOC_CHUNK_SIZE 256 // first and subsequent malloc size.  Should be greater than STACK_BUFFER_SIZE
181    char localCharBuffer[STACK_BUFFER_SIZE];
182    char *charPtr = localCharBuffer;
183    char *endCharPtr;
184    SInt32 numChars = 0;
185    SInt32 capacity = STACK_BUFFER_SIZE;	// in chars
186    double result;
187    UniChar ch;
188    CFAllocatorRef tmpAlloc = NULL;
189
190    ch = __CFStringGetFirstNonSpaceCharacterFromInlineBuffer(buf, indexPtr);
191    // At this point indexPtr points at the first non-space char
192#if 0
193#warning need to allow, case insensitively, all of: "nan", "inf", "-inf", "+inf", "-infinity", "+infinity", "infinity";
194#warning -- strtod() will actually do most or all of that for us
195#define BITSFORDOUBLENAN	((uint64_t)0x7ff8000000000000ULL)
196#define BITSFORDOUBLEPOSINF	((uint64_t)0x7ff0000000000000ULL)
197#define BITSFORDOUBLENEGINF	((uint64_t)0xfff0000000000000ULL)
198    if ('N' == ch || 'n' == ch) {	// check for "NaN", case insensitively
199        UniChar next1 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 1);
200        UniChar next2 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 2);
201        if (('a' == next1 || 'A' == next1) &&
202            ('N' == next2 || 'n' == next2)) {
203            *indexPtr += 3;
204            if (resultPtr) *(uint64_t *)resultPtr = BITSFORDOUBLENAN;
205            return true;
206        }
207    }
208    if ('I' == ch || 'i' == ch) {	// check for "Inf", case insensitively
209        UniChar next1 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 1);
210        UniChar next2 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 2);
211        if (('n' == next1 || 'N' == next1) &&
212            ('f' == next2 || 'F' == next2)) {
213            *indexPtr += 3;
214            if (resultPtr) *(uint64_t *)resultPtr = BITSFORDOUBLEPOSINF;
215            return true;
216        }
217    }
218    if ('+' == ch || '-' == ch) {	// check for "+/-Inf", case insensitively
219        UniChar next1 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 1);
220        UniChar next2 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 2);
221        UniChar next3 = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + 3);
222        if (('I' == next1 || 'i' == next1) &&
223            ('n' == next2 || 'N' == next2) &&
224            ('f' == next3 || 'F' == next3)) {
225            *indexPtr += 4;
226            if (resultPtr) *(uint64_t *)resultPtr = ('-' == ch) ? BITSFORDOUBLENEGINF : BITSFORDOUBLEPOSINF;
227            return true;
228        }
229    }
230#endif // 0
231    // Get characters until one not in __CFNumberSet[] is encountered
232    while ((ch < 128) && (__CFNumberSet[ch >> 3] & (1 << (ch & 7)))) {
233        if (numChars >= capacity - 1) {
234	    capacity += ALLOC_CHUNK_SIZE;
235	    if (tmpAlloc == NULL) tmpAlloc = __CFGetDefaultAllocator();
236	    if (charPtr == localCharBuffer) {
237		charPtr = (char *)CFAllocatorAllocate(tmpAlloc, capacity * sizeof(char), 0);
238		memmove(charPtr, localCharBuffer, numChars * sizeof(char));
239 	    } else {
240		charPtr = (char *)CFAllocatorReallocate(tmpAlloc, charPtr, capacity * sizeof(char), 0);
241	    }
242        }
243	charPtr[numChars++] = (char)ch;
244	ch = __CFStringGetCharacterFromInlineBufferAux(buf, *indexPtr + numChars);
245    };
246    charPtr[numChars] = 0;	// Null byte for strtod
247
248    result = strtod_l(charPtr, &endCharPtr, NULL);
249
250    if (tmpAlloc) CFAllocatorDeallocate(tmpAlloc, charPtr);
251    if (charPtr == endCharPtr) return false;
252    *indexPtr += (endCharPtr - charPtr);
253    if (resultPtr) *resultPtr = result; // only store result if we succeed
254
255    return true;
256}
257
258
259#undef STACK_BUFFER_SIZE
260#undef ALLOC_CHUNK_SIZE
261
262
263