1/*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*	CFXMLParser.c
25	Copyright (c) 1999-2013, Apple Inc. All rights reserved.
26	Responsibility: David Smith
27*/
28
29#include <CoreFoundation/CFXMLParser.h>
30#include <CoreFoundation/CFNumber.h>
31#include "CFXMLInputStream.h"
32#include "CFUniChar.h"
33#include "CFInternal.h"
34
35#pragma GCC diagnostic push
36#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
37
38struct __CFXMLParser {
39    CFRuntimeBase _cfBase;
40
41    _CFXMLInputStream input;
42
43    void **stack;
44    void **top;
45    UInt32 capacity;
46
47    struct __CFXMLNode *node;  // Our private node; we use it to report back information
48    CFMutableDictionaryRef argDict;
49    CFMutableArrayRef argArray;
50
51    UInt32 options;
52    CFXMLParserCallBacks callBacks;
53    CFXMLParserContext context;
54
55    CFXMLParserStatusCode status;
56    CFStringRef errorString;
57};
58
59static CFStringRef __CFXMLParserCopyDescription(CFTypeRef cf) {
60    const struct __CFXMLParser *parser = (const struct __CFXMLParser *)cf;
61    return CFStringCreateWithFormat(CFGetAllocator(cf), NULL, CFSTR("<CFXMLParser %p>"), parser);
62}
63
64static void __CFXMLParserDeallocate(CFTypeRef cf) {
65    struct __CFXMLParser *parser = (struct __CFXMLParser *)cf;
66    CFAllocatorRef alloc = CFGetAllocator(parser);
67    _freeInputStream(&(parser->input));
68    if (parser->argDict) CFRelease(parser->argDict);
69    if (parser->argArray) CFRelease(parser->argArray);
70    if (parser->errorString) CFRelease(parser->errorString);
71    if (parser->node) CFRelease(parser->node);
72    CFAllocatorDeallocate(alloc, parser->stack);
73    if (parser->context.info && parser->context.release) {
74        parser->context.release(parser->context.info);
75    }
76}
77
78static CFTypeID __kCFXMLParserTypeID = _kCFRuntimeNotATypeID;
79
80static const CFRuntimeClass __CFXMLParserClass = {
81    0,
82    "CFXMLParser",
83    NULL,      // init
84    NULL,      // copy
85    __CFXMLParserDeallocate,
86    NULL,
87    NULL,
88    NULL,      //
89    __CFXMLParserCopyDescription
90};
91
92static void __CFXMLParserInitialize(void) {
93    __kCFXMLParserTypeID = _CFRuntimeRegisterClass(&__CFXMLParserClass);
94}
95
96CFTypeID CFXMLParserGetTypeID(void) {
97    if (_kCFRuntimeNotATypeID == __kCFXMLParserTypeID) __CFXMLParserInitialize();
98    return __kCFXMLParserTypeID;
99}
100
101void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context) {
102    CFAssert1(parser != NULL, __kCFLogAssertion, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__);
103    __CFGenericValidateType(parser, CFXMLParserGetTypeID());
104    if (context) {
105	context->version = parser->context.version;
106	context->info = parser->context.info;
107	context->retain = parser->context.retain;
108	context->release = parser->context.release;
109	context->copyDescription = parser->context.copyDescription;
110	UNFAULT_CALLBACK(context->retain);
111	UNFAULT_CALLBACK(context->release);
112	UNFAULT_CALLBACK(context->copyDescription);
113    }
114}
115
116void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks) {
117    __CFGenericValidateType(parser, CFXMLParserGetTypeID());
118    if (callBacks) {
119        callBacks->version = parser->callBacks.version;
120        callBacks->createXMLStructure = parser->callBacks.createXMLStructure;
121        callBacks->addChild = parser->callBacks.addChild;
122        callBacks->endXMLStructure = parser->callBacks.endXMLStructure;
123        callBacks->resolveExternalEntity = parser->callBacks.resolveExternalEntity;
124        callBacks->handleError = parser->callBacks.handleError;
125	UNFAULT_CALLBACK(callBacks->createXMLStructure);
126	UNFAULT_CALLBACK(callBacks->addChild);
127	UNFAULT_CALLBACK(callBacks->endXMLStructure);
128	UNFAULT_CALLBACK(callBacks->resolveExternalEntity);
129	UNFAULT_CALLBACK(callBacks->handleError);
130    }
131}
132
133CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser) {
134    __CFGenericValidateType(parser, CFXMLParserGetTypeID());
135    return parser->input.url;
136}
137
138/* Returns the character index or line number of the current parse location */
139CFIndex CFXMLParserGetLocation(CFXMLParserRef parser) {
140    __CFGenericValidateType(parser, CFXMLParserGetTypeID());
141    return _inputStreamCurrentLocation(&parser->input);
142}
143
144CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser) {
145    __CFGenericValidateType(parser, CFXMLParserGetTypeID());
146    return _inputStreamCurrentLine(&parser->input);
147}
148
149/* Returns the top-most object returned by the createXMLStructure callback */
150void *CFXMLParserGetDocument(CFXMLParserRef parser) {
151    __CFGenericValidateType(parser, CFXMLParserGetTypeID());
152    if (parser->capacity > 0)
153        return parser->stack[0];
154    else
155        return NULL;
156}
157
158CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser) {
159    __CFGenericValidateType(parser, CFXMLParserGetTypeID());
160    return parser->status;
161}
162
163CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser) {
164    __CFGenericValidateType(parser, CFXMLParserGetTypeID());
165    return (CFStringRef)CFRetain(parser->errorString);
166}
167
168void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription) {
169    __CFGenericValidateType(parser, CFXMLParserGetTypeID());
170    CFAssert1(errorCode > 0, __kCFLogAssertion, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__);
171    CFAssert1(errorDescription != NULL, __kCFLogAssertion, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__);
172    __CFGenericValidateType(errorDescription, CFStringGetTypeID());
173
174    parser->status = errorCode;
175    if (parser->errorString) CFRelease(parser->errorString);
176    parser->errorString = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, errorDescription);
177}
178
179
180static Boolean parseXML(CFXMLParserRef parser);
181static Boolean parseComment(CFXMLParserRef parser, Boolean report);
182static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report);
183static Boolean parseInlineDTD(CFXMLParserRef parser);
184static Boolean parseDTD(CFXMLParserRef parser);
185static Boolean parsePhysicalEntityReference(CFXMLParserRef parser);
186static Boolean parseCDSect(CFXMLParserRef parser);
187static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report);
188static Boolean parsePCData(CFXMLParserRef parser);
189static Boolean parseWhitespace(CFXMLParserRef parser);
190static Boolean parseAttributeListDeclaration(CFXMLParserRef parser);
191static Boolean parseNotationDeclaration(CFXMLParserRef parser);
192static Boolean parseElementDeclaration(CFXMLParserRef parser);
193static Boolean parseEntityDeclaration(CFXMLParserRef parser);
194static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID);
195static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag);
196static Boolean parseTagContent(CFXMLParserRef parser);
197static Boolean parseTag(CFXMLParserRef parser);
198static Boolean parseAttributes(CFXMLParserRef parser);
199static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str);
200
201// Utilities; may need to make these accessible to the property list parser to avoid code duplication
202static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str);
203static Boolean reportNewLeaf(CFXMLParserRef parser); // Assumes parser->node has been set and is ready to go
204static void pushXMLNode(CFXMLParserRef parser, void *node);
205
206static CFXMLParserRef __CFXMLParserInit(CFAllocatorRef alloc, CFURLRef dataSource, CFOptionFlags options, CFDataRef xmlData, CFIndex version, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
207    struct __CFXMLParser *parser = (struct __CFXMLParser *)_CFRuntimeCreateInstance(alloc, CFXMLParserGetTypeID(), sizeof(struct __CFXMLParser) - sizeof(CFRuntimeBase), NULL);
208    struct __CFXMLNode *node = (struct __CFXMLNode *)_CFRuntimeCreateInstance(alloc, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode) - sizeof(CFRuntimeBase), NULL);
209    UniChar *buf;
210    if (parser && node) {
211        alloc = CFGetAllocator(parser);
212        _initializeInputStream(&(parser->input), alloc, dataSource, xmlData);
213        parser->top = parser->stack;
214        parser->stack = NULL;
215        parser->capacity = 0;
216
217        buf = (UniChar *)CFAllocatorAllocate(alloc, 128*sizeof(UniChar), 0);
218        parser->node = node;
219        parser->node->dataString = CFStringCreateMutableWithExternalCharactersNoCopy(alloc, buf, 0, 128, alloc);
220        parser->node->additionalData = NULL;
221        parser->node->version = version;
222        parser->argDict = NULL; // don't create these until necessary
223        parser->argArray = NULL;
224
225        parser->options = options;
226        parser->callBacks = *callBacks;
227
228        FAULT_CALLBACK((void **)&(parser->callBacks.createXMLStructure));
229        FAULT_CALLBACK((void **)&(parser->callBacks.addChild));
230        FAULT_CALLBACK((void **)&(parser->callBacks.endXMLStructure));
231        FAULT_CALLBACK((void **)&(parser->callBacks.resolveExternalEntity));
232        FAULT_CALLBACK((void **)&(parser->callBacks.handleError));
233
234        if (context) {
235            parser->context = *context;
236            if (parser->context.info && parser->context.retain) {
237                parser->context.retain(parser->context.info);
238            }
239        } else {
240            parser->context.version = 0;
241            parser->context.info = NULL;
242            parser->context.retain = NULL;
243            parser->context.release = NULL;
244            parser->context.copyDescription = NULL;
245        }
246        parser->status = kCFXMLStatusParseNotBegun;
247        parser->errorString = NULL;
248    } else {
249        if (parser) CFRelease(parser);
250        if (node) CFRelease(node);
251        parser = NULL;
252    }
253    return parser;
254}
255
256CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
257    CFAssert1(xmlData != NULL, __kCFLogAssertion, "%s(): NULL data not permitted", __PRETTY_FUNCTION__);
258    __CFGenericValidateType(xmlData, CFDataGetTypeID());
259    CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
260    CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
261    CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
262    CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
263    return __CFXMLParserInit(allocator, dataSource, parseOptions, xmlData, versionOfNodes, callBacks, context);
264}
265
266CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
267    CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
268    CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
269    CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
270    CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
271
272    return __CFXMLParserInit(allocator, dataSource, parseOptions, NULL, versionOfNodes, callBacks, context);
273}
274
275Boolean CFXMLParserParse(CFXMLParserRef parser) {
276    CFXMLDocumentInfo docData;
277    __CFGenericValidateType(parser, CFXMLParserGetTypeID());
278    if (parser->status != kCFXMLStatusParseNotBegun) return false;
279    parser->status = kCFXMLStatusParseInProgress;
280
281    if (!_openInputStream(&parser->input)) {
282        if (!parser->input.data) {
283            // couldn't load URL
284            parser->status = kCFXMLErrorNoData;
285            parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("No data found at %@"), CFURLGetString(parser->input.url));
286        } else {
287            // couldn't figure out the encoding
288            CFAssert(parser->input.encoding == kCFStringEncodingInvalidId, __kCFLogAssertion, "CFXMLParser internal error: input stream could not be opened");
289            parser->status = kCFXMLErrorUnknownEncoding;
290            parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), "Encountered unknown encoding", kCFStringEncodingASCII);
291        }
292        if (parser->callBacks.handleError) {
293            INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
294        }
295        return false;
296    }
297
298    // Create the document
299    parser->stack = (void **)CFAllocatorAllocate(CFGetAllocator(parser), 16 * sizeof(void *), 0);
300    parser->capacity = 16;
301    parser->node->dataTypeID = kCFXMLNodeTypeDocument;
302    docData.encoding = _inputStreamGetEncoding(&parser->input);
303    docData.sourceURL = parser->input.url;
304    parser->node->additionalData = &docData;
305    parser->stack[0] = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
306    parser->top = parser->stack;
307    parser->node->additionalData = NULL;
308
309    // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
310    if (parser->status != kCFXMLStatusParseInProgress) {
311        _CFReportError(parser, parser->status, NULL);
312        return false;
313    }
314    return parseXML(parser);
315}
316
317/* The next several functions are all intended to parse past a particular XML structure.  They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--").  They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go.  They either return void (not possible for the parse to fail) or they return a Boolean (success/failure).  The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
318
319// [3]  S ::= (#x20 | #x9 | #xD | #xA)+
320static Boolean parseWhitespace(CFXMLParserRef parser) {
321    CFIndex len;
322    Boolean report = !(parser->options & kCFXMLParserSkipWhitespace);
323    len = _inputStreamSkipWhitespace(&parser->input, report ? (CFMutableStringRef)(parser->node->dataString) : NULL);
324    if (report && len) {
325        parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
326        parser->node->additionalData = NULL;
327        return reportNewLeaf(parser);
328    } else {
329        return true;
330    }
331}
332
333// parser should be just past "<!--"
334static Boolean parseComment(CFXMLParserRef parser, Boolean report) {
335    const UniChar dashes[2] = {'-', '-'};
336    UniChar ch;
337    report = report && (!(parser->options & kCFXMLParserSkipMetaData));
338    if (!_inputStreamScanToCharacters(&parser->input, dashes, 2, report ? (CFMutableStringRef)(parser->node->dataString) : NULL) || !_inputStreamGetCharacter(&parser->input, &ch)) {
339        _CFReportError(parser, kCFXMLErrorUnexpectedEOF,"Found unexpected EOF while parsing comment");
340        return false;
341    } else if (ch != '>') {
342        _CFReportError(parser, kCFXMLErrorMalformedComment, "Found \"--\" within a comment");
343        return false;
344    } else if (report) {
345        parser->node->dataTypeID = kCFXMLNodeTypeComment;
346        parser->node->additionalData = NULL;
347        return reportNewLeaf(parser);
348    } else {
349        return true;
350    }
351}
352
353/*
354[16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
355[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
356 */
357// parser should be set to the first character after "<?"
358static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report) {
359    const UniChar piTermination[2] = {'?', '>'};
360    CFMutableStringRef str;
361    CFStringRef name;
362
363    if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
364        _CFReportError(parser, kCFXMLErrorMalformedProcessingInstruction, "Found malformed processing instruction");
365        return false;
366    }
367    _inputStreamSkipWhitespace(&parser->input, NULL);
368    str = (report && *parser->top) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)) : NULL;
369    if (!_inputStreamScanToCharacters(&parser->input, piTermination, 2, str)) {
370        _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing processing instruction");
371        if (str) CFRelease(str);
372        return false;
373    }
374
375    if (str) {
376        CFXMLProcessingInstructionInfo data;
377        Boolean result;
378        CFStringRef tmp = parser->node->dataString;
379        parser->node->dataTypeID = kCFXMLNodeTypeProcessingInstruction;
380        parser->node->dataString = name;
381        data.dataString = str;
382        parser->node->additionalData = &data;
383        result = reportNewLeaf(parser);
384        parser->node->additionalData = NULL;
385        parser->node->dataString = tmp;
386        CFRelease(str);
387        return result;
388    } else {
389        return true;
390    }
391}
392
393/*
394 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
395*/
396static const UniChar _DoctypeOpening[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
397// first character should be immediately after the "<!"
398static Boolean parseDTD(CFXMLParserRef parser) {
399    UniChar ch;
400    Boolean success, hasExtID = false;
401    CFXMLDocumentTypeInfo docData = {{NULL, NULL}};
402    void *dtdStructure = NULL;
403    CFStringRef name;
404
405    // First pass "DOCTYPE"
406    success = _inputStreamMatchString(&parser->input, _DoctypeOpening, 7);
407    success = success && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
408    success = success && _inputStreamScanXMLName(&parser->input, false, &name);
409    if (success) {
410        _inputStreamSkipWhitespace(&parser->input, NULL);
411        success = _inputStreamPeekCharacter(&parser->input, &ch);
412    } else {
413        // didn't make it past "DOCTYPE" successfully.
414        _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
415        return false;
416    }
417    if (success && ch != '[' && ch != '>') {
418        // ExternalID
419        hasExtID = true;
420        success = parseExternalID(parser, false, &(docData.externalID));
421        if (success)  {
422            _inputStreamSkipWhitespace(&parser->input, NULL);
423            success = _inputStreamPeekCharacter(&parser->input, &ch);
424        }
425    }
426
427    if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
428        CFStringRef tmp = parser->node->dataString;
429        parser->node->dataTypeID = kCFXMLNodeTypeDocumentType;
430        parser->node->dataString = name;
431        parser->node->additionalData = &docData;
432        dtdStructure = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
433        if (dtdStructure && parser->status == kCFXMLStatusParseInProgress) {
434            INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, dtdStructure, parser->context.info);
435        }
436        parser->node->additionalData = NULL;
437        parser->node->dataString = tmp;
438        if (parser->status != kCFXMLStatusParseInProgress) {
439            // callback called CFXMLParserAbort()
440            _CFReportError(parser, parser->status, NULL);
441            return false;
442        }
443    } else {
444        dtdStructure = NULL;
445    }
446    if (docData.externalID.publicID) CFRelease(docData.externalID.publicID);
447    if (docData.externalID.systemID) CFRelease(docData.externalID.systemID);
448    pushXMLNode(parser, dtdStructure);
449
450    if (success && ch == '[')  {
451        // inline DTD
452        _inputStreamGetCharacter(&parser->input, &ch);
453        if (!parseInlineDTD(parser)) return false;
454        _inputStreamSkipWhitespace(&parser->input, NULL);
455        success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
456    } else if (success && ch == '>') {
457        // End of the DTD
458        _inputStreamGetCharacter(&parser->input, &ch);
459    }
460    if (!success) {
461        if (_inputStreamAtEOF(&parser->input)) {
462            _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing DTD");
463        } else {
464            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
465        }
466        return false;
467    }
468
469    parser->top --; // Remove dtdStructure from the stack
470
471    if (success && dtdStructure) {
472        INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, dtdStructure, parser->context.info);
473        if (parser->status != kCFXMLStatusParseInProgress) {
474            _CFReportError(parser, parser->status, NULL);
475            return false;
476        }
477    }
478    return true;
479}
480
481/*
482 [69] PEReference ::= '%' Name ';'
483*/
484static Boolean parsePhysicalEntityReference(CFXMLParserRef parser) {
485    UniChar ch;
486    CFStringRef name;
487    if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
488        _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
489        return false;
490    } else if (!_inputStreamGetCharacter(&parser->input, &ch)) {
491        _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing physical entity reference");
492        return false;
493    } else if (ch != ';') {
494        _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
495        return false;
496    } else if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
497        CFXMLEntityReferenceInfo myData;
498        Boolean result;
499        CFStringRef tmp = parser->node->dataString;
500        parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
501        parser->node->dataString = name;
502        myData.entityType = kCFXMLEntityTypeParameter;
503        parser->node->additionalData = &myData;
504        result = reportNewLeaf(parser);
505        parser->node->additionalData = NULL;
506        parser->node->dataString = tmp;
507        return result;
508    } else {
509        return true;
510    }
511}
512
513/*
514 [54] AttType ::= StringType | TokenizedType | EnumeratedType
515 [55] StringType ::= 'CDATA'
516 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
517 [57] EnumeratedType ::= NotationType | Enumeration
518 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
519 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
520*/
521static Boolean parseEnumeration(CFXMLParserRef parser, Boolean useNMTokens) {
522    UniChar ch;
523    Boolean done = false;
524    if (!_inputStreamGetCharacter(&parser->input, &ch)) {
525        _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
526        return false;
527    } else if (ch != '(') {
528        _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
529        return false;
530    }
531    _inputStreamSkipWhitespace(&parser->input, NULL);
532    if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
533        _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
534        return false;
535    }
536    while (!done) {
537        _inputStreamSkipWhitespace(&parser->input, NULL);
538        if (!_inputStreamGetCharacter(&parser->input, &ch)) {
539            _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
540            return false;
541        } else if (ch == ')') {
542            done = true;
543        } else if (ch == '|') {
544            _inputStreamSkipWhitespace(&parser->input, NULL);
545            if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
546                _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
547                return false;
548            }
549        } else {
550            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
551            return false;
552        }
553    }
554    return true;
555}
556
557static Boolean parseAttributeType(CFXMLParserRef parser, CFMutableStringRef str) {
558    Boolean success = false;
559    static const UniChar attTypeStrings[6][8] = {
560    {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
561    {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
562    {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
563    {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
564    {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
565    {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
566    if (str) _inputStreamSetMark(&parser->input);
567    if (_inputStreamMatchString(&parser->input, attTypeStrings[0], 5) ||
568        _inputStreamMatchString(&parser->input, attTypeStrings[1], 6) ||
569        _inputStreamMatchString(&parser->input, attTypeStrings[1], 5) ||
570        _inputStreamMatchString(&parser->input, attTypeStrings[1], 2) ||
571        _inputStreamMatchString(&parser->input, attTypeStrings[2], 6) ||
572        _inputStreamMatchString(&parser->input, attTypeStrings[3], 8) ||
573        _inputStreamMatchString(&parser->input, attTypeStrings[4], 8) ||
574        _inputStreamMatchString(&parser->input, attTypeStrings[4], 7)) {
575        success = true;
576    } else if (_inputStreamMatchString(&parser->input, attTypeStrings[5], 8)) {
577        // Notation
578        if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
579            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
580            success = false;
581        } else  {
582            success = parseEnumeration(parser, false);
583        }
584    } else {
585        success = parseEnumeration(parser, true);
586    }
587    if (str) {
588        if (success) {
589            _inputStreamGetCharactersFromMark(&parser->input, str);
590        }
591        _inputStreamClearMark(&parser->input);
592    }
593    return success;
594}
595
596/*  [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
597static Boolean parseAttributeDefaultDeclaration(CFXMLParserRef parser, CFMutableStringRef str) {
598    const UniChar strings[3][8] = {
599    {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
600    {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
601    {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
602    UniChar ch;
603    Boolean success;
604    if (str) _inputStreamSetMark(&parser->input);
605    if (!_inputStreamGetCharacter(&parser->input, &ch)) {
606        _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
607        success = false;
608    } else if (ch == '#') {
609        if (_inputStreamMatchString(&parser->input, strings[0], 8) ||
610            _inputStreamMatchString(&parser->input, strings[1], 7)) {
611            success = true;
612        } else if (!_inputStreamMatchString(&parser->input, strings[2], 5) || _inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
613            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
614            success = false;
615        } else {
616            // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
617            success = parseAttributeValue(parser, NULL);
618        }
619    } else {
620        _inputStreamReturnCharacter(&parser->input, ch);
621        success = parseAttributeValue(parser, NULL);
622    }
623    if (str) {
624        if (success) {
625            _inputStreamGetCharactersFromMark(&parser->input, str);
626        }
627        _inputStreamClearMark(&parser->input);
628    }
629    return success;
630}
631
632/*
633 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
634 [53] AttDef ::= S Name S AttType S DefaultDecl
635*/
636static Boolean parseAttributeListDeclaration(CFXMLParserRef parser) {
637    const UniChar attList[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
638    CFXMLAttributeListDeclarationInfo attListData;
639    CFXMLAttributeDeclarationInfo attributeArray[8], *attributes=attributeArray;
640    CFIndex capacity = 8;
641    UniChar ch;
642    Boolean success = true;
643    CFStringRef name;
644    if (!_inputStreamMatchString(&parser->input, attList, 7) ||
645        _inputStreamSkipWhitespace(&parser->input, NULL) == 0 ||
646        !_inputStreamScanXMLName(&parser->input, false, &name)) {
647        _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
648        return false;
649    }
650    attListData.numberOfAttributes = 0;
651    if (!(*parser->top) || (parser->options & kCFXMLParserSkipMetaData)) {
652        // Use this to mark that we don't need to collect attribute information to report to the client.  Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client.  -- REW, 2/9/2000
653        attributes = NULL;
654    }
655    while (_inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
656        CFXMLAttributeDeclarationInfo *attribute = NULL;
657        if (_inputStreamPeekCharacter(&parser->input, &ch) && ch == '>')
658            break;
659        if (attributes) {
660            if (capacity == attListData.numberOfAttributes) {
661                capacity = 2*capacity;
662                if (attributes != attributeArray) {
663                    attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorReallocate(CFGetAllocator(parser), attributes, capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
664                } else {
665                    attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorAllocate(CFGetAllocator(parser), capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
666                }
667            }
668            attribute = &(attributes[attListData.numberOfAttributes]);
669            // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed.  -- REW, 2/9/2000
670            attribute->typeString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
671            attribute->defaultString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
672        }
673        if (!_inputStreamScanXMLName(&parser->input, false, &(attribute->attributeName)) || (_inputStreamSkipWhitespace(&parser->input, NULL) == 0)) {
674            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
675            success = false;
676            break;
677        }
678        if (!parseAttributeType(parser, attribute ? (CFMutableStringRef)attribute->typeString : NULL)) {
679            success = false;
680            break;
681        }
682        if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
683            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
684            success = false;
685            break;
686        }
687        if (!parseAttributeDefaultDeclaration(parser, attribute ? (CFMutableStringRef)attribute->defaultString : NULL)) {
688            success = false;
689            break;
690        }
691        attListData.numberOfAttributes ++;
692    }
693    if (success) {
694        if (!_inputStreamGetCharacter(&parser->input, &ch)) {
695            _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
696            success = false;
697        } else if (ch != '>') {
698            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
699            success = false;
700        } else if (attributes) {
701            CFStringRef tmp = parser->node->dataString;
702            parser->node->dataTypeID = kCFXMLNodeTypeAttributeListDeclaration;
703            parser->node->dataString = name;
704            attListData.attributes = attributes;
705            parser->node->additionalData = (void *)&attListData;
706            success = reportNewLeaf(parser);
707            parser->node->additionalData = NULL;
708            parser->node->dataString = tmp;
709        }
710    }
711    if (attributes) {
712        // Free up all that memory
713        CFIndex idx;
714        for (idx = 0; idx < attListData.numberOfAttributes; idx ++) {
715            // Do not release attributeName here; it's a uniqued string from scanXMLName
716            CFRelease(attributes[idx].typeString);
717            CFRelease(attributes[idx].defaultString);
718        }
719        if (attributes != attributeArray) {
720            CFAllocatorDeallocate(CFGetAllocator(parser), attributes);
721        }
722    }
723    return success;
724}
725
726CF_INLINE Boolean parseSystemLiteral(CFXMLParserRef parser, CFXMLExternalID *extID) {
727    Boolean success;
728    if (extID) {
729        CFMutableStringRef urlStr = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
730        if (_inputStreamScanQuotedString(&parser->input, urlStr)) {
731            success = true;
732            extID->systemID = CFURLCreateWithString(CFGetAllocator(parser), urlStr, parser->input.url);
733        } else {
734            extID->systemID = NULL;
735            success = false;
736        }
737        CFRelease(urlStr);
738    } else {
739        success = _inputStreamScanQuotedString(&parser->input, NULL);
740    }
741    return success;
742}
743
744/*
745 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
746 [83] PublicID ::= 'PUBLIC' S PubidLiteral
747 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
748 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
749 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
750*/
751// This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred.  -- REW, 2/2/2000
752static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID) {
753    const UniChar publicString[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
754    const UniChar systemString[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
755    Boolean success;
756    if (extID) {
757        extID->systemID = NULL;
758        extID->publicID = NULL;
759    }
760    if (_inputStreamMatchString(&parser->input, publicString, 6)) {
761        success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
762        if (extID) {
763            extID->publicID = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
764            success = success && _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)extID->publicID);
765        } else {
766            success = success && _inputStreamScanQuotedString(&parser->input, NULL);
767        }
768        if (success) {
769            UniChar ch;
770            if (alsoAcceptPublicID) {
771                _inputStreamSetMark(&parser->input); // In case we need to roll back the parser
772            }
773            if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0
774                || !_inputStreamPeekCharacter(&parser->input, &ch)
775                || (ch != '\'' && ch != '\"')
776                || !parseSystemLiteral(parser, extID)) {
777                success = alsoAcceptPublicID;
778                if (alsoAcceptPublicID) {
779                    _inputStreamBackUpToMark(&parser->input);
780                }
781            } else {
782                success = true;
783            }
784            if (alsoAcceptPublicID) {
785                _inputStreamClearMark(&parser->input);
786            }
787        }
788    } else if (_inputStreamMatchString(&parser->input, systemString, 6)) {
789        success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && parseSystemLiteral(parser, extID);
790    } else {
791        success = false;
792    }
793    return success;
794}
795
796/*
797 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
798*/
799static Boolean parseNotationDeclaration(CFXMLParserRef parser) {
800    static UniChar notationString[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
801    Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
802    CFXMLNotationInfo notationData = {{NULL, NULL}};
803    CFStringRef name;
804    Boolean success =
805        _inputStreamMatchString(&parser->input, notationString, 8) &&
806        _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
807        _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) &&
808        _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
809        parseExternalID(parser, true, report ? &(notationData.externalID) : NULL);
810
811    if (success) {
812        UniChar ch;
813        _inputStreamSkipWhitespace(&parser->input, NULL);
814        success = (_inputStreamGetCharacter(&parser->input, &ch) && ch == '>');
815    }
816    if (!success) {
817        if (_inputStreamAtEOF(&parser->input)) {
818            _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
819        } else {
820            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
821        }
822    } else if (report) {
823        CFStringRef tmp = parser->node->dataString;
824        parser->node->dataTypeID = kCFXMLNodeTypeNotation;
825        parser->node->dataString = name;
826        parser->node->additionalData = &notationData;
827        success = reportNewLeaf(parser);
828        parser->node->additionalData = NULL;
829        parser->node->dataString = tmp;
830    }
831    if (notationData.externalID.systemID) CFRelease(notationData.externalID.systemID);
832    if (notationData.externalID.publicID) CFRelease(notationData.externalID.publicID);
833    return success;
834}
835
836/*
837 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
838 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
839 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
840*/
841static Boolean parseChoiceOrSequence(CFXMLParserRef parser, Boolean pastParen) {
842    UniChar ch, separator;
843    if (!pastParen) {
844        if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '(') return false;
845        _inputStreamSkipWhitespace(&parser->input, NULL);
846    }
847    if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
848
849    /* Now scanning cp, production [48] */
850    if (ch == '(') {
851        if (!parseChoiceOrSequence(parser, false)) return false;
852    } else {
853        if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
854    }
855    if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
856    if (ch == '?' || ch == '*' || ch == '+') _inputStreamGetCharacter(&parser->input, &ch);
857
858    /* Now past cp */
859    _inputStreamSkipWhitespace(&parser->input, NULL);
860    if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
861    if (ch == ')') return true;
862    if (ch != '|' && ch != ',') return false;
863    separator = ch;
864    while (ch == separator) {
865        _inputStreamSkipWhitespace(&parser->input, NULL);
866        if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
867        if (ch != '(') {
868            if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
869        } else if (!parseChoiceOrSequence(parser, false)) {
870            return false;
871        }
872        _inputStreamSkipWhitespace(&parser->input, NULL);
873        if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
874    }
875    return ch == ')';
876}
877
878/*
879 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
880*/
881static Boolean parseMixedElementContent(CFXMLParserRef parser) {
882    static const UniChar pcdataString[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
883    UniChar ch;
884    if (!_inputStreamMatchString(&parser->input, pcdataString, 7)) return false;
885    _inputStreamSkipWhitespace(&parser->input, NULL);
886    if (!_inputStreamGetCharacter(&parser->input, &ch) && (ch == ')' || ch == '|')) return false;
887    if (ch == ')') return true;
888
889    while (ch == '|') {
890        _inputStreamSkipWhitespace(&parser->input, NULL);
891        if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
892        _inputStreamSkipWhitespace(&parser->input, NULL);
893        if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
894    }
895    if (ch != ')') return false;
896    if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '*') return false;
897    return true;
898}
899
900/*
901 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
902 [47] children ::= (choice | seq) ('?' | '*' | '+')?
903 */
904static Boolean parseElementContentSpec(CFXMLParserRef parser) {
905    static const UniChar eltContentEmpty[5] = {'E', 'M', 'P', 'T', 'Y'};
906    static const UniChar eltContentAny[3] = {'A', 'N', 'Y'};
907    UniChar ch;
908    if (_inputStreamMatchString(&parser->input, eltContentEmpty, 5) || _inputStreamMatchString(&parser->input, eltContentAny, 3)) {
909        return true;
910    } else if (!_inputStreamPeekCharacter(&parser->input, &ch) || ch != '(') {
911        return false;
912    } else {
913        // We want to know if we have a Mixed per production [51].  If we don't, we will need to back up and call the parseChoiceOrSequence function.  So we set the mark now.  -- REW, 2/10/2000
914        _inputStreamGetCharacter(&parser->input, &ch);
915        _inputStreamSkipWhitespace(&parser->input, NULL);
916        if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
917        if (ch == '#') {
918            // Mixed
919            return parseMixedElementContent(parser);
920        } else {
921            if (parseChoiceOrSequence(parser, true)) {
922                if (_inputStreamPeekCharacter(&parser->input, &ch) && (ch == '*' || ch == '?' || ch == '+')) {
923                    _inputStreamGetCharacter(&parser->input, &ch);
924                }
925                return true;
926            } else {
927                return false;
928            }
929        }
930    }
931}
932
933/*
934 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
935 */
936static Boolean parseElementDeclaration(CFXMLParserRef parser) {
937    Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
938    Boolean success;
939    static const UniChar eltChars[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
940    UniChar ch = '>';
941    CFMutableStringRef contentDesc = NULL;
942    CFStringRef name;
943    success = _inputStreamMatchString(&parser->input, eltChars, 7)
944        && _inputStreamSkipWhitespace(&parser->input, NULL) != 0
945        && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL)
946        && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
947    if (success) {
948        if (report) _inputStreamSetMark(&parser->input);
949        success = parseElementContentSpec(parser);
950        if (success && report) {
951            contentDesc = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
952            _inputStreamGetCharactersFromMark(&parser->input, contentDesc);
953        }
954        if (report) _inputStreamClearMark(&parser->input);
955        if (success) _inputStreamSkipWhitespace(&parser->input, NULL);
956        success = success && _inputStreamMatchString(&parser->input, &ch, 1);
957    }
958    if (!success) {
959        if (_inputStreamAtEOF(&parser->input)) {
960            _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
961        } else {
962            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
963        }
964    } else if (report) {
965        CFXMLElementTypeDeclarationInfo eltData;
966        CFStringRef tmp = parser->node->dataString;
967        parser->node->dataTypeID = kCFXMLNodeTypeElementTypeDeclaration;
968        parser->node->dataString = name;
969        eltData.contentDescription = contentDesc;
970        parser->node->additionalData = &eltData;
971        success = reportNewLeaf(parser);
972        parser->node->additionalData = NULL;
973        parser->node->dataString = tmp;
974    }
975    if (contentDesc) CFRelease(contentDesc);
976    return success;
977}
978
979/*
980 [70] EntityDecl ::= GEDecl | PEDecl
981 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
982 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
983 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
984 [74] PEDef ::= EntityValue | ExternalID
985 [76] NDataDecl ::= S 'NDATA' S Name
986 [9]  EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |  "'" ([^%&'] | PEReference | Reference)* "'"
987*/
988static Boolean parseEntityDeclaration(CFXMLParserRef parser) {
989    const UniChar entityStr[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
990    UniChar ch;
991    Boolean isPEDecl = false;
992    CFXMLEntityInfo entityData;
993    CFStringRef name;
994    Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
995    Boolean success =
996        _inputStreamMatchString(&parser->input, entityStr, 6) &&
997        (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) &&
998        _inputStreamPeekCharacter(&parser->input, &ch);
999
1000    entityData.replacementText = NULL;
1001    entityData.entityID.publicID = NULL;
1002    entityData.entityID.systemID = NULL;
1003    entityData.notationName = NULL;
1004    // We will set entityType immediately before reporting
1005
1006    if (success && ch == '%') {
1007        _inputStreamGetCharacter(&parser->input, &ch);
1008        success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
1009        isPEDecl = true;
1010    }
1011    success = success && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamPeekCharacter(&parser->input, &ch);
1012    if (success && (ch == '\"' || ch == '\'')) {
1013        // EntityValue
1014        // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1015        if (report) {
1016            entityData.replacementText = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1017            success = _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)entityData.replacementText);
1018        } else {
1019            success = _inputStreamScanQuotedString(&parser->input, NULL);
1020        }
1021    } else if (success) {
1022        // ExternalID
1023        success = parseExternalID(parser, false, report ? &(entityData.entityID) : NULL);
1024        if (success && !isPEDecl && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
1025            // There could be an option NDataDecl
1026            // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1027            const UniChar nDataStr[5] = {'N', 'D', 'A', 'T', 'A'};
1028            if (_inputStreamMatchString(&parser->input, nDataStr, 5)) {
1029                success = (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamScanXMLName(&parser->input, false, NULL);
1030            }
1031        }
1032    }
1033    if (success) {
1034        _inputStreamSkipWhitespace(&parser->input, NULL);
1035        success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
1036    }
1037    if (!success) {
1038        if (_inputStreamAtEOF(&parser->input)) {
1039            _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1040        } else {
1041            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1042        }
1043    } else {
1044        CFStringRef tmp = parser->node->dataString;
1045        if (isPEDecl) entityData.entityType = kCFXMLEntityTypeParameter;
1046        else if (entityData.replacementText) entityData.entityType = kCFXMLEntityTypeParsedInternal;
1047        else if (!entityData.notationName) entityData.entityType = kCFXMLEntityTypeParsedExternal;
1048        else entityData.entityType = kCFXMLEntityTypeUnparsed;
1049        parser->node->dataTypeID = kCFXMLNodeTypeEntity;
1050        parser->node->dataString = name;
1051        parser->node->additionalData = &entityData;
1052        success = reportNewLeaf(parser);
1053        parser->node->additionalData = NULL;
1054        parser->node->dataString = tmp;
1055        if (entityData.replacementText) CFRelease(entityData.replacementText);
1056    }
1057    if (entityData.entityID.publicID) CFRelease(entityData.entityID.publicID);
1058    if (entityData.entityID.systemID) CFRelease(entityData.entityID.systemID);
1059    return success;
1060}
1061
1062/*
1063 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1064 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
1065*/
1066// First character should be just past '['
1067static Boolean parseInlineDTD(CFXMLParserRef parser) {
1068    Boolean success = true;
1069    while (success && !_inputStreamAtEOF(&parser->input)) {
1070        UniChar ch;
1071
1072        parseWhitespace(parser);
1073        if (!_inputStreamGetCharacter(&parser->input, &ch)) break;
1074        if (ch == '%') {
1075            // PEReference
1076            success = parsePhysicalEntityReference(parser);
1077        } else if (ch == '<') {
1078            // markupdecl
1079            if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1080                _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1081                return false;
1082            }
1083            if (ch == '?') {
1084                // Processing Instruction
1085                success = parseProcessingInstruction(parser, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1086            } else if (ch == '!') {
1087                UniChar dashes[2] = {'-', '-'};
1088                if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1089                    // Comment
1090                    success = parseComment(parser, true);
1091                } else {
1092                    // elementdecl | AttListDecl | EntityDecl | NotationDecl
1093                    if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1094                        _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1095                        return false;
1096                    } else if (ch == 'A') {
1097                        // AttListDecl
1098                        success = parseAttributeListDeclaration(parser);
1099                    } else if (ch == 'N') {
1100                        success = parseNotationDeclaration(parser);
1101                    } else if (ch == 'E') {
1102                        // elementdecl | EntityDecl
1103                        _inputStreamGetCharacter(&parser->input, &ch);
1104                        if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1105                            _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1106                            return false;
1107                        }
1108                        _inputStreamReturnCharacter(&parser->input, 'E');
1109                        if (ch == 'L') {
1110                            success = parseElementDeclaration(parser);
1111                        } else if (ch == 'N') {
1112                            success = parseEntityDeclaration(parser);
1113                        } else {
1114                            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1115                            return false;
1116                        }
1117                    } else {
1118                        _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1119                        return false;
1120                    }
1121                }
1122            } else {
1123                _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1124                return false;
1125            }
1126        } else if (ch == ']') {
1127            return true;
1128        } else {
1129            _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1130            return false;
1131        }
1132    }
1133    if (success) {
1134        _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1135    }
1136    return false;
1137}
1138
1139/*
1140[43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1141 */
1142static Boolean parseTagContent(CFXMLParserRef parser) {
1143    while (!_inputStreamAtEOF(&parser->input)) {
1144        UniChar ch;
1145        CFIndex numWhitespaceCharacters;
1146
1147        _inputStreamSetMark(&parser->input);
1148        numWhitespaceCharacters = _inputStreamSkipWhitespace(&parser->input, NULL);
1149        // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1150        if (!_inputStreamGetCharacter(&parser->input, &ch)) break;  // break == report unexpected EOF
1151
1152        if (ch != '<' && ch != '&') { // CharData
1153            // Back off the whitespace; we'll report it with the PCData
1154            _inputStreamBackUpToMark(&parser->input);
1155            _inputStreamClearMark(&parser->input);
1156             if (!parsePCData(parser)) return false;
1157             if(_inputStreamComposingErrorOccurred(&parser->input)) {
1158                 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1159                 return false;
1160             }
1161             continue;
1162        }
1163
1164        // element | Reference | CDSect | PI | Comment
1165        // We can safely report any whitespace now
1166        if (!(parser->options & kCFXMLParserSkipWhitespace) && numWhitespaceCharacters != 0 && *(parser->top)) {
1167            _inputStreamReturnCharacter(&parser->input, ch);
1168            _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1169            parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
1170            parser->node->additionalData = NULL;
1171            if (!reportNewLeaf(parser)) return false;
1172            _inputStreamGetCharacter(&parser->input, &ch);
1173        }
1174        _inputStreamClearMark(&parser->input);
1175
1176        if (ch == '&') {
1177            // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1178            if (!parseEntityReference(parser, true)) return false;
1179            continue;
1180        }
1181
1182        // ch == '<'; element | CDSect | PI | Comment
1183        if (!_inputStreamPeekCharacter(&parser->input, &ch)) break;
1184        if (ch == '?') { // PI
1185            _inputStreamGetCharacter(&parser->input, &ch);
1186            if (!parseProcessingInstruction(parser, true))
1187                return false;
1188        } else if (ch == '/') { // end tag; we're passing outside of content's production
1189            _inputStreamReturnCharacter(&parser->input, '<'); // Back off to the '<'
1190            return true;
1191        } else if (ch != '!') { // element
1192            if (!parseTag(parser))  return false;
1193        } else {
1194            // Comment | CDSect
1195            UniChar dashes[3] = {'!', '-', '-'};
1196            if (_inputStreamMatchString(&parser->input, dashes, 3)) {
1197                // Comment
1198                if (!parseComment(parser, true)) return false;
1199            } else {
1200                // Should have a CDSect; back off the "<!" and call parseCDSect
1201                _inputStreamReturnCharacter(&parser->input, '<');
1202                if (!parseCDSect(parser)) return false;
1203            }
1204        }
1205    }
1206
1207    if(_inputStreamComposingErrorOccurred(&parser->input)) {
1208        _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1209        return false;
1210    }
1211    // Only way to get here is if premature EOF was found
1212//#warning CF:Include the tag name here
1213    _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing tag content");
1214    return false;
1215}
1216
1217static Boolean parseCDSect(CFXMLParserRef parser) {
1218    const UniChar _CDSectOpening[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1219    const UniChar _CDSectClose[3] = {']', ']', '>'};
1220    if (!_inputStreamMatchString(&parser->input, _CDSectOpening, 9)) {
1221        _CFReportError(parser, kCFXMLErrorMalformedCDSect, "Encountered bad prefix to a presumed CDATA section");
1222        return false;
1223    }
1224    if (!_inputStreamScanToCharacters(&parser->input, _CDSectClose, 3, (CFMutableStringRef)(parser->node->dataString))) {
1225        _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing CDATA section");
1226        return false;
1227    }
1228
1229    parser->node->dataTypeID = kCFXMLNodeTypeCDATASection;
1230    parser->node->additionalData = NULL;
1231    return reportNewLeaf(parser);
1232}
1233
1234/*
1235 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1236*/
1237static Boolean validateCharacterReference(CFStringRef str) {
1238    Boolean isHex;
1239    CFIndex idx, len = CFStringGetLength(str);
1240    if (len < 2) return false;
1241    if (CFStringGetCharacterAtIndex(str, 0) != '#') return false;
1242    if (CFStringGetCharacterAtIndex(str, 1) == 'x') {
1243        isHex = true;
1244        idx = 2;
1245        if (len == 2) return false;
1246    } else {
1247        isHex = false;
1248        idx = 1;
1249    }
1250
1251    while (idx < len) {
1252        UniChar ch;
1253        ch = CFStringGetCharacterAtIndex(str, idx);
1254        idx ++;
1255        if (!(ch <= '9' && ch >= '0') &&
1256            !(isHex && ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')))) {
1257            break;
1258        }
1259    }
1260    return (idx == len);
1261}
1262
1263/*
1264 [67] Reference ::= EntityRef | CharRef
1265 [68] EntityRef ::= '&' Name ';'
1266*/
1267static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report) {
1268    UniChar ch;
1269    CFXMLEntityReferenceInfo entData;
1270    CFStringRef name = NULL;
1271    if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1272        _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1273        return false;
1274    }
1275    if (ch == '#') {
1276        ch = ';';
1277        if (!_inputStreamScanToCharacters(&parser->input, &ch, 1, (CFMutableStringRef)parser->node->dataString)) {
1278            _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1279            return false;
1280        } else if (!validateCharacterReference(parser->node->dataString)) {
1281            _CFReportError(parser, kCFXMLErrorMalformedCharacterReference, "Encountered illegal character while parsing character reference");
1282            return false;
1283        }
1284        entData.entityType = kCFXMLEntityTypeCharacter;
1285        name = parser->node->dataString;
1286    } else if (!_inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) || !_inputStreamGetCharacter(&parser->input, &ch) || ch != ';') {
1287        if (_inputStreamAtEOF(&parser->input)) {
1288            _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1289            return false;
1290        } else {
1291            _CFReportError(parser, kCFXMLErrorMalformedName, "Encountered malformed name while parsing EntityReference");
1292            return false;
1293        }
1294    } else {
1295        entData.entityType = kCFXMLEntityTypeParsedInternal;
1296    }
1297    if (report) {
1298        CFStringRef tmp = parser->node->dataString;
1299        Boolean success;
1300        parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
1301        parser->node->dataString = name;
1302        parser->node->additionalData = &entData;
1303        success = reportNewLeaf(parser);
1304        parser->node->additionalData = NULL;
1305        parser->node->dataString = tmp;
1306        return success;
1307    } else {
1308        return true;
1309    }
1310}
1311
1312#if 0
1313// Kept from old entity reference parsing....
1314{
1315    switch (*(parser->curr)) {
1316        case 'l':  // "lt"
1317            if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1318                ch = '<';
1319                parser->curr += 3;
1320                break;
1321            }
1322            parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1323            return;
1324        case 'g': // "gt"
1325            if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1326                ch = '>';
1327                parser->curr += 3;
1328                break;
1329            }
1330            parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1331            return;
1332        case 'a': // "apos" or "amp"
1333            if (len < 4) {   // Not enough characters for either conversion
1334                parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1335                return;
1336            }
1337            if (*(parser->curr+1) == 'm') {
1338                // "amp"
1339                if (*(parser->curr+2) == 'p' && *(parser->curr+3) == ';') {
1340                    ch = '&';
1341                    parser->curr += 4;
1342                    break;
1343                }
1344            } else if (*(parser->curr+1) == 'p') {
1345                // "apos"
1346                if (len > 4 && *(parser->curr+2) == 'o' && *(parser->curr+3) == 's' && *(parser->curr+4) == ';') {
1347                    ch = '\'';
1348                    parser->curr += 5;
1349                    break;
1350                }
1351            }
1352            parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1353            return;
1354        case 'q':  // "quote"
1355            if (len >= 6 && *(parser->curr+1) == 'u' && *(parser->curr+2) == 'o' && *(parser->curr+3) == 't' && *(parser->curr+4) == 'e' && *(parser->curr+5) == ';') {
1356                ch = '\"';
1357                parser->curr += 6;
1358                break;
1359            }
1360            parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1361            return;
1362        case '#':
1363        {
1364            UniChar num = 0;
1365            Boolean isHex = false;
1366            if ( len < 4) {  // Not enough characters to make it all fit!  Need at least "&#d;"
1367                parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1368                return;
1369            }
1370            parser->curr ++;
1371            if (*(parser->curr) == 'x') {
1372                isHex = true;
1373                parser->curr ++;
1374            }
1375            while (parser->curr < parser->end) {
1376                ch = *(parser->curr);
1377                if (ch == ';') {
1378                    CFStringAppendCharacters(string, &num, 1);
1379                    parser->curr ++;
1380                    return;
1381                }
1382                if (!isHex) num = num*10;
1383                else num = num << 4;
1384                if (ch <= '9' && ch >= '0') {
1385                    num += (ch - '0');
1386                } else if (!isHex) {
1387                    parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1388                    return;
1389                } else if (ch >= 'a' && ch <= 'f') {
1390                    num += 10 + (ch - 'a');
1391                } else if (ch >= 'A' && ch <= 'F') {
1392                    num += 10 + (ch - 'A');
1393                } else {
1394                    parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1395                    return;
1396                }
1397            }
1398            parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1399            return;
1400        }
1401        default:
1402            parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1403            return;
1404    }
1405    CFStringAppendCharacters(string, &ch, 1);
1406}
1407#endif
1408
1409/*
1410[14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1411*/
1412static Boolean parsePCData(CFXMLParserRef parser) {
1413    UniChar ch;
1414    Boolean done = false;
1415    _inputStreamSetMark(&parser->input);
1416    while (!done && _inputStreamGetCharacter(&parser->input, &ch)) {
1417        switch (ch) {
1418            case '<':
1419            case '&':
1420                _inputStreamReturnCharacter(&parser->input, ch);
1421                done = true;
1422                break;
1423            case ']':
1424            {
1425                const UniChar endSequence[2] = {']', '>'};
1426                if (_inputStreamMatchString(&parser->input, endSequence, 2)) {
1427                    _CFReportError(parser, kCFXMLErrorMalformedParsedCharacterData, "Encountered \"]]>\" in parsed character data");
1428                    _inputStreamClearMark(&parser->input);
1429                    return false;
1430                }
1431                break;
1432            }
1433            default:
1434                ;
1435        }
1436    }
1437    _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1438    _inputStreamClearMark(&parser->input);
1439    parser->node->dataTypeID = kCFXMLNodeTypeText;
1440    parser->node->additionalData = NULL;
1441    return reportNewLeaf(parser);
1442}
1443
1444/*
1445[42] ETag ::= '</' Name S? '>'
1446 */
1447static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag) {
1448    const UniChar beginEndTag[2] = {'<', '/'};
1449    Boolean unexpectedEOF = false, mismatch = false;
1450    CFStringRef closeTag;
1451
1452    // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1453    if (_inputStreamMatchString(&parser->input, beginEndTag, 2) && _inputStreamScanXMLName(&parser->input, false, &closeTag) && closeTag == tag) {
1454
1455        UniChar ch;
1456        _inputStreamSkipWhitespace(&parser->input, NULL);
1457        if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1458            unexpectedEOF = true;
1459        } else if (ch != '>') {
1460            mismatch = true;
1461        }
1462    } else if (_inputStreamAtEOF(&parser->input)) {
1463        unexpectedEOF = true;
1464    } else {
1465        mismatch = true;
1466    }
1467
1468    if (unexpectedEOF || mismatch) {
1469        if (unexpectedEOF) {
1470            parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag);
1471            parser->status = kCFXMLErrorUnexpectedEOF;
1472            if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorUnexpectedEOF, parser->context.info);
1473        } else {
1474            parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered malformed close tag for <%@>"), tag);
1475            parser->status = kCFXMLErrorMalformedCloseTag;
1476            if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorMalformedCloseTag, parser->context.info);
1477        }
1478        return false;
1479    }
1480    return true;
1481}
1482
1483/*
1484 [39] element ::= EmptyElementTag | STag content ETag
1485 [40] STag ::= '<' Name (S Attribute)* S? '>'
1486 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1487*/
1488static Boolean parseTag(CFXMLParserRef parser) {
1489    UniChar ch;
1490    void *tag;
1491    CFXMLElementInfo data;
1492    Boolean success = true;
1493    CFStringRef tagName;
1494
1495    if (!_inputStreamScanXMLName(&parser->input, false, &tagName)) {
1496        _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1497        return false;
1498    }
1499
1500    _inputStreamSkipWhitespace(&parser->input, NULL);
1501
1502    if (!parseAttributes(parser)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1503    data.attributes = parser->argDict;
1504    data.attributeOrder = parser->argArray;
1505    if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1506        _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1507        return false;
1508    }
1509    if (ch == '/') {
1510        data.isEmpty = true;
1511        if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1512            _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1513            return false;
1514        }
1515    } else {
1516        data.isEmpty = false;
1517    }
1518    if (ch != '>') {
1519        _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1520        return false;
1521    }
1522
1523    if (*parser->top || parser->top == parser->stack) {
1524        CFStringRef oldStr = parser->node->dataString;
1525	parser->node->dataTypeID = kCFXMLNodeTypeElement;
1526        parser->node->dataString = tagName;
1527	parser->node->additionalData = &data;
1528        tag = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1529        if (tag && parser->status == kCFXMLStatusParseInProgress) {
1530            INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, tag, parser->context.info);
1531        }
1532	parser->node->additionalData = NULL;
1533        parser->node->dataString = oldStr;
1534        if (parser->status != kCFXMLStatusParseInProgress) {
1535            // callback called CFXMLParserAbort()
1536            _CFReportError(parser, parser->status, NULL);
1537            return false;
1538        }
1539    } else {
1540        tag = NULL;
1541    }
1542
1543    pushXMLNode(parser, tag);
1544    if (!data.isEmpty) {
1545        success =  parseTagContent(parser);
1546        if (success) {
1547            success = parseCloseTag(parser, tagName);
1548        }
1549    }
1550    parser->top --;
1551
1552    if (success && tag) {
1553        INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, tag, parser->context.info);
1554        if (parser->status != kCFXMLStatusParseInProgress) {
1555            _CFReportError(parser, parser->status, NULL);
1556            return false;
1557        }
1558    }
1559    return success;
1560}
1561
1562/*
1563 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |  "'" ([^<&'] | Reference)* "'"
1564 [67] Reference ::= EntityRef | CharRef
1565 [68] EntityRef ::= '&' Name ';'
1566 */
1567// For the moment, we don't worry about references in the attribute values.
1568static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str) {
1569    UniChar quote, ch;
1570    Boolean success = _inputStreamGetCharacter(&parser->input, &quote);
1571    if (!success || (quote != '\'' && quote != '\"')) return false;
1572    if (str) _inputStreamSetMark(&parser->input);
1573    while (_inputStreamGetCharacter(&parser->input, &ch) && ch != quote) {
1574        switch (ch) {
1575            case '<': success = false; break;
1576            case '&':
1577                if (!parseEntityReference(parser, false)) {
1578                    success = false;
1579                    break;
1580                }
1581            default:
1582                ;
1583        }
1584    }
1585
1586    if (success && _inputStreamAtEOF(&parser->input)) {
1587        success = false;
1588    }
1589    if (str) {
1590        if (success) {
1591            _inputStreamReturnCharacter(&parser->input, quote);
1592            _inputStreamGetCharactersFromMark(&parser->input, str);
1593            _inputStreamGetCharacter(&parser->input, &ch);
1594        }
1595        _inputStreamClearMark(&parser->input);
1596    }
1597    return success;
1598}
1599
1600/*
1601 [40] STag ::= '<' Name (S Attribute)* S? '>'
1602 [41] Attribute ::= Name Eq AttValue
1603 [25] Eq ::= S? '=' S?
1604*/
1605
1606// Expects parser->curr to be at the first content character; will consume the trailing whitespace.
1607Boolean parseAttributes(CFXMLParserRef parser) {
1608    UniChar ch;
1609    CFMutableDictionaryRef dict;
1610    CFMutableArrayRef array;
1611    Boolean failure = false;
1612    if (_inputStreamPeekCharacter(&parser->input, &ch) == '>') {
1613        if (parser->argDict) {
1614            CFDictionaryRemoveAllValues(parser->argDict);
1615            CFArrayRemoveAllValues(parser->argArray);
1616        }
1617        return true;  // No attributes; let caller deal with it
1618    }
1619    if (!parser->argDict) {
1620        parser->argDict = CFDictionaryCreateMutable(CFGetAllocator(parser), 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1621        parser->argArray = CFArrayCreateMutable(CFGetAllocator(parser), 0, &kCFTypeArrayCallBacks);
1622    } else {
1623        CFDictionaryRemoveAllValues(parser->argDict);
1624        CFArrayRemoveAllValues(parser->argArray);
1625    }
1626    dict = parser->argDict;
1627    array = parser->argArray;
1628    while (!failure && _inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && ch != '/') {
1629        CFStringRef key;
1630        CFMutableStringRef value;
1631        if (!_inputStreamScanXMLName(&parser->input, false, &key)) {
1632            failure = true;
1633            break;
1634        }
1635        if (CFArrayGetFirstIndexOfValue(array, CFRangeMake(0, CFArrayGetCount(array)), key) != kCFNotFound) {
1636                _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found repeated attribute");
1637                return false;
1638        }
1639        _inputStreamSkipWhitespace(&parser->input, NULL);
1640        if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '=') {
1641            failure = true;
1642            break;
1643        }
1644        _inputStreamSkipWhitespace(&parser->input, NULL);
1645        value = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1646        if (!parseAttributeValue(parser, value)) {
1647            CFRelease(value);
1648            failure = true;
1649            break;
1650        }
1651        CFArrayAppendValue(array, key);
1652        CFDictionarySetValue(dict, key, value);
1653        CFRelease(value);
1654        _inputStreamSkipWhitespace(&parser->input, NULL);
1655    }
1656    if (failure) {
1657//#warning CF:Include tag name in this error report
1658        _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found illegal character while parsing element tag");
1659        return false;
1660    } else if (_inputStreamAtEOF(&parser->input)) {
1661        _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing element attributes");
1662        return false;
1663    } else {
1664        return true;
1665    }
1666}
1667
1668/*
1669 [1]  document ::= prolog element Misc*
1670 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1671 [27] Misc ::= Comment | PI | S
1672 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1673
1674 We treat XMLDecl as a plain old PI, since PI is part of Misc.  This changes the prolog and document productions to
1675 [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1676 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1677
1678 NOTE: This function assumes parser->stack has a valid top.  I.e. the document pointer has already been created!
1679*/
1680static Boolean parseXML(CFXMLParserRef parser) {
1681    Boolean success = true, sawDTD = false, sawElement = false;
1682    UniChar ch;
1683    while (success && _inputStreamPeekCharacter(&parser->input, &ch)) {
1684        switch (ch) {
1685            case ' ':
1686            case '\n':
1687            case '\t':
1688            case '\r':
1689                success = parseWhitespace(parser);
1690                break;
1691            case '<':
1692                _inputStreamGetCharacter(&parser->input, &ch);
1693                if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1694                    _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing top-level document");
1695                    return false;
1696                }
1697                if (ch == '!') {
1698                    // Comment or DTD
1699                    UniChar dashes[2] = {'-', '-'};
1700                    if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1701                        // Comment
1702                        success = parseComment(parser, true);
1703                    } else {
1704                        // Should be DTD
1705                        if (sawDTD) {
1706                            _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered a second DTD");
1707                            return false;
1708                        }
1709                        success = parseDTD(parser);
1710                        if (success) sawDTD = true;
1711                    }
1712                } else if (ch == '?') {
1713                    // Processing instruction
1714                    success = parseProcessingInstruction(parser, true);
1715                } else {
1716                    // Tag or malformed
1717                    if (sawElement) {
1718                        _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered second top-level element");
1719                        return false;
1720                    }
1721                    _inputStreamReturnCharacter(&parser->input, ch);
1722                    success = parseTag(parser);
1723                    if (success) sawElement = true;
1724                }
1725                break;
1726            default: {
1727                parser->status = kCFXMLErrorMalformedDocument;
1728                parser->errorString = ch < 256 ?
1729                    CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch, ch) :
1730                    CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch);
1731
1732                if (parser->callBacks.handleError) {
1733                    INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
1734                }
1735                return false;
1736            }
1737        }
1738    }
1739
1740    if (!success) return false;
1741    if (!sawElement) {
1742        _CFReportError(parser, kCFXMLErrorElementlessDocument, "No element found in document");
1743        return false;
1744    }
1745    return true;
1746}
1747
1748static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str) {
1749    if (str) {
1750        parser->status = errNum;
1751        parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), str, kCFStringEncodingASCII);
1752    }
1753    if (parser->callBacks.handleError) {
1754        INVOKE_CALLBACK3(parser->callBacks.handleError, parser, errNum, parser->context.info);
1755    }
1756}
1757
1758// Assumes parser->node has been set and is ready to go
1759static Boolean reportNewLeaf(CFXMLParserRef parser) {
1760    void *xmlStruct;
1761    if (*(parser->top) == NULL) return true;
1762
1763    xmlStruct = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1764    if (xmlStruct && parser->status == kCFXMLStatusParseInProgress) {
1765        INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *(parser->top), xmlStruct, parser->context.info);
1766        if (parser->status == kCFXMLStatusParseInProgress) INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, xmlStruct, parser->context.info);
1767    }
1768    if (parser->status != kCFXMLStatusParseInProgress) {
1769        _CFReportError(parser, parser->status, NULL);
1770        return false;
1771    }
1772    return true;
1773}
1774
1775static void pushXMLNode(CFXMLParserRef parser, void *node) {
1776    parser->top ++;
1777    if ((unsigned)(parser->top - parser->stack) == parser->capacity) {
1778        parser->stack = (void **)CFAllocatorReallocate(CFGetAllocator(parser), parser->stack, 2 * parser->capacity * sizeof(void *), 0);
1779        parser->top = parser->stack + parser->capacity;
1780        parser->capacity = 2*parser->capacity;
1781    }
1782    *(parser->top) = node;
1783}
1784
1785/**************************/
1786/* Parsing to a CFXMLTree */
1787/**************************/
1788
1789static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser, CFXMLNodeRef node, void *context) {
1790    CFXMLNodeRef myNode = CFXMLNodeCreateCopy(CFGetAllocator(parser), node);
1791    CFXMLTreeRef tree = CFXMLTreeCreateWithNode(CFGetAllocator(parser), myNode);
1792    CFRelease(myNode);
1793    return (void *)tree;
1794}
1795
1796static void _XMLTreeAddChild(CFXMLParserRef parser, void *parent, void *child, void *context) {
1797    CFTreeAppendChild((CFTreeRef)parent, (CFTreeRef)child);
1798}
1799
1800static void _XMLTreeEndXMLStructure(CFXMLParserRef parser, void *xmlType, void *context) {
1801    CFXMLTreeRef node = (CFXMLTreeRef)xmlType;
1802    if (CFTreeGetParent(node))
1803        CFRelease((CFXMLTreeRef)xmlType);
1804}
1805
1806CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex version) {
1807    CFXMLParserRef parser;
1808    CFXMLParserCallBacks callbacks;
1809    CFXMLTreeRef result;
1810
1811    CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1812
1813    callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1814    callbacks.addChild = _XMLTreeAddChild;
1815    callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1816    callbacks.resolveExternalEntity = NULL;
1817    callbacks.handleError = NULL;
1818    parser = CFXMLParserCreateWithDataFromURL(allocator, dataSource, parseOptions, version, &callbacks, NULL);
1819
1820    if (CFXMLParserParse(parser)) {
1821        result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1822    } else {
1823        result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1824        if (result) CFRelease(result);
1825        result = NULL;
1826    }
1827    CFRelease(parser);
1828    return result;
1829}
1830
1831CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion) {
1832    return CFXMLTreeCreateFromDataWithError(allocator, xmlData, dataSource, parseOptions, parserVersion, NULL);
1833}
1834
1835CONST_STRING_DECL(kCFXMLTreeErrorDescription, "kCFXMLTreeErrorDescription");
1836CONST_STRING_DECL(kCFXMLTreeErrorLineNumber, "kCFXMLTreeErrorLineNumber");
1837CONST_STRING_DECL(kCFXMLTreeErrorLocation, "kCFXMLTreeErrorLocation");
1838CONST_STRING_DECL(kCFXMLTreeErrorStatusCode, "kCFXMLTreeErrorStatusCode");
1839
1840CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion, CFDictionaryRef *errorDict) {
1841    CFXMLParserRef parser;
1842    CFXMLParserCallBacks callbacks;
1843    CFXMLTreeRef result;
1844
1845    __CFGenericValidateType(xmlData, CFDataGetTypeID());
1846    CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1847
1848    callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1849    callbacks.addChild = _XMLTreeAddChild;
1850    callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1851    callbacks.resolveExternalEntity = NULL;
1852    callbacks.handleError = NULL;
1853    parser = CFXMLParserCreate(allocator, xmlData, dataSource, parseOptions, parserVersion, &callbacks, NULL);
1854
1855    if (CFXMLParserParse(parser)) {
1856        result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1857    } else {
1858        if (errorDict) {	// collect the error dictionary
1859            *errorDict = CFDictionaryCreateMutable(allocator, 4, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1860            if (*errorDict) {
1861                CFIndex rawnum;
1862                CFNumberRef cfnum;
1863                CFStringRef errstring;
1864
1865                rawnum = CFXMLParserGetLocation(parser);
1866                cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1867                if(cfnum) {
1868                    CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLocation, cfnum);
1869                    CFRelease(cfnum);
1870                }
1871
1872                rawnum = CFXMLParserGetLineNumber(parser);
1873                cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1874                if(cfnum) {
1875                    CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLineNumber, cfnum);
1876                    CFRelease(cfnum);
1877                }
1878
1879                rawnum = CFXMLParserGetStatusCode(parser);
1880                cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1881                if(cfnum) {
1882                    CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorStatusCode, cfnum);
1883                    CFRelease(cfnum);
1884                }
1885
1886                errstring = CFXMLParserCopyErrorDescription(parser);
1887                if(errstring) {
1888                    CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorDescription, errstring);
1889                    CFRelease(errstring);
1890                }
1891            }
1892        }
1893        result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1894        if (result) CFRelease(result);
1895        result = NULL;
1896    }
1897    CFRelease(parser);
1898    return result;
1899}
1900
1901/*
1902 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1903 We should also be handling items that are up over certain values correctly.
1904 */
1905CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1906    CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1907    CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); // unbounded mutable string
1908    CFMutableCharacterSetRef startChars = CFCharacterSetCreateMutable(allocator);
1909
1910    CFStringInlineBuffer inlineBuf;
1911    CFIndex idx = 0;
1912    CFIndex mark = idx;
1913    CFIndex stringLength = CFStringGetLength(string);
1914    UniChar uc;
1915
1916    CFCharacterSetAddCharactersInString(startChars, CFSTR("&<>'\""));
1917
1918    CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, stringLength));
1919    for(idx = 0; idx < stringLength; idx++) {
1920        uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, idx);
1921        if(CFCharacterSetIsCharacterMember(startChars, uc)) {
1922            CFStringRef previousSubstring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1923            CFStringAppend(newString, previousSubstring);
1924            CFRelease(previousSubstring);
1925            switch(uc) {
1926                case '&':
1927                    CFStringAppend(newString, CFSTR("&amp;"));
1928                    break;
1929                case '<':
1930                    CFStringAppend(newString, CFSTR("&lt;"));
1931                    break;
1932                case '>':
1933                    CFStringAppend(newString, CFSTR("&gt;"));
1934                    break;
1935                case '\'':
1936                    CFStringAppend(newString, CFSTR("&apos;"));
1937                    break;
1938                case '"':
1939                    CFStringAppend(newString, CFSTR("&quot;"));
1940                    break;
1941            }
1942            mark = idx + 1;
1943        }
1944    }
1945    // Copy the remainder to the output string before returning.
1946    CFStringRef remainder = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1947    if (NULL != remainder) {
1948        CFStringAppend(newString, remainder);
1949        CFRelease(remainder);
1950    }
1951
1952    CFRelease(startChars);
1953    return newString;
1954}
1955
1956CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1957    CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1958
1959    CFStringInlineBuffer inlineBuf; /* use this for fast traversal of the string in question */
1960    CFStringRef sub;
1961    CFIndex lastChunkStart, length = CFStringGetLength(string);
1962    CFIndex i, entityStart;
1963    UniChar uc;
1964    UInt32 entity;
1965    int base;
1966    CFMutableDictionaryRef fullReplDict = entitiesDictionary ? CFDictionaryCreateMutableCopy(allocator, 0, entitiesDictionary) : CFDictionaryCreateMutable(allocator, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1967
1968    CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1969    CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1970    CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1971    CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1972    CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1973
1974    CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, length - 1));
1975    CFMutableStringRef newString = CFStringCreateMutable(allocator, 0);
1976
1977    lastChunkStart = 0;
1978    // Scan through the string in its entirety
1979    for(i = 0; i < length; ) {
1980        uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;	// grab the next character and move i.
1981
1982        if(uc == '&') {
1983            entityStart = i - 1;
1984            entity = 0xFFFF;	// set this to a not-Unicode character as sentinel
1985                             // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1986            if(lastChunkStart < i - 1) {
1987                sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, (i - 1) - lastChunkStart));
1988                CFStringAppend(newString, sub);
1989                CFRelease(sub);
1990            }
1991
1992            uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;	// grab the next character and move i.
1993                                                                           // Now we can process the entity reference itself
1994            if(uc == '#') {	// this is a numeric entity.
1995                base = 10;
1996                entity = 0;
1997                uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1998
1999                if(uc == 'x') {	// only lowercase x allowed. Translating numeric entity as hexadecimal.
2000                    base = 16;
2001                    uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2002                }
2003
2004                // process the provided digits 'til we're finished
2005                while(true) {
2006                    if (uc >= '0' && uc <= '9')
2007                        entity = entity * base + (uc-'0');
2008                    else if (uc >= 'a' && uc <= 'f' && base == 16)
2009                        entity = entity * base + (uc-'a'+10);
2010                    else if (uc >= 'A' && uc <= 'F' && base == 16)
2011                        entity = entity * base + (uc-'A'+10);
2012                    else break;
2013
2014                    if (i < length) {
2015                        uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2016                    }
2017                    else
2018                        break;
2019                }
2020            }
2021
2022            // Scan to the end of the entity
2023            while(uc != ';' && i < length) {
2024                uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2025            }
2026
2027            if(0xFFFF != entity) { // it was numeric, and translated.
2028                // Now, output the result fo the entity
2029                if(entity >= 0x10000) {
2030                    UniChar characters[2] = { ((entity - 0x10000) >> 10) + 0xD800, ((entity - 0x10000) & 0x3ff) + 0xDC00 };
2031                    CFStringAppendCharacters(newString, characters, 2);
2032                } else {
2033                    UniChar character = entity;
2034                    CFStringAppendCharacters(newString, &character, 1);
2035                }
2036            } else {	// it wasn't numeric.
2037                sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart + 1, (i - entityStart - 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
2038                CFStringRef replacementString = (CFStringRef)CFDictionaryGetValue(fullReplDict, sub);
2039                if(replacementString) {
2040                    CFStringAppend(newString, replacementString);
2041                } else {
2042                    CFRelease(sub); // let the old substring go, since we didn't find it in the dictionary
2043                    sub =  CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart, (i - entityStart))); // create a new one, including the & and ;
2044                    CFStringAppend(newString, sub); // ...and append that.
2045                }
2046                CFRelease(sub); // in either case, release the most-recent "sub"
2047            }
2048
2049            // move the lastChunkStart to the beginning of the next chunk.
2050            lastChunkStart = i;
2051        }
2052    }
2053    if(lastChunkStart < length) { // we've come out of the loop, let's get the rest of the string and tack it on.
2054        sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, i - lastChunkStart));
2055        CFStringAppend(newString, sub);
2056        CFRelease(sub);
2057    }
2058
2059    CFRelease(fullReplDict);
2060
2061    return newString;
2062}
2063
2064#pragma GCC diagnostic pop
2065