1/* 2 * Copyright (c) 2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24/* CFXMLParser.c 25 Copyright (c) 1999-2013, Apple Inc. All rights reserved. 26 Responsibility: David Smith 27*/ 28 29#include <CoreFoundation/CFXMLParser.h> 30#include <CoreFoundation/CFNumber.h> 31#include "CFXMLInputStream.h" 32#include "CFUniChar.h" 33#include "CFInternal.h" 34 35#pragma GCC diagnostic push 36#pragma GCC diagnostic ignored "-Wdeprecated-declarations" 37 38struct __CFXMLParser { 39 CFRuntimeBase _cfBase; 40 41 _CFXMLInputStream input; 42 43 void **stack; 44 void **top; 45 UInt32 capacity; 46 47 struct __CFXMLNode *node; // Our private node; we use it to report back information 48 CFMutableDictionaryRef argDict; 49 CFMutableArrayRef argArray; 50 51 UInt32 options; 52 CFXMLParserCallBacks callBacks; 53 CFXMLParserContext context; 54 55 CFXMLParserStatusCode status; 56 CFStringRef errorString; 57}; 58 59static CFStringRef __CFXMLParserCopyDescription(CFTypeRef cf) { 60 const struct __CFXMLParser *parser = (const struct __CFXMLParser *)cf; 61 return CFStringCreateWithFormat(CFGetAllocator(cf), NULL, CFSTR("<CFXMLParser %p>"), parser); 62} 63 64static void __CFXMLParserDeallocate(CFTypeRef cf) { 65 struct __CFXMLParser *parser = (struct __CFXMLParser *)cf; 66 CFAllocatorRef alloc = CFGetAllocator(parser); 67 _freeInputStream(&(parser->input)); 68 if (parser->argDict) CFRelease(parser->argDict); 69 if (parser->argArray) CFRelease(parser->argArray); 70 if (parser->errorString) CFRelease(parser->errorString); 71 if (parser->node) CFRelease(parser->node); 72 CFAllocatorDeallocate(alloc, parser->stack); 73 if (parser->context.info && parser->context.release) { 74 parser->context.release(parser->context.info); 75 } 76} 77 78static CFTypeID __kCFXMLParserTypeID = _kCFRuntimeNotATypeID; 79 80static const CFRuntimeClass __CFXMLParserClass = { 81 0, 82 "CFXMLParser", 83 NULL, // init 84 NULL, // copy 85 __CFXMLParserDeallocate, 86 NULL, 87 NULL, 88 NULL, // 89 __CFXMLParserCopyDescription 90}; 91 92static void __CFXMLParserInitialize(void) { 93 __kCFXMLParserTypeID = _CFRuntimeRegisterClass(&__CFXMLParserClass); 94} 95 96CFTypeID CFXMLParserGetTypeID(void) { 97 if (_kCFRuntimeNotATypeID == __kCFXMLParserTypeID) __CFXMLParserInitialize(); 98 return __kCFXMLParserTypeID; 99} 100 101void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context) { 102 CFAssert1(parser != NULL, __kCFLogAssertion, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__); 103 __CFGenericValidateType(parser, CFXMLParserGetTypeID()); 104 if (context) { 105 context->version = parser->context.version; 106 context->info = parser->context.info; 107 context->retain = parser->context.retain; 108 context->release = parser->context.release; 109 context->copyDescription = parser->context.copyDescription; 110 UNFAULT_CALLBACK(context->retain); 111 UNFAULT_CALLBACK(context->release); 112 UNFAULT_CALLBACK(context->copyDescription); 113 } 114} 115 116void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks) { 117 __CFGenericValidateType(parser, CFXMLParserGetTypeID()); 118 if (callBacks) { 119 callBacks->version = parser->callBacks.version; 120 callBacks->createXMLStructure = parser->callBacks.createXMLStructure; 121 callBacks->addChild = parser->callBacks.addChild; 122 callBacks->endXMLStructure = parser->callBacks.endXMLStructure; 123 callBacks->resolveExternalEntity = parser->callBacks.resolveExternalEntity; 124 callBacks->handleError = parser->callBacks.handleError; 125 UNFAULT_CALLBACK(callBacks->createXMLStructure); 126 UNFAULT_CALLBACK(callBacks->addChild); 127 UNFAULT_CALLBACK(callBacks->endXMLStructure); 128 UNFAULT_CALLBACK(callBacks->resolveExternalEntity); 129 UNFAULT_CALLBACK(callBacks->handleError); 130 } 131} 132 133CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser) { 134 __CFGenericValidateType(parser, CFXMLParserGetTypeID()); 135 return parser->input.url; 136} 137 138/* Returns the character index or line number of the current parse location */ 139CFIndex CFXMLParserGetLocation(CFXMLParserRef parser) { 140 __CFGenericValidateType(parser, CFXMLParserGetTypeID()); 141 return _inputStreamCurrentLocation(&parser->input); 142} 143 144CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser) { 145 __CFGenericValidateType(parser, CFXMLParserGetTypeID()); 146 return _inputStreamCurrentLine(&parser->input); 147} 148 149/* Returns the top-most object returned by the createXMLStructure callback */ 150void *CFXMLParserGetDocument(CFXMLParserRef parser) { 151 __CFGenericValidateType(parser, CFXMLParserGetTypeID()); 152 if (parser->capacity > 0) 153 return parser->stack[0]; 154 else 155 return NULL; 156} 157 158CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser) { 159 __CFGenericValidateType(parser, CFXMLParserGetTypeID()); 160 return parser->status; 161} 162 163CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser) { 164 __CFGenericValidateType(parser, CFXMLParserGetTypeID()); 165 return (CFStringRef)CFRetain(parser->errorString); 166} 167 168void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription) { 169 __CFGenericValidateType(parser, CFXMLParserGetTypeID()); 170 CFAssert1(errorCode > 0, __kCFLogAssertion, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__); 171 CFAssert1(errorDescription != NULL, __kCFLogAssertion, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__); 172 __CFGenericValidateType(errorDescription, CFStringGetTypeID()); 173 174 parser->status = errorCode; 175 if (parser->errorString) CFRelease(parser->errorString); 176 parser->errorString = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, errorDescription); 177} 178 179 180static Boolean parseXML(CFXMLParserRef parser); 181static Boolean parseComment(CFXMLParserRef parser, Boolean report); 182static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report); 183static Boolean parseInlineDTD(CFXMLParserRef parser); 184static Boolean parseDTD(CFXMLParserRef parser); 185static Boolean parsePhysicalEntityReference(CFXMLParserRef parser); 186static Boolean parseCDSect(CFXMLParserRef parser); 187static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report); 188static Boolean parsePCData(CFXMLParserRef parser); 189static Boolean parseWhitespace(CFXMLParserRef parser); 190static Boolean parseAttributeListDeclaration(CFXMLParserRef parser); 191static Boolean parseNotationDeclaration(CFXMLParserRef parser); 192static Boolean parseElementDeclaration(CFXMLParserRef parser); 193static Boolean parseEntityDeclaration(CFXMLParserRef parser); 194static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID); 195static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag); 196static Boolean parseTagContent(CFXMLParserRef parser); 197static Boolean parseTag(CFXMLParserRef parser); 198static Boolean parseAttributes(CFXMLParserRef parser); 199static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str); 200 201// Utilities; may need to make these accessible to the property list parser to avoid code duplication 202static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str); 203static Boolean reportNewLeaf(CFXMLParserRef parser); // Assumes parser->node has been set and is ready to go 204static void pushXMLNode(CFXMLParserRef parser, void *node); 205 206static CFXMLParserRef __CFXMLParserInit(CFAllocatorRef alloc, CFURLRef dataSource, CFOptionFlags options, CFDataRef xmlData, CFIndex version, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) { 207 struct __CFXMLParser *parser = (struct __CFXMLParser *)_CFRuntimeCreateInstance(alloc, CFXMLParserGetTypeID(), sizeof(struct __CFXMLParser) - sizeof(CFRuntimeBase), NULL); 208 struct __CFXMLNode *node = (struct __CFXMLNode *)_CFRuntimeCreateInstance(alloc, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode) - sizeof(CFRuntimeBase), NULL); 209 UniChar *buf; 210 if (parser && node) { 211 alloc = CFGetAllocator(parser); 212 _initializeInputStream(&(parser->input), alloc, dataSource, xmlData); 213 parser->top = parser->stack; 214 parser->stack = NULL; 215 parser->capacity = 0; 216 217 buf = (UniChar *)CFAllocatorAllocate(alloc, 128*sizeof(UniChar), 0); 218 parser->node = node; 219 parser->node->dataString = CFStringCreateMutableWithExternalCharactersNoCopy(alloc, buf, 0, 128, alloc); 220 parser->node->additionalData = NULL; 221 parser->node->version = version; 222 parser->argDict = NULL; // don't create these until necessary 223 parser->argArray = NULL; 224 225 parser->options = options; 226 parser->callBacks = *callBacks; 227 228 FAULT_CALLBACK((void **)&(parser->callBacks.createXMLStructure)); 229 FAULT_CALLBACK((void **)&(parser->callBacks.addChild)); 230 FAULT_CALLBACK((void **)&(parser->callBacks.endXMLStructure)); 231 FAULT_CALLBACK((void **)&(parser->callBacks.resolveExternalEntity)); 232 FAULT_CALLBACK((void **)&(parser->callBacks.handleError)); 233 234 if (context) { 235 parser->context = *context; 236 if (parser->context.info && parser->context.retain) { 237 parser->context.retain(parser->context.info); 238 } 239 } else { 240 parser->context.version = 0; 241 parser->context.info = NULL; 242 parser->context.retain = NULL; 243 parser->context.release = NULL; 244 parser->context.copyDescription = NULL; 245 } 246 parser->status = kCFXMLStatusParseNotBegun; 247 parser->errorString = NULL; 248 } else { 249 if (parser) CFRelease(parser); 250 if (node) CFRelease(node); 251 parser = NULL; 252 } 253 return parser; 254} 255 256CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) { 257 CFAssert1(xmlData != NULL, __kCFLogAssertion, "%s(): NULL data not permitted", __PRETTY_FUNCTION__); 258 __CFGenericValidateType(xmlData, CFDataGetTypeID()); 259 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__); 260 CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__); 261 CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes); 262 CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__); 263 return __CFXMLParserInit(allocator, dataSource, parseOptions, xmlData, versionOfNodes, callBacks, context); 264} 265 266CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) { 267 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__); 268 CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__); 269 CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes); 270 CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__); 271 272 return __CFXMLParserInit(allocator, dataSource, parseOptions, NULL, versionOfNodes, callBacks, context); 273} 274 275Boolean CFXMLParserParse(CFXMLParserRef parser) { 276 CFXMLDocumentInfo docData; 277 __CFGenericValidateType(parser, CFXMLParserGetTypeID()); 278 if (parser->status != kCFXMLStatusParseNotBegun) return false; 279 parser->status = kCFXMLStatusParseInProgress; 280 281 if (!_openInputStream(&parser->input)) { 282 if (!parser->input.data) { 283 // couldn't load URL 284 parser->status = kCFXMLErrorNoData; 285 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("No data found at %@"), CFURLGetString(parser->input.url)); 286 } else { 287 // couldn't figure out the encoding 288 CFAssert(parser->input.encoding == kCFStringEncodingInvalidId, __kCFLogAssertion, "CFXMLParser internal error: input stream could not be opened"); 289 parser->status = kCFXMLErrorUnknownEncoding; 290 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), "Encountered unknown encoding", kCFStringEncodingASCII); 291 } 292 if (parser->callBacks.handleError) { 293 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info); 294 } 295 return false; 296 } 297 298 // Create the document 299 parser->stack = (void **)CFAllocatorAllocate(CFGetAllocator(parser), 16 * sizeof(void *), 0); 300 parser->capacity = 16; 301 parser->node->dataTypeID = kCFXMLNodeTypeDocument; 302 docData.encoding = _inputStreamGetEncoding(&parser->input); 303 docData.sourceURL = parser->input.url; 304 parser->node->additionalData = &docData; 305 parser->stack[0] = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info); 306 parser->top = parser->stack; 307 parser->node->additionalData = NULL; 308 309 // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback 310 if (parser->status != kCFXMLStatusParseInProgress) { 311 _CFReportError(parser, parser->status, NULL); 312 return false; 313 } 314 return parseXML(parser); 315} 316 317/* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */ 318 319// [3] S ::= (#x20 | #x9 | #xD | #xA)+ 320static Boolean parseWhitespace(CFXMLParserRef parser) { 321 CFIndex len; 322 Boolean report = !(parser->options & kCFXMLParserSkipWhitespace); 323 len = _inputStreamSkipWhitespace(&parser->input, report ? (CFMutableStringRef)(parser->node->dataString) : NULL); 324 if (report && len) { 325 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace; 326 parser->node->additionalData = NULL; 327 return reportNewLeaf(parser); 328 } else { 329 return true; 330 } 331} 332 333// parser should be just past "<!--" 334static Boolean parseComment(CFXMLParserRef parser, Boolean report) { 335 const UniChar dashes[2] = {'-', '-'}; 336 UniChar ch; 337 report = report && (!(parser->options & kCFXMLParserSkipMetaData)); 338 if (!_inputStreamScanToCharacters(&parser->input, dashes, 2, report ? (CFMutableStringRef)(parser->node->dataString) : NULL) || !_inputStreamGetCharacter(&parser->input, &ch)) { 339 _CFReportError(parser, kCFXMLErrorUnexpectedEOF,"Found unexpected EOF while parsing comment"); 340 return false; 341 } else if (ch != '>') { 342 _CFReportError(parser, kCFXMLErrorMalformedComment, "Found \"--\" within a comment"); 343 return false; 344 } else if (report) { 345 parser->node->dataTypeID = kCFXMLNodeTypeComment; 346 parser->node->additionalData = NULL; 347 return reportNewLeaf(parser); 348 } else { 349 return true; 350 } 351} 352 353/* 354[16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 355[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 356 */ 357// parser should be set to the first character after "<?" 358static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report) { 359 const UniChar piTermination[2] = {'?', '>'}; 360 CFMutableStringRef str; 361 CFStringRef name; 362 363 if (!_inputStreamScanXMLName(&parser->input, false, &name)) { 364 _CFReportError(parser, kCFXMLErrorMalformedProcessingInstruction, "Found malformed processing instruction"); 365 return false; 366 } 367 _inputStreamSkipWhitespace(&parser->input, NULL); 368 str = (report && *parser->top) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)) : NULL; 369 if (!_inputStreamScanToCharacters(&parser->input, piTermination, 2, str)) { 370 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing processing instruction"); 371 if (str) CFRelease(str); 372 return false; 373 } 374 375 if (str) { 376 CFXMLProcessingInstructionInfo data; 377 Boolean result; 378 CFStringRef tmp = parser->node->dataString; 379 parser->node->dataTypeID = kCFXMLNodeTypeProcessingInstruction; 380 parser->node->dataString = name; 381 data.dataString = str; 382 parser->node->additionalData = &data; 383 result = reportNewLeaf(parser); 384 parser->node->additionalData = NULL; 385 parser->node->dataString = tmp; 386 CFRelease(str); 387 return result; 388 } else { 389 return true; 390 } 391} 392 393/* 394 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 395*/ 396static const UniChar _DoctypeOpening[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'}; 397// first character should be immediately after the "<!" 398static Boolean parseDTD(CFXMLParserRef parser) { 399 UniChar ch; 400 Boolean success, hasExtID = false; 401 CFXMLDocumentTypeInfo docData = {{NULL, NULL}}; 402 void *dtdStructure = NULL; 403 CFStringRef name; 404 405 // First pass "DOCTYPE" 406 success = _inputStreamMatchString(&parser->input, _DoctypeOpening, 7); 407 success = success && _inputStreamSkipWhitespace(&parser->input, NULL) != 0; 408 success = success && _inputStreamScanXMLName(&parser->input, false, &name); 409 if (success) { 410 _inputStreamSkipWhitespace(&parser->input, NULL); 411 success = _inputStreamPeekCharacter(&parser->input, &ch); 412 } else { 413 // didn't make it past "DOCTYPE" successfully. 414 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD"); 415 return false; 416 } 417 if (success && ch != '[' && ch != '>') { 418 // ExternalID 419 hasExtID = true; 420 success = parseExternalID(parser, false, &(docData.externalID)); 421 if (success) { 422 _inputStreamSkipWhitespace(&parser->input, NULL); 423 success = _inputStreamPeekCharacter(&parser->input, &ch); 424 } 425 } 426 427 if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) { 428 CFStringRef tmp = parser->node->dataString; 429 parser->node->dataTypeID = kCFXMLNodeTypeDocumentType; 430 parser->node->dataString = name; 431 parser->node->additionalData = &docData; 432 dtdStructure = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info); 433 if (dtdStructure && parser->status == kCFXMLStatusParseInProgress) { 434 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, dtdStructure, parser->context.info); 435 } 436 parser->node->additionalData = NULL; 437 parser->node->dataString = tmp; 438 if (parser->status != kCFXMLStatusParseInProgress) { 439 // callback called CFXMLParserAbort() 440 _CFReportError(parser, parser->status, NULL); 441 return false; 442 } 443 } else { 444 dtdStructure = NULL; 445 } 446 if (docData.externalID.publicID) CFRelease(docData.externalID.publicID); 447 if (docData.externalID.systemID) CFRelease(docData.externalID.systemID); 448 pushXMLNode(parser, dtdStructure); 449 450 if (success && ch == '[') { 451 // inline DTD 452 _inputStreamGetCharacter(&parser->input, &ch); 453 if (!parseInlineDTD(parser)) return false; 454 _inputStreamSkipWhitespace(&parser->input, NULL); 455 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>'; 456 } else if (success && ch == '>') { 457 // End of the DTD 458 _inputStreamGetCharacter(&parser->input, &ch); 459 } 460 if (!success) { 461 if (_inputStreamAtEOF(&parser->input)) { 462 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing DTD"); 463 } else { 464 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD"); 465 } 466 return false; 467 } 468 469 parser->top --; // Remove dtdStructure from the stack 470 471 if (success && dtdStructure) { 472 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, dtdStructure, parser->context.info); 473 if (parser->status != kCFXMLStatusParseInProgress) { 474 _CFReportError(parser, parser->status, NULL); 475 return false; 476 } 477 } 478 return true; 479} 480 481/* 482 [69] PEReference ::= '%' Name ';' 483*/ 484static Boolean parsePhysicalEntityReference(CFXMLParserRef parser) { 485 UniChar ch; 486 CFStringRef name; 487 if (!_inputStreamScanXMLName(&parser->input, false, &name)) { 488 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference"); 489 return false; 490 } else if (!_inputStreamGetCharacter(&parser->input, &ch)) { 491 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing physical entity reference"); 492 return false; 493 } else if (ch != ';') { 494 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference"); 495 return false; 496 } else if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) { 497 CFXMLEntityReferenceInfo myData; 498 Boolean result; 499 CFStringRef tmp = parser->node->dataString; 500 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference; 501 parser->node->dataString = name; 502 myData.entityType = kCFXMLEntityTypeParameter; 503 parser->node->additionalData = &myData; 504 result = reportNewLeaf(parser); 505 parser->node->additionalData = NULL; 506 parser->node->dataString = tmp; 507 return result; 508 } else { 509 return true; 510 } 511} 512 513/* 514 [54] AttType ::= StringType | TokenizedType | EnumeratedType 515 [55] StringType ::= 'CDATA' 516 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS' 517 [57] EnumeratedType ::= NotationType | Enumeration 518 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 519 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 520*/ 521static Boolean parseEnumeration(CFXMLParserRef parser, Boolean useNMTokens) { 522 UniChar ch; 523 Boolean done = false; 524 if (!_inputStreamGetCharacter(&parser->input, &ch)) { 525 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); 526 return false; 527 } else if (ch != '(') { 528 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 529 return false; 530 } 531 _inputStreamSkipWhitespace(&parser->input, NULL); 532 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) { 533 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 534 return false; 535 } 536 while (!done) { 537 _inputStreamSkipWhitespace(&parser->input, NULL); 538 if (!_inputStreamGetCharacter(&parser->input, &ch)) { 539 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); 540 return false; 541 } else if (ch == ')') { 542 done = true; 543 } else if (ch == '|') { 544 _inputStreamSkipWhitespace(&parser->input, NULL); 545 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) { 546 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 547 return false; 548 } 549 } else { 550 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 551 return false; 552 } 553 } 554 return true; 555} 556 557static Boolean parseAttributeType(CFXMLParserRef parser, CFMutableStringRef str) { 558 Boolean success = false; 559 static const UniChar attTypeStrings[6][8] = { 560 {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'}, 561 {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'}, 562 {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'}, 563 {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'}, 564 {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'}, 565 {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} }; 566 if (str) _inputStreamSetMark(&parser->input); 567 if (_inputStreamMatchString(&parser->input, attTypeStrings[0], 5) || 568 _inputStreamMatchString(&parser->input, attTypeStrings[1], 6) || 569 _inputStreamMatchString(&parser->input, attTypeStrings[1], 5) || 570 _inputStreamMatchString(&parser->input, attTypeStrings[1], 2) || 571 _inputStreamMatchString(&parser->input, attTypeStrings[2], 6) || 572 _inputStreamMatchString(&parser->input, attTypeStrings[3], 8) || 573 _inputStreamMatchString(&parser->input, attTypeStrings[4], 8) || 574 _inputStreamMatchString(&parser->input, attTypeStrings[4], 7)) { 575 success = true; 576 } else if (_inputStreamMatchString(&parser->input, attTypeStrings[5], 8)) { 577 // Notation 578 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) { 579 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 580 success = false; 581 } else { 582 success = parseEnumeration(parser, false); 583 } 584 } else { 585 success = parseEnumeration(parser, true); 586 } 587 if (str) { 588 if (success) { 589 _inputStreamGetCharactersFromMark(&parser->input, str); 590 } 591 _inputStreamClearMark(&parser->input); 592 } 593 return success; 594} 595 596/* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */ 597static Boolean parseAttributeDefaultDeclaration(CFXMLParserRef parser, CFMutableStringRef str) { 598 const UniChar strings[3][8] = { 599 {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'}, 600 {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'}, 601 {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}}; 602 UniChar ch; 603 Boolean success; 604 if (str) _inputStreamSetMark(&parser->input); 605 if (!_inputStreamGetCharacter(&parser->input, &ch)) { 606 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); 607 success = false; 608 } else if (ch == '#') { 609 if (_inputStreamMatchString(&parser->input, strings[0], 8) || 610 _inputStreamMatchString(&parser->input, strings[1], 7)) { 611 success = true; 612 } else if (!_inputStreamMatchString(&parser->input, strings[2], 5) || _inputStreamSkipWhitespace(&parser->input, NULL) == 0) { 613 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 614 success = false; 615 } else { 616 // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped. 617 success = parseAttributeValue(parser, NULL); 618 } 619 } else { 620 _inputStreamReturnCharacter(&parser->input, ch); 621 success = parseAttributeValue(parser, NULL); 622 } 623 if (str) { 624 if (success) { 625 _inputStreamGetCharactersFromMark(&parser->input, str); 626 } 627 _inputStreamClearMark(&parser->input); 628 } 629 return success; 630} 631 632/* 633 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' 634 [53] AttDef ::= S Name S AttType S DefaultDecl 635*/ 636static Boolean parseAttributeListDeclaration(CFXMLParserRef parser) { 637 const UniChar attList[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'}; 638 CFXMLAttributeListDeclarationInfo attListData; 639 CFXMLAttributeDeclarationInfo attributeArray[8], *attributes=attributeArray; 640 CFIndex capacity = 8; 641 UniChar ch; 642 Boolean success = true; 643 CFStringRef name; 644 if (!_inputStreamMatchString(&parser->input, attList, 7) || 645 _inputStreamSkipWhitespace(&parser->input, NULL) == 0 || 646 !_inputStreamScanXMLName(&parser->input, false, &name)) { 647 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 648 return false; 649 } 650 attListData.numberOfAttributes = 0; 651 if (!(*parser->top) || (parser->options & kCFXMLParserSkipMetaData)) { 652 // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000 653 attributes = NULL; 654 } 655 while (_inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) { 656 CFXMLAttributeDeclarationInfo *attribute = NULL; 657 if (_inputStreamPeekCharacter(&parser->input, &ch) && ch == '>') 658 break; 659 if (attributes) { 660 if (capacity == attListData.numberOfAttributes) { 661 capacity = 2*capacity; 662 if (attributes != attributeArray) { 663 attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorReallocate(CFGetAllocator(parser), attributes, capacity * sizeof(CFXMLAttributeDeclarationInfo), 0); 664 } else { 665 attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorAllocate(CFGetAllocator(parser), capacity * sizeof(CFXMLAttributeDeclarationInfo), 0); 666 } 667 } 668 attribute = &(attributes[attListData.numberOfAttributes]); 669 // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000 670 attribute->typeString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); 671 attribute->defaultString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); 672 } 673 if (!_inputStreamScanXMLName(&parser->input, false, &(attribute->attributeName)) || (_inputStreamSkipWhitespace(&parser->input, NULL) == 0)) { 674 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 675 success = false; 676 break; 677 } 678 if (!parseAttributeType(parser, attribute ? (CFMutableStringRef)attribute->typeString : NULL)) { 679 success = false; 680 break; 681 } 682 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) { 683 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 684 success = false; 685 break; 686 } 687 if (!parseAttributeDefaultDeclaration(parser, attribute ? (CFMutableStringRef)attribute->defaultString : NULL)) { 688 success = false; 689 break; 690 } 691 attListData.numberOfAttributes ++; 692 } 693 if (success) { 694 if (!_inputStreamGetCharacter(&parser->input, &ch)) { 695 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); 696 success = false; 697 } else if (ch != '>') { 698 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 699 success = false; 700 } else if (attributes) { 701 CFStringRef tmp = parser->node->dataString; 702 parser->node->dataTypeID = kCFXMLNodeTypeAttributeListDeclaration; 703 parser->node->dataString = name; 704 attListData.attributes = attributes; 705 parser->node->additionalData = (void *)&attListData; 706 success = reportNewLeaf(parser); 707 parser->node->additionalData = NULL; 708 parser->node->dataString = tmp; 709 } 710 } 711 if (attributes) { 712 // Free up all that memory 713 CFIndex idx; 714 for (idx = 0; idx < attListData.numberOfAttributes; idx ++) { 715 // Do not release attributeName here; it's a uniqued string from scanXMLName 716 CFRelease(attributes[idx].typeString); 717 CFRelease(attributes[idx].defaultString); 718 } 719 if (attributes != attributeArray) { 720 CFAllocatorDeallocate(CFGetAllocator(parser), attributes); 721 } 722 } 723 return success; 724} 725 726CF_INLINE Boolean parseSystemLiteral(CFXMLParserRef parser, CFXMLExternalID *extID) { 727 Boolean success; 728 if (extID) { 729 CFMutableStringRef urlStr = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); 730 if (_inputStreamScanQuotedString(&parser->input, urlStr)) { 731 success = true; 732 extID->systemID = CFURLCreateWithString(CFGetAllocator(parser), urlStr, parser->input.url); 733 } else { 734 extID->systemID = NULL; 735 success = false; 736 } 737 CFRelease(urlStr); 738 } else { 739 success = _inputStreamScanQuotedString(&parser->input, NULL); 740 } 741 return success; 742} 743 744/* 745 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral 746 [83] PublicID ::= 'PUBLIC' S PubidLiteral 747 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 748 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 749 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") 750*/ 751// This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000 752static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID) { 753 const UniChar publicString[6] = {'P', 'U', 'B', 'L', 'I', 'C'}; 754 const UniChar systemString[6] = {'S', 'Y', 'S', 'T', 'E', 'M'}; 755 Boolean success; 756 if (extID) { 757 extID->systemID = NULL; 758 extID->publicID = NULL; 759 } 760 if (_inputStreamMatchString(&parser->input, publicString, 6)) { 761 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0; 762 if (extID) { 763 extID->publicID = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); 764 success = success && _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)extID->publicID); 765 } else { 766 success = success && _inputStreamScanQuotedString(&parser->input, NULL); 767 } 768 if (success) { 769 UniChar ch; 770 if (alsoAcceptPublicID) { 771 _inputStreamSetMark(&parser->input); // In case we need to roll back the parser 772 } 773 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0 774 || !_inputStreamPeekCharacter(&parser->input, &ch) 775 || (ch != '\'' && ch != '\"') 776 || !parseSystemLiteral(parser, extID)) { 777 success = alsoAcceptPublicID; 778 if (alsoAcceptPublicID) { 779 _inputStreamBackUpToMark(&parser->input); 780 } 781 } else { 782 success = true; 783 } 784 if (alsoAcceptPublicID) { 785 _inputStreamClearMark(&parser->input); 786 } 787 } 788 } else if (_inputStreamMatchString(&parser->input, systemString, 6)) { 789 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && parseSystemLiteral(parser, extID); 790 } else { 791 success = false; 792 } 793 return success; 794} 795 796/* 797 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' 798*/ 799static Boolean parseNotationDeclaration(CFXMLParserRef parser) { 800 static UniChar notationString[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'}; 801 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData); 802 CFXMLNotationInfo notationData = {{NULL, NULL}}; 803 CFStringRef name; 804 Boolean success = 805 _inputStreamMatchString(&parser->input, notationString, 8) && 806 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && 807 _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && 808 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && 809 parseExternalID(parser, true, report ? &(notationData.externalID) : NULL); 810 811 if (success) { 812 UniChar ch; 813 _inputStreamSkipWhitespace(&parser->input, NULL); 814 success = (_inputStreamGetCharacter(&parser->input, &ch) && ch == '>'); 815 } 816 if (!success) { 817 if (_inputStreamAtEOF(&parser->input)) { 818 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); 819 } else { 820 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 821 } 822 } else if (report) { 823 CFStringRef tmp = parser->node->dataString; 824 parser->node->dataTypeID = kCFXMLNodeTypeNotation; 825 parser->node->dataString = name; 826 parser->node->additionalData = ¬ationData; 827 success = reportNewLeaf(parser); 828 parser->node->additionalData = NULL; 829 parser->node->dataString = tmp; 830 } 831 if (notationData.externalID.systemID) CFRelease(notationData.externalID.systemID); 832 if (notationData.externalID.publicID) CFRelease(notationData.externalID.publicID); 833 return success; 834} 835 836/* 837 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? 838 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' 839 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' 840*/ 841static Boolean parseChoiceOrSequence(CFXMLParserRef parser, Boolean pastParen) { 842 UniChar ch, separator; 843 if (!pastParen) { 844 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '(') return false; 845 _inputStreamSkipWhitespace(&parser->input, NULL); 846 } 847 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false; 848 849 /* Now scanning cp, production [48] */ 850 if (ch == '(') { 851 if (!parseChoiceOrSequence(parser, false)) return false; 852 } else { 853 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false; 854 } 855 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false; 856 if (ch == '?' || ch == '*' || ch == '+') _inputStreamGetCharacter(&parser->input, &ch); 857 858 /* Now past cp */ 859 _inputStreamSkipWhitespace(&parser->input, NULL); 860 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false; 861 if (ch == ')') return true; 862 if (ch != '|' && ch != ',') return false; 863 separator = ch; 864 while (ch == separator) { 865 _inputStreamSkipWhitespace(&parser->input, NULL); 866 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false; 867 if (ch != '(') { 868 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false; 869 } else if (!parseChoiceOrSequence(parser, false)) { 870 return false; 871 } 872 _inputStreamSkipWhitespace(&parser->input, NULL); 873 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false; 874 } 875 return ch == ')'; 876} 877 878/* 879 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' 880*/ 881static Boolean parseMixedElementContent(CFXMLParserRef parser) { 882 static const UniChar pcdataString[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'}; 883 UniChar ch; 884 if (!_inputStreamMatchString(&parser->input, pcdataString, 7)) return false; 885 _inputStreamSkipWhitespace(&parser->input, NULL); 886 if (!_inputStreamGetCharacter(&parser->input, &ch) && (ch == ')' || ch == '|')) return false; 887 if (ch == ')') return true; 888 889 while (ch == '|') { 890 _inputStreamSkipWhitespace(&parser->input, NULL); 891 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false; 892 _inputStreamSkipWhitespace(&parser->input, NULL); 893 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false; 894 } 895 if (ch != ')') return false; 896 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '*') return false; 897 return true; 898} 899 900/* 901 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children 902 [47] children ::= (choice | seq) ('?' | '*' | '+')? 903 */ 904static Boolean parseElementContentSpec(CFXMLParserRef parser) { 905 static const UniChar eltContentEmpty[5] = {'E', 'M', 'P', 'T', 'Y'}; 906 static const UniChar eltContentAny[3] = {'A', 'N', 'Y'}; 907 UniChar ch; 908 if (_inputStreamMatchString(&parser->input, eltContentEmpty, 5) || _inputStreamMatchString(&parser->input, eltContentAny, 3)) { 909 return true; 910 } else if (!_inputStreamPeekCharacter(&parser->input, &ch) || ch != '(') { 911 return false; 912 } else { 913 // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000 914 _inputStreamGetCharacter(&parser->input, &ch); 915 _inputStreamSkipWhitespace(&parser->input, NULL); 916 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false; 917 if (ch == '#') { 918 // Mixed 919 return parseMixedElementContent(parser); 920 } else { 921 if (parseChoiceOrSequence(parser, true)) { 922 if (_inputStreamPeekCharacter(&parser->input, &ch) && (ch == '*' || ch == '?' || ch == '+')) { 923 _inputStreamGetCharacter(&parser->input, &ch); 924 } 925 return true; 926 } else { 927 return false; 928 } 929 } 930 } 931} 932 933/* 934 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' 935 */ 936static Boolean parseElementDeclaration(CFXMLParserRef parser) { 937 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData); 938 Boolean success; 939 static const UniChar eltChars[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'}; 940 UniChar ch = '>'; 941 CFMutableStringRef contentDesc = NULL; 942 CFStringRef name; 943 success = _inputStreamMatchString(&parser->input, eltChars, 7) 944 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0 945 && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) 946 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0; 947 if (success) { 948 if (report) _inputStreamSetMark(&parser->input); 949 success = parseElementContentSpec(parser); 950 if (success && report) { 951 contentDesc = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); 952 _inputStreamGetCharactersFromMark(&parser->input, contentDesc); 953 } 954 if (report) _inputStreamClearMark(&parser->input); 955 if (success) _inputStreamSkipWhitespace(&parser->input, NULL); 956 success = success && _inputStreamMatchString(&parser->input, &ch, 1); 957 } 958 if (!success) { 959 if (_inputStreamAtEOF(&parser->input)) { 960 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); 961 } else { 962 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 963 } 964 } else if (report) { 965 CFXMLElementTypeDeclarationInfo eltData; 966 CFStringRef tmp = parser->node->dataString; 967 parser->node->dataTypeID = kCFXMLNodeTypeElementTypeDeclaration; 968 parser->node->dataString = name; 969 eltData.contentDescription = contentDesc; 970 parser->node->additionalData = &eltData; 971 success = reportNewLeaf(parser); 972 parser->node->additionalData = NULL; 973 parser->node->dataString = tmp; 974 } 975 if (contentDesc) CFRelease(contentDesc); 976 return success; 977} 978 979/* 980 [70] EntityDecl ::= GEDecl | PEDecl 981 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' 982 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' 983 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) 984 [74] PEDef ::= EntityValue | ExternalID 985 [76] NDataDecl ::= S 'NDATA' S Name 986 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" 987*/ 988static Boolean parseEntityDeclaration(CFXMLParserRef parser) { 989 const UniChar entityStr[6] = {'E', 'N', 'T', 'I', 'T', 'Y'}; 990 UniChar ch; 991 Boolean isPEDecl = false; 992 CFXMLEntityInfo entityData; 993 CFStringRef name; 994 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData); 995 Boolean success = 996 _inputStreamMatchString(&parser->input, entityStr, 6) && 997 (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && 998 _inputStreamPeekCharacter(&parser->input, &ch); 999 1000 entityData.replacementText = NULL; 1001 entityData.entityID.publicID = NULL; 1002 entityData.entityID.systemID = NULL; 1003 entityData.notationName = NULL; 1004 // We will set entityType immediately before reporting 1005 1006 if (success && ch == '%') { 1007 _inputStreamGetCharacter(&parser->input, &ch); 1008 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0; 1009 isPEDecl = true; 1010 } 1011 success = success && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamPeekCharacter(&parser->input, &ch); 1012 if (success && (ch == '\"' || ch == '\'')) { 1013 // EntityValue 1014 // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000 1015 if (report) { 1016 entityData.replacementText = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); 1017 success = _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)entityData.replacementText); 1018 } else { 1019 success = _inputStreamScanQuotedString(&parser->input, NULL); 1020 } 1021 } else if (success) { 1022 // ExternalID 1023 success = parseExternalID(parser, false, report ? &(entityData.entityID) : NULL); 1024 if (success && !isPEDecl && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) { 1025 // There could be an option NDataDecl 1026 // Don't we need to set entityData.notationName? -- REW, 3/6/2000 1027 const UniChar nDataStr[5] = {'N', 'D', 'A', 'T', 'A'}; 1028 if (_inputStreamMatchString(&parser->input, nDataStr, 5)) { 1029 success = (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamScanXMLName(&parser->input, false, NULL); 1030 } 1031 } 1032 } 1033 if (success) { 1034 _inputStreamSkipWhitespace(&parser->input, NULL); 1035 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>'; 1036 } 1037 if (!success) { 1038 if (_inputStreamAtEOF(&parser->input)) { 1039 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); 1040 } else { 1041 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 1042 } 1043 } else { 1044 CFStringRef tmp = parser->node->dataString; 1045 if (isPEDecl) entityData.entityType = kCFXMLEntityTypeParameter; 1046 else if (entityData.replacementText) entityData.entityType = kCFXMLEntityTypeParsedInternal; 1047 else if (!entityData.notationName) entityData.entityType = kCFXMLEntityTypeParsedExternal; 1048 else entityData.entityType = kCFXMLEntityTypeUnparsed; 1049 parser->node->dataTypeID = kCFXMLNodeTypeEntity; 1050 parser->node->dataString = name; 1051 parser->node->additionalData = &entityData; 1052 success = reportNewLeaf(parser); 1053 parser->node->additionalData = NULL; 1054 parser->node->dataString = tmp; 1055 if (entityData.replacementText) CFRelease(entityData.replacementText); 1056 } 1057 if (entityData.entityID.publicID) CFRelease(entityData.entityID.publicID); 1058 if (entityData.entityID.systemID) CFRelease(entityData.entityID.systemID); 1059 return success; 1060} 1061 1062/* 1063 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' 1064 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment 1065*/ 1066// First character should be just past '[' 1067static Boolean parseInlineDTD(CFXMLParserRef parser) { 1068 Boolean success = true; 1069 while (success && !_inputStreamAtEOF(&parser->input)) { 1070 UniChar ch; 1071 1072 parseWhitespace(parser); 1073 if (!_inputStreamGetCharacter(&parser->input, &ch)) break; 1074 if (ch == '%') { 1075 // PEReference 1076 success = parsePhysicalEntityReference(parser); 1077 } else if (ch == '<') { 1078 // markupdecl 1079 if (!_inputStreamGetCharacter(&parser->input, &ch)) { 1080 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); 1081 return false; 1082 } 1083 if (ch == '?') { 1084 // Processing Instruction 1085 success = parseProcessingInstruction(parser, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true 1086 } else if (ch == '!') { 1087 UniChar dashes[2] = {'-', '-'}; 1088 if (_inputStreamMatchString(&parser->input, dashes, 2)) { 1089 // Comment 1090 success = parseComment(parser, true); 1091 } else { 1092 // elementdecl | AttListDecl | EntityDecl | NotationDecl 1093 if (!_inputStreamPeekCharacter(&parser->input, &ch)) { 1094 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); 1095 return false; 1096 } else if (ch == 'A') { 1097 // AttListDecl 1098 success = parseAttributeListDeclaration(parser); 1099 } else if (ch == 'N') { 1100 success = parseNotationDeclaration(parser); 1101 } else if (ch == 'E') { 1102 // elementdecl | EntityDecl 1103 _inputStreamGetCharacter(&parser->input, &ch); 1104 if (!_inputStreamPeekCharacter(&parser->input, &ch)) { 1105 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); 1106 return false; 1107 } 1108 _inputStreamReturnCharacter(&parser->input, 'E'); 1109 if (ch == 'L') { 1110 success = parseElementDeclaration(parser); 1111 } else if (ch == 'N') { 1112 success = parseEntityDeclaration(parser); 1113 } else { 1114 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 1115 return false; 1116 } 1117 } else { 1118 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 1119 return false; 1120 } 1121 } 1122 } else { 1123 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 1124 return false; 1125 } 1126 } else if (ch == ']') { 1127 return true; 1128 } else { 1129 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); 1130 return false; 1131 } 1132 } 1133 if (success) { 1134 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); 1135 } 1136 return false; 1137} 1138 1139/* 1140[43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* 1141 */ 1142static Boolean parseTagContent(CFXMLParserRef parser) { 1143 while (!_inputStreamAtEOF(&parser->input)) { 1144 UniChar ch; 1145 CFIndex numWhitespaceCharacters; 1146 1147 _inputStreamSetMark(&parser->input); 1148 numWhitespaceCharacters = _inputStreamSkipWhitespace(&parser->input, NULL); 1149 // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data. 1150 if (!_inputStreamGetCharacter(&parser->input, &ch)) break; // break == report unexpected EOF 1151 1152 if (ch != '<' && ch != '&') { // CharData 1153 // Back off the whitespace; we'll report it with the PCData 1154 _inputStreamBackUpToMark(&parser->input); 1155 _inputStreamClearMark(&parser->input); 1156 if (!parsePCData(parser)) return false; 1157 if(_inputStreamComposingErrorOccurred(&parser->input)) { 1158 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error"); 1159 return false; 1160 } 1161 continue; 1162 } 1163 1164 // element | Reference | CDSect | PI | Comment 1165 // We can safely report any whitespace now 1166 if (!(parser->options & kCFXMLParserSkipWhitespace) && numWhitespaceCharacters != 0 && *(parser->top)) { 1167 _inputStreamReturnCharacter(&parser->input, ch); 1168 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString)); 1169 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace; 1170 parser->node->additionalData = NULL; 1171 if (!reportNewLeaf(parser)) return false; 1172 _inputStreamGetCharacter(&parser->input, &ch); 1173 } 1174 _inputStreamClearMark(&parser->input); 1175 1176 if (ch == '&') { 1177 // Reference; for the time being, we don't worry about processing these; just report them as Entity references 1178 if (!parseEntityReference(parser, true)) return false; 1179 continue; 1180 } 1181 1182 // ch == '<'; element | CDSect | PI | Comment 1183 if (!_inputStreamPeekCharacter(&parser->input, &ch)) break; 1184 if (ch == '?') { // PI 1185 _inputStreamGetCharacter(&parser->input, &ch); 1186 if (!parseProcessingInstruction(parser, true)) 1187 return false; 1188 } else if (ch == '/') { // end tag; we're passing outside of content's production 1189 _inputStreamReturnCharacter(&parser->input, '<'); // Back off to the '<' 1190 return true; 1191 } else if (ch != '!') { // element 1192 if (!parseTag(parser)) return false; 1193 } else { 1194 // Comment | CDSect 1195 UniChar dashes[3] = {'!', '-', '-'}; 1196 if (_inputStreamMatchString(&parser->input, dashes, 3)) { 1197 // Comment 1198 if (!parseComment(parser, true)) return false; 1199 } else { 1200 // Should have a CDSect; back off the "<!" and call parseCDSect 1201 _inputStreamReturnCharacter(&parser->input, '<'); 1202 if (!parseCDSect(parser)) return false; 1203 } 1204 } 1205 } 1206 1207 if(_inputStreamComposingErrorOccurred(&parser->input)) { 1208 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error"); 1209 return false; 1210 } 1211 // Only way to get here is if premature EOF was found 1212//#warning CF:Include the tag name here 1213 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing tag content"); 1214 return false; 1215} 1216 1217static Boolean parseCDSect(CFXMLParserRef parser) { 1218 const UniChar _CDSectOpening[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['}; 1219 const UniChar _CDSectClose[3] = {']', ']', '>'}; 1220 if (!_inputStreamMatchString(&parser->input, _CDSectOpening, 9)) { 1221 _CFReportError(parser, kCFXMLErrorMalformedCDSect, "Encountered bad prefix to a presumed CDATA section"); 1222 return false; 1223 } 1224 if (!_inputStreamScanToCharacters(&parser->input, _CDSectClose, 3, (CFMutableStringRef)(parser->node->dataString))) { 1225 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing CDATA section"); 1226 return false; 1227 } 1228 1229 parser->node->dataTypeID = kCFXMLNodeTypeCDATASection; 1230 parser->node->additionalData = NULL; 1231 return reportNewLeaf(parser); 1232} 1233 1234/* 1235 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1236*/ 1237static Boolean validateCharacterReference(CFStringRef str) { 1238 Boolean isHex; 1239 CFIndex idx, len = CFStringGetLength(str); 1240 if (len < 2) return false; 1241 if (CFStringGetCharacterAtIndex(str, 0) != '#') return false; 1242 if (CFStringGetCharacterAtIndex(str, 1) == 'x') { 1243 isHex = true; 1244 idx = 2; 1245 if (len == 2) return false; 1246 } else { 1247 isHex = false; 1248 idx = 1; 1249 } 1250 1251 while (idx < len) { 1252 UniChar ch; 1253 ch = CFStringGetCharacterAtIndex(str, idx); 1254 idx ++; 1255 if (!(ch <= '9' && ch >= '0') && 1256 !(isHex && ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')))) { 1257 break; 1258 } 1259 } 1260 return (idx == len); 1261} 1262 1263/* 1264 [67] Reference ::= EntityRef | CharRef 1265 [68] EntityRef ::= '&' Name ';' 1266*/ 1267static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report) { 1268 UniChar ch; 1269 CFXMLEntityReferenceInfo entData; 1270 CFStringRef name = NULL; 1271 if (!_inputStreamPeekCharacter(&parser->input, &ch)) { 1272 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference"); 1273 return false; 1274 } 1275 if (ch == '#') { 1276 ch = ';'; 1277 if (!_inputStreamScanToCharacters(&parser->input, &ch, 1, (CFMutableStringRef)parser->node->dataString)) { 1278 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference"); 1279 return false; 1280 } else if (!validateCharacterReference(parser->node->dataString)) { 1281 _CFReportError(parser, kCFXMLErrorMalformedCharacterReference, "Encountered illegal character while parsing character reference"); 1282 return false; 1283 } 1284 entData.entityType = kCFXMLEntityTypeCharacter; 1285 name = parser->node->dataString; 1286 } else if (!_inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) || !_inputStreamGetCharacter(&parser->input, &ch) || ch != ';') { 1287 if (_inputStreamAtEOF(&parser->input)) { 1288 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference"); 1289 return false; 1290 } else { 1291 _CFReportError(parser, kCFXMLErrorMalformedName, "Encountered malformed name while parsing EntityReference"); 1292 return false; 1293 } 1294 } else { 1295 entData.entityType = kCFXMLEntityTypeParsedInternal; 1296 } 1297 if (report) { 1298 CFStringRef tmp = parser->node->dataString; 1299 Boolean success; 1300 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference; 1301 parser->node->dataString = name; 1302 parser->node->additionalData = &entData; 1303 success = reportNewLeaf(parser); 1304 parser->node->additionalData = NULL; 1305 parser->node->dataString = tmp; 1306 return success; 1307 } else { 1308 return true; 1309 } 1310} 1311 1312#if 0 1313// Kept from old entity reference parsing.... 1314{ 1315 switch (*(parser->curr)) { 1316 case 'l': // "lt" 1317 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') { 1318 ch = '<'; 1319 parser->curr += 3; 1320 break; 1321 } 1322 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser)); 1323 return; 1324 case 'g': // "gt" 1325 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') { 1326 ch = '>'; 1327 parser->curr += 3; 1328 break; 1329 } 1330 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser)); 1331 return; 1332 case 'a': // "apos" or "amp" 1333 if (len < 4) { // Not enough characters for either conversion 1334 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII); 1335 return; 1336 } 1337 if (*(parser->curr+1) == 'm') { 1338 // "amp" 1339 if (*(parser->curr+2) == 'p' && *(parser->curr+3) == ';') { 1340 ch = '&'; 1341 parser->curr += 4; 1342 break; 1343 } 1344 } else if (*(parser->curr+1) == 'p') { 1345 // "apos" 1346 if (len > 4 && *(parser->curr+2) == 'o' && *(parser->curr+3) == 's' && *(parser->curr+4) == ';') { 1347 ch = '\''; 1348 parser->curr += 5; 1349 break; 1350 } 1351 } 1352 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser)); 1353 return; 1354 case 'q': // "quote" 1355 if (len >= 6 && *(parser->curr+1) == 'u' && *(parser->curr+2) == 'o' && *(parser->curr+3) == 't' && *(parser->curr+4) == 'e' && *(parser->curr+5) == ';') { 1356 ch = '\"'; 1357 parser->curr += 6; 1358 break; 1359 } 1360 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser)); 1361 return; 1362 case '#': 1363 { 1364 UniChar num = 0; 1365 Boolean isHex = false; 1366 if ( len < 4) { // Not enough characters to make it all fit! Need at least "&#d;" 1367 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII); 1368 return; 1369 } 1370 parser->curr ++; 1371 if (*(parser->curr) == 'x') { 1372 isHex = true; 1373 parser->curr ++; 1374 } 1375 while (parser->curr < parser->end) { 1376 ch = *(parser->curr); 1377 if (ch == ';') { 1378 CFStringAppendCharacters(string, &num, 1); 1379 parser->curr ++; 1380 return; 1381 } 1382 if (!isHex) num = num*10; 1383 else num = num << 4; 1384 if (ch <= '9' && ch >= '0') { 1385 num += (ch - '0'); 1386 } else if (!isHex) { 1387 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser)); 1388 return; 1389 } else if (ch >= 'a' && ch <= 'f') { 1390 num += 10 + (ch - 'a'); 1391 } else if (ch >= 'A' && ch <= 'F') { 1392 num += 10 + (ch - 'A'); 1393 } else { 1394 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser)); 1395 return; 1396 } 1397 } 1398 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII); 1399 return; 1400 } 1401 default: 1402 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser)); 1403 return; 1404 } 1405 CFStringAppendCharacters(string, &ch, 1); 1406} 1407#endif 1408 1409/* 1410[14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) 1411*/ 1412static Boolean parsePCData(CFXMLParserRef parser) { 1413 UniChar ch; 1414 Boolean done = false; 1415 _inputStreamSetMark(&parser->input); 1416 while (!done && _inputStreamGetCharacter(&parser->input, &ch)) { 1417 switch (ch) { 1418 case '<': 1419 case '&': 1420 _inputStreamReturnCharacter(&parser->input, ch); 1421 done = true; 1422 break; 1423 case ']': 1424 { 1425 const UniChar endSequence[2] = {']', '>'}; 1426 if (_inputStreamMatchString(&parser->input, endSequence, 2)) { 1427 _CFReportError(parser, kCFXMLErrorMalformedParsedCharacterData, "Encountered \"]]>\" in parsed character data"); 1428 _inputStreamClearMark(&parser->input); 1429 return false; 1430 } 1431 break; 1432 } 1433 default: 1434 ; 1435 } 1436 } 1437 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString)); 1438 _inputStreamClearMark(&parser->input); 1439 parser->node->dataTypeID = kCFXMLNodeTypeText; 1440 parser->node->additionalData = NULL; 1441 return reportNewLeaf(parser); 1442} 1443 1444/* 1445[42] ETag ::= '</' Name S? '>' 1446 */ 1447static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag) { 1448 const UniChar beginEndTag[2] = {'<', '/'}; 1449 Boolean unexpectedEOF = false, mismatch = false; 1450 CFStringRef closeTag; 1451 1452 // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique. 1453 if (_inputStreamMatchString(&parser->input, beginEndTag, 2) && _inputStreamScanXMLName(&parser->input, false, &closeTag) && closeTag == tag) { 1454 1455 UniChar ch; 1456 _inputStreamSkipWhitespace(&parser->input, NULL); 1457 if (!_inputStreamGetCharacter(&parser->input, &ch)) { 1458 unexpectedEOF = true; 1459 } else if (ch != '>') { 1460 mismatch = true; 1461 } 1462 } else if (_inputStreamAtEOF(&parser->input)) { 1463 unexpectedEOF = true; 1464 } else { 1465 mismatch = true; 1466 } 1467 1468 if (unexpectedEOF || mismatch) { 1469 if (unexpectedEOF) { 1470 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag); 1471 parser->status = kCFXMLErrorUnexpectedEOF; 1472 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorUnexpectedEOF, parser->context.info); 1473 } else { 1474 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered malformed close tag for <%@>"), tag); 1475 parser->status = kCFXMLErrorMalformedCloseTag; 1476 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorMalformedCloseTag, parser->context.info); 1477 } 1478 return false; 1479 } 1480 return true; 1481} 1482 1483/* 1484 [39] element ::= EmptyElementTag | STag content ETag 1485 [40] STag ::= '<' Name (S Attribute)* S? '>' 1486 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1487*/ 1488static Boolean parseTag(CFXMLParserRef parser) { 1489 UniChar ch; 1490 void *tag; 1491 CFXMLElementInfo data; 1492 Boolean success = true; 1493 CFStringRef tagName; 1494 1495 if (!_inputStreamScanXMLName(&parser->input, false, &tagName)) { 1496 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag"); 1497 return false; 1498 } 1499 1500 _inputStreamSkipWhitespace(&parser->input, NULL); 1501 1502 if (!parseAttributes(parser)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace 1503 data.attributes = parser->argDict; 1504 data.attributeOrder = parser->argArray; 1505 if (!_inputStreamGetCharacter(&parser->input, &ch)) { 1506 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF"); 1507 return false; 1508 } 1509 if (ch == '/') { 1510 data.isEmpty = true; 1511 if (!_inputStreamGetCharacter(&parser->input, &ch)) { 1512 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF"); 1513 return false; 1514 } 1515 } else { 1516 data.isEmpty = false; 1517 } 1518 if (ch != '>') { 1519 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag"); 1520 return false; 1521 } 1522 1523 if (*parser->top || parser->top == parser->stack) { 1524 CFStringRef oldStr = parser->node->dataString; 1525 parser->node->dataTypeID = kCFXMLNodeTypeElement; 1526 parser->node->dataString = tagName; 1527 parser->node->additionalData = &data; 1528 tag = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info); 1529 if (tag && parser->status == kCFXMLStatusParseInProgress) { 1530 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, tag, parser->context.info); 1531 } 1532 parser->node->additionalData = NULL; 1533 parser->node->dataString = oldStr; 1534 if (parser->status != kCFXMLStatusParseInProgress) { 1535 // callback called CFXMLParserAbort() 1536 _CFReportError(parser, parser->status, NULL); 1537 return false; 1538 } 1539 } else { 1540 tag = NULL; 1541 } 1542 1543 pushXMLNode(parser, tag); 1544 if (!data.isEmpty) { 1545 success = parseTagContent(parser); 1546 if (success) { 1547 success = parseCloseTag(parser, tagName); 1548 } 1549 } 1550 parser->top --; 1551 1552 if (success && tag) { 1553 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, tag, parser->context.info); 1554 if (parser->status != kCFXMLStatusParseInProgress) { 1555 _CFReportError(parser, parser->status, NULL); 1556 return false; 1557 } 1558 } 1559 return success; 1560} 1561 1562/* 1563 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" 1564 [67] Reference ::= EntityRef | CharRef 1565 [68] EntityRef ::= '&' Name ';' 1566 */ 1567// For the moment, we don't worry about references in the attribute values. 1568static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str) { 1569 UniChar quote, ch; 1570 Boolean success = _inputStreamGetCharacter(&parser->input, "e); 1571 if (!success || (quote != '\'' && quote != '\"')) return false; 1572 if (str) _inputStreamSetMark(&parser->input); 1573 while (_inputStreamGetCharacter(&parser->input, &ch) && ch != quote) { 1574 switch (ch) { 1575 case '<': success = false; break; 1576 case '&': 1577 if (!parseEntityReference(parser, false)) { 1578 success = false; 1579 break; 1580 } 1581 default: 1582 ; 1583 } 1584 } 1585 1586 if (success && _inputStreamAtEOF(&parser->input)) { 1587 success = false; 1588 } 1589 if (str) { 1590 if (success) { 1591 _inputStreamReturnCharacter(&parser->input, quote); 1592 _inputStreamGetCharactersFromMark(&parser->input, str); 1593 _inputStreamGetCharacter(&parser->input, &ch); 1594 } 1595 _inputStreamClearMark(&parser->input); 1596 } 1597 return success; 1598} 1599 1600/* 1601 [40] STag ::= '<' Name (S Attribute)* S? '>' 1602 [41] Attribute ::= Name Eq AttValue 1603 [25] Eq ::= S? '=' S? 1604*/ 1605 1606// Expects parser->curr to be at the first content character; will consume the trailing whitespace. 1607Boolean parseAttributes(CFXMLParserRef parser) { 1608 UniChar ch; 1609 CFMutableDictionaryRef dict; 1610 CFMutableArrayRef array; 1611 Boolean failure = false; 1612 if (_inputStreamPeekCharacter(&parser->input, &ch) == '>') { 1613 if (parser->argDict) { 1614 CFDictionaryRemoveAllValues(parser->argDict); 1615 CFArrayRemoveAllValues(parser->argArray); 1616 } 1617 return true; // No attributes; let caller deal with it 1618 } 1619 if (!parser->argDict) { 1620 parser->argDict = CFDictionaryCreateMutable(CFGetAllocator(parser), 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); 1621 parser->argArray = CFArrayCreateMutable(CFGetAllocator(parser), 0, &kCFTypeArrayCallBacks); 1622 } else { 1623 CFDictionaryRemoveAllValues(parser->argDict); 1624 CFArrayRemoveAllValues(parser->argArray); 1625 } 1626 dict = parser->argDict; 1627 array = parser->argArray; 1628 while (!failure && _inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && ch != '/') { 1629 CFStringRef key; 1630 CFMutableStringRef value; 1631 if (!_inputStreamScanXMLName(&parser->input, false, &key)) { 1632 failure = true; 1633 break; 1634 } 1635 if (CFArrayGetFirstIndexOfValue(array, CFRangeMake(0, CFArrayGetCount(array)), key) != kCFNotFound) { 1636 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found repeated attribute"); 1637 return false; 1638 } 1639 _inputStreamSkipWhitespace(&parser->input, NULL); 1640 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '=') { 1641 failure = true; 1642 break; 1643 } 1644 _inputStreamSkipWhitespace(&parser->input, NULL); 1645 value = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); 1646 if (!parseAttributeValue(parser, value)) { 1647 CFRelease(value); 1648 failure = true; 1649 break; 1650 } 1651 CFArrayAppendValue(array, key); 1652 CFDictionarySetValue(dict, key, value); 1653 CFRelease(value); 1654 _inputStreamSkipWhitespace(&parser->input, NULL); 1655 } 1656 if (failure) { 1657//#warning CF:Include tag name in this error report 1658 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found illegal character while parsing element tag"); 1659 return false; 1660 } else if (_inputStreamAtEOF(&parser->input)) { 1661 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing element attributes"); 1662 return false; 1663 } else { 1664 return true; 1665 } 1666} 1667 1668/* 1669 [1] document ::= prolog element Misc* 1670 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? 1671 [27] Misc ::= Comment | PI | S 1672 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 1673 1674 We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to 1675 [22-1] prolog ::= Misc* (doctypedecl Misc*)? 1676 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc* 1677 1678 NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created! 1679*/ 1680static Boolean parseXML(CFXMLParserRef parser) { 1681 Boolean success = true, sawDTD = false, sawElement = false; 1682 UniChar ch; 1683 while (success && _inputStreamPeekCharacter(&parser->input, &ch)) { 1684 switch (ch) { 1685 case ' ': 1686 case '\n': 1687 case '\t': 1688 case '\r': 1689 success = parseWhitespace(parser); 1690 break; 1691 case '<': 1692 _inputStreamGetCharacter(&parser->input, &ch); 1693 if (!_inputStreamGetCharacter(&parser->input, &ch)) { 1694 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing top-level document"); 1695 return false; 1696 } 1697 if (ch == '!') { 1698 // Comment or DTD 1699 UniChar dashes[2] = {'-', '-'}; 1700 if (_inputStreamMatchString(&parser->input, dashes, 2)) { 1701 // Comment 1702 success = parseComment(parser, true); 1703 } else { 1704 // Should be DTD 1705 if (sawDTD) { 1706 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered a second DTD"); 1707 return false; 1708 } 1709 success = parseDTD(parser); 1710 if (success) sawDTD = true; 1711 } 1712 } else if (ch == '?') { 1713 // Processing instruction 1714 success = parseProcessingInstruction(parser, true); 1715 } else { 1716 // Tag or malformed 1717 if (sawElement) { 1718 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered second top-level element"); 1719 return false; 1720 } 1721 _inputStreamReturnCharacter(&parser->input, ch); 1722 success = parseTag(parser); 1723 if (success) sawElement = true; 1724 } 1725 break; 1726 default: { 1727 parser->status = kCFXMLErrorMalformedDocument; 1728 parser->errorString = ch < 256 ? 1729 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch, ch) : 1730 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch); 1731 1732 if (parser->callBacks.handleError) { 1733 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info); 1734 } 1735 return false; 1736 } 1737 } 1738 } 1739 1740 if (!success) return false; 1741 if (!sawElement) { 1742 _CFReportError(parser, kCFXMLErrorElementlessDocument, "No element found in document"); 1743 return false; 1744 } 1745 return true; 1746} 1747 1748static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str) { 1749 if (str) { 1750 parser->status = errNum; 1751 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), str, kCFStringEncodingASCII); 1752 } 1753 if (parser->callBacks.handleError) { 1754 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, errNum, parser->context.info); 1755 } 1756} 1757 1758// Assumes parser->node has been set and is ready to go 1759static Boolean reportNewLeaf(CFXMLParserRef parser) { 1760 void *xmlStruct; 1761 if (*(parser->top) == NULL) return true; 1762 1763 xmlStruct = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info); 1764 if (xmlStruct && parser->status == kCFXMLStatusParseInProgress) { 1765 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *(parser->top), xmlStruct, parser->context.info); 1766 if (parser->status == kCFXMLStatusParseInProgress) INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, xmlStruct, parser->context.info); 1767 } 1768 if (parser->status != kCFXMLStatusParseInProgress) { 1769 _CFReportError(parser, parser->status, NULL); 1770 return false; 1771 } 1772 return true; 1773} 1774 1775static void pushXMLNode(CFXMLParserRef parser, void *node) { 1776 parser->top ++; 1777 if ((unsigned)(parser->top - parser->stack) == parser->capacity) { 1778 parser->stack = (void **)CFAllocatorReallocate(CFGetAllocator(parser), parser->stack, 2 * parser->capacity * sizeof(void *), 0); 1779 parser->top = parser->stack + parser->capacity; 1780 parser->capacity = 2*parser->capacity; 1781 } 1782 *(parser->top) = node; 1783} 1784 1785/**************************/ 1786/* Parsing to a CFXMLTree */ 1787/**************************/ 1788 1789static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser, CFXMLNodeRef node, void *context) { 1790 CFXMLNodeRef myNode = CFXMLNodeCreateCopy(CFGetAllocator(parser), node); 1791 CFXMLTreeRef tree = CFXMLTreeCreateWithNode(CFGetAllocator(parser), myNode); 1792 CFRelease(myNode); 1793 return (void *)tree; 1794} 1795 1796static void _XMLTreeAddChild(CFXMLParserRef parser, void *parent, void *child, void *context) { 1797 CFTreeAppendChild((CFTreeRef)parent, (CFTreeRef)child); 1798} 1799 1800static void _XMLTreeEndXMLStructure(CFXMLParserRef parser, void *xmlType, void *context) { 1801 CFXMLTreeRef node = (CFXMLTreeRef)xmlType; 1802 if (CFTreeGetParent(node)) 1803 CFRelease((CFXMLTreeRef)xmlType); 1804} 1805 1806CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex version) { 1807 CFXMLParserRef parser; 1808 CFXMLParserCallBacks callbacks; 1809 CFXMLTreeRef result; 1810 1811 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__); 1812 1813 callbacks.createXMLStructure = _XMLTreeCreateXMLStructure; 1814 callbacks.addChild = _XMLTreeAddChild; 1815 callbacks.endXMLStructure = _XMLTreeEndXMLStructure; 1816 callbacks.resolveExternalEntity = NULL; 1817 callbacks.handleError = NULL; 1818 parser = CFXMLParserCreateWithDataFromURL(allocator, dataSource, parseOptions, version, &callbacks, NULL); 1819 1820 if (CFXMLParserParse(parser)) { 1821 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser); 1822 } else { 1823 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser); 1824 if (result) CFRelease(result); 1825 result = NULL; 1826 } 1827 CFRelease(parser); 1828 return result; 1829} 1830 1831CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion) { 1832 return CFXMLTreeCreateFromDataWithError(allocator, xmlData, dataSource, parseOptions, parserVersion, NULL); 1833} 1834 1835CONST_STRING_DECL(kCFXMLTreeErrorDescription, "kCFXMLTreeErrorDescription"); 1836CONST_STRING_DECL(kCFXMLTreeErrorLineNumber, "kCFXMLTreeErrorLineNumber"); 1837CONST_STRING_DECL(kCFXMLTreeErrorLocation, "kCFXMLTreeErrorLocation"); 1838CONST_STRING_DECL(kCFXMLTreeErrorStatusCode, "kCFXMLTreeErrorStatusCode"); 1839 1840CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion, CFDictionaryRef *errorDict) { 1841 CFXMLParserRef parser; 1842 CFXMLParserCallBacks callbacks; 1843 CFXMLTreeRef result; 1844 1845 __CFGenericValidateType(xmlData, CFDataGetTypeID()); 1846 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__); 1847 1848 callbacks.createXMLStructure = _XMLTreeCreateXMLStructure; 1849 callbacks.addChild = _XMLTreeAddChild; 1850 callbacks.endXMLStructure = _XMLTreeEndXMLStructure; 1851 callbacks.resolveExternalEntity = NULL; 1852 callbacks.handleError = NULL; 1853 parser = CFXMLParserCreate(allocator, xmlData, dataSource, parseOptions, parserVersion, &callbacks, NULL); 1854 1855 if (CFXMLParserParse(parser)) { 1856 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser); 1857 } else { 1858 if (errorDict) { // collect the error dictionary 1859 *errorDict = CFDictionaryCreateMutable(allocator, 4, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); 1860 if (*errorDict) { 1861 CFIndex rawnum; 1862 CFNumberRef cfnum; 1863 CFStringRef errstring; 1864 1865 rawnum = CFXMLParserGetLocation(parser); 1866 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum); 1867 if(cfnum) { 1868 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLocation, cfnum); 1869 CFRelease(cfnum); 1870 } 1871 1872 rawnum = CFXMLParserGetLineNumber(parser); 1873 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum); 1874 if(cfnum) { 1875 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLineNumber, cfnum); 1876 CFRelease(cfnum); 1877 } 1878 1879 rawnum = CFXMLParserGetStatusCode(parser); 1880 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum); 1881 if(cfnum) { 1882 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorStatusCode, cfnum); 1883 CFRelease(cfnum); 1884 } 1885 1886 errstring = CFXMLParserCopyErrorDescription(parser); 1887 if(errstring) { 1888 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorDescription, errstring); 1889 CFRelease(errstring); 1890 } 1891 } 1892 } 1893 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser); 1894 if (result) CFRelease(result); 1895 result = NULL; 1896 } 1897 CFRelease(parser); 1898 return result; 1899} 1900 1901/* 1902 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string. 1903 We should also be handling items that are up over certain values correctly. 1904 */ 1905CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) { 1906 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__); 1907 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); // unbounded mutable string 1908 CFMutableCharacterSetRef startChars = CFCharacterSetCreateMutable(allocator); 1909 1910 CFStringInlineBuffer inlineBuf; 1911 CFIndex idx = 0; 1912 CFIndex mark = idx; 1913 CFIndex stringLength = CFStringGetLength(string); 1914 UniChar uc; 1915 1916 CFCharacterSetAddCharactersInString(startChars, CFSTR("&<>'\"")); 1917 1918 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, stringLength)); 1919 for(idx = 0; idx < stringLength; idx++) { 1920 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, idx); 1921 if(CFCharacterSetIsCharacterMember(startChars, uc)) { 1922 CFStringRef previousSubstring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark)); 1923 CFStringAppend(newString, previousSubstring); 1924 CFRelease(previousSubstring); 1925 switch(uc) { 1926 case '&': 1927 CFStringAppend(newString, CFSTR("&")); 1928 break; 1929 case '<': 1930 CFStringAppend(newString, CFSTR("<")); 1931 break; 1932 case '>': 1933 CFStringAppend(newString, CFSTR(">")); 1934 break; 1935 case '\'': 1936 CFStringAppend(newString, CFSTR("'")); 1937 break; 1938 case '"': 1939 CFStringAppend(newString, CFSTR(""")); 1940 break; 1941 } 1942 mark = idx + 1; 1943 } 1944 } 1945 // Copy the remainder to the output string before returning. 1946 CFStringRef remainder = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark)); 1947 if (NULL != remainder) { 1948 CFStringAppend(newString, remainder); 1949 CFRelease(remainder); 1950 } 1951 1952 CFRelease(startChars); 1953 return newString; 1954} 1955 1956CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) { 1957 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__); 1958 1959 CFStringInlineBuffer inlineBuf; /* use this for fast traversal of the string in question */ 1960 CFStringRef sub; 1961 CFIndex lastChunkStart, length = CFStringGetLength(string); 1962 CFIndex i, entityStart; 1963 UniChar uc; 1964 UInt32 entity; 1965 int base; 1966 CFMutableDictionaryRef fullReplDict = entitiesDictionary ? CFDictionaryCreateMutableCopy(allocator, 0, entitiesDictionary) : CFDictionaryCreateMutable(allocator, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); 1967 1968 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("amp"), (const void *)CFSTR("&")); 1969 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("quot"), (const void *)CFSTR("\"")); 1970 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("lt"), (const void *)CFSTR("<")); 1971 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("gt"), (const void *)CFSTR(">")); 1972 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("apos"), (const void *)CFSTR("'")); 1973 1974 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, length - 1)); 1975 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); 1976 1977 lastChunkStart = 0; 1978 // Scan through the string in its entirety 1979 for(i = 0; i < length; ) { 1980 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i. 1981 1982 if(uc == '&') { 1983 entityStart = i - 1; 1984 entity = 0xFFFF; // set this to a not-Unicode character as sentinel 1985 // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point. 1986 if(lastChunkStart < i - 1) { 1987 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, (i - 1) - lastChunkStart)); 1988 CFStringAppend(newString, sub); 1989 CFRelease(sub); 1990 } 1991 1992 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i. 1993 // Now we can process the entity reference itself 1994 if(uc == '#') { // this is a numeric entity. 1995 base = 10; 1996 entity = 0; 1997 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; 1998 1999 if(uc == 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal. 2000 base = 16; 2001 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; 2002 } 2003 2004 // process the provided digits 'til we're finished 2005 while(true) { 2006 if (uc >= '0' && uc <= '9') 2007 entity = entity * base + (uc-'0'); 2008 else if (uc >= 'a' && uc <= 'f' && base == 16) 2009 entity = entity * base + (uc-'a'+10); 2010 else if (uc >= 'A' && uc <= 'F' && base == 16) 2011 entity = entity * base + (uc-'A'+10); 2012 else break; 2013 2014 if (i < length) { 2015 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; 2016 } 2017 else 2018 break; 2019 } 2020 } 2021 2022 // Scan to the end of the entity 2023 while(uc != ';' && i < length) { 2024 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; 2025 } 2026 2027 if(0xFFFF != entity) { // it was numeric, and translated. 2028 // Now, output the result fo the entity 2029 if(entity >= 0x10000) { 2030 UniChar characters[2] = { ((entity - 0x10000) >> 10) + 0xD800, ((entity - 0x10000) & 0x3ff) + 0xDC00 }; 2031 CFStringAppendCharacters(newString, characters, 2); 2032 } else { 2033 UniChar character = entity; 2034 CFStringAppendCharacters(newString, &character, 1); 2035 } 2036 } else { // it wasn't numeric. 2037 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart + 1, (i - entityStart - 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself. 2038 CFStringRef replacementString = (CFStringRef)CFDictionaryGetValue(fullReplDict, sub); 2039 if(replacementString) { 2040 CFStringAppend(newString, replacementString); 2041 } else { 2042 CFRelease(sub); // let the old substring go, since we didn't find it in the dictionary 2043 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart, (i - entityStart))); // create a new one, including the & and ; 2044 CFStringAppend(newString, sub); // ...and append that. 2045 } 2046 CFRelease(sub); // in either case, release the most-recent "sub" 2047 } 2048 2049 // move the lastChunkStart to the beginning of the next chunk. 2050 lastChunkStart = i; 2051 } 2052 } 2053 if(lastChunkStart < length) { // we've come out of the loop, let's get the rest of the string and tack it on. 2054 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, i - lastChunkStart)); 2055 CFStringAppend(newString, sub); 2056 CFRelease(sub); 2057 } 2058 2059 CFRelease(fullReplDict); 2060 2061 return newString; 2062} 2063 2064#pragma GCC diagnostic pop 2065