1/* Tests in the "basic" test case for the Expat test suite 2 __ __ _ 3 ___\ \/ /_ __ __ _| |_ 4 / _ \\ /| '_ \ / _` | __| 5 | __// \| |_) | (_| | |_ 6 \___/_/\_\ .__/ \__,_|\__| 7 |_| XML parser 8 9 Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 10 Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> 11 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 12 Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> 13 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 14 Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> 15 Copyright (c) 2017 Joe Orton <jorton@redhat.com> 16 Copyright (c) 2017 Jos�� Guti��rrez de la Concha <jose@zeroc.com> 17 Copyright (c) 2018 Marco Maggi <marco.maggi-ipsu@poste.it> 18 Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 19 Copyright (c) 2020 Tim Gates <tim.gates@iress.com> 20 Copyright (c) 2021 Donghee Na <donghee.na@python.org> 21 Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com> 22 Licensed under the MIT license: 23 24 Permission is hereby granted, free of charge, to any person obtaining 25 a copy of this software and associated documentation files (the 26 "Software"), to deal in the Software without restriction, including 27 without limitation the rights to use, copy, modify, merge, publish, 28 distribute, sublicense, and/or sell copies of the Software, and to permit 29 persons to whom the Software is furnished to do so, subject to the 30 following conditions: 31 32 The above copyright notice and this permission notice shall be included 33 in all copies or substantial portions of the Software. 34 35 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 36 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 37 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 38 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 39 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 40 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 41 USE OR OTHER DEALINGS IN THE SOFTWARE. 42*/ 43 44#if defined(NDEBUG) 45# undef NDEBUG /* because test suite relies on assert(...) at the moment */ 46#endif 47 48#include <assert.h> 49 50#include <stdio.h> 51#include <string.h> 52#include <time.h> 53 54#if ! defined(__cplusplus) 55# include <stdbool.h> 56#endif 57 58#include "expat_config.h" 59 60#include "expat.h" 61#include "internal.h" 62#include "minicheck.h" 63#include "structdata.h" 64#include "common.h" 65#include "dummy.h" 66#include "handlers.h" 67#include "siphash.h" 68#include "basic_tests.h" 69 70static void 71basic_setup(void) { 72 g_parser = XML_ParserCreate(NULL); 73 if (g_parser == NULL) 74 fail("Parser not created."); 75} 76 77/* 78 * Character & encoding tests. 79 */ 80 81START_TEST(test_nul_byte) { 82 char text[] = "<doc>\0</doc>"; 83 84 /* test that a NUL byte (in US-ASCII data) is an error */ 85 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 86 == XML_STATUS_OK) 87 fail("Parser did not report error on NUL-byte."); 88 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 89 xml_failure(g_parser); 90} 91END_TEST 92 93START_TEST(test_u0000_char) { 94 /* test that a NUL byte (in US-ASCII data) is an error */ 95 expect_failure("<doc>�</doc>", XML_ERROR_BAD_CHAR_REF, 96 "Parser did not report error on NUL-byte."); 97} 98END_TEST 99 100START_TEST(test_siphash_self) { 101 if (! sip24_valid()) 102 fail("SipHash self-test failed"); 103} 104END_TEST 105 106START_TEST(test_siphash_spec) { 107 /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */ 108 const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 109 "\x0a\x0b\x0c\x0d\x0e"; 110 const size_t len = sizeof(message) - 1; 111 const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U); 112 struct siphash state; 113 struct sipkey key; 114 115 sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" 116 "\x0a\x0b\x0c\x0d\x0e\x0f"); 117 sip24_init(&state, &key); 118 119 /* Cover spread across calls */ 120 sip24_update(&state, message, 4); 121 sip24_update(&state, message + 4, len - 4); 122 123 /* Cover null length */ 124 sip24_update(&state, message, 0); 125 126 if (sip24_final(&state) != expected) 127 fail("sip24_final failed spec test\n"); 128 129 /* Cover wrapper */ 130 if (siphash24(message, len, &key) != expected) 131 fail("siphash24 failed spec test\n"); 132} 133END_TEST 134 135START_TEST(test_bom_utf8) { 136 /* This test is really just making sure we don't core on a UTF-8 BOM. */ 137 const char *text = "\357\273\277<e/>"; 138 139 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 140 == XML_STATUS_ERROR) 141 xml_failure(g_parser); 142} 143END_TEST 144 145START_TEST(test_bom_utf16_be) { 146 char text[] = "\376\377\0<\0e\0/\0>"; 147 148 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 149 == XML_STATUS_ERROR) 150 xml_failure(g_parser); 151} 152END_TEST 153 154START_TEST(test_bom_utf16_le) { 155 char text[] = "\377\376<\0e\0/\0>\0"; 156 157 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 158 == XML_STATUS_ERROR) 159 xml_failure(g_parser); 160} 161END_TEST 162 163START_TEST(test_nobom_utf16_le) { 164 char text[] = " \0<\0e\0/\0>\0"; 165 166 if (g_chunkSize == 1) { 167 // TODO: with just the first byte, we can't tell the difference between 168 // UTF-16-LE and UTF-8. Avoid the failure for now. 169 return; 170 } 171 172 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 173 == XML_STATUS_ERROR) 174 xml_failure(g_parser); 175} 176END_TEST 177 178START_TEST(test_hash_collision) { 179 /* For full coverage of the lookup routine, we need to ensure a 180 * hash collision even though we can only tell that we have one 181 * through breakpoint debugging or coverage statistics. The 182 * following will cause a hash collision on machines with a 64-bit 183 * long type; others will have to experiment. The full coverage 184 * tests invoked from qa.sh usually provide a hash collision, but 185 * not always. This is an attempt to provide insurance. 186 */ 187#define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U) 188 const char *text 189 = "<doc>\n" 190 "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n" 191 "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n" 192 "<b5></b5><b6></b6><b7></b7><b8></b8>\n" 193 "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n" 194 "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n" 195 "<d8>This triggers the table growth and collides with b2</d8>\n" 196 "</doc>\n"; 197 198 XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT); 199 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 200 == XML_STATUS_ERROR) 201 xml_failure(g_parser); 202} 203END_TEST 204#undef COLLIDING_HASH_SALT 205 206/* Regression test for SF bug #491986. */ 207START_TEST(test_danish_latin1) { 208 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 209 "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>"; 210#ifdef XML_UNICODE 211 const XML_Char *expected 212 = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5"); 213#else 214 const XML_Char *expected 215 = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85"); 216#endif 217 run_character_check(text, expected); 218} 219END_TEST 220 221/* Regression test for SF bug #514281. */ 222START_TEST(test_french_charref_hexidecimal) { 223 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 224 "<doc>éèàçêÈ</doc>"; 225#ifdef XML_UNICODE 226 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 227#else 228 const XML_Char *expected 229 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 230#endif 231 run_character_check(text, expected); 232} 233END_TEST 234 235START_TEST(test_french_charref_decimal) { 236 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 237 "<doc>éèàçêÈ</doc>"; 238#ifdef XML_UNICODE 239 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 240#else 241 const XML_Char *expected 242 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 243#endif 244 run_character_check(text, expected); 245} 246END_TEST 247 248START_TEST(test_french_latin1) { 249 const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 250 "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>"; 251#ifdef XML_UNICODE 252 const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8"); 253#else 254 const XML_Char *expected 255 = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88"); 256#endif 257 run_character_check(text, expected); 258} 259END_TEST 260 261START_TEST(test_french_utf8) { 262 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 263 "<doc>\xC3\xA9</doc>"; 264#ifdef XML_UNICODE 265 const XML_Char *expected = XCS("\x00e9"); 266#else 267 const XML_Char *expected = XCS("\xC3\xA9"); 268#endif 269 run_character_check(text, expected); 270} 271END_TEST 272 273/* Regression test for SF bug #600479. 274 XXX There should be a test that exercises all legal XML Unicode 275 characters as PCDATA and attribute value content, and XML Name 276 characters as part of element and attribute names. 277*/ 278START_TEST(test_utf8_false_rejection) { 279 const char *text = "<doc>\xEF\xBA\xBF</doc>"; 280#ifdef XML_UNICODE 281 const XML_Char *expected = XCS("\xfebf"); 282#else 283 const XML_Char *expected = XCS("\xEF\xBA\xBF"); 284#endif 285 run_character_check(text, expected); 286} 287END_TEST 288 289/* Regression test for SF bug #477667. 290 This test assures that any 8-bit character followed by a 7-bit 291 character will not be mistakenly interpreted as a valid UTF-8 292 sequence. 293*/ 294START_TEST(test_illegal_utf8) { 295 char text[100]; 296 int i; 297 298 for (i = 128; i <= 255; ++i) { 299 snprintf(text, sizeof(text), "<e>%ccd</e>", i); 300 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 301 == XML_STATUS_OK) { 302 snprintf(text, sizeof(text), 303 "expected token error for '%c' (ordinal %d) in UTF-8 text", i, 304 i); 305 fail(text); 306 } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 307 xml_failure(g_parser); 308 /* Reset the parser since we use the same parser repeatedly. */ 309 XML_ParserReset(g_parser, NULL); 310 } 311} 312END_TEST 313 314/* Examples, not masks: */ 315#define UTF8_LEAD_1 "\x7f" /* 0b01111111 */ 316#define UTF8_LEAD_2 "\xdf" /* 0b11011111 */ 317#define UTF8_LEAD_3 "\xef" /* 0b11101111 */ 318#define UTF8_LEAD_4 "\xf7" /* 0b11110111 */ 319#define UTF8_FOLLOW "\xbf" /* 0b10111111 */ 320 321START_TEST(test_utf8_auto_align) { 322 struct TestCase { 323 ptrdiff_t expectedMovementInChars; 324 const char *input; 325 }; 326 327 struct TestCase cases[] = { 328 {00, ""}, 329 330 {00, UTF8_LEAD_1}, 331 332 {-1, UTF8_LEAD_2}, 333 {00, UTF8_LEAD_2 UTF8_FOLLOW}, 334 335 {-1, UTF8_LEAD_3}, 336 {-2, UTF8_LEAD_3 UTF8_FOLLOW}, 337 {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW}, 338 339 {-1, UTF8_LEAD_4}, 340 {-2, UTF8_LEAD_4 UTF8_FOLLOW}, 341 {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW}, 342 {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW}, 343 }; 344 345 size_t i = 0; 346 bool success = true; 347 for (; i < sizeof(cases) / sizeof(*cases); i++) { 348 const char *fromLim = cases[i].input + strlen(cases[i].input); 349 const char *const fromLimInitially = fromLim; 350 ptrdiff_t actualMovementInChars; 351 352 _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim); 353 354 actualMovementInChars = (fromLim - fromLimInitially); 355 if (actualMovementInChars != cases[i].expectedMovementInChars) { 356 size_t j = 0; 357 success = false; 358 printf("[-] UTF-8 case %2u: Expected movement by %2d chars" 359 ", actually moved by %2d chars: \"", 360 (unsigned)(i + 1), (int)cases[i].expectedMovementInChars, 361 (int)actualMovementInChars); 362 for (; j < strlen(cases[i].input); j++) { 363 printf("\\x%02x", (unsigned char)cases[i].input[j]); 364 } 365 printf("\"\n"); 366 } 367 } 368 369 if (! success) { 370 fail("UTF-8 auto-alignment is not bullet-proof\n"); 371 } 372} 373END_TEST 374 375START_TEST(test_utf16) { 376 /* <?xml version="1.0" encoding="UTF-16"?> 377 * <doc a='123'>some {A} text</doc> 378 * 379 * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A 380 */ 381 char text[] 382 = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o" 383 "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o" 384 "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066" 385 "\000'\000?\000>\000\n" 386 "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>" 387 "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000" 388 "<\000/\000d\000o\000c\000>"; 389#ifdef XML_UNICODE 390 const XML_Char *expected = XCS("some \xff21 text"); 391#else 392 const XML_Char *expected = XCS("some \357\274\241 text"); 393#endif 394 CharData storage; 395 396 CharData_Init(&storage); 397 XML_SetUserData(g_parser, &storage); 398 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 399 if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE) 400 == XML_STATUS_ERROR) 401 xml_failure(g_parser); 402 CharData_CheckXMLChars(&storage, expected); 403} 404END_TEST 405 406START_TEST(test_utf16_le_epilog_newline) { 407 unsigned int first_chunk_bytes = 17; 408 char text[] = "\xFF\xFE" /* BOM */ 409 "<\000e\000/\000>\000" /* document element */ 410 "\r\000\n\000\r\000\n\000"; /* epilog */ 411 412 if (first_chunk_bytes >= sizeof(text) - 1) 413 fail("bad value of first_chunk_bytes"); 414 if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE) 415 == XML_STATUS_ERROR) 416 xml_failure(g_parser); 417 else { 418 enum XML_Status rc; 419 rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes, 420 sizeof(text) - first_chunk_bytes - 1, 421 XML_TRUE); 422 if (rc == XML_STATUS_ERROR) 423 xml_failure(g_parser); 424 } 425} 426END_TEST 427 428/* Test that an outright lie in the encoding is faulted */ 429START_TEST(test_not_utf16) { 430 const char *text = "<?xml version='1.0' encoding='utf-16'?>" 431 "<doc>Hi</doc>"; 432 433 /* Use a handler to provoke the appropriate code paths */ 434 XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler); 435 expect_failure(text, XML_ERROR_INCORRECT_ENCODING, 436 "UTF-16 declared in UTF-8 not faulted"); 437} 438END_TEST 439 440/* Test that an unknown encoding is rejected */ 441START_TEST(test_bad_encoding) { 442 const char *text = "<doc>Hi</doc>"; 443 444 if (! XML_SetEncoding(g_parser, XCS("unknown-encoding"))) 445 fail("XML_SetEncoding failed"); 446 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 447 "Unknown encoding not faulted"); 448} 449END_TEST 450 451/* Regression test for SF bug #481609, #774028. */ 452START_TEST(test_latin1_umlauts) { 453 const char *text 454 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 455 "<e a='\xE4 \xF6 \xFC ä ö ü ä ö ü >'\n" 456 " >\xE4 \xF6 \xFC ä ö ü ä ö ü ></e>"; 457#ifdef XML_UNICODE 458 /* Expected results in UTF-16 */ 459 const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ") 460 XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >"); 461#else 462 /* Expected results in UTF-8 */ 463 const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") 464 XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >"); 465#endif 466 467 run_character_check(text, expected); 468 XML_ParserReset(g_parser, NULL); 469 run_attribute_check(text, expected); 470 /* Repeat with a default handler */ 471 XML_ParserReset(g_parser, NULL); 472 XML_SetDefaultHandler(g_parser, dummy_default_handler); 473 run_character_check(text, expected); 474 XML_ParserReset(g_parser, NULL); 475 XML_SetDefaultHandler(g_parser, dummy_default_handler); 476 run_attribute_check(text, expected); 477} 478END_TEST 479 480/* Test that an element name with a 4-byte UTF-8 character is rejected */ 481START_TEST(test_long_utf8_character) { 482 const char *text 483 = "<?xml version='1.0' encoding='utf-8'?>\n" 484 /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */ 485 "<do\xf0\x90\x80\x80/>"; 486 expect_failure(text, XML_ERROR_INVALID_TOKEN, 487 "4-byte UTF-8 character in element name not faulted"); 488} 489END_TEST 490 491/* Test that a long latin-1 attribute (too long to convert in one go) 492 * is correctly converted 493 */ 494START_TEST(test_long_latin1_attribute) { 495 const char *text 496 = "<?xml version='1.0' encoding='iso-8859-1'?>\n" 497 "<doc att='" 498 /* 64 characters per line */ 499 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 500 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 501 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 502 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 503 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 504 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 505 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 506 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 507 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 508 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 509 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 510 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 511 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 512 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 513 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 514 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO" 515 /* Last character splits across a buffer boundary */ 516 "\xe4'>\n</doc>"; 517 518 const XML_Char *expected = 519 /* 64 characters per line */ 520 /* clang-format off */ 521 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 522 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 523 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 524 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 525 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 526 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 527 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 528 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 529 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 530 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 531 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 532 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 533 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 534 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 535 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 536 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO") 537 /* clang-format on */ 538#ifdef XML_UNICODE 539 XCS("\x00e4"); 540#else 541 XCS("\xc3\xa4"); 542#endif 543 544 run_attribute_check(text, expected); 545} 546END_TEST 547 548/* Test that a long ASCII attribute (too long to convert in one go) 549 * is correctly converted 550 */ 551START_TEST(test_long_ascii_attribute) { 552 const char *text 553 = "<?xml version='1.0' encoding='us-ascii'?>\n" 554 "<doc att='" 555 /* 64 characters per line */ 556 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 557 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 558 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 559 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 560 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 561 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 562 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 563 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 564 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 565 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 566 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 567 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 568 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 569 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 570 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 571 "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP" 572 "01234'>\n</doc>"; 573 const XML_Char *expected = 574 /* 64 characters per line */ 575 /* clang-format off */ 576 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 577 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 578 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 579 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 580 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 581 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 582 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 583 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 584 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 585 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 586 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 587 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 592 XCS("01234"); 593 /* clang-format on */ 594 595 run_attribute_check(text, expected); 596} 597END_TEST 598 599/* Regression test #1 for SF bug #653180. */ 600START_TEST(test_line_number_after_parse) { 601 const char *text = "<tag>\n" 602 "\n" 603 "\n</tag>"; 604 XML_Size lineno; 605 606 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 607 == XML_STATUS_ERROR) 608 xml_failure(g_parser); 609 lineno = XML_GetCurrentLineNumber(g_parser); 610 if (lineno != 4) { 611 char buffer[100]; 612 snprintf(buffer, sizeof(buffer), 613 "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno); 614 fail(buffer); 615 } 616} 617END_TEST 618 619/* Regression test #2 for SF bug #653180. */ 620START_TEST(test_column_number_after_parse) { 621 const char *text = "<tag></tag>"; 622 XML_Size colno; 623 624 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 625 == XML_STATUS_ERROR) 626 xml_failure(g_parser); 627 colno = XML_GetCurrentColumnNumber(g_parser); 628 if (colno != 11) { 629 char buffer[100]; 630 snprintf(buffer, sizeof(buffer), 631 "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno); 632 fail(buffer); 633 } 634} 635END_TEST 636 637/* Regression test #3 for SF bug #653180. */ 638START_TEST(test_line_and_column_numbers_inside_handlers) { 639 const char *text = "<a>\n" /* Unix end-of-line */ 640 " <b>\r\n" /* Windows end-of-line */ 641 " <c/>\r" /* Mac OS end-of-line */ 642 " </b>\n" 643 " <d>\n" 644 " <f/>\n" 645 " </d>\n" 646 "</a>"; 647 const StructDataEntry expected[] 648 = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG}, 649 {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG}, 650 {XCS("b"), 2, 4, STRUCT_END_TAG}, {XCS("d"), 2, 5, STRUCT_START_TAG}, 651 {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG}, 652 {XCS("d"), 2, 7, STRUCT_END_TAG}, {XCS("a"), 0, 8, STRUCT_END_TAG}}; 653 const int expected_count = sizeof(expected) / sizeof(StructDataEntry); 654 StructData storage; 655 656 StructData_Init(&storage); 657 XML_SetUserData(g_parser, &storage); 658 XML_SetStartElementHandler(g_parser, start_element_event_handler2); 659 XML_SetEndElementHandler(g_parser, end_element_event_handler2); 660 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 661 == XML_STATUS_ERROR) 662 xml_failure(g_parser); 663 664 StructData_CheckItems(&storage, expected, expected_count); 665 StructData_Dispose(&storage); 666} 667END_TEST 668 669/* Regression test #4 for SF bug #653180. */ 670START_TEST(test_line_number_after_error) { 671 const char *text = "<a>\n" 672 " <b>\n" 673 " </a>"; /* missing </b> */ 674 XML_Size lineno; 675 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 676 != XML_STATUS_ERROR) 677 fail("Expected a parse error"); 678 679 lineno = XML_GetCurrentLineNumber(g_parser); 680 if (lineno != 3) { 681 char buffer[100]; 682 snprintf(buffer, sizeof(buffer), 683 "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno); 684 fail(buffer); 685 } 686} 687END_TEST 688 689/* Regression test #5 for SF bug #653180. */ 690START_TEST(test_column_number_after_error) { 691 const char *text = "<a>\n" 692 " <b>\n" 693 " </a>"; /* missing </b> */ 694 XML_Size colno; 695 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 696 != XML_STATUS_ERROR) 697 fail("Expected a parse error"); 698 699 colno = XML_GetCurrentColumnNumber(g_parser); 700 if (colno != 4) { 701 char buffer[100]; 702 snprintf(buffer, sizeof(buffer), 703 "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno); 704 fail(buffer); 705 } 706} 707END_TEST 708 709/* Regression test for SF bug #478332. */ 710START_TEST(test_really_long_lines) { 711 /* This parses an input line longer than INIT_DATA_BUF_SIZE 712 characters long (defined to be 1024 in xmlparse.c). We take a 713 really cheesy approach to building the input buffer, because 714 this avoids writing bugs in buffer-filling code. 715 */ 716 const char *text 717 = "<e>" 718 /* 64 chars */ 719 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 720 /* until we have at least 1024 characters on the line: */ 721 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 722 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 723 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 724 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 725 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 726 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 727 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 728 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 729 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 730 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 731 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 732 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 733 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 734 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 735 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 736 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 737 "</e>"; 738 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 739 == XML_STATUS_ERROR) 740 xml_failure(g_parser); 741} 742END_TEST 743 744/* Test cdata processing across a buffer boundary */ 745START_TEST(test_really_long_encoded_lines) { 746 /* As above, except that we want to provoke an output buffer 747 * overflow with a non-trivial encoding. For this we need to pass 748 * the whole cdata in one go, not byte-by-byte. 749 */ 750 void *buffer; 751 const char *text 752 = "<?xml version='1.0' encoding='iso-8859-1'?>" 753 "<e>" 754 /* 64 chars */ 755 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 756 /* until we have at least 1024 characters on the line: */ 757 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 758 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 759 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 760 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 761 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 762 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 763 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 764 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 765 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 766 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 767 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 768 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 769 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 770 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 771 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 772 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+" 773 "</e>"; 774 int parse_len = (int)strlen(text); 775 776 /* Need a cdata handler to provoke the code path we want to test */ 777 XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler); 778 buffer = XML_GetBuffer(g_parser, parse_len); 779 if (buffer == NULL) 780 fail("Could not allocate parse buffer"); 781 assert(buffer != NULL); 782 memcpy(buffer, text, parse_len); 783 if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR) 784 xml_failure(g_parser); 785} 786END_TEST 787 788/* 789 * Element event tests. 790 */ 791 792START_TEST(test_end_element_events) { 793 const char *text = "<a><b><c/></b><d><f/></d></a>"; 794 const XML_Char *expected = XCS("/c/b/f/d/a"); 795 CharData storage; 796 797 CharData_Init(&storage); 798 XML_SetUserData(g_parser, &storage); 799 XML_SetEndElementHandler(g_parser, end_element_event_handler); 800 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 801 == XML_STATUS_ERROR) 802 xml_failure(g_parser); 803 CharData_CheckXMLChars(&storage, expected); 804} 805END_TEST 806 807/* 808 * Attribute tests. 809 */ 810 811/* Helper used by the following tests; this checks any "attr" and "refs" 812 attributes to make sure whitespace has been normalized. 813 814 Return true if whitespace has been normalized in a string, using 815 the rules for attribute value normalization. The 'is_cdata' flag 816 is needed since CDATA attributes don't need to have multiple 817 whitespace characters collapsed to a single space, while other 818 attribute data types do. (Section 3.3.3 of the recommendation.) 819*/ 820static int 821is_whitespace_normalized(const XML_Char *s, int is_cdata) { 822 int blanks = 0; 823 int at_start = 1; 824 while (*s) { 825 if (*s == XCS(' ')) 826 ++blanks; 827 else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r')) 828 return 0; 829 else { 830 if (at_start) { 831 at_start = 0; 832 if (blanks && ! is_cdata) 833 /* illegal leading blanks */ 834 return 0; 835 } else if (blanks > 1 && ! is_cdata) 836 return 0; 837 blanks = 0; 838 } 839 ++s; 840 } 841 if (blanks && ! is_cdata) 842 return 0; 843 return 1; 844} 845 846/* Check the attribute whitespace checker: */ 847START_TEST(test_helper_is_whitespace_normalized) { 848 assert(is_whitespace_normalized(XCS("abc"), 0)); 849 assert(is_whitespace_normalized(XCS("abc"), 1)); 850 assert(is_whitespace_normalized(XCS("abc def ghi"), 0)); 851 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 852 assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0)); 853 assert(is_whitespace_normalized(XCS(" abc def ghi"), 1)); 854 assert(! is_whitespace_normalized(XCS("abc def ghi"), 0)); 855 assert(is_whitespace_normalized(XCS("abc def ghi"), 1)); 856 assert(! is_whitespace_normalized(XCS("abc def ghi "), 0)); 857 assert(is_whitespace_normalized(XCS("abc def ghi "), 1)); 858 assert(! is_whitespace_normalized(XCS(" "), 0)); 859 assert(is_whitespace_normalized(XCS(" "), 1)); 860 assert(! is_whitespace_normalized(XCS("\t"), 0)); 861 assert(! is_whitespace_normalized(XCS("\t"), 1)); 862 assert(! is_whitespace_normalized(XCS("\n"), 0)); 863 assert(! is_whitespace_normalized(XCS("\n"), 1)); 864 assert(! is_whitespace_normalized(XCS("\r"), 0)); 865 assert(! is_whitespace_normalized(XCS("\r"), 1)); 866 assert(! is_whitespace_normalized(XCS("abc\t def"), 1)); 867} 868END_TEST 869 870static void XMLCALL 871check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name, 872 const XML_Char **atts) { 873 int i; 874 UNUSED_P(userData); 875 UNUSED_P(name); 876 for (i = 0; atts[i] != NULL; i += 2) { 877 const XML_Char *attrname = atts[i]; 878 const XML_Char *value = atts[i + 1]; 879 if (xcstrcmp(XCS("attr"), attrname) == 0 880 || xcstrcmp(XCS("ents"), attrname) == 0 881 || xcstrcmp(XCS("refs"), attrname) == 0) { 882 if (! is_whitespace_normalized(value, 0)) { 883 char buffer[256]; 884 snprintf(buffer, sizeof(buffer), 885 "attribute value not normalized: %" XML_FMT_STR 886 "='%" XML_FMT_STR "'", 887 attrname, value); 888 fail(buffer); 889 } 890 } 891 } 892} 893 894START_TEST(test_attr_whitespace_normalization) { 895 const char *text 896 = "<!DOCTYPE doc [\n" 897 " <!ATTLIST doc\n" 898 " attr NMTOKENS #REQUIRED\n" 899 " ents ENTITIES #REQUIRED\n" 900 " refs IDREFS #REQUIRED>\n" 901 "]>\n" 902 "<doc attr=' a b c\t\td\te\t' refs=' id-1 \t id-2\t\t' \n" 903 " ents=' ent-1 \t\r\n" 904 " ent-2 ' >\n" 905 " <e id='id-1'/>\n" 906 " <e id='id-2'/>\n" 907 "</doc>"; 908 909 XML_SetStartElementHandler(g_parser, 910 check_attr_contains_normalized_whitespace); 911 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 912 == XML_STATUS_ERROR) 913 xml_failure(g_parser); 914} 915END_TEST 916 917/* 918 * XML declaration tests. 919 */ 920 921START_TEST(test_xmldecl_misplaced) { 922 expect_failure("\n" 923 "<?xml version='1.0'?>\n" 924 "<a/>", 925 XML_ERROR_MISPLACED_XML_PI, 926 "failed to report misplaced XML declaration"); 927} 928END_TEST 929 930START_TEST(test_xmldecl_invalid) { 931 expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL, 932 "Failed to report invalid XML declaration"); 933} 934END_TEST 935 936START_TEST(test_xmldecl_missing_attr) { 937 expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL, 938 "Failed to report missing XML declaration attribute"); 939} 940END_TEST 941 942START_TEST(test_xmldecl_missing_value) { 943 expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n" 944 "<doc/>", 945 XML_ERROR_XML_DECL, 946 "Failed to report missing attribute value"); 947} 948END_TEST 949 950/* Regression test for SF bug #584832. */ 951START_TEST(test_unknown_encoding_internal_entity) { 952 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 953 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 954 "<test a='&foo;'/>"; 955 956 XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL); 957 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 958 == XML_STATUS_ERROR) 959 xml_failure(g_parser); 960} 961END_TEST 962 963/* Test unrecognised encoding handler */ 964START_TEST(test_unrecognised_encoding_internal_entity) { 965 const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n" 966 "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n" 967 "<test a='&foo;'/>"; 968 969 XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL); 970 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 971 != XML_STATUS_ERROR) 972 fail("Unrecognised encoding not rejected"); 973} 974END_TEST 975 976/* Regression test for SF bug #620106. */ 977START_TEST(test_ext_entity_set_encoding) { 978 const char *text = "<!DOCTYPE doc [\n" 979 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 980 "]>\n" 981 "<doc>&en;</doc>"; 982 ExtTest test_data 983 = {/* This text says it's an unsupported encoding, but it's really 984 UTF-8, which we tell Expat using XML_SetEncoding(). 985 */ 986 "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL}; 987#ifdef XML_UNICODE 988 const XML_Char *expected = XCS("\x00e9"); 989#else 990 const XML_Char *expected = XCS("\xc3\xa9"); 991#endif 992 993 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 994 run_ext_character_check(text, &test_data, expected); 995} 996END_TEST 997 998/* Test external entities with no handler */ 999START_TEST(test_ext_entity_no_handler) { 1000 const char *text = "<!DOCTYPE doc [\n" 1001 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1002 "]>\n" 1003 "<doc>&en;</doc>"; 1004 1005 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1006 run_character_check(text, XCS("")); 1007} 1008END_TEST 1009 1010/* Test UTF-8 BOM is accepted */ 1011START_TEST(test_ext_entity_set_bom) { 1012 const char *text = "<!DOCTYPE doc [\n" 1013 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1014 "]>\n" 1015 "<doc>&en;</doc>"; 1016 ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */ 1017 "<?xml encoding='iso-8859-3'?>" 1018 "\xC3\xA9", 1019 XCS("utf-8"), NULL}; 1020#ifdef XML_UNICODE 1021 const XML_Char *expected = XCS("\x00e9"); 1022#else 1023 const XML_Char *expected = XCS("\xc3\xa9"); 1024#endif 1025 1026 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1027 run_ext_character_check(text, &test_data, expected); 1028} 1029END_TEST 1030 1031/* Test that bad encodings are faulted */ 1032START_TEST(test_ext_entity_bad_encoding) { 1033 const char *text = "<!DOCTYPE doc [\n" 1034 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1035 "]>\n" 1036 "<doc>&en;</doc>"; 1037 ExtFaults fault 1038 = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted", 1039 XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING}; 1040 1041 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1042 XML_SetUserData(g_parser, &fault); 1043 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1044 "Bad encoding should not have been accepted"); 1045} 1046END_TEST 1047 1048/* Try handing an invalid encoding to an external entity parser */ 1049START_TEST(test_ext_entity_bad_encoding_2) { 1050 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1051 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1052 "<doc>&entity;</doc>"; 1053 ExtFaults fault 1054 = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted", 1055 XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING}; 1056 1057 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1058 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1059 XML_SetUserData(g_parser, &fault); 1060 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1061 "Bad encoding not faulted in external entity handler"); 1062} 1063END_TEST 1064 1065/* Test that no error is reported for unknown entities if we don't 1066 read an external subset. This was fixed in Expat 1.95.5. 1067*/ 1068START_TEST(test_wfc_undeclared_entity_unread_external_subset) { 1069 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 1070 "<doc>&entity;</doc>"; 1071 1072 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1073 == XML_STATUS_ERROR) 1074 xml_failure(g_parser); 1075} 1076END_TEST 1077 1078/* Test that an error is reported for unknown entities if we don't 1079 have an external subset. 1080*/ 1081START_TEST(test_wfc_undeclared_entity_no_external_subset) { 1082 expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY, 1083 "Parser did not report undefined entity w/out a DTD."); 1084} 1085END_TEST 1086 1087/* Test that an error is reported for unknown entities if we don't 1088 read an external subset, but have been declared standalone. 1089*/ 1090START_TEST(test_wfc_undeclared_entity_standalone) { 1091 const char *text 1092 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1093 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1094 "<doc>&entity;</doc>"; 1095 1096 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1097 "Parser did not report undefined entity (standalone)."); 1098} 1099END_TEST 1100 1101/* Test that an error is reported for unknown entities if we have read 1102 an external subset, and standalone is true. 1103*/ 1104START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) { 1105 const char *text 1106 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1107 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1108 "<doc>&entity;</doc>"; 1109 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1110 1111 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1112 XML_SetUserData(g_parser, &test_data); 1113 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1114 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1115 "Parser did not report undefined entity (external DTD)."); 1116} 1117END_TEST 1118 1119/* Test that external entity handling is not done if the parsing flag 1120 * is set to UNLESS_STANDALONE 1121 */ 1122START_TEST(test_entity_with_external_subset_unless_standalone) { 1123 const char *text 1124 = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n" 1125 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1126 "<doc>&entity;</doc>"; 1127 ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL}; 1128 1129 XML_SetParamEntityParsing(g_parser, 1130 XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE); 1131 XML_SetUserData(g_parser, &test_data); 1132 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1133 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 1134 "Parser did not report undefined entity"); 1135} 1136END_TEST 1137 1138/* Test that no error is reported for unknown entities if we have read 1139 an external subset, and standalone is false. 1140*/ 1141START_TEST(test_wfc_undeclared_entity_with_external_subset) { 1142 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1143 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1144 "<doc>&entity;</doc>"; 1145 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1146 1147 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1148 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1149 run_ext_character_check(text, &test_data, XCS("")); 1150} 1151END_TEST 1152 1153/* Test that an error is reported if our NotStandalone handler fails */ 1154START_TEST(test_not_standalone_handler_reject) { 1155 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1156 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1157 "<doc>&entity;</doc>"; 1158 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1159 1160 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1161 XML_SetUserData(g_parser, &test_data); 1162 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1163 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1164 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1165 "NotStandalone handler failed to reject"); 1166 1167 /* Try again but without external entity handling */ 1168 XML_ParserReset(g_parser, NULL); 1169 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 1170 expect_failure(text, XML_ERROR_NOT_STANDALONE, 1171 "NotStandalone handler failed to reject"); 1172} 1173END_TEST 1174 1175/* Test that no error is reported if our NotStandalone handler succeeds */ 1176START_TEST(test_not_standalone_handler_accept) { 1177 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 1178 "<!DOCTYPE doc SYSTEM 'foo'>\n" 1179 "<doc>&entity;</doc>"; 1180 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 1181 1182 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1183 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 1184 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1185 run_ext_character_check(text, &test_data, XCS("")); 1186 1187 /* Repeat without the external entity handler */ 1188 XML_ParserReset(g_parser, NULL); 1189 XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler); 1190 run_character_check(text, XCS("")); 1191} 1192END_TEST 1193 1194START_TEST(test_wfc_no_recursive_entity_refs) { 1195 const char *text = "<!DOCTYPE doc [\n" 1196 " <!ENTITY entity '&entity;'>\n" 1197 "]>\n" 1198 "<doc>&entity;</doc>"; 1199 1200 expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF, 1201 "Parser did not report recursive entity reference."); 1202} 1203END_TEST 1204 1205START_TEST(test_recursive_external_parameter_entity_2) { 1206 struct TestCase { 1207 const char *doc; 1208 enum XML_Status expectedStatus; 1209 }; 1210 1211 struct TestCase cases[] = { 1212 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR}, 1213 {"<!ENTITY % p1 '%p1;'>" 1214 "<!ENTITY % p1 'first declaration wins'>", 1215 XML_STATUS_ERROR}, 1216 {"<!ENTITY % p1 'first declaration wins'>" 1217 "<!ENTITY % p1 '%p1;'>", 1218 XML_STATUS_OK}, 1219 {"<!ENTITY % p1 '%p1;'>", XML_STATUS_OK}, 1220 }; 1221 1222 for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { 1223 const char *const doc = cases[i].doc; 1224 const enum XML_Status expectedStatus = cases[i].expectedStatus; 1225 set_subtest("%s", doc); 1226 1227 XML_Parser parser = XML_ParserCreate(NULL); 1228 assert_true(parser != NULL); 1229 1230 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL); 1231 assert_true(ext_parser != NULL); 1232 1233 const enum XML_Status actualStatus 1234 = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE); 1235 1236 assert_true(actualStatus == expectedStatus); 1237 if (actualStatus != XML_STATUS_OK) { 1238 assert_true(XML_GetErrorCode(ext_parser) 1239 == XML_ERROR_RECURSIVE_ENTITY_REF); 1240 } 1241 1242 XML_ParserFree(ext_parser); 1243 XML_ParserFree(parser); 1244 } 1245} 1246END_TEST 1247 1248/* Test incomplete external entities are faulted */ 1249START_TEST(test_ext_entity_invalid_parse) { 1250 const char *text = "<!DOCTYPE doc [\n" 1251 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 1252 "]>\n" 1253 "<doc>&en;</doc>"; 1254 const ExtFaults faults[] 1255 = {{"<", "Incomplete element declaration not faulted", NULL, 1256 XML_ERROR_UNCLOSED_TOKEN}, 1257 {"<\xe2\x82", /* First two bytes of a three-byte char */ 1258 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 1259 {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL, 1260 XML_ERROR_PARTIAL_CHAR}, 1261 {NULL, NULL, NULL, XML_ERROR_NONE}}; 1262 const ExtFaults *fault = faults; 1263 1264 for (; fault->parse_text != NULL; fault++) { 1265 set_subtest("\"%s\"", fault->parse_text); 1266 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 1267 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 1268 XML_SetUserData(g_parser, (void *)fault); 1269 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 1270 "Parser did not report external entity error"); 1271 XML_ParserReset(g_parser, NULL); 1272 } 1273} 1274END_TEST 1275 1276/* Regression test for SF bug #483514. */ 1277START_TEST(test_dtd_default_handling) { 1278 const char *text = "<!DOCTYPE doc [\n" 1279 "<!ENTITY e SYSTEM 'http://example.org/e'>\n" 1280 "<!NOTATION n SYSTEM 'http://example.org/n'>\n" 1281 "<!ELEMENT doc EMPTY>\n" 1282 "<!ATTLIST doc a CDATA #IMPLIED>\n" 1283 "<?pi in dtd?>\n" 1284 "<!--comment in dtd-->\n" 1285 "]><doc/>"; 1286 1287 XML_SetDefaultHandler(g_parser, accumulate_characters); 1288 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 1289 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 1290 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 1291 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 1292 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 1293 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 1294 XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler); 1295 XML_SetCommentHandler(g_parser, dummy_comment_handler); 1296 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1297 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1298 run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>")); 1299} 1300END_TEST 1301 1302/* Test handling of attribute declarations */ 1303START_TEST(test_dtd_attr_handling) { 1304 const char *prolog = "<!DOCTYPE doc [\n" 1305 "<!ELEMENT doc EMPTY>\n"; 1306 AttTest attr_data[] 1307 = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n" 1308 "]>" 1309 "<doc a='two'/>", 1310 XCS("doc"), XCS("a"), 1311 XCS("(one|two|three)"), /* Extraneous spaces will be removed */ 1312 NULL, XML_TRUE}, 1313 {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n" 1314 "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n" 1315 "]>" 1316 "<doc/>", 1317 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE}, 1318 {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n" 1319 "]>" 1320 "<doc/>", 1321 XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE}, 1322 {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n" 1323 "]>" 1324 "<doc/>", 1325 XCS("doc"), XCS("a"), XCS("CDATA"), 1326#ifdef XML_UNICODE 1327 XCS("\x06f2"), 1328#else 1329 XCS("\xdb\xb2"), 1330#endif 1331 XML_FALSE}, 1332 {NULL, NULL, NULL, NULL, NULL, XML_FALSE}}; 1333 AttTest *test; 1334 1335 for (test = attr_data; test->definition != NULL; test++) { 1336 set_subtest("%s", test->definition); 1337 XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler); 1338 XML_SetUserData(g_parser, test); 1339 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog), 1340 XML_FALSE) 1341 == XML_STATUS_ERROR) 1342 xml_failure(g_parser); 1343 if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition, 1344 (int)strlen(test->definition), XML_TRUE) 1345 == XML_STATUS_ERROR) 1346 xml_failure(g_parser); 1347 XML_ParserReset(g_parser, NULL); 1348 } 1349} 1350END_TEST 1351 1352/* See related SF bug #673791. 1353 When namespace processing is enabled, setting the namespace URI for 1354 a prefix is not allowed; this test ensures that it *is* allowed 1355 when namespace processing is not enabled. 1356 (See Namespaces in XML, section 2.) 1357*/ 1358START_TEST(test_empty_ns_without_namespaces) { 1359 const char *text = "<doc xmlns:prefix='http://example.org/'>\n" 1360 " <e xmlns:prefix=''/>\n" 1361 "</doc>"; 1362 1363 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1364 == XML_STATUS_ERROR) 1365 xml_failure(g_parser); 1366} 1367END_TEST 1368 1369/* Regression test for SF bug #824420. 1370 Checks that an xmlns:prefix attribute set in an attribute's default 1371 value isn't misinterpreted. 1372*/ 1373START_TEST(test_ns_in_attribute_default_without_namespaces) { 1374 const char *text = "<!DOCTYPE e:element [\n" 1375 " <!ATTLIST e:element\n" 1376 " xmlns:e CDATA 'http://example.org/'>\n" 1377 " ]>\n" 1378 "<e:element/>"; 1379 1380 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1381 == XML_STATUS_ERROR) 1382 xml_failure(g_parser); 1383} 1384END_TEST 1385 1386/* Regression test for SF bug #1515266: missing check of stopped 1387 parser in doContext() 'for' loop. */ 1388START_TEST(test_stop_parser_between_char_data_calls) { 1389 /* The sample data must be big enough that there are two calls to 1390 the character data handler from within the inner "for" loop of 1391 the XML_TOK_DATA_CHARS case in doContent(), and the character 1392 handler must stop the parser and clear the character data 1393 handler. 1394 */ 1395 const char *text = long_character_data_text; 1396 1397 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1398 g_resumable = XML_FALSE; 1399 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1400 != XML_STATUS_ERROR) 1401 xml_failure(g_parser); 1402 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 1403 xml_failure(g_parser); 1404} 1405END_TEST 1406 1407/* Regression test for SF bug #1515266: missing check of stopped 1408 parser in doContext() 'for' loop. */ 1409START_TEST(test_suspend_parser_between_char_data_calls) { 1410 /* The sample data must be big enough that there are two calls to 1411 the character data handler from within the inner "for" loop of 1412 the XML_TOK_DATA_CHARS case in doContent(), and the character 1413 handler must stop the parser and clear the character data 1414 handler. 1415 */ 1416 const char *text = long_character_data_text; 1417 1418 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1419 g_resumable = XML_TRUE; 1420 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1421 != XML_STATUS_SUSPENDED) 1422 xml_failure(g_parser); 1423 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1424 xml_failure(g_parser); 1425 /* Try parsing directly */ 1426 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1427 != XML_STATUS_ERROR) 1428 fail("Attempt to continue parse while suspended not faulted"); 1429 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 1430 fail("Suspended parse not faulted with correct error"); 1431} 1432END_TEST 1433 1434/* Test repeated calls to XML_StopParser are handled correctly */ 1435START_TEST(test_repeated_stop_parser_between_char_data_calls) { 1436 const char *text = long_character_data_text; 1437 1438 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1439 g_resumable = XML_FALSE; 1440 g_abortable = XML_FALSE; 1441 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1442 != XML_STATUS_ERROR) 1443 fail("Failed to double-stop parser"); 1444 1445 XML_ParserReset(g_parser, NULL); 1446 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1447 g_resumable = XML_TRUE; 1448 g_abortable = XML_FALSE; 1449 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1450 != XML_STATUS_SUSPENDED) 1451 fail("Failed to double-suspend parser"); 1452 1453 XML_ParserReset(g_parser, NULL); 1454 XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler); 1455 g_resumable = XML_TRUE; 1456 g_abortable = XML_TRUE; 1457 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1458 != XML_STATUS_ERROR) 1459 fail("Failed to suspend-abort parser"); 1460} 1461END_TEST 1462 1463START_TEST(test_good_cdata_ascii) { 1464 const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>"; 1465 const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>"); 1466 1467 CharData storage; 1468 CharData_Init(&storage); 1469 XML_SetUserData(g_parser, &storage); 1470 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1471 /* Add start and end handlers for coverage */ 1472 XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler); 1473 XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler); 1474 1475 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1476 == XML_STATUS_ERROR) 1477 xml_failure(g_parser); 1478 CharData_CheckXMLChars(&storage, expected); 1479 1480 /* Try again, this time with a default handler */ 1481 XML_ParserReset(g_parser, NULL); 1482 CharData_Init(&storage); 1483 XML_SetUserData(g_parser, &storage); 1484 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1485 XML_SetDefaultHandler(g_parser, dummy_default_handler); 1486 1487 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1488 == XML_STATUS_ERROR) 1489 xml_failure(g_parser); 1490 CharData_CheckXMLChars(&storage, expected); 1491} 1492END_TEST 1493 1494START_TEST(test_good_cdata_utf16) { 1495 /* Test data is: 1496 * <?xml version='1.0' encoding='utf-16'?> 1497 * <a><![CDATA[hello]]></a> 1498 */ 1499 const char text[] 1500 = "\0<\0?\0x\0m\0l\0" 1501 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1502 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1503 "1\0" 1504 "6\0'" 1505 "\0?\0>\0\n" 1506 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>"; 1507 const XML_Char *expected = XCS("hello"); 1508 1509 CharData storage; 1510 CharData_Init(&storage); 1511 XML_SetUserData(g_parser, &storage); 1512 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1513 1514 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1515 == XML_STATUS_ERROR) 1516 xml_failure(g_parser); 1517 CharData_CheckXMLChars(&storage, expected); 1518} 1519END_TEST 1520 1521START_TEST(test_good_cdata_utf16_le) { 1522 /* Test data is: 1523 * <?xml version='1.0' encoding='utf-16'?> 1524 * <a><![CDATA[hello]]></a> 1525 */ 1526 const char text[] 1527 = "<\0?\0x\0m\0l\0" 1528 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1529 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1530 "1\0" 1531 "6\0'" 1532 "\0?\0>\0\n" 1533 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0"; 1534 const XML_Char *expected = XCS("hello"); 1535 1536 CharData storage; 1537 CharData_Init(&storage); 1538 XML_SetUserData(g_parser, &storage); 1539 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1540 1541 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1542 == XML_STATUS_ERROR) 1543 xml_failure(g_parser); 1544 CharData_CheckXMLChars(&storage, expected); 1545} 1546END_TEST 1547 1548/* Test UTF16 conversion of a long cdata string */ 1549 1550/* 16 characters: handy macro to reduce visual clutter */ 1551#define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P" 1552 1553START_TEST(test_long_cdata_utf16) { 1554 /* Test data is: 1555 * <?xlm version='1.0' encoding='utf-16'?> 1556 * <a><![CDATA[ 1557 * ABCDEFGHIJKLMNOP 1558 * ]]></a> 1559 */ 1560 const char text[] 1561 = "\0<\0?\0x\0m\0l\0 " 1562 "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 " 1563 "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>" 1564 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1565 /* 64 characters per line */ 1566 /* clang-format off */ 1567 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1568 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1569 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1570 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1571 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1572 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1573 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1574 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1575 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1576 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1577 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1578 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1579 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1580 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1581 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1582 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 A_TO_P_IN_UTF16 1583 A_TO_P_IN_UTF16 1584 /* clang-format on */ 1585 "\0]\0]\0>\0<\0/\0a\0>"; 1586 const XML_Char *expected = 1587 /* clang-format off */ 1588 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1589 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1590 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1591 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1592 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1593 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1594 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1595 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1596 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1597 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1598 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1599 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1600 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1601 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1602 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1603 XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP") 1604 XCS("ABCDEFGHIJKLMNOP"); 1605 /* clang-format on */ 1606 CharData storage; 1607 void *buffer; 1608 1609 CharData_Init(&storage); 1610 XML_SetUserData(g_parser, &storage); 1611 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1612 buffer = XML_GetBuffer(g_parser, sizeof(text) - 1); 1613 if (buffer == NULL) 1614 fail("Could not allocate parse buffer"); 1615 assert(buffer != NULL); 1616 memcpy(buffer, text, sizeof(text) - 1); 1617 if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR) 1618 xml_failure(g_parser); 1619 CharData_CheckXMLChars(&storage, expected); 1620} 1621END_TEST 1622 1623/* Test handling of multiple unit UTF-16 characters */ 1624START_TEST(test_multichar_cdata_utf16) { 1625 /* Test data is: 1626 * <?xml version='1.0' encoding='utf-16'?> 1627 * <a><![CDATA[{MINIM}{CROTCHET}]]></a> 1628 * 1629 * where {MINIM} is U+1d15e (a minim or half-note) 1630 * UTF-16: 0xd834 0xdd5e 1631 * UTF-8: 0xf0 0x9d 0x85 0x9e 1632 * and {CROTCHET} is U+1d15f (a crotchet or quarter-note) 1633 * UTF-16: 0xd834 0xdd5f 1634 * UTF-8: 0xf0 0x9d 0x85 0x9f 1635 */ 1636 const char text[] = "\0<\0?\0x\0m\0l\0" 1637 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1638 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1639 "1\0" 1640 "6\0'" 1641 "\0?\0>\0\n" 1642 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1643 "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f" 1644 "\0]\0]\0>\0<\0/\0a\0>"; 1645#ifdef XML_UNICODE 1646 const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f"); 1647#else 1648 const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f"); 1649#endif 1650 CharData storage; 1651 1652 CharData_Init(&storage); 1653 XML_SetUserData(g_parser, &storage); 1654 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 1655 1656 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1657 == XML_STATUS_ERROR) 1658 xml_failure(g_parser); 1659 CharData_CheckXMLChars(&storage, expected); 1660} 1661END_TEST 1662 1663/* Test that an element name with a UTF-16 surrogate pair is rejected */ 1664START_TEST(test_utf16_bad_surrogate_pair) { 1665 /* Test data is: 1666 * <?xml version='1.0' encoding='utf-16'?> 1667 * <a><![CDATA[{BADLINB}]]></a> 1668 * 1669 * where {BADLINB} is U+10000 (the first Linear B character) 1670 * with the UTF-16 surrogate pair in the wrong order, i.e. 1671 * 0xdc00 0xd800 1672 */ 1673 const char text[] = "\0<\0?\0x\0m\0l\0" 1674 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1675 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1676 "1\0" 1677 "6\0'" 1678 "\0?\0>\0\n" 1679 "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[" 1680 "\xdc\x00\xd8\x00" 1681 "\0]\0]\0>\0<\0/\0a\0>"; 1682 1683 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 1684 != XML_STATUS_ERROR) 1685 fail("Reversed UTF-16 surrogate pair not faulted"); 1686 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 1687 xml_failure(g_parser); 1688} 1689END_TEST 1690 1691START_TEST(test_bad_cdata) { 1692 struct CaseData { 1693 const char *text; 1694 enum XML_Error expectedError; 1695 }; 1696 1697 struct CaseData cases[] 1698 = {{"<a><", XML_ERROR_UNCLOSED_TOKEN}, 1699 {"<a><!", XML_ERROR_UNCLOSED_TOKEN}, 1700 {"<a><![", XML_ERROR_UNCLOSED_TOKEN}, 1701 {"<a><![C", XML_ERROR_UNCLOSED_TOKEN}, 1702 {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN}, 1703 {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN}, 1704 {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN}, 1705 {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN}, 1706 1707 {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1708 {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1709 {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1710 1711 {"<a><!<a/>", XML_ERROR_INVALID_TOKEN}, 1712 {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1713 {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */ 1714 {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN}, 1715 {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN}, 1716 {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN}, 1717 {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN}, 1718 1719 {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1720 {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1721 {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1722 1723 size_t i = 0; 1724 for (; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1725 set_subtest("%s", cases[i].text); 1726 const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES( 1727 g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE); 1728 const enum XML_Error actualError = XML_GetErrorCode(g_parser); 1729 1730 assert(actualStatus == XML_STATUS_ERROR); 1731 1732 if (actualError != cases[i].expectedError) { 1733 char message[100]; 1734 snprintf(message, sizeof(message), 1735 "Expected error %d but got error %d for case %u: \"%s\"\n", 1736 cases[i].expectedError, actualError, (unsigned int)i + 1, 1737 cases[i].text); 1738 fail(message); 1739 } 1740 1741 XML_ParserReset(g_parser, NULL); 1742 } 1743} 1744END_TEST 1745 1746/* Test failures in UTF-16 CDATA */ 1747START_TEST(test_bad_cdata_utf16) { 1748 struct CaseData { 1749 size_t text_bytes; 1750 const char *text; 1751 enum XML_Error expected_error; 1752 }; 1753 1754 const char prolog[] = "\0<\0?\0x\0m\0l\0" 1755 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0" 1756 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0" 1757 "1\0" 1758 "6\0'" 1759 "\0?\0>\0\n" 1760 "\0<\0a\0>"; 1761 struct CaseData cases[] = { 1762 {1, "\0", XML_ERROR_UNCLOSED_TOKEN}, 1763 {2, "\0<", XML_ERROR_UNCLOSED_TOKEN}, 1764 {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN}, 1765 {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN}, 1766 {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN}, 1767 {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN}, 1768 {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN}, 1769 {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN}, 1770 {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN}, 1771 {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN}, 1772 {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN}, 1773 {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN}, 1774 {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1775 {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN}, 1776 {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN}, 1777 {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN}, 1778 {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN}, 1779 {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1780 {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1781 {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION}, 1782 /* Now add a four-byte UTF-16 character */ 1783 {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8", 1784 XML_ERROR_UNCLOSED_CDATA_SECTION}, 1785 {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR}, 1786 {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd", 1787 XML_ERROR_PARTIAL_CHAR}, 1788 {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e", 1789 XML_ERROR_UNCLOSED_CDATA_SECTION}}; 1790 size_t i; 1791 1792 for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) { 1793 set_subtest("case %lu", (long unsigned)(i + 1)); 1794 enum XML_Status actual_status; 1795 enum XML_Error actual_error; 1796 1797 if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1, 1798 XML_FALSE) 1799 == XML_STATUS_ERROR) 1800 xml_failure(g_parser); 1801 actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text, 1802 (int)cases[i].text_bytes, XML_TRUE); 1803 assert(actual_status == XML_STATUS_ERROR); 1804 actual_error = XML_GetErrorCode(g_parser); 1805 if (actual_error != cases[i].expected_error) { 1806 char message[1024]; 1807 1808 snprintf(message, sizeof(message), 1809 "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR 1810 ") for case %lu\n", 1811 cases[i].expected_error, 1812 XML_ErrorString(cases[i].expected_error), actual_error, 1813 XML_ErrorString(actual_error), (long unsigned)(i + 1)); 1814 fail(message); 1815 } 1816 XML_ParserReset(g_parser, NULL); 1817 } 1818} 1819END_TEST 1820 1821/* Test stopping the parser in cdata handler */ 1822START_TEST(test_stop_parser_between_cdata_calls) { 1823 const char *text = long_cdata_text; 1824 1825 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1826 g_resumable = XML_FALSE; 1827 expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler"); 1828} 1829END_TEST 1830 1831/* Test suspending the parser in cdata handler */ 1832START_TEST(test_suspend_parser_between_cdata_calls) { 1833 const char *text = long_cdata_text; 1834 enum XML_Status result; 1835 1836 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 1837 g_resumable = XML_TRUE; 1838 result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); 1839 if (result != XML_STATUS_SUSPENDED) { 1840 if (result == XML_STATUS_ERROR) 1841 xml_failure(g_parser); 1842 fail("Parse not suspended in CDATA handler"); 1843 } 1844 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 1845 xml_failure(g_parser); 1846} 1847END_TEST 1848 1849/* Test memory allocation functions */ 1850START_TEST(test_memory_allocation) { 1851 char *buffer = (char *)XML_MemMalloc(g_parser, 256); 1852 char *p; 1853 1854 if (buffer == NULL) { 1855 fail("Allocation failed"); 1856 } else { 1857 /* Try writing to memory; some OSes try to cheat! */ 1858 buffer[0] = 'T'; 1859 buffer[1] = 'E'; 1860 buffer[2] = 'S'; 1861 buffer[3] = 'T'; 1862 buffer[4] = '\0'; 1863 if (strcmp(buffer, "TEST") != 0) { 1864 fail("Memory not writable"); 1865 } else { 1866 p = (char *)XML_MemRealloc(g_parser, buffer, 512); 1867 if (p == NULL) { 1868 fail("Reallocation failed"); 1869 } else { 1870 /* Write again, just to be sure */ 1871 buffer = p; 1872 buffer[0] = 'V'; 1873 if (strcmp(buffer, "VEST") != 0) { 1874 fail("Reallocated memory not writable"); 1875 } 1876 } 1877 } 1878 XML_MemFree(g_parser, buffer); 1879 } 1880} 1881END_TEST 1882 1883/* Test XML_DefaultCurrent() passes handling on correctly */ 1884START_TEST(test_default_current) { 1885 const char *text = "<doc>hell]</doc>"; 1886 const char *entity_text = "<!DOCTYPE doc [\n" 1887 "<!ENTITY entity '%'>\n" 1888 "]>\n" 1889 "<doc>&entity;</doc>"; 1890 1891 set_subtest("with defaulting"); 1892 { 1893 struct handler_record_list storage; 1894 storage.count = 0; 1895 XML_SetDefaultHandler(g_parser, record_default_handler); 1896 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 1897 XML_SetUserData(g_parser, &storage); 1898 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1899 == XML_STATUS_ERROR) 1900 xml_failure(g_parser); 1901 int i = 0; 1902 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 1903 // we should have gotten one or more cdata callbacks, totaling 5 chars 1904 int cdata_len_remaining = 5; 1905 while (cdata_len_remaining > 0) { 1906 const struct handler_record_entry *c_entry 1907 = handler_record_get(&storage, i++); 1908 assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0); 1909 assert_true(c_entry->arg > 0); 1910 assert_true(c_entry->arg <= cdata_len_remaining); 1911 cdata_len_remaining -= c_entry->arg; 1912 // default handler must follow, with the exact same len argument. 1913 assert_record_handler_called(&storage, i++, "record_default_handler", 1914 c_entry->arg); 1915 } 1916 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 1917 assert_true(storage.count == i); 1918 } 1919 1920 /* Again, without the defaulting */ 1921 set_subtest("no defaulting"); 1922 { 1923 struct handler_record_list storage; 1924 storage.count = 0; 1925 XML_ParserReset(g_parser, NULL); 1926 XML_SetDefaultHandler(g_parser, record_default_handler); 1927 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 1928 XML_SetUserData(g_parser, &storage); 1929 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 1930 == XML_STATUS_ERROR) 1931 xml_failure(g_parser); 1932 int i = 0; 1933 assert_record_handler_called(&storage, i++, "record_default_handler", 5); 1934 // we should have gotten one or more cdata callbacks, totaling 5 chars 1935 int cdata_len_remaining = 5; 1936 while (cdata_len_remaining > 0) { 1937 const struct handler_record_entry *c_entry 1938 = handler_record_get(&storage, i++); 1939 assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0); 1940 assert_true(c_entry->arg > 0); 1941 assert_true(c_entry->arg <= cdata_len_remaining); 1942 cdata_len_remaining -= c_entry->arg; 1943 } 1944 assert_record_handler_called(&storage, i++, "record_default_handler", 6); 1945 assert_true(storage.count == i); 1946 } 1947 1948 /* Now with an internal entity to complicate matters */ 1949 set_subtest("with internal entity"); 1950 { 1951 struct handler_record_list storage; 1952 storage.count = 0; 1953 XML_ParserReset(g_parser, NULL); 1954 XML_SetDefaultHandler(g_parser, record_default_handler); 1955 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 1956 XML_SetUserData(g_parser, &storage); 1957 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 1958 XML_TRUE) 1959 == XML_STATUS_ERROR) 1960 xml_failure(g_parser); 1961 /* The default handler suppresses the entity */ 1962 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 1963 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 1964 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 1965 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 1966 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 1967 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 1968 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 1969 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 1970 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 1971 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 1972 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 1973 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 1974 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 1975 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 1976 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 1977 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 1978 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 1979 assert_record_handler_called(&storage, 17, "record_default_handler", 8); 1980 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 1981 assert_true(storage.count == 19); 1982 } 1983 1984 /* Again, with a skip handler */ 1985 set_subtest("with skip handler"); 1986 { 1987 struct handler_record_list storage; 1988 storage.count = 0; 1989 XML_ParserReset(g_parser, NULL); 1990 XML_SetDefaultHandler(g_parser, record_default_handler); 1991 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 1992 XML_SetSkippedEntityHandler(g_parser, record_skip_handler); 1993 XML_SetUserData(g_parser, &storage); 1994 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 1995 XML_TRUE) 1996 == XML_STATUS_ERROR) 1997 xml_failure(g_parser); 1998 /* The default handler suppresses the entity */ 1999 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2000 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2001 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2002 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2003 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2004 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2005 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2006 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2007 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2008 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2009 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2010 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2011 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2012 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2013 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2014 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2015 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2016 assert_record_handler_called(&storage, 17, "record_skip_handler", 0); 2017 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2018 assert_true(storage.count == 19); 2019 } 2020 2021 /* This time, allow the entity through */ 2022 set_subtest("allow entity"); 2023 { 2024 struct handler_record_list storage; 2025 storage.count = 0; 2026 XML_ParserReset(g_parser, NULL); 2027 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2028 XML_SetCharacterDataHandler(g_parser, record_cdata_handler); 2029 XML_SetUserData(g_parser, &storage); 2030 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2031 XML_TRUE) 2032 == XML_STATUS_ERROR) 2033 xml_failure(g_parser); 2034 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2035 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2036 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2037 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2038 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2039 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2040 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2041 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2042 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2043 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2044 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2045 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2046 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2047 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2048 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2049 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2050 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2051 assert_record_handler_called(&storage, 17, "record_cdata_handler", 1); 2052 assert_record_handler_called(&storage, 18, "record_default_handler", 1); 2053 assert_record_handler_called(&storage, 19, "record_default_handler", 6); 2054 assert_true(storage.count == 20); 2055 } 2056 2057 /* Finally, without passing the cdata to the default handler */ 2058 set_subtest("not passing cdata"); 2059 { 2060 struct handler_record_list storage; 2061 storage.count = 0; 2062 XML_ParserReset(g_parser, NULL); 2063 XML_SetDefaultHandlerExpand(g_parser, record_default_handler); 2064 XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); 2065 XML_SetUserData(g_parser, &storage); 2066 if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), 2067 XML_TRUE) 2068 == XML_STATUS_ERROR) 2069 xml_failure(g_parser); 2070 assert_record_handler_called(&storage, 0, "record_default_handler", 9); 2071 assert_record_handler_called(&storage, 1, "record_default_handler", 1); 2072 assert_record_handler_called(&storage, 2, "record_default_handler", 3); 2073 assert_record_handler_called(&storage, 3, "record_default_handler", 1); 2074 assert_record_handler_called(&storage, 4, "record_default_handler", 1); 2075 assert_record_handler_called(&storage, 5, "record_default_handler", 1); 2076 assert_record_handler_called(&storage, 6, "record_default_handler", 8); 2077 assert_record_handler_called(&storage, 7, "record_default_handler", 1); 2078 assert_record_handler_called(&storage, 8, "record_default_handler", 6); 2079 assert_record_handler_called(&storage, 9, "record_default_handler", 1); 2080 assert_record_handler_called(&storage, 10, "record_default_handler", 7); 2081 assert_record_handler_called(&storage, 11, "record_default_handler", 1); 2082 assert_record_handler_called(&storage, 12, "record_default_handler", 1); 2083 assert_record_handler_called(&storage, 13, "record_default_handler", 1); 2084 assert_record_handler_called(&storage, 14, "record_default_handler", 1); 2085 assert_record_handler_called(&storage, 15, "record_default_handler", 1); 2086 assert_record_handler_called(&storage, 16, "record_default_handler", 5); 2087 assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler", 2088 1); 2089 assert_record_handler_called(&storage, 18, "record_default_handler", 6); 2090 assert_true(storage.count == 19); 2091 } 2092} 2093END_TEST 2094 2095/* Test DTD element parsing code paths */ 2096START_TEST(test_dtd_elements) { 2097 const char *text = "<!DOCTYPE doc [\n" 2098 "<!ELEMENT doc (chapter)>\n" 2099 "<!ELEMENT chapter (#PCDATA)>\n" 2100 "]>\n" 2101 "<doc><chapter>Wombats are go</chapter></doc>"; 2102 2103 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 2104 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2105 == XML_STATUS_ERROR) 2106 xml_failure(g_parser); 2107} 2108END_TEST 2109 2110static void XMLCALL 2111element_decl_check_model(void *userData, const XML_Char *name, 2112 XML_Content *model) { 2113 UNUSED_P(userData); 2114 uint32_t errorFlags = 0; 2115 2116 /* Expected model array structure is this: 2117 * [0] (type 6, quant 0) 2118 * [1] (type 5, quant 0) 2119 * [3] (type 4, quant 0, name "bar") 2120 * [4] (type 4, quant 0, name "foo") 2121 * [5] (type 4, quant 3, name "xyz") 2122 * [2] (type 4, quant 2, name "zebra") 2123 */ 2124 errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0)); 2125 errorFlags |= ((model != NULL) ? 0 : (1u << 1)); 2126 2127 if (model != NULL) { 2128 errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2)); 2129 errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3)); 2130 errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4)); 2131 errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5)); 2132 errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6)); 2133 2134 errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7)); 2135 errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8)); 2136 errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9)); 2137 errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10)); 2138 errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11)); 2139 2140 errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12)); 2141 errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13)); 2142 errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14)); 2143 errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15)); 2144 errorFlags 2145 |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16)); 2146 2147 errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17)); 2148 errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18)); 2149 errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19)); 2150 errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20)); 2151 errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21)); 2152 2153 errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22)); 2154 errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23)); 2155 errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24)); 2156 errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25)); 2157 errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26)); 2158 2159 errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27)); 2160 errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28)); 2161 errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29)); 2162 errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30)); 2163 errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31)); 2164 } 2165 2166 XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags); 2167 XML_FreeContentModel(g_parser, model); 2168} 2169 2170START_TEST(test_dtd_elements_nesting) { 2171 // Payload inspired by a test in Perl's XML::Parser 2172 const char *text = "<!DOCTYPE foo [\n" 2173 "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n" 2174 "]>\n" 2175 "<foo/>"; 2176 2177 XML_SetUserData(g_parser, (void *)(uintptr_t)-1); 2178 2179 XML_SetElementDeclHandler(g_parser, element_decl_check_model); 2180 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2181 == XML_STATUS_ERROR) 2182 xml_failure(g_parser); 2183 2184 if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0) 2185 fail("Element declaration model regression detected"); 2186} 2187END_TEST 2188 2189/* Test foreign DTD handling */ 2190START_TEST(test_set_foreign_dtd) { 2191 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"; 2192 const char *text2 = "<doc>&entity;</doc>"; 2193 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2194 2195 /* Check hash salt is passed through too */ 2196 XML_SetHashSalt(g_parser, 0x12345678); 2197 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2198 XML_SetUserData(g_parser, &test_data); 2199 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2200 /* Add a default handler to exercise more code paths */ 2201 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2202 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2203 fail("Could not set foreign DTD"); 2204 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2205 == XML_STATUS_ERROR) 2206 xml_failure(g_parser); 2207 2208 /* Ensure that trying to set the DTD after parsing has started 2209 * is faulted, even if it's the same setting. 2210 */ 2211 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2212 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2213 fail("Failed to reject late foreign DTD setting"); 2214 /* Ditto for the hash salt */ 2215 if (XML_SetHashSalt(g_parser, 0x23456789)) 2216 fail("Failed to reject late hash salt change"); 2217 2218 /* Now finish the parse */ 2219 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2220 == XML_STATUS_ERROR) 2221 xml_failure(g_parser); 2222} 2223END_TEST 2224 2225/* Test foreign DTD handling with a failing NotStandalone handler */ 2226START_TEST(test_foreign_dtd_not_standalone) { 2227 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2228 "<doc>&entity;</doc>"; 2229 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2230 2231 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2232 XML_SetUserData(g_parser, &test_data); 2233 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2234 XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler); 2235 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2236 fail("Could not set foreign DTD"); 2237 expect_failure(text, XML_ERROR_NOT_STANDALONE, 2238 "NotStandalonehandler failed to reject"); 2239} 2240END_TEST 2241 2242/* Test invalid character in a foreign DTD is faulted */ 2243START_TEST(test_invalid_foreign_dtd) { 2244 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2245 "<doc>&entity;</doc>"; 2246 ExtFaults test_data 2247 = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN}; 2248 2249 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2250 XML_SetUserData(g_parser, &test_data); 2251 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 2252 XML_UseForeignDTD(g_parser, XML_TRUE); 2253 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2254 "Bad DTD should not have been accepted"); 2255} 2256END_TEST 2257 2258/* Test foreign DTD use with a doctype */ 2259START_TEST(test_foreign_dtd_with_doctype) { 2260 const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n" 2261 "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n"; 2262 const char *text2 = "<doc>&entity;</doc>"; 2263 ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL}; 2264 2265 /* Check hash salt is passed through too */ 2266 XML_SetHashSalt(g_parser, 0x12345678); 2267 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2268 XML_SetUserData(g_parser, &test_data); 2269 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 2270 /* Add a default handler to exercise more code paths */ 2271 XML_SetDefaultHandler(g_parser, dummy_default_handler); 2272 if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE) 2273 fail("Could not set foreign DTD"); 2274 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2275 == XML_STATUS_ERROR) 2276 xml_failure(g_parser); 2277 2278 /* Ensure that trying to set the DTD after parsing has started 2279 * is faulted, even if it's the same setting. 2280 */ 2281 if (XML_UseForeignDTD(g_parser, XML_TRUE) 2282 != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING) 2283 fail("Failed to reject late foreign DTD setting"); 2284 /* Ditto for the hash salt */ 2285 if (XML_SetHashSalt(g_parser, 0x23456789)) 2286 fail("Failed to reject late hash salt change"); 2287 2288 /* Now finish the parse */ 2289 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2290 == XML_STATUS_ERROR) 2291 xml_failure(g_parser); 2292} 2293END_TEST 2294 2295/* Test XML_UseForeignDTD with no external subset present */ 2296START_TEST(test_foreign_dtd_without_external_subset) { 2297 const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n" 2298 "<doc>&foo;</doc>"; 2299 2300 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2301 XML_SetUserData(g_parser, NULL); 2302 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2303 XML_UseForeignDTD(g_parser, XML_TRUE); 2304 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2305 == XML_STATUS_ERROR) 2306 xml_failure(g_parser); 2307} 2308END_TEST 2309 2310START_TEST(test_empty_foreign_dtd) { 2311 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2312 "<doc>&entity;</doc>"; 2313 2314 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2315 XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader); 2316 XML_UseForeignDTD(g_parser, XML_TRUE); 2317 expect_failure(text, XML_ERROR_UNDEFINED_ENTITY, 2318 "Undefined entity not faulted"); 2319} 2320END_TEST 2321 2322/* Test XML Base is set and unset appropriately */ 2323START_TEST(test_set_base) { 2324 const XML_Char *old_base; 2325 const XML_Char *new_base = XCS("/local/file/name.xml"); 2326 2327 old_base = XML_GetBase(g_parser); 2328 if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK) 2329 fail("Unable to set base"); 2330 if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0) 2331 fail("Base setting not correct"); 2332 if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK) 2333 fail("Unable to NULL base"); 2334 if (XML_GetBase(g_parser) != NULL) 2335 fail("Base setting not nulled"); 2336 XML_SetBase(g_parser, old_base); 2337} 2338END_TEST 2339 2340/* Test attribute counts, indexing, etc */ 2341START_TEST(test_attributes) { 2342 const char *text = "<!DOCTYPE doc [\n" 2343 "<!ELEMENT doc (tag)>\n" 2344 "<!ATTLIST doc id ID #REQUIRED>\n" 2345 "]>" 2346 "<doc a='1' id='one' b='2'>" 2347 "<tag c='3'/>" 2348 "</doc>"; 2349 AttrInfo doc_info[] = {{XCS("a"), XCS("1")}, 2350 {XCS("b"), XCS("2")}, 2351 {XCS("id"), XCS("one")}, 2352 {NULL, NULL}}; 2353 AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}}; 2354 ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL}, 2355 {XCS("tag"), 1, NULL, NULL}, 2356 {NULL, 0, NULL, NULL}}; 2357 info[0].attributes = doc_info; 2358 info[1].attributes = tag_info; 2359 2360 XML_SetStartElementHandler(g_parser, counting_start_element_handler); 2361 XML_SetUserData(g_parser, info); 2362 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2363 == XML_STATUS_ERROR) 2364 xml_failure(g_parser); 2365} 2366END_TEST 2367 2368/* Test reset works correctly in the middle of processing an internal 2369 * entity. Exercises some obscure code in XML_ParserReset(). 2370 */ 2371START_TEST(test_reset_in_entity) { 2372 const char *text = "<!DOCTYPE doc [\n" 2373 "<!ENTITY wombat 'wom'>\n" 2374 "<!ENTITY entity 'hi &wom; there'>\n" 2375 "]>\n" 2376 "<doc>&entity;</doc>"; 2377 XML_ParsingStatus status; 2378 2379 g_resumable = XML_TRUE; 2380 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2381 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2382 == XML_STATUS_ERROR) 2383 xml_failure(g_parser); 2384 XML_GetParsingStatus(g_parser, &status); 2385 if (status.parsing != XML_SUSPENDED) 2386 fail("Parsing status not SUSPENDED"); 2387 XML_ParserReset(g_parser, NULL); 2388 XML_GetParsingStatus(g_parser, &status); 2389 if (status.parsing != XML_INITIALIZED) 2390 fail("Parsing status doesn't reset to INITIALIZED"); 2391} 2392END_TEST 2393 2394/* Test that resume correctly passes through parse errors */ 2395START_TEST(test_resume_invalid_parse) { 2396 const char *text = "<doc>Hello</doc"; /* Missing closing wedge */ 2397 2398 g_resumable = XML_TRUE; 2399 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2400 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2401 == XML_STATUS_ERROR) 2402 xml_failure(g_parser); 2403 if (XML_ResumeParser(g_parser) == XML_STATUS_OK) 2404 fail("Resumed invalid parse not faulted"); 2405 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN) 2406 fail("Invalid parse not correctly faulted"); 2407} 2408END_TEST 2409 2410/* Test that re-suspended parses are correctly passed through */ 2411START_TEST(test_resume_resuspended) { 2412 const char *text = "<doc>Hello<meep/>world</doc>"; 2413 2414 g_resumable = XML_TRUE; 2415 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2416 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 2417 == XML_STATUS_ERROR) 2418 xml_failure(g_parser); 2419 g_resumable = XML_TRUE; 2420 XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler); 2421 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 2422 fail("Resumption not suspended"); 2423 /* This one should succeed and finish up */ 2424 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 2425 xml_failure(g_parser); 2426} 2427END_TEST 2428 2429/* Test that CDATA shows up correctly through a default handler */ 2430START_TEST(test_cdata_default) { 2431 const char *text = "<doc><![CDATA[Hello\nworld]]></doc>"; 2432 const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>"); 2433 CharData storage; 2434 2435 CharData_Init(&storage); 2436 XML_SetUserData(g_parser, &storage); 2437 XML_SetDefaultHandler(g_parser, accumulate_characters); 2438 2439 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2440 == XML_STATUS_ERROR) 2441 xml_failure(g_parser); 2442 CharData_CheckXMLChars(&storage, expected); 2443} 2444END_TEST 2445 2446/* Test resetting a subordinate parser does exactly nothing */ 2447START_TEST(test_subordinate_reset) { 2448 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2449 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2450 "<doc>&entity;</doc>"; 2451 2452 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2453 XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter); 2454 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2455 == XML_STATUS_ERROR) 2456 xml_failure(g_parser); 2457} 2458END_TEST 2459 2460/* Test suspending a subordinate parser */ 2461START_TEST(test_subordinate_suspend) { 2462 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2463 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2464 "<doc>&entity;</doc>"; 2465 2466 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2467 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender); 2468 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2469 == XML_STATUS_ERROR) 2470 xml_failure(g_parser); 2471} 2472END_TEST 2473 2474/* Test suspending a subordinate parser from an XML declaration */ 2475/* Increases code coverage of the tests */ 2476 2477START_TEST(test_subordinate_xdecl_suspend) { 2478 const char *text 2479 = "<!DOCTYPE doc [\n" 2480 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2481 "]>\n" 2482 "<doc>&entity;</doc>"; 2483 2484 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2485 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2486 g_resumable = XML_TRUE; 2487 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2488 == XML_STATUS_ERROR) 2489 xml_failure(g_parser); 2490} 2491END_TEST 2492 2493START_TEST(test_subordinate_xdecl_abort) { 2494 const char *text 2495 = "<!DOCTYPE doc [\n" 2496 " <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n" 2497 "]>\n" 2498 "<doc>&entity;</doc>"; 2499 2500 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2501 XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl); 2502 g_resumable = XML_FALSE; 2503 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2504 == XML_STATUS_ERROR) 2505 xml_failure(g_parser); 2506} 2507END_TEST 2508 2509/* Test external entity fault handling with suspension */ 2510START_TEST(test_ext_entity_invalid_suspended_parse) { 2511 const char *text = "<!DOCTYPE doc [\n" 2512 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2513 "]>\n" 2514 "<doc>&en;</doc>"; 2515 ExtFaults faults[] 2516 = {{"<?xml version='1.0' encoding='us-ascii'?><", 2517 "Incomplete element declaration not faulted", NULL, 2518 XML_ERROR_UNCLOSED_TOKEN}, 2519 {/* First two bytes of a three-byte char */ 2520 "<?xml version='1.0' encoding='utf-8'?>\xe2\x82", 2521 "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR}, 2522 {NULL, NULL, NULL, XML_ERROR_NONE}}; 2523 ExtFaults *fault; 2524 2525 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 2526 set_subtest("%s", fault->parse_text); 2527 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2528 XML_SetExternalEntityRefHandler(g_parser, 2529 external_entity_suspending_faulter); 2530 XML_SetUserData(g_parser, fault); 2531 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 2532 "Parser did not report external entity error"); 2533 XML_ParserReset(g_parser, NULL); 2534 } 2535} 2536END_TEST 2537 2538/* Test setting an explicit encoding */ 2539START_TEST(test_explicit_encoding) { 2540 const char *text1 = "<doc>Hello "; 2541 const char *text2 = " World</doc>"; 2542 2543 /* Just check that we can set the encoding to NULL before starting */ 2544 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2545 fail("Failed to initialise encoding to NULL"); 2546 /* Say we are UTF-8 */ 2547 if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK) 2548 fail("Failed to set explicit encoding"); 2549 if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE) 2550 == XML_STATUS_ERROR) 2551 xml_failure(g_parser); 2552 /* Try to switch encodings mid-parse */ 2553 if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR) 2554 fail("Allowed encoding change"); 2555 if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE) 2556 == XML_STATUS_ERROR) 2557 xml_failure(g_parser); 2558 /* Try now the parse is over */ 2559 if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK) 2560 fail("Failed to unset encoding"); 2561} 2562END_TEST 2563 2564/* Test handling of trailing CR (rather than newline) */ 2565START_TEST(test_trailing_cr) { 2566 const char *text = "<doc>\r"; 2567 int found_cr; 2568 2569 /* Try with a character handler, for code coverage */ 2570 XML_SetCharacterDataHandler(g_parser, cr_cdata_handler); 2571 XML_SetUserData(g_parser, &found_cr); 2572 found_cr = 0; 2573 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2574 == XML_STATUS_OK) 2575 fail("Failed to fault unclosed doc"); 2576 if (found_cr == 0) 2577 fail("Did not catch the carriage return"); 2578 XML_ParserReset(g_parser, NULL); 2579 2580 /* Now with a default handler instead */ 2581 XML_SetDefaultHandler(g_parser, cr_cdata_handler); 2582 XML_SetUserData(g_parser, &found_cr); 2583 found_cr = 0; 2584 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2585 == XML_STATUS_OK) 2586 fail("Failed to fault unclosed doc"); 2587 if (found_cr == 0) 2588 fail("Did not catch default carriage return"); 2589} 2590END_TEST 2591 2592/* Test trailing CR in an external entity parse */ 2593START_TEST(test_ext_entity_trailing_cr) { 2594 const char *text = "<!DOCTYPE doc [\n" 2595 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2596 "]>\n" 2597 "<doc>&en;</doc>"; 2598 int found_cr; 2599 2600 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2601 XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher); 2602 XML_SetUserData(g_parser, &found_cr); 2603 found_cr = 0; 2604 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2605 != XML_STATUS_OK) 2606 xml_failure(g_parser); 2607 if (found_cr == 0) 2608 fail("No carriage return found"); 2609 XML_ParserReset(g_parser, NULL); 2610 2611 /* Try again with a different trailing CR */ 2612 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2613 XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher); 2614 XML_SetUserData(g_parser, &found_cr); 2615 found_cr = 0; 2616 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2617 != XML_STATUS_OK) 2618 xml_failure(g_parser); 2619 if (found_cr == 0) 2620 fail("No carriage return found"); 2621} 2622END_TEST 2623 2624/* Test handling of trailing square bracket */ 2625START_TEST(test_trailing_rsqb) { 2626 const char *text8 = "<doc>]"; 2627 const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000"; 2628 int found_rsqb; 2629 int text8_len = (int)strlen(text8); 2630 2631 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 2632 XML_SetUserData(g_parser, &found_rsqb); 2633 found_rsqb = 0; 2634 if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE) 2635 == XML_STATUS_OK) 2636 fail("Failed to fault unclosed doc"); 2637 if (found_rsqb == 0) 2638 fail("Did not catch the right square bracket"); 2639 2640 /* Try again with a different encoding */ 2641 XML_ParserReset(g_parser, NULL); 2642 XML_SetCharacterDataHandler(g_parser, rsqb_handler); 2643 XML_SetUserData(g_parser, &found_rsqb); 2644 found_rsqb = 0; 2645 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 2646 XML_TRUE) 2647 == XML_STATUS_OK) 2648 fail("Failed to fault unclosed doc"); 2649 if (found_rsqb == 0) 2650 fail("Did not catch the right square bracket"); 2651 2652 /* And finally with a default handler */ 2653 XML_ParserReset(g_parser, NULL); 2654 XML_SetDefaultHandler(g_parser, rsqb_handler); 2655 XML_SetUserData(g_parser, &found_rsqb); 2656 found_rsqb = 0; 2657 if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1, 2658 XML_TRUE) 2659 == XML_STATUS_OK) 2660 fail("Failed to fault unclosed doc"); 2661 if (found_rsqb == 0) 2662 fail("Did not catch the right square bracket"); 2663} 2664END_TEST 2665 2666/* Test trailing right square bracket in an external entity parse */ 2667START_TEST(test_ext_entity_trailing_rsqb) { 2668 const char *text = "<!DOCTYPE doc [\n" 2669 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2670 "]>\n" 2671 "<doc>&en;</doc>"; 2672 int found_rsqb; 2673 2674 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2675 XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher); 2676 XML_SetUserData(g_parser, &found_rsqb); 2677 found_rsqb = 0; 2678 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2679 != XML_STATUS_OK) 2680 xml_failure(g_parser); 2681 if (found_rsqb == 0) 2682 fail("No right square bracket found"); 2683} 2684END_TEST 2685 2686/* Test CDATA handling in an external entity */ 2687START_TEST(test_ext_entity_good_cdata) { 2688 const char *text = "<!DOCTYPE doc [\n" 2689 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 2690 "]>\n" 2691 "<doc>&en;</doc>"; 2692 2693 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2694 XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii); 2695 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2696 != XML_STATUS_OK) 2697 xml_failure(g_parser); 2698} 2699END_TEST 2700 2701/* Test user parameter settings */ 2702START_TEST(test_user_parameters) { 2703 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2704 "<!-- Primary parse -->\n" 2705 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2706 "<doc>&entity;"; 2707 const char *epilog = "<!-- Back to primary parser -->\n" 2708 "</doc>"; 2709 2710 g_comment_count = 0; 2711 g_skip_count = 0; 2712 g_xdecl_count = 0; 2713 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2714 XML_SetXmlDeclHandler(g_parser, xml_decl_handler); 2715 XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker); 2716 XML_SetCommentHandler(g_parser, data_check_comment_handler); 2717 XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler); 2718 XML_UseParserAsHandlerArg(g_parser); 2719 XML_SetUserData(g_parser, (void *)1); 2720 g_handler_data = g_parser; 2721 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 2722 == XML_STATUS_ERROR) 2723 xml_failure(g_parser); 2724 /* Ensure we can't change policy mid-parse */ 2725 if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER)) 2726 fail("Changed param entity parsing policy while parsing"); 2727 if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) 2728 == XML_STATUS_ERROR) 2729 xml_failure(g_parser); 2730 if (g_comment_count != 3) 2731 fail("Comment handler not invoked enough times"); 2732 if (g_skip_count != 1) 2733 fail("Skip handler not invoked enough times"); 2734 if (g_xdecl_count != 1) 2735 fail("XML declaration handler not invoked"); 2736} 2737END_TEST 2738 2739/* Test that an explicit external entity handler argument replaces 2740 * the parser as the first argument. 2741 * 2742 * We do not call the first parameter to the external entity handler 2743 * 'parser' for once, since the first time the handler is called it 2744 * will actually be a text string. We need to be able to access the 2745 * global 'parser' variable to create our external entity parser from, 2746 * since there are code paths we need to ensure get executed. 2747 */ 2748START_TEST(test_ext_entity_ref_parameter) { 2749 const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n" 2750 "<!DOCTYPE doc SYSTEM 'foo'>\n" 2751 "<doc>&entity;</doc>"; 2752 2753 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2754 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 2755 /* Set a handler arg that is not NULL and not parser (which is 2756 * what NULL would cause to be passed. 2757 */ 2758 XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text); 2759 g_handler_data = text; 2760 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2761 == XML_STATUS_ERROR) 2762 xml_failure(g_parser); 2763 2764 /* Now try again with unset args */ 2765 XML_ParserReset(g_parser, NULL); 2766 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 2767 XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker); 2768 XML_SetExternalEntityRefHandlerArg(g_parser, NULL); 2769 g_handler_data = g_parser; 2770 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2771 == XML_STATUS_ERROR) 2772 xml_failure(g_parser); 2773} 2774END_TEST 2775 2776/* Test the parsing of an empty string */ 2777START_TEST(test_empty_parse) { 2778 const char *text = "<doc></doc>"; 2779 const char *partial = "<doc>"; 2780 2781 if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR) 2782 fail("Parsing empty string faulted"); 2783 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 2784 fail("Parsing final empty string not faulted"); 2785 if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS) 2786 fail("Parsing final empty string faulted for wrong reason"); 2787 2788 /* Now try with valid text before the empty end */ 2789 XML_ParserReset(g_parser, NULL); 2790 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 2791 == XML_STATUS_ERROR) 2792 xml_failure(g_parser); 2793 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR) 2794 fail("Parsing final empty string faulted"); 2795 2796 /* Now try with invalid text before the empty end */ 2797 XML_ParserReset(g_parser, NULL); 2798 if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial), 2799 XML_FALSE) 2800 == XML_STATUS_ERROR) 2801 xml_failure(g_parser); 2802 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 2803 fail("Parsing final incomplete empty string not faulted"); 2804} 2805END_TEST 2806 2807/* Test odd corners of the XML_GetBuffer interface */ 2808static enum XML_Status 2809get_feature(enum XML_FeatureEnum feature_id, long *presult) { 2810 const XML_Feature *feature = XML_GetFeatureList(); 2811 2812 if (feature == NULL) 2813 return XML_STATUS_ERROR; 2814 for (; feature->feature != XML_FEATURE_END; feature++) { 2815 if (feature->feature == feature_id) { 2816 *presult = feature->value; 2817 return XML_STATUS_OK; 2818 } 2819 } 2820 return XML_STATUS_ERROR; 2821} 2822 2823/* Test odd corners of the XML_GetBuffer interface */ 2824START_TEST(test_get_buffer_1) { 2825 const char *text = get_buffer_test_text; 2826 void *buffer; 2827 long context_bytes; 2828 2829 /* Attempt to allocate a negative length buffer */ 2830 if (XML_GetBuffer(g_parser, -12) != NULL) 2831 fail("Negative length buffer not failed"); 2832 2833 /* Now get a small buffer and extend it past valid length */ 2834 buffer = XML_GetBuffer(g_parser, 1536); 2835 if (buffer == NULL) 2836 fail("1.5K buffer failed"); 2837 assert(buffer != NULL); 2838 memcpy(buffer, text, strlen(text)); 2839 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 2840 == XML_STATUS_ERROR) 2841 xml_failure(g_parser); 2842 if (XML_GetBuffer(g_parser, INT_MAX) != NULL) 2843 fail("INT_MAX buffer not failed"); 2844 2845 /* Now try extending it a more reasonable but still too large 2846 * amount. The allocator in XML_GetBuffer() doubles the buffer 2847 * size until it exceeds the requested amount or INT_MAX. If it 2848 * exceeds INT_MAX, it rejects the request, so we want a request 2849 * between INT_MAX and INT_MAX/2. A gap of 1K seems comfortable, 2850 * with an extra byte just to ensure that the request is off any 2851 * boundary. The request will be inflated internally by 2852 * XML_CONTEXT_BYTES (if >=1), so we subtract that from our 2853 * request. 2854 */ 2855 if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK) 2856 context_bytes = 0; 2857 if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL) 2858 fail("INT_MAX- buffer not failed"); 2859 2860 /* Now try extending it a carefully crafted amount */ 2861 if (XML_GetBuffer(g_parser, 1000) == NULL) 2862 fail("1000 buffer failed"); 2863} 2864END_TEST 2865 2866/* Test more corners of the XML_GetBuffer interface */ 2867START_TEST(test_get_buffer_2) { 2868 const char *text = get_buffer_test_text; 2869 void *buffer; 2870 2871 /* Now get a decent buffer */ 2872 buffer = XML_GetBuffer(g_parser, 1536); 2873 if (buffer == NULL) 2874 fail("1.5K buffer failed"); 2875 assert(buffer != NULL); 2876 memcpy(buffer, text, strlen(text)); 2877 if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE) 2878 == XML_STATUS_ERROR) 2879 xml_failure(g_parser); 2880 2881 /* Extend it, to catch a different code path */ 2882 if (XML_GetBuffer(g_parser, 1024) == NULL) 2883 fail("1024 buffer failed"); 2884} 2885END_TEST 2886 2887/* Test for signed integer overflow CVE-2022-23852 */ 2888#if XML_CONTEXT_BYTES > 0 2889START_TEST(test_get_buffer_3_overflow) { 2890 XML_Parser parser = XML_ParserCreate(NULL); 2891 assert(parser != NULL); 2892 2893 const char *const text = "\n"; 2894 const int expectedKeepValue = (int)strlen(text); 2895 2896 // After this call, variable "keep" in XML_GetBuffer will 2897 // have value expectedKeepValue 2898 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), 2899 XML_FALSE /* isFinal */) 2900 == XML_STATUS_ERROR) 2901 xml_failure(parser); 2902 2903 assert(expectedKeepValue > 0); 2904 if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL) 2905 fail("enlarging buffer not failed"); 2906 2907 XML_ParserFree(parser); 2908} 2909END_TEST 2910#endif // XML_CONTEXT_BYTES > 0 2911 2912START_TEST(test_buffer_can_grow_to_max) { 2913 const char *const prefixes[] = { 2914 "", 2915 "<", 2916 "<x a='", 2917 "<doc><x a='", 2918 "<document><x a='", 2919 "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand" 2920 "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif" 2921 "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin" 2922 "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping" 2923 "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"}; 2924 const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]); 2925 int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow 2926#if defined(__MINGW32__) && ! defined(__MINGW64__) 2927 // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB 2928 // Can we make a big allocation? 2929 void *big = malloc(maxbuf); 2930 if (! big) { 2931 // The big allocation failed. Let's be a little lenient. 2932 maxbuf = maxbuf / 2; 2933 } 2934 free(big); 2935#endif 2936 2937 for (int i = 0; i < num_prefixes; ++i) { 2938 set_subtest("\"%s\"", prefixes[i]); 2939 XML_Parser parser = XML_ParserCreate(NULL); 2940 const int prefix_len = (int)strlen(prefixes[i]); 2941 const enum XML_Status s 2942 = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE); 2943 if (s != XML_STATUS_OK) 2944 xml_failure(parser); 2945 2946 // XML_CONTEXT_BYTES of the prefix may remain in the buffer; 2947 // subtracting the whole prefix is easiest, and close enough. 2948 assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL); 2949 // The limit should be consistent; no prefix should allow us to 2950 // reach above the max buffer size. 2951 assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL); 2952 XML_ParserFree(parser); 2953 } 2954} 2955END_TEST 2956 2957START_TEST(test_getbuffer_allocates_on_zero_len) { 2958 for (int first_len = 1; first_len >= 0; first_len--) { 2959 set_subtest("with len=%d first", first_len); 2960 XML_Parser parser = XML_ParserCreate(NULL); 2961 assert_true(parser != NULL); 2962 assert_true(XML_GetBuffer(parser, first_len) != NULL); 2963 assert_true(XML_GetBuffer(parser, 0) != NULL); 2964 if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK) 2965 xml_failure(parser); 2966 XML_ParserFree(parser); 2967 } 2968} 2969END_TEST 2970 2971/* Test position information macros */ 2972START_TEST(test_byte_info_at_end) { 2973 const char *text = "<doc></doc>"; 2974 2975 if (XML_GetCurrentByteIndex(g_parser) != -1 2976 || XML_GetCurrentByteCount(g_parser) != 0) 2977 fail("Byte index/count incorrect at start of parse"); 2978 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2979 == XML_STATUS_ERROR) 2980 xml_failure(g_parser); 2981 /* At end, the count will be zero and the index the end of string */ 2982 if (XML_GetCurrentByteCount(g_parser) != 0) 2983 fail("Terminal byte count incorrect"); 2984 if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text)) 2985 fail("Terminal byte index incorrect"); 2986} 2987END_TEST 2988 2989/* Test position information from errors */ 2990#define PRE_ERROR_STR "<doc></" 2991#define POST_ERROR_STR "wombat></doc>" 2992START_TEST(test_byte_info_at_error) { 2993 const char *text = PRE_ERROR_STR POST_ERROR_STR; 2994 2995 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 2996 == XML_STATUS_OK) 2997 fail("Syntax error not faulted"); 2998 if (XML_GetCurrentByteCount(g_parser) != 0) 2999 fail("Error byte count incorrect"); 3000 if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR)) 3001 fail("Error byte index incorrect"); 3002} 3003END_TEST 3004#undef PRE_ERROR_STR 3005#undef POST_ERROR_STR 3006 3007/* Test position information in handler */ 3008#define START_ELEMENT "<e>" 3009#define CDATA_TEXT "Hello" 3010#define END_ELEMENT "</e>" 3011START_TEST(test_byte_info_at_cdata) { 3012 const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT; 3013 int offset, size; 3014 ByteTestData data; 3015 3016 /* Check initial context is empty */ 3017 if (XML_GetInputContext(g_parser, &offset, &size) != NULL) 3018 fail("Unexpected context at start of parse"); 3019 3020 data.start_element_len = (int)strlen(START_ELEMENT); 3021 data.cdata_len = (int)strlen(CDATA_TEXT); 3022 data.total_string_len = (int)strlen(text); 3023 XML_SetCharacterDataHandler(g_parser, byte_character_handler); 3024 XML_SetUserData(g_parser, &data); 3025 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 3026 xml_failure(g_parser); 3027} 3028END_TEST 3029#undef START_ELEMENT 3030#undef CDATA_TEXT 3031#undef END_ELEMENT 3032 3033/* Test predefined entities are correctly recognised */ 3034START_TEST(test_predefined_entities) { 3035 const char *text = "<doc><>&"'</doc>"; 3036 const XML_Char *expected = XCS("<doc><>&"'</doc>"); 3037 const XML_Char *result = XCS("<>&\"'"); 3038 CharData storage; 3039 3040 XML_SetDefaultHandler(g_parser, accumulate_characters); 3041 /* run_character_check uses XML_SetCharacterDataHandler(), which 3042 * unfortunately heads off a code path that we need to exercise. 3043 */ 3044 CharData_Init(&storage); 3045 XML_SetUserData(g_parser, &storage); 3046 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3047 == XML_STATUS_ERROR) 3048 xml_failure(g_parser); 3049 /* The default handler doesn't translate the entities */ 3050 CharData_CheckXMLChars(&storage, expected); 3051 3052 /* Now try again and check the translation */ 3053 XML_ParserReset(g_parser, NULL); 3054 run_character_check(text, result); 3055} 3056END_TEST 3057 3058/* Regression test that an invalid tag in an external parameter 3059 * reference in an external DTD is correctly faulted. 3060 * 3061 * Only a few specific tags are legal in DTDs ignoring comments and 3062 * processing instructions, all of which begin with an exclamation 3063 * mark. "<el/>" is not one of them, so the parser should raise an 3064 * error on encountering it. 3065 */ 3066START_TEST(test_invalid_tag_in_dtd) { 3067 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3068 "<doc></doc>\n"; 3069 3070 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3071 XML_SetExternalEntityRefHandler(g_parser, external_entity_param); 3072 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3073 "Invalid tag IN DTD external param not rejected"); 3074} 3075END_TEST 3076 3077/* Test entities not quite the predefined ones are not mis-recognised */ 3078START_TEST(test_not_predefined_entities) { 3079 const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>", 3080 "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL}; 3081 int i = 0; 3082 3083 while (text[i] != NULL) { 3084 expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY, 3085 "Undefined entity not rejected"); 3086 XML_ParserReset(g_parser, NULL); 3087 i++; 3088 } 3089} 3090END_TEST 3091 3092/* Test conditional inclusion (IGNORE) */ 3093START_TEST(test_ignore_section) { 3094 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3095 "<doc><e>&entity;</e></doc>"; 3096 const XML_Char *expected 3097 = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;"); 3098 CharData storage; 3099 3100 CharData_Init(&storage); 3101 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3102 XML_SetUserData(g_parser, &storage); 3103 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore); 3104 XML_SetDefaultHandler(g_parser, accumulate_characters); 3105 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3106 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3107 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3108 XML_SetStartElementHandler(g_parser, dummy_start_element); 3109 XML_SetEndElementHandler(g_parser, dummy_end_element); 3110 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3111 == XML_STATUS_ERROR) 3112 xml_failure(g_parser); 3113 CharData_CheckXMLChars(&storage, expected); 3114} 3115END_TEST 3116 3117START_TEST(test_ignore_section_utf16) { 3118 const char text[] = 3119 /* <!DOCTYPE d SYSTEM 's'> */ 3120 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3121 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0" 3122 /* <d><e>&en;</e></d> */ 3123 "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0"; 3124 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3125 CharData storage; 3126 3127 CharData_Init(&storage); 3128 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3129 XML_SetUserData(g_parser, &storage); 3130 XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16); 3131 XML_SetDefaultHandler(g_parser, accumulate_characters); 3132 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3133 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3134 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3135 XML_SetStartElementHandler(g_parser, dummy_start_element); 3136 XML_SetEndElementHandler(g_parser, dummy_end_element); 3137 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3138 == XML_STATUS_ERROR) 3139 xml_failure(g_parser); 3140 CharData_CheckXMLChars(&storage, expected); 3141} 3142END_TEST 3143 3144START_TEST(test_ignore_section_utf16_be) { 3145 const char text[] = 3146 /* <!DOCTYPE d SYSTEM 's'> */ 3147 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 " 3148 "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n" 3149 /* <d><e>&en;</e></d> */ 3150 "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>"; 3151 const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;"); 3152 CharData storage; 3153 3154 CharData_Init(&storage); 3155 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3156 XML_SetUserData(g_parser, &storage); 3157 XML_SetExternalEntityRefHandler(g_parser, 3158 external_entity_load_ignore_utf16_be); 3159 XML_SetDefaultHandler(g_parser, accumulate_characters); 3160 XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler); 3161 XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler); 3162 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3163 XML_SetStartElementHandler(g_parser, dummy_start_element); 3164 XML_SetEndElementHandler(g_parser, dummy_end_element); 3165 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3166 == XML_STATUS_ERROR) 3167 xml_failure(g_parser); 3168 CharData_CheckXMLChars(&storage, expected); 3169} 3170END_TEST 3171 3172/* Test mis-formatted conditional exclusion */ 3173START_TEST(test_bad_ignore_section) { 3174 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3175 "<doc><e>&entity;</e></doc>"; 3176 ExtFaults faults[] 3177 = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL, 3178 XML_ERROR_SYNTAX}, 3179 {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL, 3180 XML_ERROR_INVALID_TOKEN}, 3181 {/* FIrst two bytes of a three-byte char */ 3182 "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL, 3183 XML_ERROR_PARTIAL_CHAR}, 3184 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3185 ExtFaults *fault; 3186 3187 for (fault = &faults[0]; fault->parse_text != NULL; fault++) { 3188 set_subtest("%s", fault->parse_text); 3189 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3190 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3191 XML_SetUserData(g_parser, fault); 3192 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3193 "Incomplete IGNORE section not failed"); 3194 XML_ParserReset(g_parser, NULL); 3195 } 3196} 3197END_TEST 3198 3199struct bom_testdata { 3200 const char *external; 3201 int split; 3202 XML_Bool nested_callback_happened; 3203}; 3204 3205static int XMLCALL 3206external_bom_checker(XML_Parser parser, const XML_Char *context, 3207 const XML_Char *base, const XML_Char *systemId, 3208 const XML_Char *publicId) { 3209 const char *text; 3210 UNUSED_P(base); 3211 UNUSED_P(systemId); 3212 UNUSED_P(publicId); 3213 3214 XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL); 3215 if (ext_parser == NULL) 3216 fail("Could not create external entity parser"); 3217 3218 if (! xcstrcmp(systemId, XCS("004-2.ent"))) { 3219 struct bom_testdata *const testdata 3220 = (struct bom_testdata *)XML_GetUserData(parser); 3221 const char *const external = testdata->external; 3222 const int split = testdata->split; 3223 testdata->nested_callback_happened = XML_TRUE; 3224 3225 if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE) 3226 != XML_STATUS_OK) { 3227 xml_failure(ext_parser); 3228 } 3229 text = external + split; // the parse below will continue where we left off. 3230 } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) { 3231 text = "<!ELEMENT doc EMPTY>\n" 3232 "<!ENTITY % e1 SYSTEM '004-2.ent'>\n" 3233 "<!ENTITY % e2 '%e1;'>\n"; 3234 } else { 3235 fail("unknown systemId"); 3236 } 3237 3238 if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE) 3239 != XML_STATUS_OK) 3240 xml_failure(ext_parser); 3241 3242 XML_ParserFree(ext_parser); 3243 return XML_STATUS_OK; 3244} 3245 3246/* regression test: BOM should be consumed when followed by a partial token. */ 3247START_TEST(test_external_bom_consumed) { 3248 const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3249 "<doc></doc>\n"; 3250 const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>"; 3251 const int len = (int)strlen(external); 3252 for (int split = 0; split <= len; ++split) { 3253 set_subtest("split at byte %d", split); 3254 3255 struct bom_testdata testdata; 3256 testdata.external = external; 3257 testdata.split = split; 3258 testdata.nested_callback_happened = XML_FALSE; 3259 3260 XML_Parser parser = XML_ParserCreate(NULL); 3261 if (parser == NULL) { 3262 fail("Couldn't create parser"); 3263 } 3264 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3265 XML_SetExternalEntityRefHandler(parser, external_bom_checker); 3266 XML_SetUserData(parser, &testdata); 3267 if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) 3268 == XML_STATUS_ERROR) 3269 xml_failure(parser); 3270 if (! testdata.nested_callback_happened) { 3271 fail("ref handler not called"); 3272 } 3273 XML_ParserFree(parser); 3274 } 3275} 3276END_TEST 3277 3278/* Test recursive parsing */ 3279START_TEST(test_external_entity_values) { 3280 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3281 "<doc></doc>\n"; 3282 ExtFaults data_004_2[] = { 3283 {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE}, 3284 {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL, 3285 XML_ERROR_INVALID_TOKEN}, 3286 {"'wombat", "Unterminated string not faulted", NULL, 3287 XML_ERROR_UNCLOSED_TOKEN}, 3288 {"\xe2\x82", "Partial UTF-8 character not faulted", NULL, 3289 XML_ERROR_PARTIAL_CHAR}, 3290 {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE}, 3291 {"<?xml?>", "Malformed XML declaration not faulted", NULL, 3292 XML_ERROR_XML_DECL}, 3293 {/* UTF-8 BOM */ 3294 "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, 3295 XML_ERROR_NONE}, 3296 {"<?xml version='1.0' encoding='utf-8'?>\n$", 3297 "Invalid token after text declaration not faulted", NULL, 3298 XML_ERROR_INVALID_TOKEN}, 3299 {"<?xml version='1.0' encoding='utf-8'?>\n'wombat", 3300 "Unterminated string after text decl not faulted", NULL, 3301 XML_ERROR_UNCLOSED_TOKEN}, 3302 {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82", 3303 "Partial UTF-8 character after text decl not faulted", NULL, 3304 XML_ERROR_PARTIAL_CHAR}, 3305 {"%e1;", "Recursive parameter entity not faulted", NULL, 3306 XML_ERROR_RECURSIVE_ENTITY_REF}, 3307 {NULL, NULL, NULL, XML_ERROR_NONE}}; 3308 int i; 3309 3310 for (i = 0; data_004_2[i].parse_text != NULL; i++) { 3311 set_subtest("%s", data_004_2[i].parse_text); 3312 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3313 XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer); 3314 XML_SetUserData(g_parser, &data_004_2[i]); 3315 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3316 == XML_STATUS_ERROR) 3317 xml_failure(g_parser); 3318 XML_ParserReset(g_parser, NULL); 3319 } 3320} 3321END_TEST 3322 3323/* Test the recursive parse interacts with a not standalone handler */ 3324START_TEST(test_ext_entity_not_standalone) { 3325 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3326 "<doc></doc>"; 3327 3328 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3329 XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone); 3330 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3331 "Standalone rejection not caught"); 3332} 3333END_TEST 3334 3335START_TEST(test_ext_entity_value_abort) { 3336 const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n" 3337 "<doc></doc>\n"; 3338 3339 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3340 XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter); 3341 g_resumable = XML_FALSE; 3342 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3343 == XML_STATUS_ERROR) 3344 xml_failure(g_parser); 3345} 3346END_TEST 3347 3348START_TEST(test_bad_public_doctype) { 3349 const char *text = "<?xml version='1.0' encoding='utf-8'?>\n" 3350 "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n" 3351 "<doc></doc>"; 3352 3353 /* Setting a handler provokes a particular code path */ 3354 XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler, 3355 dummy_end_doctype_handler); 3356 expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed"); 3357} 3358END_TEST 3359 3360/* Test based on ibm/valid/P32/ibm32v04.xml */ 3361START_TEST(test_attribute_enum_value) { 3362 const char *text = "<?xml version='1.0' standalone='no'?>\n" 3363 "<!DOCTYPE animal SYSTEM 'test.dtd'>\n" 3364 "<animal>This is a \n <a/> \n\nyellow tiger</animal>"; 3365 ExtTest dtd_data 3366 = {"<!ELEMENT animal (#PCDATA|a)*>\n" 3367 "<!ELEMENT a EMPTY>\n" 3368 "<!ATTLIST animal xml:space (default|preserve) 'preserve'>", 3369 NULL, NULL}; 3370 const XML_Char *expected = XCS("This is a \n \n\nyellow tiger"); 3371 3372 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3373 XML_SetUserData(g_parser, &dtd_data); 3374 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3375 /* An attribute list handler provokes a different code path */ 3376 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 3377 run_ext_character_check(text, &dtd_data, expected); 3378} 3379END_TEST 3380 3381/* Slightly bizarrely, the library seems to silently ignore entity 3382 * definitions for predefined entities, even when they are wrong. The 3383 * language of the XML 1.0 spec is somewhat unhelpful as to what ought 3384 * to happen, so this is currently treated as acceptable. 3385 */ 3386START_TEST(test_predefined_entity_redefinition) { 3387 const char *text = "<!DOCTYPE doc [\n" 3388 "<!ENTITY apos 'foo'>\n" 3389 "]>\n" 3390 "<doc>'</doc>"; 3391 run_character_check(text, XCS("'")); 3392} 3393END_TEST 3394 3395/* Test that the parser stops processing the DTD after an unresolved 3396 * parameter entity is encountered. 3397 */ 3398START_TEST(test_dtd_stop_processing) { 3399 const char *text = "<!DOCTYPE doc [\n" 3400 "%foo;\n" 3401 "<!ENTITY bar 'bas'>\n" 3402 "]><doc/>"; 3403 3404 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 3405 init_dummy_handlers(); 3406 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3407 == XML_STATUS_ERROR) 3408 xml_failure(g_parser); 3409 if (get_dummy_handler_flags() != 0) 3410 fail("DTD processing still going after undefined PE"); 3411} 3412END_TEST 3413 3414/* Test public notations with no system ID */ 3415START_TEST(test_public_notation_no_sysid) { 3416 const char *text = "<!DOCTYPE doc [\n" 3417 "<!NOTATION note PUBLIC 'foo'>\n" 3418 "<!ELEMENT doc EMPTY>\n" 3419 "]>\n<doc/>"; 3420 3421 init_dummy_handlers(); 3422 XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler); 3423 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3424 == XML_STATUS_ERROR) 3425 xml_failure(g_parser); 3426 if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG) 3427 fail("Notation declaration handler not called"); 3428} 3429END_TEST 3430 3431START_TEST(test_nested_groups) { 3432 const char *text 3433 = "<!DOCTYPE doc [\n" 3434 "<!ELEMENT doc " 3435 /* Sixteen elements per line */ 3436 "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?," 3437 "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?" 3438 "))))))))))))))))))))))))))))))))>\n" 3439 "<!ELEMENT e EMPTY>" 3440 "]>\n" 3441 "<doc><e/></doc>"; 3442 CharData storage; 3443 3444 CharData_Init(&storage); 3445 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3446 XML_SetStartElementHandler(g_parser, record_element_start_handler); 3447 XML_SetUserData(g_parser, &storage); 3448 init_dummy_handlers(); 3449 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3450 == XML_STATUS_ERROR) 3451 xml_failure(g_parser); 3452 CharData_CheckXMLChars(&storage, XCS("doce")); 3453 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3454 fail("Element handler not fired"); 3455} 3456END_TEST 3457 3458START_TEST(test_group_choice) { 3459 const char *text = "<!DOCTYPE doc [\n" 3460 "<!ELEMENT doc (a|b|c)+>\n" 3461 "<!ELEMENT a EMPTY>\n" 3462 "<!ELEMENT b (#PCDATA)>\n" 3463 "<!ELEMENT c ANY>\n" 3464 "]>\n" 3465 "<doc>\n" 3466 "<a/>\n" 3467 "<b attr='foo'>This is a foo</b>\n" 3468 "<c></c>\n" 3469 "</doc>\n"; 3470 3471 XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler); 3472 init_dummy_handlers(); 3473 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3474 == XML_STATUS_ERROR) 3475 xml_failure(g_parser); 3476 if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG) 3477 fail("Element handler flag not raised"); 3478} 3479END_TEST 3480 3481START_TEST(test_standalone_parameter_entity) { 3482 const char *text = "<?xml version='1.0' standalone='yes'?>\n" 3483 "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n" 3484 "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n" 3485 "%entity;\n" 3486 "]>\n" 3487 "<doc></doc>"; 3488 char dtd_data[] = "<!ENTITY % e1 'foo'>\n"; 3489 3490 XML_SetUserData(g_parser, dtd_data); 3491 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3492 XML_SetExternalEntityRefHandler(g_parser, external_entity_public); 3493 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3494 == XML_STATUS_ERROR) 3495 xml_failure(g_parser); 3496} 3497END_TEST 3498 3499/* Test skipping of parameter entity in an external DTD */ 3500/* Derived from ibm/invalid/P69/ibm69i01.xml */ 3501START_TEST(test_skipped_parameter_entity) { 3502 const char *text = "<?xml version='1.0'?>\n" 3503 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3504 "<!ELEMENT root (#PCDATA|a)* >\n" 3505 "]>\n" 3506 "<root></root>"; 3507 ExtTest dtd_data = {"%pe2;", NULL, NULL}; 3508 3509 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3510 XML_SetUserData(g_parser, &dtd_data); 3511 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3512 XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler); 3513 init_dummy_handlers(); 3514 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3515 == XML_STATUS_ERROR) 3516 xml_failure(g_parser); 3517 if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG) 3518 fail("Skip handler not executed"); 3519} 3520END_TEST 3521 3522/* Test recursive parameter entity definition rejected in external DTD */ 3523START_TEST(test_recursive_external_parameter_entity) { 3524 const char *text = "<?xml version='1.0'?>\n" 3525 "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n" 3526 "<!ELEMENT root (#PCDATA|a)* >\n" 3527 "]>\n" 3528 "<root></root>"; 3529 ExtFaults dtd_data = {"<!ENTITY % pe2 '%pe2;'>\n%pe2;", 3530 "Recursive external parameter entity not faulted", NULL, 3531 XML_ERROR_RECURSIVE_ENTITY_REF}; 3532 3533 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 3534 XML_SetUserData(g_parser, &dtd_data); 3535 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3536 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 3537 "Recursive external parameter not spotted"); 3538} 3539END_TEST 3540 3541/* Test undefined parameter entity in external entity handler */ 3542START_TEST(test_undefined_ext_entity_in_external_dtd) { 3543 const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n" 3544 "<doc></doc>\n"; 3545 3546 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3547 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 3548 XML_SetUserData(g_parser, NULL); 3549 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3550 == XML_STATUS_ERROR) 3551 xml_failure(g_parser); 3552 3553 /* Now repeat without the external entity ref handler invoking 3554 * another copy of itself. 3555 */ 3556 XML_ParserReset(g_parser, NULL); 3557 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3558 XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer); 3559 XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */ 3560 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3561 == XML_STATUS_ERROR) 3562 xml_failure(g_parser); 3563} 3564END_TEST 3565 3566/* Test suspending the parse on receiving an XML declaration works */ 3567START_TEST(test_suspend_xdecl) { 3568 const char *text = long_character_data_text; 3569 3570 XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler); 3571 XML_SetUserData(g_parser, g_parser); 3572 g_resumable = XML_TRUE; 3573 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3574 != XML_STATUS_SUSPENDED) 3575 xml_failure(g_parser); 3576 if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE) 3577 xml_failure(g_parser); 3578 /* Attempt to start a new parse while suspended */ 3579 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3580 != XML_STATUS_ERROR) 3581 fail("Attempt to parse while suspended not faulted"); 3582 if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED) 3583 fail("Suspended parse not faulted with correct error"); 3584} 3585END_TEST 3586 3587/* Test aborting the parse in an epilog works */ 3588START_TEST(test_abort_epilog) { 3589 const char *text = "<doc></doc>\n\r\n"; 3590 XML_Char trigger_char = XCS('\r'); 3591 3592 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3593 XML_SetUserData(g_parser, &trigger_char); 3594 g_resumable = XML_FALSE; 3595 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3596 != XML_STATUS_ERROR) 3597 fail("Abort not triggered"); 3598 if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED) 3599 xml_failure(g_parser); 3600} 3601END_TEST 3602 3603/* Test a different code path for abort in the epilog */ 3604START_TEST(test_abort_epilog_2) { 3605 const char *text = "<doc></doc>\n"; 3606 XML_Char trigger_char = XCS('\n'); 3607 3608 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3609 XML_SetUserData(g_parser, &trigger_char); 3610 g_resumable = XML_FALSE; 3611 expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered"); 3612} 3613END_TEST 3614 3615/* Test suspension from the epilog */ 3616START_TEST(test_suspend_epilog) { 3617 const char *text = "<doc></doc>\n"; 3618 XML_Char trigger_char = XCS('\n'); 3619 3620 XML_SetDefaultHandler(g_parser, selective_aborting_default_handler); 3621 XML_SetUserData(g_parser, &trigger_char); 3622 g_resumable = XML_TRUE; 3623 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3624 != XML_STATUS_SUSPENDED) 3625 xml_failure(g_parser); 3626} 3627END_TEST 3628 3629START_TEST(test_suspend_in_sole_empty_tag) { 3630 const char *text = "<doc/>"; 3631 enum XML_Status rc; 3632 3633 XML_SetEndElementHandler(g_parser, suspending_end_handler); 3634 XML_SetUserData(g_parser, g_parser); 3635 rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE); 3636 if (rc == XML_STATUS_ERROR) 3637 xml_failure(g_parser); 3638 else if (rc != XML_STATUS_SUSPENDED) 3639 fail("Suspend not triggered"); 3640 rc = XML_ResumeParser(g_parser); 3641 if (rc == XML_STATUS_ERROR) 3642 xml_failure(g_parser); 3643 else if (rc != XML_STATUS_OK) 3644 fail("Resume failed"); 3645} 3646END_TEST 3647 3648START_TEST(test_unfinished_epilog) { 3649 const char *text = "<doc></doc><"; 3650 3651 expect_failure(text, XML_ERROR_UNCLOSED_TOKEN, 3652 "Incomplete epilog entry not faulted"); 3653} 3654END_TEST 3655 3656START_TEST(test_partial_char_in_epilog) { 3657 const char *text = "<doc></doc>\xe2\x82"; 3658 3659 /* First check that no fault is raised if the parse is not finished */ 3660 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) 3661 == XML_STATUS_ERROR) 3662 xml_failure(g_parser); 3663 /* Now check that it is faulted once we finish */ 3664 if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR) 3665 fail("Partial character in epilog not faulted"); 3666 if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR) 3667 xml_failure(g_parser); 3668} 3669END_TEST 3670 3671/* Test resuming a parse suspended in entity substitution */ 3672START_TEST(test_suspend_resume_internal_entity) { 3673 const char *text 3674 = "<!DOCTYPE doc [\n" 3675 "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n" 3676 "]>\n" 3677 "<doc>&foo;</doc>\n"; 3678 const XML_Char *expected1 = XCS("Hi"); 3679 const XML_Char *expected2 = XCS("HiHo"); 3680 CharData storage; 3681 3682 CharData_Init(&storage); 3683 XML_SetStartElementHandler(g_parser, start_element_suspender); 3684 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 3685 XML_SetUserData(g_parser, &storage); 3686 // can't use SINGLE_BYTES here, because it'll return early on suspension, and 3687 // we won't know exactly how much input we actually managed to give Expat. 3688 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 3689 != XML_STATUS_SUSPENDED) 3690 xml_failure(g_parser); 3691 CharData_CheckXMLChars(&storage, XCS("")); 3692 if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED) 3693 xml_failure(g_parser); 3694 CharData_CheckXMLChars(&storage, expected1); 3695 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 3696 xml_failure(g_parser); 3697 CharData_CheckXMLChars(&storage, expected2); 3698} 3699END_TEST 3700 3701START_TEST(test_suspend_resume_internal_entity_issue_629) { 3702 const char *const text 3703 = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n" 3704 "<" 3705 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3706 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3707 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3708 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3709 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3710 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3711 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3712 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3713 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3714 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3715 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3716 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3717 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3718 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3719 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3720 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3721 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3722 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3723 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3724 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3725 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3726 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3727 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3728 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3729 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3730 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3731 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3732 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3733 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3734 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3735 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3736 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3737 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3738 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3739 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3740 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3741 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3742 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3743 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3744 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 3745 "/>" 3746 "</b></a>"; 3747 const size_t firstChunkSizeBytes = 54; 3748 3749 XML_Parser parser = XML_ParserCreate(NULL); 3750 XML_SetUserData(parser, parser); 3751 XML_SetCommentHandler(parser, suspending_comment_handler); 3752 3753 if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE) 3754 != XML_STATUS_SUSPENDED) 3755 xml_failure(parser); 3756 if (XML_ResumeParser(parser) != XML_STATUS_OK) 3757 xml_failure(parser); 3758 if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes, 3759 (int)(strlen(text) - firstChunkSizeBytes), 3760 XML_TRUE) 3761 != XML_STATUS_OK) 3762 xml_failure(parser); 3763 XML_ParserFree(parser); 3764} 3765END_TEST 3766 3767/* Test syntax error is caught at parse resumption */ 3768START_TEST(test_resume_entity_with_syntax_error) { 3769 const char *text = "<!DOCTYPE doc [\n" 3770 "<!ENTITY foo '<suspend>Hi</wombat>'>\n" 3771 "]>\n" 3772 "<doc>&foo;</doc>\n"; 3773 3774 XML_SetStartElementHandler(g_parser, start_element_suspender); 3775 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3776 != XML_STATUS_SUSPENDED) 3777 xml_failure(g_parser); 3778 if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR) 3779 fail("Syntax error in entity not faulted"); 3780 if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH) 3781 xml_failure(g_parser); 3782} 3783END_TEST 3784 3785/* Test suspending and resuming in a parameter entity substitution */ 3786START_TEST(test_suspend_resume_parameter_entity) { 3787 const char *text = "<!DOCTYPE doc [\n" 3788 "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n" 3789 "%foo;\n" 3790 "]>\n" 3791 "<doc>Hello, world</doc>"; 3792 const XML_Char *expected = XCS("Hello, world"); 3793 CharData storage; 3794 3795 CharData_Init(&storage); 3796 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3797 XML_SetElementDeclHandler(g_parser, element_decl_suspender); 3798 XML_SetCharacterDataHandler(g_parser, accumulate_characters); 3799 XML_SetUserData(g_parser, &storage); 3800 if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) 3801 != XML_STATUS_SUSPENDED) 3802 xml_failure(g_parser); 3803 CharData_CheckXMLChars(&storage, XCS("")); 3804 if (XML_ResumeParser(g_parser) != XML_STATUS_OK) 3805 xml_failure(g_parser); 3806 CharData_CheckXMLChars(&storage, expected); 3807} 3808END_TEST 3809 3810/* Test attempting to use parser after an error is faulted */ 3811START_TEST(test_restart_on_error) { 3812 const char *text = "<$doc><doc></doc>"; 3813 3814 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3815 != XML_STATUS_ERROR) 3816 fail("Invalid tag name not faulted"); 3817 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 3818 xml_failure(g_parser); 3819 if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR) 3820 fail("Restarting invalid parse not faulted"); 3821 if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN) 3822 xml_failure(g_parser); 3823} 3824END_TEST 3825 3826/* Test that angle brackets in an attribute default value are faulted */ 3827START_TEST(test_reject_lt_in_attribute_value) { 3828 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n" 3829 "<doc></doc>"; 3830 3831 expect_failure(text, XML_ERROR_INVALID_TOKEN, 3832 "Bad attribute default not faulted"); 3833} 3834END_TEST 3835 3836START_TEST(test_reject_unfinished_param_in_att_value) { 3837 const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n" 3838 "<doc></doc>"; 3839 3840 expect_failure(text, XML_ERROR_INVALID_TOKEN, 3841 "Bad attribute default not faulted"); 3842} 3843END_TEST 3844 3845START_TEST(test_trailing_cr_in_att_value) { 3846 const char *text = "<doc a='value\r'/>"; 3847 3848 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3849 == XML_STATUS_ERROR) 3850 xml_failure(g_parser); 3851} 3852END_TEST 3853 3854/* Try parsing a general entity within a parameter entity in a 3855 * standalone internal DTD. Covers a corner case in the parser. 3856 */ 3857START_TEST(test_standalone_internal_entity) { 3858 const char *text = "<?xml version='1.0' standalone='yes' ?>\n" 3859 "<!DOCTYPE doc [\n" 3860 " <!ELEMENT doc (#PCDATA)>\n" 3861 " <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"≥\">'>\n" 3862 " <!ENTITY ge 'AttDefaultValue'>\n" 3863 " %pe;\n" 3864 "]>\n" 3865 "<doc att2='any'/>"; 3866 3867 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3868 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3869 == XML_STATUS_ERROR) 3870 xml_failure(g_parser); 3871} 3872END_TEST 3873 3874/* Test that a reference to an unknown external entity is skipped */ 3875START_TEST(test_skipped_external_entity) { 3876 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 3877 "<doc></doc>\n"; 3878 ExtTest test_data = {"<!ELEMENT doc EMPTY>\n" 3879 "<!ENTITY % e2 '%e1;'>\n", 3880 NULL, NULL}; 3881 3882 XML_SetUserData(g_parser, &test_data); 3883 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3884 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3885 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3886 == XML_STATUS_ERROR) 3887 xml_failure(g_parser); 3888} 3889END_TEST 3890 3891/* Test a different form of unknown external entity */ 3892START_TEST(test_skipped_null_loaded_ext_entity) { 3893 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 3894 "<doc />"; 3895 ExtHdlrData test_data 3896 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 3897 "<!ENTITY % pe2 '%pe1;'>\n" 3898 "%pe2;\n", 3899 external_entity_null_loader}; 3900 3901 XML_SetUserData(g_parser, &test_data); 3902 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3903 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 3904 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3905 == XML_STATUS_ERROR) 3906 xml_failure(g_parser); 3907} 3908END_TEST 3909 3910START_TEST(test_skipped_unloaded_ext_entity) { 3911 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n" 3912 "<doc />"; 3913 ExtHdlrData test_data 3914 = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n" 3915 "<!ENTITY % pe2 '%pe1;'>\n" 3916 "%pe2;\n", 3917 NULL}; 3918 3919 XML_SetUserData(g_parser, &test_data); 3920 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3921 XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader); 3922 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3923 == XML_STATUS_ERROR) 3924 xml_failure(g_parser); 3925} 3926END_TEST 3927 3928/* Test that a parameter entity value ending with a carriage return 3929 * has it translated internally into a newline. 3930 */ 3931START_TEST(test_param_entity_with_trailing_cr) { 3932#define PARAM_ENTITY_NAME "pe" 3933#define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">" 3934 const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n" 3935 "<doc/>"; 3936 ExtTest test_data 3937 = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n" 3938 "%" PARAM_ENTITY_NAME ";\n", 3939 NULL, NULL}; 3940 3941 XML_SetUserData(g_parser, &test_data); 3942 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 3943 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader); 3944 XML_SetEntityDeclHandler(g_parser, param_entity_match_handler); 3945 param_entity_match_init(XCS(PARAM_ENTITY_NAME), 3946 XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n")); 3947 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 3948 == XML_STATUS_ERROR) 3949 xml_failure(g_parser); 3950 int entity_match_flag = get_param_entity_match_flag(); 3951 if (entity_match_flag == ENTITY_MATCH_FAIL) 3952 fail("Parameter entity CR->NEWLINE conversion failed"); 3953 else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND) 3954 fail("Parameter entity not parsed"); 3955} 3956#undef PARAM_ENTITY_NAME 3957#undef PARAM_ENTITY_CORE_VALUE 3958END_TEST 3959 3960START_TEST(test_invalid_character_entity) { 3961 const char *text = "<!DOCTYPE doc [\n" 3962 " <!ENTITY entity '�'>\n" 3963 "]>\n" 3964 "<doc>&entity;</doc>"; 3965 3966 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 3967 "Out of range character reference not faulted"); 3968} 3969END_TEST 3970 3971START_TEST(test_invalid_character_entity_2) { 3972 const char *text = "<!DOCTYPE doc [\n" 3973 " <!ENTITY entity '&#xg0;'>\n" 3974 "]>\n" 3975 "<doc>&entity;</doc>"; 3976 3977 expect_failure(text, XML_ERROR_INVALID_TOKEN, 3978 "Out of range character reference not faulted"); 3979} 3980END_TEST 3981 3982START_TEST(test_invalid_character_entity_3) { 3983 const char text[] = 3984 /* <!DOCTYPE doc [\n */ 3985 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 3986 /* U+0E04 = KHO KHWAI 3987 * U+0E08 = CHO CHAN */ 3988 /* <!ENTITY entity '&\u0e04\u0e08;'>\n */ 3989 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 " 3990 "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n" 3991 /* ]>\n */ 3992 "\0]\0>\0\n" 3993 /* <doc>&entity;</doc> */ 3994 "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>"; 3995 3996 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 3997 != XML_STATUS_ERROR) 3998 fail("Invalid start of entity name not faulted"); 3999 if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY) 4000 xml_failure(g_parser); 4001} 4002END_TEST 4003 4004START_TEST(test_invalid_character_entity_4) { 4005 const char *text = "<!DOCTYPE doc [\n" 4006 " <!ENTITY entity '�'>\n" /* = � */ 4007 "]>\n" 4008 "<doc>&entity;</doc>"; 4009 4010 expect_failure(text, XML_ERROR_BAD_CHAR_REF, 4011 "Out of range character reference not faulted"); 4012} 4013END_TEST 4014 4015/* Test that processing instructions are picked up by a default handler */ 4016START_TEST(test_pi_handled_in_default) { 4017 const char *text = "<?test processing instruction?>\n<doc/>"; 4018 const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>"); 4019 CharData storage; 4020 4021 CharData_Init(&storage); 4022 XML_SetDefaultHandler(g_parser, accumulate_characters); 4023 XML_SetUserData(g_parser, &storage); 4024 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4025 == XML_STATUS_ERROR) 4026 xml_failure(g_parser); 4027 CharData_CheckXMLChars(&storage, expected); 4028} 4029END_TEST 4030 4031/* Test that comments are picked up by a default handler */ 4032START_TEST(test_comment_handled_in_default) { 4033 const char *text = "<!-- This is a comment -->\n<doc/>"; 4034 const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>"); 4035 CharData storage; 4036 4037 CharData_Init(&storage); 4038 XML_SetDefaultHandler(g_parser, accumulate_characters); 4039 XML_SetUserData(g_parser, &storage); 4040 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4041 == XML_STATUS_ERROR) 4042 xml_failure(g_parser); 4043 CharData_CheckXMLChars(&storage, expected); 4044} 4045END_TEST 4046 4047/* Test PIs that look almost but not quite like XML declarations */ 4048START_TEST(test_pi_yml) { 4049 const char *text = "<?yml something like data?><doc/>"; 4050 const XML_Char *expected = XCS("yml: something like data\n"); 4051 CharData storage; 4052 4053 CharData_Init(&storage); 4054 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4055 XML_SetUserData(g_parser, &storage); 4056 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4057 == XML_STATUS_ERROR) 4058 xml_failure(g_parser); 4059 CharData_CheckXMLChars(&storage, expected); 4060} 4061END_TEST 4062 4063START_TEST(test_pi_xnl) { 4064 const char *text = "<?xnl nothing like data?><doc/>"; 4065 const XML_Char *expected = XCS("xnl: nothing like data\n"); 4066 CharData storage; 4067 4068 CharData_Init(&storage); 4069 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4070 XML_SetUserData(g_parser, &storage); 4071 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4072 == XML_STATUS_ERROR) 4073 xml_failure(g_parser); 4074 CharData_CheckXMLChars(&storage, expected); 4075} 4076END_TEST 4077 4078START_TEST(test_pi_xmm) { 4079 const char *text = "<?xmm everything like data?><doc/>"; 4080 const XML_Char *expected = XCS("xmm: everything like data\n"); 4081 CharData storage; 4082 4083 CharData_Init(&storage); 4084 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4085 XML_SetUserData(g_parser, &storage); 4086 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4087 == XML_STATUS_ERROR) 4088 xml_failure(g_parser); 4089 CharData_CheckXMLChars(&storage, expected); 4090} 4091END_TEST 4092 4093START_TEST(test_utf16_pi) { 4094 const char text[] = 4095 /* <?{KHO KHWAI}{CHO CHAN}?> 4096 * where {KHO KHWAI} = U+0E04 4097 * and {CHO CHAN} = U+0E08 4098 */ 4099 "<\0?\0\x04\x0e\x08\x0e?\0>\0" 4100 /* <q/> */ 4101 "<\0q\0/\0>\0"; 4102#ifdef XML_UNICODE 4103 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4104#else 4105 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4106#endif 4107 CharData storage; 4108 4109 CharData_Init(&storage); 4110 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4111 XML_SetUserData(g_parser, &storage); 4112 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4113 == XML_STATUS_ERROR) 4114 xml_failure(g_parser); 4115 CharData_CheckXMLChars(&storage, expected); 4116} 4117END_TEST 4118 4119START_TEST(test_utf16_be_pi) { 4120 const char text[] = 4121 /* <?{KHO KHWAI}{CHO CHAN}?> 4122 * where {KHO KHWAI} = U+0E04 4123 * and {CHO CHAN} = U+0E08 4124 */ 4125 "\0<\0?\x0e\x04\x0e\x08\0?\0>" 4126 /* <q/> */ 4127 "\0<\0q\0/\0>"; 4128#ifdef XML_UNICODE 4129 const XML_Char *expected = XCS("\x0e04\x0e08: \n"); 4130#else 4131 const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n"); 4132#endif 4133 CharData storage; 4134 4135 CharData_Init(&storage); 4136 XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters); 4137 XML_SetUserData(g_parser, &storage); 4138 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4139 == XML_STATUS_ERROR) 4140 xml_failure(g_parser); 4141 CharData_CheckXMLChars(&storage, expected); 4142} 4143END_TEST 4144 4145/* Test that comments can be picked up and translated */ 4146START_TEST(test_utf16_be_comment) { 4147 const char text[] = 4148 /* <!-- Comment A --> */ 4149 "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n" 4150 /* <doc/> */ 4151 "\0<\0d\0o\0c\0/\0>"; 4152 const XML_Char *expected = XCS(" Comment A "); 4153 CharData storage; 4154 4155 CharData_Init(&storage); 4156 XML_SetCommentHandler(g_parser, accumulate_comment); 4157 XML_SetUserData(g_parser, &storage); 4158 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4159 == XML_STATUS_ERROR) 4160 xml_failure(g_parser); 4161 CharData_CheckXMLChars(&storage, expected); 4162} 4163END_TEST 4164 4165START_TEST(test_utf16_le_comment) { 4166 const char text[] = 4167 /* <!-- Comment B --> */ 4168 "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0" 4169 /* <doc/> */ 4170 "<\0d\0o\0c\0/\0>\0"; 4171 const XML_Char *expected = XCS(" Comment B "); 4172 CharData storage; 4173 4174 CharData_Init(&storage); 4175 XML_SetCommentHandler(g_parser, accumulate_comment); 4176 XML_SetUserData(g_parser, &storage); 4177 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4178 == XML_STATUS_ERROR) 4179 xml_failure(g_parser); 4180 CharData_CheckXMLChars(&storage, expected); 4181} 4182END_TEST 4183 4184/* Test that the unknown encoding handler with map entries that expect 4185 * conversion but no conversion function is faulted 4186 */ 4187START_TEST(test_missing_encoding_conversion_fn) { 4188 const char *text = "<?xml version='1.0' encoding='no-conv'?>\n" 4189 "<doc>\x81</doc>"; 4190 4191 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4192 /* MiscEncodingHandler sets up an encoding with every top-bit-set 4193 * character introducing a two-byte sequence. For this, it 4194 * requires a convert function. The above function call doesn't 4195 * pass one through, so when BadEncodingHandler actually gets 4196 * called it should supply an invalid encoding. 4197 */ 4198 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4199 "Encoding with missing convert() not faulted"); 4200} 4201END_TEST 4202 4203START_TEST(test_failing_encoding_conversion_fn) { 4204 const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n" 4205 "<doc>\x81</doc>"; 4206 4207 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4208 /* BadEncodingHandler sets up an encoding with every top-bit-set 4209 * character introducing a two-byte sequence. For this, it 4210 * requires a convert function. The above function call passes 4211 * one that insists all possible sequences are invalid anyway. 4212 */ 4213 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4214 "Encoding with failing convert() not faulted"); 4215} 4216END_TEST 4217 4218/* Test unknown encoding conversions */ 4219START_TEST(test_unknown_encoding_success) { 4220 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4221 /* Equivalent to <eoc>Hello, world</eoc> */ 4222 "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>"; 4223 4224 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4225 run_character_check(text, XCS("Hello, world")); 4226} 4227END_TEST 4228 4229/* Test bad name character in unknown encoding */ 4230START_TEST(test_unknown_encoding_bad_name) { 4231 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4232 "<\xff\x64oc>Hello, world</\xff\x64oc>"; 4233 4234 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4235 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4236 "Bad name start in unknown encoding not faulted"); 4237} 4238END_TEST 4239 4240/* Test bad mid-name character in unknown encoding */ 4241START_TEST(test_unknown_encoding_bad_name_2) { 4242 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4243 "<d\xffoc>Hello, world</d\xffoc>"; 4244 4245 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4246 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4247 "Bad name in unknown encoding not faulted"); 4248} 4249END_TEST 4250 4251/* Test element name that is long enough to fill the conversion buffer 4252 * in an unknown encoding, finishing with an encoded character. 4253 */ 4254START_TEST(test_unknown_encoding_long_name_1) { 4255 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4256 "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>" 4257 "Hi" 4258 "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"; 4259 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4260 CharData storage; 4261 4262 CharData_Init(&storage); 4263 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4264 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4265 XML_SetUserData(g_parser, &storage); 4266 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4267 == XML_STATUS_ERROR) 4268 xml_failure(g_parser); 4269 CharData_CheckXMLChars(&storage, expected); 4270} 4271END_TEST 4272 4273/* Test element name that is long enough to fill the conversion buffer 4274 * in an unknown encoding, finishing with an simple character. 4275 */ 4276START_TEST(test_unknown_encoding_long_name_2) { 4277 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4278 "<abcdefghabcdefghabcdefghijklmnop>" 4279 "Hi" 4280 "</abcdefghabcdefghabcdefghijklmnop>"; 4281 const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop"); 4282 CharData storage; 4283 4284 CharData_Init(&storage); 4285 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4286 XML_SetStartElementHandler(g_parser, record_element_start_handler); 4287 XML_SetUserData(g_parser, &storage); 4288 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4289 == XML_STATUS_ERROR) 4290 xml_failure(g_parser); 4291 CharData_CheckXMLChars(&storage, expected); 4292} 4293END_TEST 4294 4295START_TEST(test_invalid_unknown_encoding) { 4296 const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n" 4297 "<doc>Hello world</doc>"; 4298 4299 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4300 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4301 "Invalid unknown encoding not faulted"); 4302} 4303END_TEST 4304 4305START_TEST(test_unknown_ascii_encoding_ok) { 4306 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4307 "<doc>Hello, world</doc>"; 4308 4309 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4310 run_character_check(text, XCS("Hello, world")); 4311} 4312END_TEST 4313 4314START_TEST(test_unknown_ascii_encoding_fail) { 4315 const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n" 4316 "<doc>Hello, \x80 world</doc>"; 4317 4318 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4319 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4320 "Invalid character not faulted"); 4321} 4322END_TEST 4323 4324START_TEST(test_unknown_encoding_invalid_length) { 4325 const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n" 4326 "<doc>Hello, world</doc>"; 4327 4328 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4329 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4330 "Invalid unknown encoding not faulted"); 4331} 4332END_TEST 4333 4334START_TEST(test_unknown_encoding_invalid_topbit) { 4335 const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n" 4336 "<doc>Hello, world</doc>"; 4337 4338 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4339 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4340 "Invalid unknown encoding not faulted"); 4341} 4342END_TEST 4343 4344START_TEST(test_unknown_encoding_invalid_surrogate) { 4345 const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n" 4346 "<doc>Hello, \x82 world</doc>"; 4347 4348 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4349 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4350 "Invalid unknown encoding not faulted"); 4351} 4352END_TEST 4353 4354START_TEST(test_unknown_encoding_invalid_high) { 4355 const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n" 4356 "<doc>Hello, world</doc>"; 4357 4358 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4359 expect_failure(text, XML_ERROR_UNKNOWN_ENCODING, 4360 "Invalid unknown encoding not faulted"); 4361} 4362END_TEST 4363 4364START_TEST(test_unknown_encoding_invalid_attr_value) { 4365 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4366 "<doc attr='\xff\x30'/>"; 4367 4368 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4369 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4370 "Invalid attribute valid not faulted"); 4371} 4372END_TEST 4373 4374/* Test an external entity parser set to use latin-1 detects UTF-16 4375 * BOMs correctly. 4376 */ 4377/* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */ 4378START_TEST(test_ext_entity_latin1_utf16le_bom) { 4379 const char *text = "<!DOCTYPE doc [\n" 4380 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4381 "]>\n" 4382 "<doc>&en;</doc>"; 4383 ExtTest2 test_data 4384 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4385 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4386 * 0x4c = L and 0x20 is a space 4387 */ 4388 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4389#ifdef XML_UNICODE 4390 const XML_Char *expected = XCS("\x00ff\x00feL "); 4391#else 4392 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4393 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4394#endif 4395 CharData storage; 4396 4397 CharData_Init(&storage); 4398 test_data.storage = &storage; 4399 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4400 XML_SetUserData(g_parser, &test_data); 4401 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4402 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4403 == XML_STATUS_ERROR) 4404 xml_failure(g_parser); 4405 CharData_CheckXMLChars(&storage, expected); 4406} 4407END_TEST 4408 4409START_TEST(test_ext_entity_latin1_utf16be_bom) { 4410 const char *text = "<!DOCTYPE doc [\n" 4411 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4412 "]>\n" 4413 "<doc>&en;</doc>"; 4414 ExtTest2 test_data 4415 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4416 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4417 * 0x4c = L and 0x20 is a space 4418 */ 4419 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4420#ifdef XML_UNICODE 4421 const XML_Char *expected = XCS("\x00fe\x00ff L"); 4422#else 4423 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4424 const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L"); 4425#endif 4426 CharData storage; 4427 4428 CharData_Init(&storage); 4429 test_data.storage = &storage; 4430 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4431 XML_SetUserData(g_parser, &test_data); 4432 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4433 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4434 == XML_STATUS_ERROR) 4435 xml_failure(g_parser); 4436 CharData_CheckXMLChars(&storage, expected); 4437} 4438END_TEST 4439 4440/* Parsing the full buffer rather than a byte at a time makes a 4441 * difference to the encoding scanning code, so repeat the above tests 4442 * without breaking them down by byte. 4443 */ 4444START_TEST(test_ext_entity_latin1_utf16le_bom2) { 4445 const char *text = "<!DOCTYPE doc [\n" 4446 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4447 "]>\n" 4448 "<doc>&en;</doc>"; 4449 ExtTest2 test_data 4450 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4451 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4452 * 0x4c = L and 0x20 is a space 4453 */ 4454 "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL}; 4455#ifdef XML_UNICODE 4456 const XML_Char *expected = XCS("\x00ff\x00feL "); 4457#else 4458 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4459 const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL "); 4460#endif 4461 CharData storage; 4462 4463 CharData_Init(&storage); 4464 test_data.storage = &storage; 4465 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4466 XML_SetUserData(g_parser, &test_data); 4467 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4468 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4469 == XML_STATUS_ERROR) 4470 xml_failure(g_parser); 4471 CharData_CheckXMLChars(&storage, expected); 4472} 4473END_TEST 4474 4475START_TEST(test_ext_entity_latin1_utf16be_bom2) { 4476 const char *text = "<!DOCTYPE doc [\n" 4477 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4478 "]>\n" 4479 "<doc>&en;</doc>"; 4480 ExtTest2 test_data 4481 = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */ 4482 /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn, 4483 * 0x4c = L and 0x20 is a space 4484 */ 4485 "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL}; 4486#ifdef XML_UNICODE 4487 const XML_Char *expected = XCS("\x00fe\x00ff L"); 4488#else 4489 /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */ 4490 const XML_Char *expected = "\xc3\xbe\xc3\xbf L"; 4491#endif 4492 CharData storage; 4493 4494 CharData_Init(&storage); 4495 test_data.storage = &storage; 4496 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4497 XML_SetUserData(g_parser, &test_data); 4498 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4499 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4500 == XML_STATUS_ERROR) 4501 xml_failure(g_parser); 4502 CharData_CheckXMLChars(&storage, expected); 4503} 4504END_TEST 4505 4506/* Test little-endian UTF-16 given an explicit big-endian encoding */ 4507START_TEST(test_ext_entity_utf16_be) { 4508 const char *text = "<!DOCTYPE doc [\n" 4509 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4510 "]>\n" 4511 "<doc>&en;</doc>"; 4512 ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL}; 4513#ifdef XML_UNICODE 4514 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 4515#else 4516 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 4517 "\xe6\x94\x80" /* U+6500 */ 4518 "\xe2\xbc\x80" /* U+2F00 */ 4519 "\xe3\xb8\x80"); /* U+3E00 */ 4520#endif 4521 CharData storage; 4522 4523 CharData_Init(&storage); 4524 test_data.storage = &storage; 4525 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4526 XML_SetUserData(g_parser, &test_data); 4527 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4528 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4529 == XML_STATUS_ERROR) 4530 xml_failure(g_parser); 4531 CharData_CheckXMLChars(&storage, expected); 4532} 4533END_TEST 4534 4535/* Test big-endian UTF-16 given an explicit little-endian encoding */ 4536START_TEST(test_ext_entity_utf16_le) { 4537 const char *text = "<!DOCTYPE doc [\n" 4538 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4539 "]>\n" 4540 "<doc>&en;</doc>"; 4541 ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL}; 4542#ifdef XML_UNICODE 4543 const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00"); 4544#else 4545 const XML_Char *expected = XCS("\xe3\xb0\x80" /* U+3C00 */ 4546 "\xe6\x94\x80" /* U+6500 */ 4547 "\xe2\xbc\x80" /* U+2F00 */ 4548 "\xe3\xb8\x80"); /* U+3E00 */ 4549#endif 4550 CharData storage; 4551 4552 CharData_Init(&storage); 4553 test_data.storage = &storage; 4554 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4555 XML_SetUserData(g_parser, &test_data); 4556 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4557 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4558 == XML_STATUS_ERROR) 4559 xml_failure(g_parser); 4560 CharData_CheckXMLChars(&storage, expected); 4561} 4562END_TEST 4563 4564/* Test little-endian UTF-16 given no explicit encoding. 4565 * The existing default encoding (UTF-8) is assumed to hold without a 4566 * BOM to contradict it, so the entity value will in fact provoke an 4567 * error because 0x00 is not a valid XML character. We parse the 4568 * whole buffer in one go rather than feeding it in byte by byte to 4569 * exercise different code paths in the initial scanning routines. 4570 */ 4571START_TEST(test_ext_entity_utf16_unknown) { 4572 const char *text = "<!DOCTYPE doc [\n" 4573 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4574 "]>\n" 4575 "<doc>&en;</doc>"; 4576 ExtFaults2 test_data 4577 = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL, 4578 XML_ERROR_INVALID_TOKEN}; 4579 4580 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2); 4581 XML_SetUserData(g_parser, &test_data); 4582 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 4583 "Invalid character should not have been accepted"); 4584} 4585END_TEST 4586 4587/* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */ 4588START_TEST(test_ext_entity_utf8_non_bom) { 4589 const char *text = "<!DOCTYPE doc [\n" 4590 " <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n" 4591 "]>\n" 4592 "<doc>&en;</doc>"; 4593 ExtTest2 test_data 4594 = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */ 4595 3, NULL, NULL}; 4596#ifdef XML_UNICODE 4597 const XML_Char *expected = XCS("\xfec0"); 4598#else 4599 const XML_Char *expected = XCS("\xef\xbb\x80"); 4600#endif 4601 CharData storage; 4602 4603 CharData_Init(&storage); 4604 test_data.storage = &storage; 4605 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 4606 XML_SetUserData(g_parser, &test_data); 4607 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 4608 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4609 == XML_STATUS_ERROR) 4610 xml_failure(g_parser); 4611 CharData_CheckXMLChars(&storage, expected); 4612} 4613END_TEST 4614 4615/* Test that UTF-8 in a CDATA section is correctly passed through */ 4616START_TEST(test_utf8_in_cdata_section) { 4617 const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>"; 4618#ifdef XML_UNICODE 4619 const XML_Char *expected = XCS("one \x00e9 two"); 4620#else 4621 const XML_Char *expected = XCS("one \xc3\xa9 two"); 4622#endif 4623 4624 run_character_check(text, expected); 4625} 4626END_TEST 4627 4628/* Test that little-endian UTF-16 in a CDATA section is handled */ 4629START_TEST(test_utf8_in_cdata_section_2) { 4630 const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>"; 4631#ifdef XML_UNICODE 4632 const XML_Char *expected = XCS("\x00e9]\x00e9two"); 4633#else 4634 const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two"); 4635#endif 4636 4637 run_character_check(text, expected); 4638} 4639END_TEST 4640 4641START_TEST(test_utf8_in_start_tags) { 4642 struct test_case { 4643 bool goodName; 4644 bool goodNameStart; 4645 const char *tagName; 4646 }; 4647 4648 // The idea with the tests below is this: 4649 // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences 4650 // go to isNever and are hence not a concern. 4651 // 4652 // We start with a character that is a valid name character 4653 // (or even name-start character, see XML 1.0r4 spec) and then we flip 4654 // single bits at places where (1) the result leaves the UTF-8 encoding space 4655 // and (2) we stay in the same n-byte sequence family. 4656 // 4657 // The flipped bits are highlighted in angle brackets in comments, 4658 // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped 4659 // the most significant bit to 1 to leave UTF-8 encoding space. 4660 struct test_case cases[] = { 4661 // 1-byte UTF-8: [0xxx xxxx] 4662 {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':' 4663 {false, false, "\xBA"}, // [<1>011 1010] 4664 {true, false, "\x39"}, // [0011 1001] = ASCII nine '9' 4665 {false, false, "\xB9"}, // [<1>011 1001] 4666 4667 // 2-byte UTF-8: [110x xxxx] [10xx xxxx] 4668 {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] = 4669 // Arabic small waw U+06E5 4670 {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101] 4671 {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101] 4672 {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101] 4673 {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] = 4674 // combining char U+0301 4675 {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001] 4676 {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001] 4677 {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001] 4678 4679 // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx] 4680 {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] = 4681 // Devanagari Letter A U+0905 4682 {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101] 4683 {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101] 4684 {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101] 4685 {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101] 4686 {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101] 4687 {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] = 4688 // combining char U+0901 4689 {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001] 4690 {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001] 4691 {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001] 4692 {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001] 4693 {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001] 4694 }; 4695 const bool atNameStart[] = {true, false}; 4696 4697 size_t i = 0; 4698 char doc[1024]; 4699 size_t failCount = 0; 4700 4701 // we need all the bytes to be parsed, but we don't want the errors that can 4702 // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on. 4703 if (g_reparseDeferralEnabledDefault) { 4704 return; 4705 } 4706 4707 for (; i < sizeof(cases) / sizeof(cases[0]); i++) { 4708 size_t j = 0; 4709 for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) { 4710 const bool expectedSuccess 4711 = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName; 4712 snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a", 4713 cases[i].tagName); 4714 XML_Parser parser = XML_ParserCreate(NULL); 4715 4716 const enum XML_Status status = _XML_Parse_SINGLE_BYTES( 4717 parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE); 4718 4719 bool success = true; 4720 if ((status == XML_STATUS_OK) != expectedSuccess) { 4721 success = false; 4722 } 4723 if ((status == XML_STATUS_ERROR) 4724 && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) { 4725 success = false; 4726 } 4727 4728 if (! success) { 4729 fprintf( 4730 stderr, 4731 "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n", 4732 (unsigned)i + 1u, atNameStart[j] ? " " : "not ", 4733 (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser)); 4734 failCount++; 4735 } 4736 4737 XML_ParserFree(parser); 4738 } 4739 } 4740 4741 if (failCount > 0) { 4742 fail("UTF-8 regression detected"); 4743 } 4744} 4745END_TEST 4746 4747/* Test trailing spaces in elements are accepted */ 4748START_TEST(test_trailing_spaces_in_elements) { 4749 const char *text = "<doc >Hi</doc >"; 4750 const XML_Char *expected = XCS("doc/doc"); 4751 CharData storage; 4752 4753 CharData_Init(&storage); 4754 XML_SetElementHandler(g_parser, record_element_start_handler, 4755 record_element_end_handler); 4756 XML_SetUserData(g_parser, &storage); 4757 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 4758 == XML_STATUS_ERROR) 4759 xml_failure(g_parser); 4760 CharData_CheckXMLChars(&storage, expected); 4761} 4762END_TEST 4763 4764START_TEST(test_utf16_attribute) { 4765 const char text[] = 4766 /* <d {KHO KHWAI}{CHO CHAN}='a'/> 4767 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4768 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4769 */ 4770 "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0"; 4771 const XML_Char *expected = XCS("a"); 4772 CharData storage; 4773 4774 CharData_Init(&storage); 4775 XML_SetStartElementHandler(g_parser, accumulate_attribute); 4776 XML_SetUserData(g_parser, &storage); 4777 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4778 == XML_STATUS_ERROR) 4779 xml_failure(g_parser); 4780 CharData_CheckXMLChars(&storage, expected); 4781} 4782END_TEST 4783 4784START_TEST(test_utf16_second_attr) { 4785 /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/> 4786 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4787 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4788 */ 4789 const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0" 4790 "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0"; 4791 const XML_Char *expected = XCS("1"); 4792 CharData storage; 4793 4794 CharData_Init(&storage); 4795 XML_SetStartElementHandler(g_parser, accumulate_attribute); 4796 XML_SetUserData(g_parser, &storage); 4797 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4798 == XML_STATUS_ERROR) 4799 xml_failure(g_parser); 4800 CharData_CheckXMLChars(&storage, expected); 4801} 4802END_TEST 4803 4804START_TEST(test_attr_after_solidus) { 4805 const char *text = "<doc attr1='a' / attr2='b'>"; 4806 4807 expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted"); 4808} 4809END_TEST 4810 4811START_TEST(test_utf16_pe) { 4812 /* <!DOCTYPE doc [ 4813 * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'> 4814 * %{KHO KHWAI}{CHO CHAN}; 4815 * ]> 4816 * <doc></doc> 4817 * 4818 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4819 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4820 */ 4821 const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n" 4822 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 " 4823 "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 " 4824 "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n" 4825 "\0%\x0e\x04\x0e\x08\0;\0\n" 4826 "\0]\0>\0\n" 4827 "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>"; 4828#ifdef XML_UNICODE 4829 const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n"); 4830#else 4831 const XML_Char *expected 4832 = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n"); 4833#endif 4834 CharData storage; 4835 4836 CharData_Init(&storage); 4837 XML_SetUserData(g_parser, &storage); 4838 XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl); 4839 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4840 == XML_STATUS_ERROR) 4841 xml_failure(g_parser); 4842 CharData_CheckXMLChars(&storage, expected); 4843} 4844END_TEST 4845 4846/* Test that duff attribute description keywords are rejected */ 4847START_TEST(test_bad_attr_desc_keyword) { 4848 const char *text = "<!DOCTYPE doc [\n" 4849 " <!ATTLIST doc attr CDATA #!IMPLIED>\n" 4850 "]>\n" 4851 "<doc />"; 4852 4853 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4854 "Bad keyword !IMPLIED not faulted"); 4855} 4856END_TEST 4857 4858/* Test that an invalid attribute description keyword consisting of 4859 * UTF-16 characters with their top bytes non-zero are correctly 4860 * faulted 4861 */ 4862START_TEST(test_bad_attr_desc_keyword_utf16) { 4863 /* <!DOCTYPE d [ 4864 * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}> 4865 * ]><d/> 4866 * 4867 * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8 4868 * and {CHO CHAN} = U+0E08 = 0xe0 0xb8 0x88 in UTF-8 4869 */ 4870 const char text[] 4871 = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 4872 "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 " 4873 "\0#\x0e\x04\x0e\x08\0>\0\n" 4874 "\0]\0>\0<\0d\0/\0>"; 4875 4876 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4877 != XML_STATUS_ERROR) 4878 fail("Invalid UTF16 attribute keyword not faulted"); 4879 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 4880 xml_failure(g_parser); 4881} 4882END_TEST 4883 4884/* Test that invalid syntax in a <!DOCTYPE> is rejected. Do this 4885 * using prefix-encoding (see above) to trigger specific code paths 4886 */ 4887START_TEST(test_bad_doctype) { 4888 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n" 4889 "<!DOCTYPE doc [ \x80\x44 ]><doc/>"; 4890 4891 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4892 expect_failure(text, XML_ERROR_SYNTAX, 4893 "Invalid bytes in DOCTYPE not faulted"); 4894} 4895END_TEST 4896 4897START_TEST(test_bad_doctype_utf8) { 4898 const char *text = "<!DOCTYPE \xDB\x25" 4899 "doc><doc/>"; // [1101 1011] [<0>010 0101] 4900 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4901 "Invalid UTF-8 in DOCTYPE not faulted"); 4902} 4903END_TEST 4904 4905START_TEST(test_bad_doctype_utf16) { 4906 const char text[] = 4907 /* <!DOCTYPE doc [ \x06f2 ]><doc/> 4908 * 4909 * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number 4910 * (name character) but not a valid letter (name start character) 4911 */ 4912 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 " 4913 "\x06\xf2" 4914 "\0 \0]\0>\0<\0d\0o\0c\0/\0>"; 4915 4916 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4917 != XML_STATUS_ERROR) 4918 fail("Invalid bytes in DOCTYPE not faulted"); 4919 if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX) 4920 xml_failure(g_parser); 4921} 4922END_TEST 4923 4924START_TEST(test_bad_doctype_plus) { 4925 const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n" 4926 "<1+>&foo;</1+>"; 4927 4928 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4929 "'+' in document name not faulted"); 4930} 4931END_TEST 4932 4933START_TEST(test_bad_doctype_star) { 4934 const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n" 4935 "<1*>&foo;</1*>"; 4936 4937 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4938 "'*' in document name not faulted"); 4939} 4940END_TEST 4941 4942START_TEST(test_bad_doctype_query) { 4943 const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n" 4944 "<1?>&foo;</1?>"; 4945 4946 expect_failure(text, XML_ERROR_INVALID_TOKEN, 4947 "'?' in document name not faulted"); 4948} 4949END_TEST 4950 4951START_TEST(test_unknown_encoding_bad_ignore) { 4952 const char *text = "<?xml version='1.0' encoding='prefix-conv'?>" 4953 "<!DOCTYPE doc SYSTEM 'foo'>" 4954 "<doc><e>&entity;</e></doc>"; 4955 ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>", 4956 "Invalid character not faulted", XCS("prefix-conv"), 4957 XML_ERROR_INVALID_TOKEN}; 4958 4959 XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL); 4960 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 4961 XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter); 4962 XML_SetUserData(g_parser, &fault); 4963 expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING, 4964 "Bad IGNORE section with unknown encoding not failed"); 4965} 4966END_TEST 4967 4968START_TEST(test_entity_in_utf16_be_attr) { 4969 const char text[] = 4970 /* <e a='ä ä'></e> */ 4971 "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 " 4972 "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>"; 4973#ifdef XML_UNICODE 4974 const XML_Char *expected = XCS("\x00e4 \x00e4"); 4975#else 4976 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 4977#endif 4978 CharData storage; 4979 4980 CharData_Init(&storage); 4981 XML_SetUserData(g_parser, &storage); 4982 XML_SetStartElementHandler(g_parser, accumulate_attribute); 4983 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 4984 == XML_STATUS_ERROR) 4985 xml_failure(g_parser); 4986 CharData_CheckXMLChars(&storage, expected); 4987} 4988END_TEST 4989 4990START_TEST(test_entity_in_utf16_le_attr) { 4991 const char text[] = 4992 /* <e a='ä ä'></e> */ 4993 "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0" 4994 "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0"; 4995#ifdef XML_UNICODE 4996 const XML_Char *expected = XCS("\x00e4 \x00e4"); 4997#else 4998 const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4"); 4999#endif 5000 CharData storage; 5001 5002 CharData_Init(&storage); 5003 XML_SetUserData(g_parser, &storage); 5004 XML_SetStartElementHandler(g_parser, accumulate_attribute); 5005 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5006 == XML_STATUS_ERROR) 5007 xml_failure(g_parser); 5008 CharData_CheckXMLChars(&storage, expected); 5009} 5010END_TEST 5011 5012START_TEST(test_entity_public_utf16_be) { 5013 const char text[] = 5014 /* <!DOCTYPE d [ */ 5015 "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n" 5016 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5017 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 " 5018 "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n" 5019 /* %e; */ 5020 "\0%\0e\0;\0\n" 5021 /* ]> */ 5022 "\0]\0>\0\n" 5023 /* <d>&j;</d> */ 5024 "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>"; 5025 ExtTest2 test_data 5026 = {/* <!ENTITY j 'baz'> */ 5027 "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL}; 5028 const XML_Char *expected = XCS("baz"); 5029 CharData storage; 5030 5031 CharData_Init(&storage); 5032 test_data.storage = &storage; 5033 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5034 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5035 XML_SetUserData(g_parser, &test_data); 5036 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5037 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5038 == XML_STATUS_ERROR) 5039 xml_failure(g_parser); 5040 CharData_CheckXMLChars(&storage, expected); 5041} 5042END_TEST 5043 5044START_TEST(test_entity_public_utf16_le) { 5045 const char text[] = 5046 /* <!DOCTYPE d [ */ 5047 "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0" 5048 /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */ 5049 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0" 5050 "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0" 5051 /* %e; */ 5052 "%\0e\0;\0\n\0" 5053 /* ]> */ 5054 "]\0>\0\n\0" 5055 /* <d>&j;</d> */ 5056 "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0"; 5057 ExtTest2 test_data 5058 = {/* <!ENTITY j 'baz'> */ 5059 "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL}; 5060 const XML_Char *expected = XCS("baz"); 5061 CharData storage; 5062 5063 CharData_Init(&storage); 5064 test_data.storage = &storage; 5065 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5066 XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2); 5067 XML_SetUserData(g_parser, &test_data); 5068 XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters); 5069 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE) 5070 == XML_STATUS_ERROR) 5071 xml_failure(g_parser); 5072 CharData_CheckXMLChars(&storage, expected); 5073} 5074END_TEST 5075 5076/* Test that a doctype with neither an internal nor external subset is 5077 * faulted 5078 */ 5079START_TEST(test_short_doctype) { 5080 const char *text = "<!DOCTYPE doc></doc>"; 5081 expect_failure(text, XML_ERROR_INVALID_TOKEN, 5082 "DOCTYPE without subset not rejected"); 5083} 5084END_TEST 5085 5086START_TEST(test_short_doctype_2) { 5087 const char *text = "<!DOCTYPE doc PUBLIC></doc>"; 5088 expect_failure(text, XML_ERROR_SYNTAX, 5089 "DOCTYPE without Public ID not rejected"); 5090} 5091END_TEST 5092 5093START_TEST(test_short_doctype_3) { 5094 const char *text = "<!DOCTYPE doc SYSTEM></doc>"; 5095 expect_failure(text, XML_ERROR_SYNTAX, 5096 "DOCTYPE without System ID not rejected"); 5097} 5098END_TEST 5099 5100START_TEST(test_long_doctype) { 5101 const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>"; 5102 expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected"); 5103} 5104END_TEST 5105 5106START_TEST(test_bad_entity) { 5107 const char *text = "<!DOCTYPE doc [\n" 5108 " <!ENTITY foo PUBLIC>\n" 5109 "]>\n" 5110 "<doc/>"; 5111 expect_failure(text, XML_ERROR_SYNTAX, 5112 "ENTITY without Public ID is not rejected"); 5113} 5114END_TEST 5115 5116/* Test unquoted value is faulted */ 5117START_TEST(test_bad_entity_2) { 5118 const char *text = "<!DOCTYPE doc [\n" 5119 " <!ENTITY % foo bar>\n" 5120 "]>\n" 5121 "<doc/>"; 5122 expect_failure(text, XML_ERROR_SYNTAX, 5123 "ENTITY without Public ID is not rejected"); 5124} 5125END_TEST 5126 5127START_TEST(test_bad_entity_3) { 5128 const char *text = "<!DOCTYPE doc [\n" 5129 " <!ENTITY % foo PUBLIC>\n" 5130 "]>\n" 5131 "<doc/>"; 5132 expect_failure(text, XML_ERROR_SYNTAX, 5133 "Parameter ENTITY without Public ID is not rejected"); 5134} 5135END_TEST 5136 5137START_TEST(test_bad_entity_4) { 5138 const char *text = "<!DOCTYPE doc [\n" 5139 " <!ENTITY % foo SYSTEM>\n" 5140 "]>\n" 5141 "<doc/>"; 5142 expect_failure(text, XML_ERROR_SYNTAX, 5143 "Parameter ENTITY without Public ID is not rejected"); 5144} 5145END_TEST 5146 5147START_TEST(test_bad_notation) { 5148 const char *text = "<!DOCTYPE doc [\n" 5149 " <!NOTATION n SYSTEM>\n" 5150 "]>\n" 5151 "<doc/>"; 5152 expect_failure(text, XML_ERROR_SYNTAX, 5153 "Notation without System ID is not rejected"); 5154} 5155END_TEST 5156 5157/* Test for issue #11, wrongly suppressed default handler */ 5158START_TEST(test_default_doctype_handler) { 5159 const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n" 5160 " <!ENTITY foo 'bar'>\n" 5161 "]>\n" 5162 "<doc>&foo;</doc>"; 5163 DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE}, 5164 {XCS("'test.dtd'"), 10, XML_FALSE}, 5165 {NULL, 0, XML_FALSE}}; 5166 int i; 5167 5168 XML_SetUserData(g_parser, &test_data); 5169 XML_SetDefaultHandler(g_parser, checking_default_handler); 5170 XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler); 5171 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5172 == XML_STATUS_ERROR) 5173 xml_failure(g_parser); 5174 for (i = 0; test_data[i].expected != NULL; i++) 5175 if (! test_data[i].seen) 5176 fail("Default handler not run for public !DOCTYPE"); 5177} 5178END_TEST 5179 5180START_TEST(test_empty_element_abort) { 5181 const char *text = "<abort/>"; 5182 5183 XML_SetStartElementHandler(g_parser, start_element_suspender); 5184 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5185 != XML_STATUS_ERROR) 5186 fail("Expected to error on abort"); 5187} 5188END_TEST 5189 5190/* Regression test for GH issue #612: unfinished m_declAttributeType 5191 * allocation in ->m_tempPool can corrupt following allocation. 5192 */ 5193START_TEST(test_pool_integrity_with_unfinished_attr) { 5194 const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n" 5195 "<!DOCTYPE foo [\n" 5196 "<!ELEMENT foo ANY>\n" 5197 "<!ENTITY % entp SYSTEM \"external.dtd\">\n" 5198 "%entp;\n" 5199 "]>\n" 5200 "<a></a>\n"; 5201 const XML_Char *expected = XCS("COMMENT"); 5202 CharData storage; 5203 5204 CharData_Init(&storage); 5205 XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5206 XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist); 5207 XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler); 5208 XML_SetCommentHandler(g_parser, accumulate_comment); 5209 XML_SetUserData(g_parser, &storage); 5210 if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) 5211 == XML_STATUS_ERROR) 5212 xml_failure(g_parser); 5213 CharData_CheckXMLChars(&storage, expected); 5214} 5215END_TEST 5216 5217START_TEST(test_nested_entity_suspend) { 5218 const char *const text = "<!DOCTYPE a [\n" 5219 " <!ENTITY e1 '<!--e1-->'>\n" 5220 " <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n" 5221 " <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n" 5222 "]>\n" 5223 "<a><!--start-->&e3;<!--end--></a>"; 5224 const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head") 5225 XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end"); 5226 CharData storage; 5227 CharData_Init(&storage); 5228 XML_Parser parser = XML_ParserCreate(NULL); 5229 ParserPlusStorage parserPlusStorage = {parser, &storage}; 5230 5231 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5232 XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler); 5233 XML_SetUserData(parser, &parserPlusStorage); 5234 5235 enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE); 5236 while (status == XML_STATUS_SUSPENDED) { 5237 status = XML_ResumeParser(parser); 5238 } 5239 if (status != XML_STATUS_OK) 5240 xml_failure(parser); 5241 5242 CharData_CheckXMLChars(&storage, expected); 5243 XML_ParserFree(parser); 5244} 5245END_TEST 5246 5247#if defined(XML_TESTING) 5248/* Regression test for quadratic parsing on large tokens */ 5249START_TEST(test_big_tokens_scale_linearly) { 5250 const struct { 5251 const char *pre; 5252 const char *post; 5253 } text[] = { 5254 {"<a>", "</a>"}, // assumed good, used as baseline 5255 {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch 5256 {"<c attr='", "'></c>"}, // big attribute, used to be O(N��) 5257 {"<d><!-- ", " --></d>"}, // long comment, used to be O(N��) 5258 {"<e><", "/></e>"}, // big elem name, used to be O(N��) 5259 }; 5260 const int num_cases = sizeof(text) / sizeof(text[0]); 5261 char aaaaaa[4096]; 5262 const int fillsize = (int)sizeof(aaaaaa); 5263 const int fillcount = 100; 5264 const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post. 5265 const unsigned max_factor = 4; 5266 const unsigned max_scanned = max_factor * approx_bytes; 5267 5268 memset(aaaaaa, 'a', fillsize); 5269 5270 if (! g_reparseDeferralEnabledDefault) { 5271 return; // heuristic is disabled; we would get O(n^2) and fail. 5272 } 5273 5274 for (int i = 0; i < num_cases; ++i) { 5275 XML_Parser parser = XML_ParserCreate(NULL); 5276 assert_true(parser != NULL); 5277 enum XML_Status status; 5278 set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post); 5279 5280 // parse the start text 5281 g_bytesScanned = 0; 5282 status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre, 5283 (int)strlen(text[i].pre), XML_FALSE); 5284 if (status != XML_STATUS_OK) { 5285 xml_failure(parser); 5286 } 5287 5288 // parse lots of 'a', failing the test early if it takes too long 5289 unsigned past_max_count = 0; 5290 for (int f = 0; f < fillcount; ++f) { 5291 status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE); 5292 if (status != XML_STATUS_OK) { 5293 xml_failure(parser); 5294 } 5295 if (g_bytesScanned > max_scanned) { 5296 // We're not done, and have already passed the limit -- the test will 5297 // definitely fail. This block allows us to save time by failing early. 5298 const unsigned pushed 5299 = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize; 5300 fprintf( 5301 stderr, 5302 "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 5303 f + 1, fillcount, pushed, g_bytesScanned, 5304 g_bytesScanned / (double)pushed, max_scanned, max_factor); 5305 past_max_count++; 5306 // We are failing, but allow a few log prints first. If we don't reach 5307 // a count of five, the test will fail after the loop instead. 5308 assert_true(past_max_count < 5); 5309 } 5310 } 5311 5312 // parse the end text 5313 status = _XML_Parse_SINGLE_BYTES(parser, text[i].post, 5314 (int)strlen(text[i].post), XML_TRUE); 5315 if (status != XML_STATUS_OK) { 5316 xml_failure(parser); 5317 } 5318 5319 assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working 5320 if (g_bytesScanned > max_scanned) { 5321 fprintf( 5322 stderr, 5323 "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n", 5324 g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned, 5325 max_factor); 5326 fail("scanned too many bytes"); 5327 } 5328 5329 XML_ParserFree(parser); 5330 } 5331} 5332END_TEST 5333#endif 5334 5335START_TEST(test_set_reparse_deferral) { 5336 const char *const pre = "<d>"; 5337 const char *const start = "<x attr='"; 5338 const char *const end = "'></x>"; 5339 char eeeeee[100]; 5340 const int fillsize = (int)sizeof(eeeeee); 5341 memset(eeeeee, 'e', fillsize); 5342 5343 for (int enabled = 0; enabled <= 1; enabled += 1) { 5344 set_subtest("deferral=%d", enabled); 5345 5346 XML_Parser parser = XML_ParserCreate(NULL); 5347 assert_true(parser != NULL); 5348 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 5349 // pre-grow the buffer to avoid reparsing due to almost-fullness 5350 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 5351 5352 CharData storage; 5353 CharData_Init(&storage); 5354 XML_SetUserData(parser, &storage); 5355 XML_SetStartElementHandler(parser, start_element_event_handler); 5356 5357 enum XML_Status status; 5358 // parse the start text 5359 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5360 if (status != XML_STATUS_OK) { 5361 xml_failure(parser); 5362 } 5363 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 5364 5365 // ..and the start of the token 5366 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 5367 if (status != XML_STATUS_OK) { 5368 xml_failure(parser); 5369 } 5370 CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one 5371 5372 // try to parse lots of 'e', but the token isn't finished 5373 for (int c = 0; c < 100; ++c) { 5374 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5375 if (status != XML_STATUS_OK) { 5376 xml_failure(parser); 5377 } 5378 } 5379 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 5380 5381 // end the <x> token. 5382 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5383 if (status != XML_STATUS_OK) { 5384 xml_failure(parser); 5385 } 5386 5387 if (enabled) { 5388 // In general, we may need to push more data to trigger a reparse attempt, 5389 // but in this test, the data is constructed to always require it. 5390 CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect 5391 // 2x the token length should suffice; the +1 covers the start and end. 5392 for (int c = 0; c < 101; ++c) { 5393 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5394 if (status != XML_STATUS_OK) { 5395 xml_failure(parser); 5396 } 5397 } 5398 } 5399 CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done 5400 5401 XML_ParserFree(parser); 5402 } 5403} 5404END_TEST 5405 5406struct element_decl_data { 5407 XML_Parser parser; 5408 int count; 5409}; 5410 5411static void 5412element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) { 5413 UNUSED_P(name); 5414 struct element_decl_data *testdata = (struct element_decl_data *)userData; 5415 testdata->count += 1; 5416 XML_FreeContentModel(testdata->parser, model); 5417} 5418 5419static int 5420external_inherited_parser(XML_Parser p, const XML_Char *context, 5421 const XML_Char *base, const XML_Char *systemId, 5422 const XML_Char *publicId) { 5423 UNUSED_P(base); 5424 UNUSED_P(systemId); 5425 UNUSED_P(publicId); 5426 const char *const pre = "<!ELEMENT document ANY>\n"; 5427 const char *const start = "<!ELEMENT "; 5428 const char *const end = " ANY>\n"; 5429 const char *const post = "<!ELEMENT xyz ANY>\n"; 5430 const int enabled = *(int *)XML_GetUserData(p); 5431 char eeeeee[100]; 5432 char spaces[100]; 5433 const int fillsize = (int)sizeof(eeeeee); 5434 assert_true(fillsize == (int)sizeof(spaces)); 5435 memset(eeeeee, 'e', fillsize); 5436 memset(spaces, ' ', fillsize); 5437 5438 XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL); 5439 assert_true(parser != NULL); 5440 // pre-grow the buffer to avoid reparsing due to almost-fullness 5441 assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL); 5442 5443 struct element_decl_data testdata; 5444 testdata.parser = parser; 5445 testdata.count = 0; 5446 XML_SetUserData(parser, &testdata); 5447 XML_SetElementDeclHandler(parser, element_decl_counter); 5448 5449 enum XML_Status status; 5450 // parse the initial text 5451 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5452 if (status != XML_STATUS_OK) { 5453 xml_failure(parser); 5454 } 5455 assert_true(testdata.count == 1); // first element should be done 5456 5457 // ..and the start of the big token 5458 status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE); 5459 if (status != XML_STATUS_OK) { 5460 xml_failure(parser); 5461 } 5462 assert_true(testdata.count == 1); // still just the first one 5463 5464 // try to parse lots of 'e', but the token isn't finished 5465 for (int c = 0; c < 100; ++c) { 5466 status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE); 5467 if (status != XML_STATUS_OK) { 5468 xml_failure(parser); 5469 } 5470 } 5471 assert_true(testdata.count == 1); // *still* just the first one 5472 5473 // end the big token. 5474 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5475 if (status != XML_STATUS_OK) { 5476 xml_failure(parser); 5477 } 5478 5479 if (enabled) { 5480 // In general, we may need to push more data to trigger a reparse attempt, 5481 // but in this test, the data is constructed to always require it. 5482 assert_true(testdata.count == 1); // or the test is incorrect 5483 // 2x the token length should suffice; the +1 covers the start and end. 5484 for (int c = 0; c < 101; ++c) { 5485 status = XML_Parse(parser, spaces, fillsize, XML_FALSE); 5486 if (status != XML_STATUS_OK) { 5487 xml_failure(parser); 5488 } 5489 } 5490 } 5491 assert_true(testdata.count == 2); // the big token should be done 5492 5493 // parse the final text 5494 status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE); 5495 if (status != XML_STATUS_OK) { 5496 xml_failure(parser); 5497 } 5498 assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done 5499 5500 XML_ParserFree(parser); 5501 return XML_STATUS_OK; 5502} 5503 5504START_TEST(test_reparse_deferral_is_inherited) { 5505 const char *const text 5506 = "<!DOCTYPE document SYSTEM 'something.ext'><document/>"; 5507 for (int enabled = 0; enabled <= 1; ++enabled) { 5508 set_subtest("deferral=%d", enabled); 5509 5510 XML_Parser parser = XML_ParserCreate(NULL); 5511 assert_true(parser != NULL); 5512 XML_SetUserData(parser, (void *)&enabled); 5513 XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); 5514 // this handler creates a sub-parser and checks that its deferral behavior 5515 // is what we expected, based on the value of `enabled` (in userdata). 5516 XML_SetExternalEntityRefHandler(parser, external_inherited_parser); 5517 assert_true(XML_SetReparseDeferralEnabled(parser, enabled)); 5518 if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK) 5519 xml_failure(parser); 5520 5521 XML_ParserFree(parser); 5522 } 5523} 5524END_TEST 5525 5526START_TEST(test_set_reparse_deferral_on_null_parser) { 5527 assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE); 5528 assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE); 5529 assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE); 5530 assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE); 5531 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN) 5532 == XML_FALSE); 5533 assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX) 5534 == XML_FALSE); 5535} 5536END_TEST 5537 5538START_TEST(test_set_reparse_deferral_on_the_fly) { 5539 const char *const pre = "<d><x attr='"; 5540 const char *const end = "'></x>"; 5541 char iiiiii[100]; 5542 const int fillsize = (int)sizeof(iiiiii); 5543 memset(iiiiii, 'i', fillsize); 5544 5545 XML_Parser parser = XML_ParserCreate(NULL); 5546 assert_true(parser != NULL); 5547 assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE)); 5548 5549 CharData storage; 5550 CharData_Init(&storage); 5551 XML_SetUserData(parser, &storage); 5552 XML_SetStartElementHandler(parser, start_element_event_handler); 5553 5554 enum XML_Status status; 5555 // parse the start text 5556 status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE); 5557 if (status != XML_STATUS_OK) { 5558 xml_failure(parser); 5559 } 5560 CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done 5561 5562 // try to parse some 'i', but the token isn't finished 5563 status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE); 5564 if (status != XML_STATUS_OK) { 5565 xml_failure(parser); 5566 } 5567 CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one 5568 5569 // end the <x> token. 5570 status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE); 5571 if (status != XML_STATUS_OK) { 5572 xml_failure(parser); 5573 } 5574 CharData_CheckXMLChars(&storage, XCS("d")); // not yet. 5575 5576 // now change the heuristic setting and add *no* data 5577 assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE)); 5578 // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic. 5579 status = XML_Parse(parser, "", 0, XML_FALSE); 5580 if (status != XML_STATUS_OK) { 5581 xml_failure(parser); 5582 } 5583 CharData_CheckXMLChars(&storage, XCS("dx")); 5584 5585 XML_ParserFree(parser); 5586} 5587END_TEST 5588 5589START_TEST(test_set_bad_reparse_option) { 5590 XML_Parser parser = XML_ParserCreate(NULL); 5591 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2)); 5592 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3)); 5593 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99)); 5594 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127)); 5595 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128)); 5596 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129)); 5597 assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255)); 5598 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0)); 5599 assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1)); 5600 XML_ParserFree(parser); 5601} 5602END_TEST 5603 5604static size_t g_totalAlloc = 0; 5605static size_t g_biggestAlloc = 0; 5606 5607static void * 5608counting_realloc(void *ptr, size_t size) { 5609 g_totalAlloc += size; 5610 if (size > g_biggestAlloc) { 5611 g_biggestAlloc = size; 5612 } 5613 return realloc(ptr, size); 5614} 5615 5616static void * 5617counting_malloc(size_t size) { 5618 return counting_realloc(NULL, size); 5619} 5620 5621START_TEST(test_bypass_heuristic_when_close_to_bufsize) { 5622 if (g_chunkSize != 0) { 5623 // this test does not use SINGLE_BYTES, because it depends on very precise 5624 // buffer fills. 5625 return; 5626 } 5627 if (! g_reparseDeferralEnabledDefault) { 5628 return; // this test is irrelevant when the deferral heuristic is disabled. 5629 } 5630 5631 const int document_length = 65536; 5632 char *const document = (char *)malloc(document_length); 5633 5634 const XML_Memory_Handling_Suite memfuncs = { 5635 counting_malloc, 5636 counting_realloc, 5637 free, 5638 }; 5639 5640 const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1}; 5641 const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1}; 5642 const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1}; 5643 5644 for (const int *leading = leading_list; *leading >= 0; leading++) { 5645 for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) { 5646 for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) { 5647 set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken, 5648 *fillsize); 5649 // start by checking that the test looks reasonably valid 5650 assert_true(*leading + *bigtoken <= document_length); 5651 5652 // put 'x' everywhere; some will be overwritten by elements. 5653 memset(document, 'x', document_length); 5654 // maybe add an initial tag 5655 if (*leading) { 5656 assert_true(*leading >= 3); // or the test case is invalid 5657 memcpy(document, "<a>", 3); 5658 } 5659 // add the large token 5660 document[*leading + 0] = '<'; 5661 document[*leading + 1] = 'b'; 5662 memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token 5663 document[*leading + *bigtoken - 1] = '>'; 5664 5665 // 1 for 'b', plus 1 or 0 depending on the presence of 'a' 5666 const int expected_elem_total = 1 + (*leading ? 1 : 0); 5667 5668 XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL); 5669 assert_true(parser != NULL); 5670 5671 CharData storage; 5672 CharData_Init(&storage); 5673 XML_SetUserData(parser, &storage); 5674 XML_SetStartElementHandler(parser, start_element_event_handler); 5675 5676 g_biggestAlloc = 0; 5677 g_totalAlloc = 0; 5678 int offset = 0; 5679 // fill data until the big token is covered (but not necessarily parsed) 5680 while (offset < *leading + *bigtoken) { 5681 assert_true(offset + *fillsize <= document_length); 5682 const enum XML_Status status 5683 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 5684 if (status != XML_STATUS_OK) { 5685 xml_failure(parser); 5686 } 5687 offset += *fillsize; 5688 } 5689 // Now, check that we've had a buffer allocation that could fit the 5690 // context bytes and our big token. In order to detect a special case, 5691 // we need to know how many bytes of our big token were included in the 5692 // first push that contained _any_ bytes of the big token: 5693 const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize); 5694 if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) { 5695 // Special case: we aren't saving any context, and the whole big token 5696 // was covered by a single fill, so Expat may have parsed directly 5697 // from our input pointer, without allocating an internal buffer. 5698 } else if (*leading < XML_CONTEXT_BYTES) { 5699 assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken); 5700 } else { 5701 assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken); 5702 } 5703 // fill data until the big token is actually parsed 5704 while (storage.count < expected_elem_total) { 5705 const size_t alloc_before = g_totalAlloc; 5706 assert_true(offset + *fillsize <= document_length); 5707 const enum XML_Status status 5708 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 5709 if (status != XML_STATUS_OK) { 5710 xml_failure(parser); 5711 } 5712 offset += *fillsize; 5713 // since all the bytes of the big token are already in the buffer, 5714 // the bufsize ceiling should make us finish its parsing without any 5715 // further buffer allocations. We assume that there will be no other 5716 // large allocations in this test. 5717 assert_true(g_totalAlloc - alloc_before < 4096); 5718 } 5719 // test-the-test: was our alloc even called? 5720 assert_true(g_totalAlloc > 0); 5721 // test-the-test: there shouldn't be any extra start elements 5722 assert_true(storage.count == expected_elem_total); 5723 5724 XML_ParserFree(parser); 5725 } 5726 } 5727 } 5728 free(document); 5729} 5730END_TEST 5731 5732#if defined(XML_TESTING) 5733START_TEST(test_varying_buffer_fills) { 5734 const int KiB = 1024; 5735 const int MiB = 1024 * KiB; 5736 const int document_length = 16 * MiB; 5737 const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB 5738 5739 if (g_chunkSize != 0) { 5740 return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES(). 5741 } 5742 5743 char *const document = (char *)malloc(document_length); 5744 assert_true(document != NULL); 5745 memset(document, 'x', document_length); 5746 document[0] = '<'; 5747 document[1] = 't'; 5748 memset(&document[2], ' ', big - 2); // a very spacy token 5749 document[big - 1] = '>'; 5750 5751 // Each testcase is a list of buffer fill sizes, terminated by a value < 0. 5752 // When reparse deferral is enabled, the final (negated) value is the expected 5753 // maximum number of bytes scanned in parse attempts. 5754 const int testcases[][30] = { 5755 {8 * MiB, -8 * MiB}, 5756 {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total 5757 // zero-size fills shouldn't trigger the bypass 5758 {4 * MiB, 0, 4 * MiB, -12 * MiB}, 5759 {4 * MiB, 0, 0, 4 * MiB, -12 * MiB}, 5760 {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB}, 5761 // try to hit the buffer ceiling only once (at the end) 5762 {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB}, 5763 // try to hit the same buffer ceiling multiple times 5764 {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB}, 5765 5766 // try to hit every ceiling, by always landing 1K shy of the buffer size 5767 {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB, 5768 128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB}, 5769 5770 // try to avoid every ceiling, by always landing 1B past the buffer size 5771 // the normal 2x heuristic threshold still forces parse attempts. 5772 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 5773 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 5774 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 5775 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 5776 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 5777 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 5778 2 * MiB, 4 * MiB, // will attempt 8MiB + 1 ==> total 10M + 682K + 7 5779 -(10 * MiB + 682 * KiB + 7)}, 5780 // try to avoid every ceiling again, except on our last fill. 5781 {2 * KiB + 1, // will attempt 2KiB + 1 ==> total 2KiB + 1 5782 2 * KiB, 4 * KiB, // will attempt 8KiB + 1 ==> total 10KiB + 2 5783 8 * KiB, 16 * KiB, // will attempt 32KiB + 1 ==> total 42KiB + 3 5784 32 * KiB, 64 * KiB, // will attempt 128KiB + 1 ==> total 170KiB + 4 5785 128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5 5786 512 * KiB, 1 * MiB, // will attempt 2MiB + 1 ==> total 2M + 682K + 6 5787 2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6 5788 -(10 * MiB + 682 * KiB + 6)}, 5789 5790 // try to hit ceilings on the way multiple times 5791 {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer 5792 512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer 5793 1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1, // 4 MiB buffer 5794 2 * MiB + 1, 1 * MiB, 512 * KiB, // 8 MiB buffer 5795 // we'll make a parse attempt at every parse call 5796 -(45 * MiB + 12)}, 5797 }; 5798 const int testcount = sizeof(testcases) / sizeof(testcases[0]); 5799 for (int test_i = 0; test_i < testcount; test_i++) { 5800 const int *fillsize = testcases[test_i]; 5801 set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1], 5802 fillsize[2], fillsize[3]); 5803 XML_Parser parser = XML_ParserCreate(NULL); 5804 assert_true(parser != NULL); 5805 5806 CharData storage; 5807 CharData_Init(&storage); 5808 XML_SetUserData(parser, &storage); 5809 XML_SetStartElementHandler(parser, start_element_event_handler); 5810 5811 g_bytesScanned = 0; 5812 int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call) 5813 int offset = 0; 5814 while (*fillsize >= 0) { 5815 assert_true(offset + *fillsize <= document_length); // or test is invalid 5816 const enum XML_Status status 5817 = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE); 5818 if (status != XML_STATUS_OK) { 5819 xml_failure(parser); 5820 } 5821 offset += *fillsize; 5822 fillsize++; 5823 assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow 5824 worstcase_bytes += offset; // we might've tried to parse all pending bytes 5825 } 5826 assert_true(storage.count == 1); // the big token should've been parsed 5827 assert_true(g_bytesScanned > 0); // test-the-test: does our counter work? 5828 if (g_reparseDeferralEnabledDefault) { 5829 // heuristic is enabled; some XML_Parse calls may have deferred reparsing 5830 const unsigned max_bytes_scanned = -*fillsize; 5831 if (g_bytesScanned > max_bytes_scanned) { 5832 fprintf(stderr, 5833 "bytes scanned in parse attempts: actual=%u limit=%u \n", 5834 g_bytesScanned, max_bytes_scanned); 5835 fail("too many bytes scanned in parse attempts"); 5836 } 5837 } 5838 assert_true(g_bytesScanned <= (unsigned)worstcase_bytes); 5839 5840 XML_ParserFree(parser); 5841 } 5842 free(document); 5843} 5844END_TEST 5845#endif 5846 5847void 5848make_basic_test_case(Suite *s) { 5849 TCase *tc_basic = tcase_create("basic tests"); 5850 5851 suite_add_tcase(s, tc_basic); 5852 tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown); 5853 5854 tcase_add_test(tc_basic, test_nul_byte); 5855 tcase_add_test(tc_basic, test_u0000_char); 5856 tcase_add_test(tc_basic, test_siphash_self); 5857 tcase_add_test(tc_basic, test_siphash_spec); 5858 tcase_add_test(tc_basic, test_bom_utf8); 5859 tcase_add_test(tc_basic, test_bom_utf16_be); 5860 tcase_add_test(tc_basic, test_bom_utf16_le); 5861 tcase_add_test(tc_basic, test_nobom_utf16_le); 5862 tcase_add_test(tc_basic, test_hash_collision); 5863 tcase_add_test(tc_basic, test_illegal_utf8); 5864 tcase_add_test(tc_basic, test_utf8_auto_align); 5865 tcase_add_test(tc_basic, test_utf16); 5866 tcase_add_test(tc_basic, test_utf16_le_epilog_newline); 5867 tcase_add_test(tc_basic, test_not_utf16); 5868 tcase_add_test(tc_basic, test_bad_encoding); 5869 tcase_add_test(tc_basic, test_latin1_umlauts); 5870 tcase_add_test(tc_basic, test_long_utf8_character); 5871 tcase_add_test(tc_basic, test_long_latin1_attribute); 5872 tcase_add_test(tc_basic, test_long_ascii_attribute); 5873 /* Regression test for SF bug #491986. */ 5874 tcase_add_test(tc_basic, test_danish_latin1); 5875 /* Regression test for SF bug #514281. */ 5876 tcase_add_test(tc_basic, test_french_charref_hexidecimal); 5877 tcase_add_test(tc_basic, test_french_charref_decimal); 5878 tcase_add_test(tc_basic, test_french_latin1); 5879 tcase_add_test(tc_basic, test_french_utf8); 5880 tcase_add_test(tc_basic, test_utf8_false_rejection); 5881 tcase_add_test(tc_basic, test_line_number_after_parse); 5882 tcase_add_test(tc_basic, test_column_number_after_parse); 5883 tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers); 5884 tcase_add_test(tc_basic, test_line_number_after_error); 5885 tcase_add_test(tc_basic, test_column_number_after_error); 5886 tcase_add_test(tc_basic, test_really_long_lines); 5887 tcase_add_test(tc_basic, test_really_long_encoded_lines); 5888 tcase_add_test(tc_basic, test_end_element_events); 5889 tcase_add_test(tc_basic, test_helper_is_whitespace_normalized); 5890 tcase_add_test(tc_basic, test_attr_whitespace_normalization); 5891 tcase_add_test(tc_basic, test_xmldecl_misplaced); 5892 tcase_add_test(tc_basic, test_xmldecl_invalid); 5893 tcase_add_test(tc_basic, test_xmldecl_missing_attr); 5894 tcase_add_test(tc_basic, test_xmldecl_missing_value); 5895 tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity); 5896 tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity); 5897 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding); 5898 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler); 5899 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom); 5900 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding); 5901 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2); 5902 tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset); 5903 tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset); 5904 tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone); 5905 tcase_add_test(tc_basic, 5906 test_wfc_undeclared_entity_with_external_subset_standalone); 5907 tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone); 5908 tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset); 5909 tcase_add_test(tc_basic, test_not_standalone_handler_reject); 5910 tcase_add_test(tc_basic, test_not_standalone_handler_accept); 5911 tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs); 5912 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse); 5913 tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling); 5914 tcase_add_test(tc_basic, test_dtd_attr_handling); 5915 tcase_add_test(tc_basic, test_empty_ns_without_namespaces); 5916 tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces); 5917 tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls); 5918 tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls); 5919 tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls); 5920 tcase_add_test(tc_basic, test_good_cdata_ascii); 5921 tcase_add_test(tc_basic, test_good_cdata_utf16); 5922 tcase_add_test(tc_basic, test_good_cdata_utf16_le); 5923 tcase_add_test(tc_basic, test_long_cdata_utf16); 5924 tcase_add_test(tc_basic, test_multichar_cdata_utf16); 5925 tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair); 5926 tcase_add_test(tc_basic, test_bad_cdata); 5927 tcase_add_test(tc_basic, test_bad_cdata_utf16); 5928 tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls); 5929 tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls); 5930 tcase_add_test(tc_basic, test_memory_allocation); 5931 tcase_add_test__if_xml_ge(tc_basic, test_default_current); 5932 tcase_add_test(tc_basic, test_dtd_elements); 5933 tcase_add_test(tc_basic, test_dtd_elements_nesting); 5934 tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd); 5935 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone); 5936 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd); 5937 tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype); 5938 tcase_add_test__ifdef_xml_dtd(tc_basic, 5939 test_foreign_dtd_without_external_subset); 5940 tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd); 5941 tcase_add_test(tc_basic, test_set_base); 5942 tcase_add_test(tc_basic, test_attributes); 5943 tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity); 5944 tcase_add_test(tc_basic, test_resume_invalid_parse); 5945 tcase_add_test(tc_basic, test_resume_resuspended); 5946 tcase_add_test(tc_basic, test_cdata_default); 5947 tcase_add_test(tc_basic, test_subordinate_reset); 5948 tcase_add_test(tc_basic, test_subordinate_suspend); 5949 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend); 5950 tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort); 5951 tcase_add_test__ifdef_xml_dtd(tc_basic, 5952 test_ext_entity_invalid_suspended_parse); 5953 tcase_add_test(tc_basic, test_explicit_encoding); 5954 tcase_add_test(tc_basic, test_trailing_cr); 5955 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr); 5956 tcase_add_test(tc_basic, test_trailing_rsqb); 5957 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb); 5958 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata); 5959 tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters); 5960 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter); 5961 tcase_add_test(tc_basic, test_empty_parse); 5962 tcase_add_test(tc_basic, test_get_buffer_1); 5963 tcase_add_test(tc_basic, test_get_buffer_2); 5964#if XML_CONTEXT_BYTES > 0 5965 tcase_add_test(tc_basic, test_get_buffer_3_overflow); 5966#endif 5967 tcase_add_test(tc_basic, test_buffer_can_grow_to_max); 5968 tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len); 5969 tcase_add_test(tc_basic, test_byte_info_at_end); 5970 tcase_add_test(tc_basic, test_byte_info_at_error); 5971 tcase_add_test(tc_basic, test_byte_info_at_cdata); 5972 tcase_add_test(tc_basic, test_predefined_entities); 5973 tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd); 5974 tcase_add_test(tc_basic, test_not_predefined_entities); 5975 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section); 5976 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16); 5977 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be); 5978 tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section); 5979 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed); 5980 tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values); 5981 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone); 5982 tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort); 5983 tcase_add_test(tc_basic, test_bad_public_doctype); 5984 tcase_add_test(tc_basic, test_attribute_enum_value); 5985 tcase_add_test(tc_basic, test_predefined_entity_redefinition); 5986 tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing); 5987 tcase_add_test(tc_basic, test_public_notation_no_sysid); 5988 tcase_add_test(tc_basic, test_nested_groups); 5989 tcase_add_test(tc_basic, test_group_choice); 5990 tcase_add_test(tc_basic, test_standalone_parameter_entity); 5991 tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity); 5992 tcase_add_test__ifdef_xml_dtd(tc_basic, 5993 test_recursive_external_parameter_entity); 5994 tcase_add_test__ifdef_xml_dtd(tc_basic, 5995 test_recursive_external_parameter_entity_2); 5996 tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd); 5997 tcase_add_test(tc_basic, test_suspend_xdecl); 5998 tcase_add_test(tc_basic, test_abort_epilog); 5999 tcase_add_test(tc_basic, test_abort_epilog_2); 6000 tcase_add_test(tc_basic, test_suspend_epilog); 6001 tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag); 6002 tcase_add_test(tc_basic, test_unfinished_epilog); 6003 tcase_add_test(tc_basic, test_partial_char_in_epilog); 6004 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity); 6005 tcase_add_test__ifdef_xml_dtd(tc_basic, 6006 test_suspend_resume_internal_entity_issue_629); 6007 tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error); 6008 tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity); 6009 tcase_add_test(tc_basic, test_restart_on_error); 6010 tcase_add_test(tc_basic, test_reject_lt_in_attribute_value); 6011 tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value); 6012 tcase_add_test(tc_basic, test_trailing_cr_in_att_value); 6013 tcase_add_test(tc_basic, test_standalone_internal_entity); 6014 tcase_add_test(tc_basic, test_skipped_external_entity); 6015 tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity); 6016 tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity); 6017 tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr); 6018 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity); 6019 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2); 6020 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3); 6021 tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4); 6022 tcase_add_test(tc_basic, test_pi_handled_in_default); 6023 tcase_add_test(tc_basic, test_comment_handled_in_default); 6024 tcase_add_test(tc_basic, test_pi_yml); 6025 tcase_add_test(tc_basic, test_pi_xnl); 6026 tcase_add_test(tc_basic, test_pi_xmm); 6027 tcase_add_test(tc_basic, test_utf16_pi); 6028 tcase_add_test(tc_basic, test_utf16_be_pi); 6029 tcase_add_test(tc_basic, test_utf16_be_comment); 6030 tcase_add_test(tc_basic, test_utf16_le_comment); 6031 tcase_add_test(tc_basic, test_missing_encoding_conversion_fn); 6032 tcase_add_test(tc_basic, test_failing_encoding_conversion_fn); 6033 tcase_add_test(tc_basic, test_unknown_encoding_success); 6034 tcase_add_test(tc_basic, test_unknown_encoding_bad_name); 6035 tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2); 6036 tcase_add_test(tc_basic, test_unknown_encoding_long_name_1); 6037 tcase_add_test(tc_basic, test_unknown_encoding_long_name_2); 6038 tcase_add_test(tc_basic, test_invalid_unknown_encoding); 6039 tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok); 6040 tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail); 6041 tcase_add_test(tc_basic, test_unknown_encoding_invalid_length); 6042 tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit); 6043 tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); 6044 tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); 6045 tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); 6046 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom); 6047 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom); 6048 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2); 6049 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2); 6050 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be); 6051 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le); 6052 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown); 6053 tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom); 6054 tcase_add_test(tc_basic, test_utf8_in_cdata_section); 6055 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2); 6056 tcase_add_test(tc_basic, test_utf8_in_start_tags); 6057 tcase_add_test(tc_basic, test_trailing_spaces_in_elements); 6058 tcase_add_test(tc_basic, test_utf16_attribute); 6059 tcase_add_test(tc_basic, test_utf16_second_attr); 6060 tcase_add_test(tc_basic, test_attr_after_solidus); 6061 tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe); 6062 tcase_add_test(tc_basic, test_bad_attr_desc_keyword); 6063 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16); 6064 tcase_add_test(tc_basic, test_bad_doctype); 6065 tcase_add_test(tc_basic, test_bad_doctype_utf8); 6066 tcase_add_test(tc_basic, test_bad_doctype_utf16); 6067 tcase_add_test(tc_basic, test_bad_doctype_plus); 6068 tcase_add_test(tc_basic, test_bad_doctype_star); 6069 tcase_add_test(tc_basic, test_bad_doctype_query); 6070 tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore); 6071 tcase_add_test(tc_basic, test_entity_in_utf16_be_attr); 6072 tcase_add_test(tc_basic, test_entity_in_utf16_le_attr); 6073 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be); 6074 tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le); 6075 tcase_add_test(tc_basic, test_short_doctype); 6076 tcase_add_test(tc_basic, test_short_doctype_2); 6077 tcase_add_test(tc_basic, test_short_doctype_3); 6078 tcase_add_test(tc_basic, test_long_doctype); 6079 tcase_add_test(tc_basic, test_bad_entity); 6080 tcase_add_test(tc_basic, test_bad_entity_2); 6081 tcase_add_test(tc_basic, test_bad_entity_3); 6082 tcase_add_test(tc_basic, test_bad_entity_4); 6083 tcase_add_test(tc_basic, test_bad_notation); 6084 tcase_add_test(tc_basic, test_default_doctype_handler); 6085 tcase_add_test(tc_basic, test_empty_element_abort); 6086 tcase_add_test__ifdef_xml_dtd(tc_basic, 6087 test_pool_integrity_with_unfinished_attr); 6088 tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend); 6089#if defined(XML_TESTING) 6090 tcase_add_test(tc_basic, test_big_tokens_scale_linearly); 6091#endif 6092 tcase_add_test(tc_basic, test_set_reparse_deferral); 6093 tcase_add_test(tc_basic, test_reparse_deferral_is_inherited); 6094 tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser); 6095 tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly); 6096 tcase_add_test(tc_basic, test_set_bad_reparse_option); 6097 tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize); 6098#if defined(XML_TESTING) 6099 tcase_add_test(tc_basic, test_varying_buffer_fills); 6100#endif 6101} 6102