1/* Tests in the "basic" test case for the Expat test suite
2                            __  __            _
3                         ___\ \/ /_ __   __ _| |_
4                        / _ \\  /| '_ \ / _` | __|
5                       |  __//  \| |_) | (_| | |_
6                        \___/_/\_\ .__/ \__,_|\__|
7                                 |_| XML parser
8
9   Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10   Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11   Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12   Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13   Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14   Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15   Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16   Copyright (c) 2017      Jos�� Guti��rrez de la Concha <jose@zeroc.com>
17   Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18   Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19   Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20   Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21   Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22   Licensed under the MIT license:
23
24   Permission is  hereby granted,  free of charge,  to any  person obtaining
25   a  copy  of  this  software   and  associated  documentation  files  (the
26   "Software"),  to  deal in  the  Software  without restriction,  including
27   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28   distribute, sublicense, and/or sell copies of the Software, and to permit
29   persons  to whom  the Software  is  furnished to  do so,  subject to  the
30   following conditions:
31
32   The above copyright  notice and this permission notice  shall be included
33   in all copies or substantial portions of the Software.
34
35   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41   USE OR OTHER DEALINGS IN THE SOFTWARE.
42*/
43
44#if defined(NDEBUG)
45#  undef NDEBUG /* because test suite relies on assert(...) at the moment */
46#endif
47
48#include <assert.h>
49
50#include <stdio.h>
51#include <string.h>
52#include <time.h>
53
54#if ! defined(__cplusplus)
55#  include <stdbool.h>
56#endif
57
58#include "expat_config.h"
59
60#include "expat.h"
61#include "internal.h"
62#include "minicheck.h"
63#include "structdata.h"
64#include "common.h"
65#include "dummy.h"
66#include "handlers.h"
67#include "siphash.h"
68#include "basic_tests.h"
69
70static void
71basic_setup(void) {
72  g_parser = XML_ParserCreate(NULL);
73  if (g_parser == NULL)
74    fail("Parser not created.");
75}
76
77/*
78 * Character & encoding tests.
79 */
80
81START_TEST(test_nul_byte) {
82  char text[] = "<doc>\0</doc>";
83
84  /* test that a NUL byte (in US-ASCII data) is an error */
85  if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86      == XML_STATUS_OK)
87    fail("Parser did not report error on NUL-byte.");
88  if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89    xml_failure(g_parser);
90}
91END_TEST
92
93START_TEST(test_u0000_char) {
94  /* test that a NUL byte (in US-ASCII data) is an error */
95  expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
96                 "Parser did not report error on NUL-byte.");
97}
98END_TEST
99
100START_TEST(test_siphash_self) {
101  if (! sip24_valid())
102    fail("SipHash self-test failed");
103}
104END_TEST
105
106START_TEST(test_siphash_spec) {
107  /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108  const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109                         "\x0a\x0b\x0c\x0d\x0e";
110  const size_t len = sizeof(message) - 1;
111  const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112  struct siphash state;
113  struct sipkey key;
114
115  sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116                  "\x0a\x0b\x0c\x0d\x0e\x0f");
117  sip24_init(&state, &key);
118
119  /* Cover spread across calls */
120  sip24_update(&state, message, 4);
121  sip24_update(&state, message + 4, len - 4);
122
123  /* Cover null length */
124  sip24_update(&state, message, 0);
125
126  if (sip24_final(&state) != expected)
127    fail("sip24_final failed spec test\n");
128
129  /* Cover wrapper */
130  if (siphash24(message, len, &key) != expected)
131    fail("siphash24 failed spec test\n");
132}
133END_TEST
134
135START_TEST(test_bom_utf8) {
136  /* This test is really just making sure we don't core on a UTF-8 BOM. */
137  const char *text = "\357\273\277<e/>";
138
139  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140      == XML_STATUS_ERROR)
141    xml_failure(g_parser);
142}
143END_TEST
144
145START_TEST(test_bom_utf16_be) {
146  char text[] = "\376\377\0<\0e\0/\0>";
147
148  if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149      == XML_STATUS_ERROR)
150    xml_failure(g_parser);
151}
152END_TEST
153
154START_TEST(test_bom_utf16_le) {
155  char text[] = "\377\376<\0e\0/\0>\0";
156
157  if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158      == XML_STATUS_ERROR)
159    xml_failure(g_parser);
160}
161END_TEST
162
163START_TEST(test_nobom_utf16_le) {
164  char text[] = " \0<\0e\0/\0>\0";
165
166  if (g_chunkSize == 1) {
167    // TODO: with just the first byte, we can't tell the difference between
168    // UTF-16-LE and UTF-8. Avoid the failure for now.
169    return;
170  }
171
172  if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173      == XML_STATUS_ERROR)
174    xml_failure(g_parser);
175}
176END_TEST
177
178START_TEST(test_hash_collision) {
179  /* For full coverage of the lookup routine, we need to ensure a
180   * hash collision even though we can only tell that we have one
181   * through breakpoint debugging or coverage statistics.  The
182   * following will cause a hash collision on machines with a 64-bit
183   * long type; others will have to experiment.  The full coverage
184   * tests invoked from qa.sh usually provide a hash collision, but
185   * not always.  This is an attempt to provide insurance.
186   */
187#define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188  const char *text
189      = "<doc>\n"
190        "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191        "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192        "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193        "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194        "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195        "<d8>This triggers the table growth and collides with b2</d8>\n"
196        "</doc>\n";
197
198  XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200      == XML_STATUS_ERROR)
201    xml_failure(g_parser);
202}
203END_TEST
204#undef COLLIDING_HASH_SALT
205
206/* Regression test for SF bug #491986. */
207START_TEST(test_danish_latin1) {
208  const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209                     "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210#ifdef XML_UNICODE
211  const XML_Char *expected
212      = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213#else
214  const XML_Char *expected
215      = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216#endif
217  run_character_check(text, expected);
218}
219END_TEST
220
221/* Regression test for SF bug #514281. */
222START_TEST(test_french_charref_hexidecimal) {
223  const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224                     "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
225#ifdef XML_UNICODE
226  const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227#else
228  const XML_Char *expected
229      = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230#endif
231  run_character_check(text, expected);
232}
233END_TEST
234
235START_TEST(test_french_charref_decimal) {
236  const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237                     "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
238#ifdef XML_UNICODE
239  const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240#else
241  const XML_Char *expected
242      = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243#endif
244  run_character_check(text, expected);
245}
246END_TEST
247
248START_TEST(test_french_latin1) {
249  const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250                     "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251#ifdef XML_UNICODE
252  const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253#else
254  const XML_Char *expected
255      = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256#endif
257  run_character_check(text, expected);
258}
259END_TEST
260
261START_TEST(test_french_utf8) {
262  const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263                     "<doc>\xC3\xA9</doc>";
264#ifdef XML_UNICODE
265  const XML_Char *expected = XCS("\x00e9");
266#else
267  const XML_Char *expected = XCS("\xC3\xA9");
268#endif
269  run_character_check(text, expected);
270}
271END_TEST
272
273/* Regression test for SF bug #600479.
274   XXX There should be a test that exercises all legal XML Unicode
275   characters as PCDATA and attribute value content, and XML Name
276   characters as part of element and attribute names.
277*/
278START_TEST(test_utf8_false_rejection) {
279  const char *text = "<doc>\xEF\xBA\xBF</doc>";
280#ifdef XML_UNICODE
281  const XML_Char *expected = XCS("\xfebf");
282#else
283  const XML_Char *expected = XCS("\xEF\xBA\xBF");
284#endif
285  run_character_check(text, expected);
286}
287END_TEST
288
289/* Regression test for SF bug #477667.
290   This test assures that any 8-bit character followed by a 7-bit
291   character will not be mistakenly interpreted as a valid UTF-8
292   sequence.
293*/
294START_TEST(test_illegal_utf8) {
295  char text[100];
296  int i;
297
298  for (i = 128; i <= 255; ++i) {
299    snprintf(text, sizeof(text), "<e>%ccd</e>", i);
300    if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
301        == XML_STATUS_OK) {
302      snprintf(text, sizeof(text),
303               "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304               i);
305      fail(text);
306    } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
307      xml_failure(g_parser);
308    /* Reset the parser since we use the same parser repeatedly. */
309    XML_ParserReset(g_parser, NULL);
310  }
311}
312END_TEST
313
314/* Examples, not masks: */
315#define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316#define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317#define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318#define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319#define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320
321START_TEST(test_utf8_auto_align) {
322  struct TestCase {
323    ptrdiff_t expectedMovementInChars;
324    const char *input;
325  };
326
327  struct TestCase cases[] = {
328      {00, ""},
329
330      {00, UTF8_LEAD_1},
331
332      {-1, UTF8_LEAD_2},
333      {00, UTF8_LEAD_2 UTF8_FOLLOW},
334
335      {-1, UTF8_LEAD_3},
336      {-2, UTF8_LEAD_3 UTF8_FOLLOW},
337      {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
338
339      {-1, UTF8_LEAD_4},
340      {-2, UTF8_LEAD_4 UTF8_FOLLOW},
341      {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
342      {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
343  };
344
345  size_t i = 0;
346  bool success = true;
347  for (; i < sizeof(cases) / sizeof(*cases); i++) {
348    const char *fromLim = cases[i].input + strlen(cases[i].input);
349    const char *const fromLimInitially = fromLim;
350    ptrdiff_t actualMovementInChars;
351
352    _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
353
354    actualMovementInChars = (fromLim - fromLimInitially);
355    if (actualMovementInChars != cases[i].expectedMovementInChars) {
356      size_t j = 0;
357      success = false;
358      printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359             ", actually moved by %2d chars: \"",
360             (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
361             (int)actualMovementInChars);
362      for (; j < strlen(cases[i].input); j++) {
363        printf("\\x%02x", (unsigned char)cases[i].input[j]);
364      }
365      printf("\"\n");
366    }
367  }
368
369  if (! success) {
370    fail("UTF-8 auto-alignment is not bullet-proof\n");
371  }
372}
373END_TEST
374
375START_TEST(test_utf16) {
376  /* <?xml version="1.0" encoding="UTF-16"?>
377   *  <doc a='123'>some {A} text</doc>
378   *
379   * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380   */
381  char text[]
382      = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383        "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384        "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385        "\000'\000?\000>\000\n"
386        "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387        "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388        "<\000/\000d\000o\000c\000>";
389#ifdef XML_UNICODE
390  const XML_Char *expected = XCS("some \xff21 text");
391#else
392  const XML_Char *expected = XCS("some \357\274\241 text");
393#endif
394  CharData storage;
395
396  CharData_Init(&storage);
397  XML_SetUserData(g_parser, &storage);
398  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
399  if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
400      == XML_STATUS_ERROR)
401    xml_failure(g_parser);
402  CharData_CheckXMLChars(&storage, expected);
403}
404END_TEST
405
406START_TEST(test_utf16_le_epilog_newline) {
407  unsigned int first_chunk_bytes = 17;
408  char text[] = "\xFF\xFE"                  /* BOM */
409                "<\000e\000/\000>\000"      /* document element */
410                "\r\000\n\000\r\000\n\000"; /* epilog */
411
412  if (first_chunk_bytes >= sizeof(text) - 1)
413    fail("bad value of first_chunk_bytes");
414  if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
415      == XML_STATUS_ERROR)
416    xml_failure(g_parser);
417  else {
418    enum XML_Status rc;
419    rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
420                                 sizeof(text) - first_chunk_bytes - 1,
421                                 XML_TRUE);
422    if (rc == XML_STATUS_ERROR)
423      xml_failure(g_parser);
424  }
425}
426END_TEST
427
428/* Test that an outright lie in the encoding is faulted */
429START_TEST(test_not_utf16) {
430  const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431                     "<doc>Hi</doc>";
432
433  /* Use a handler to provoke the appropriate code paths */
434  XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
435  expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
436                 "UTF-16 declared in UTF-8 not faulted");
437}
438END_TEST
439
440/* Test that an unknown encoding is rejected */
441START_TEST(test_bad_encoding) {
442  const char *text = "<doc>Hi</doc>";
443
444  if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445    fail("XML_SetEncoding failed");
446  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
447                 "Unknown encoding not faulted");
448}
449END_TEST
450
451/* Regression test for SF bug #481609, #774028. */
452START_TEST(test_latin1_umlauts) {
453  const char *text
454      = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455        "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
456        "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
457#ifdef XML_UNICODE
458  /* Expected results in UTF-16 */
459  const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460      XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461#else
462  /* Expected results in UTF-8 */
463  const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464      XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465#endif
466
467  run_character_check(text, expected);
468  XML_ParserReset(g_parser, NULL);
469  run_attribute_check(text, expected);
470  /* Repeat with a default handler */
471  XML_ParserReset(g_parser, NULL);
472  XML_SetDefaultHandler(g_parser, dummy_default_handler);
473  run_character_check(text, expected);
474  XML_ParserReset(g_parser, NULL);
475  XML_SetDefaultHandler(g_parser, dummy_default_handler);
476  run_attribute_check(text, expected);
477}
478END_TEST
479
480/* Test that an element name with a 4-byte UTF-8 character is rejected */
481START_TEST(test_long_utf8_character) {
482  const char *text
483      = "<?xml version='1.0' encoding='utf-8'?>\n"
484        /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485        "<do\xf0\x90\x80\x80/>";
486  expect_failure(text, XML_ERROR_INVALID_TOKEN,
487                 "4-byte UTF-8 character in element name not faulted");
488}
489END_TEST
490
491/* Test that a long latin-1 attribute (too long to convert in one go)
492 * is correctly converted
493 */
494START_TEST(test_long_latin1_attribute) {
495  const char *text
496      = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497        "<doc att='"
498        /* 64 characters per line */
499        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515        /* Last character splits across a buffer boundary */
516        "\xe4'>\n</doc>";
517
518  const XML_Char *expected =
519      /* 64 characters per line */
520      /* clang-format off */
521        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537  /* clang-format on */
538#ifdef XML_UNICODE
539                                                  XCS("\x00e4");
540#else
541                                                  XCS("\xc3\xa4");
542#endif
543
544  run_attribute_check(text, expected);
545}
546END_TEST
547
548/* Test that a long ASCII attribute (too long to convert in one go)
549 * is correctly converted
550 */
551START_TEST(test_long_ascii_attribute) {
552  const char *text
553      = "<?xml version='1.0' encoding='us-ascii'?>\n"
554        "<doc att='"
555        /* 64 characters per line */
556        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572        "01234'>\n</doc>";
573  const XML_Char *expected =
574      /* 64 characters per line */
575      /* clang-format off */
576        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592        XCS("01234");
593  /* clang-format on */
594
595  run_attribute_check(text, expected);
596}
597END_TEST
598
599/* Regression test #1 for SF bug #653180. */
600START_TEST(test_line_number_after_parse) {
601  const char *text = "<tag>\n"
602                     "\n"
603                     "\n</tag>";
604  XML_Size lineno;
605
606  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
607      == XML_STATUS_ERROR)
608    xml_failure(g_parser);
609  lineno = XML_GetCurrentLineNumber(g_parser);
610  if (lineno != 4) {
611    char buffer[100];
612    snprintf(buffer, sizeof(buffer),
613             "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614    fail(buffer);
615  }
616}
617END_TEST
618
619/* Regression test #2 for SF bug #653180. */
620START_TEST(test_column_number_after_parse) {
621  const char *text = "<tag></tag>";
622  XML_Size colno;
623
624  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
625      == XML_STATUS_ERROR)
626    xml_failure(g_parser);
627  colno = XML_GetCurrentColumnNumber(g_parser);
628  if (colno != 11) {
629    char buffer[100];
630    snprintf(buffer, sizeof(buffer),
631             "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632    fail(buffer);
633  }
634}
635END_TEST
636
637/* Regression test #3 for SF bug #653180. */
638START_TEST(test_line_and_column_numbers_inside_handlers) {
639  const char *text = "<a>\n"      /* Unix end-of-line */
640                     "  <b>\r\n"  /* Windows end-of-line */
641                     "    <c/>\r" /* Mac OS end-of-line */
642                     "  </b>\n"
643                     "  <d>\n"
644                     "    <f/>\n"
645                     "  </d>\n"
646                     "</a>";
647  const StructDataEntry expected[]
648      = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649         {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650         {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
651         {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652         {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
653  const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
654  StructData storage;
655
656  StructData_Init(&storage);
657  XML_SetUserData(g_parser, &storage);
658  XML_SetStartElementHandler(g_parser, start_element_event_handler2);
659  XML_SetEndElementHandler(g_parser, end_element_event_handler2);
660  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
661      == XML_STATUS_ERROR)
662    xml_failure(g_parser);
663
664  StructData_CheckItems(&storage, expected, expected_count);
665  StructData_Dispose(&storage);
666}
667END_TEST
668
669/* Regression test #4 for SF bug #653180. */
670START_TEST(test_line_number_after_error) {
671  const char *text = "<a>\n"
672                     "  <b>\n"
673                     "  </a>"; /* missing </b> */
674  XML_Size lineno;
675  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
676      != XML_STATUS_ERROR)
677    fail("Expected a parse error");
678
679  lineno = XML_GetCurrentLineNumber(g_parser);
680  if (lineno != 3) {
681    char buffer[100];
682    snprintf(buffer, sizeof(buffer),
683             "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684    fail(buffer);
685  }
686}
687END_TEST
688
689/* Regression test #5 for SF bug #653180. */
690START_TEST(test_column_number_after_error) {
691  const char *text = "<a>\n"
692                     "  <b>\n"
693                     "  </a>"; /* missing </b> */
694  XML_Size colno;
695  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
696      != XML_STATUS_ERROR)
697    fail("Expected a parse error");
698
699  colno = XML_GetCurrentColumnNumber(g_parser);
700  if (colno != 4) {
701    char buffer[100];
702    snprintf(buffer, sizeof(buffer),
703             "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704    fail(buffer);
705  }
706}
707END_TEST
708
709/* Regression test for SF bug #478332. */
710START_TEST(test_really_long_lines) {
711  /* This parses an input line longer than INIT_DATA_BUF_SIZE
712     characters long (defined to be 1024 in xmlparse.c).  We take a
713     really cheesy approach to building the input buffer, because
714     this avoids writing bugs in buffer-filling code.
715  */
716  const char *text
717      = "<e>"
718        /* 64 chars */
719        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720        /* until we have at least 1024 characters on the line: */
721        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737        "</e>";
738  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
739      == XML_STATUS_ERROR)
740    xml_failure(g_parser);
741}
742END_TEST
743
744/* Test cdata processing across a buffer boundary */
745START_TEST(test_really_long_encoded_lines) {
746  /* As above, except that we want to provoke an output buffer
747   * overflow with a non-trivial encoding.  For this we need to pass
748   * the whole cdata in one go, not byte-by-byte.
749   */
750  void *buffer;
751  const char *text
752      = "<?xml version='1.0' encoding='iso-8859-1'?>"
753        "<e>"
754        /* 64 chars */
755        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756        /* until we have at least 1024 characters on the line: */
757        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773        "</e>";
774  int parse_len = (int)strlen(text);
775
776  /* Need a cdata handler to provoke the code path we want to test */
777  XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
778  buffer = XML_GetBuffer(g_parser, parse_len);
779  if (buffer == NULL)
780    fail("Could not allocate parse buffer");
781  assert(buffer != NULL);
782  memcpy(buffer, text, parse_len);
783  if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
784    xml_failure(g_parser);
785}
786END_TEST
787
788/*
789 * Element event tests.
790 */
791
792START_TEST(test_end_element_events) {
793  const char *text = "<a><b><c/></b><d><f/></d></a>";
794  const XML_Char *expected = XCS("/c/b/f/d/a");
795  CharData storage;
796
797  CharData_Init(&storage);
798  XML_SetUserData(g_parser, &storage);
799  XML_SetEndElementHandler(g_parser, end_element_event_handler);
800  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
801      == XML_STATUS_ERROR)
802    xml_failure(g_parser);
803  CharData_CheckXMLChars(&storage, expected);
804}
805END_TEST
806
807/*
808 * Attribute tests.
809 */
810
811/* Helper used by the following tests; this checks any "attr" and "refs"
812   attributes to make sure whitespace has been normalized.
813
814   Return true if whitespace has been normalized in a string, using
815   the rules for attribute value normalization.  The 'is_cdata' flag
816   is needed since CDATA attributes don't need to have multiple
817   whitespace characters collapsed to a single space, while other
818   attribute data types do.  (Section 3.3.3 of the recommendation.)
819*/
820static int
821is_whitespace_normalized(const XML_Char *s, int is_cdata) {
822  int blanks = 0;
823  int at_start = 1;
824  while (*s) {
825    if (*s == XCS(' '))
826      ++blanks;
827    else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828      return 0;
829    else {
830      if (at_start) {
831        at_start = 0;
832        if (blanks && ! is_cdata)
833          /* illegal leading blanks */
834          return 0;
835      } else if (blanks > 1 && ! is_cdata)
836        return 0;
837      blanks = 0;
838    }
839    ++s;
840  }
841  if (blanks && ! is_cdata)
842    return 0;
843  return 1;
844}
845
846/* Check the attribute whitespace checker: */
847START_TEST(test_helper_is_whitespace_normalized) {
848  assert(is_whitespace_normalized(XCS("abc"), 0));
849  assert(is_whitespace_normalized(XCS("abc"), 1));
850  assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851  assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852  assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853  assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854  assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
855  assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
856  assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857  assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
858  assert(! is_whitespace_normalized(XCS(" "), 0));
859  assert(is_whitespace_normalized(XCS(" "), 1));
860  assert(! is_whitespace_normalized(XCS("\t"), 0));
861  assert(! is_whitespace_normalized(XCS("\t"), 1));
862  assert(! is_whitespace_normalized(XCS("\n"), 0));
863  assert(! is_whitespace_normalized(XCS("\n"), 1));
864  assert(! is_whitespace_normalized(XCS("\r"), 0));
865  assert(! is_whitespace_normalized(XCS("\r"), 1));
866  assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867}
868END_TEST
869
870static void XMLCALL
871check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
872                                          const XML_Char **atts) {
873  int i;
874  UNUSED_P(userData);
875  UNUSED_P(name);
876  for (i = 0; atts[i] != NULL; i += 2) {
877    const XML_Char *attrname = atts[i];
878    const XML_Char *value = atts[i + 1];
879    if (xcstrcmp(XCS("attr"), attrname) == 0
880        || xcstrcmp(XCS("ents"), attrname) == 0
881        || xcstrcmp(XCS("refs"), attrname) == 0) {
882      if (! is_whitespace_normalized(value, 0)) {
883        char buffer[256];
884        snprintf(buffer, sizeof(buffer),
885                 "attribute value not normalized: %" XML_FMT_STR
886                 "='%" XML_FMT_STR "'",
887                 attrname, value);
888        fail(buffer);
889      }
890    }
891  }
892}
893
894START_TEST(test_attr_whitespace_normalization) {
895  const char *text
896      = "<!DOCTYPE doc [\n"
897        "  <!ATTLIST doc\n"
898        "            attr NMTOKENS #REQUIRED\n"
899        "            ents ENTITIES #REQUIRED\n"
900        "            refs IDREFS   #REQUIRED>\n"
901        "]>\n"
902        "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
903        "     ents=' ent-1   \t\r\n"
904        "            ent-2  ' >\n"
905        "  <e id='id-1'/>\n"
906        "  <e id='id-2'/>\n"
907        "</doc>";
908
909  XML_SetStartElementHandler(g_parser,
910                             check_attr_contains_normalized_whitespace);
911  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
912      == XML_STATUS_ERROR)
913    xml_failure(g_parser);
914}
915END_TEST
916
917/*
918 * XML declaration tests.
919 */
920
921START_TEST(test_xmldecl_misplaced) {
922  expect_failure("\n"
923                 "<?xml version='1.0'?>\n"
924                 "<a/>",
925                 XML_ERROR_MISPLACED_XML_PI,
926                 "failed to report misplaced XML declaration");
927}
928END_TEST
929
930START_TEST(test_xmldecl_invalid) {
931  expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932                 "Failed to report invalid XML declaration");
933}
934END_TEST
935
936START_TEST(test_xmldecl_missing_attr) {
937  expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938                 "Failed to report missing XML declaration attribute");
939}
940END_TEST
941
942START_TEST(test_xmldecl_missing_value) {
943  expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944                 "<doc/>",
945                 XML_ERROR_XML_DECL,
946                 "Failed to report missing attribute value");
947}
948END_TEST
949
950/* Regression test for SF bug #584832. */
951START_TEST(test_unknown_encoding_internal_entity) {
952  const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953                     "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954                     "<test a='&foo;'/>";
955
956  XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
957  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
958      == XML_STATUS_ERROR)
959    xml_failure(g_parser);
960}
961END_TEST
962
963/* Test unrecognised encoding handler */
964START_TEST(test_unrecognised_encoding_internal_entity) {
965  const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966                     "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967                     "<test a='&foo;'/>";
968
969  XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
970  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
971      != XML_STATUS_ERROR)
972    fail("Unrecognised encoding not rejected");
973}
974END_TEST
975
976/* Regression test for SF bug #620106. */
977START_TEST(test_ext_entity_set_encoding) {
978  const char *text = "<!DOCTYPE doc [\n"
979                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980                     "]>\n"
981                     "<doc>&en;</doc>";
982  ExtTest test_data
983      = {/* This text says it's an unsupported encoding, but it's really
984            UTF-8, which we tell Expat using XML_SetEncoding().
985         */
986         "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987#ifdef XML_UNICODE
988  const XML_Char *expected = XCS("\x00e9");
989#else
990  const XML_Char *expected = XCS("\xc3\xa9");
991#endif
992
993  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
994  run_ext_character_check(text, &test_data, expected);
995}
996END_TEST
997
998/* Test external entities with no handler */
999START_TEST(test_ext_entity_no_handler) {
1000  const char *text = "<!DOCTYPE doc [\n"
1001                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002                     "]>\n"
1003                     "<doc>&en;</doc>";
1004
1005  XML_SetDefaultHandler(g_parser, dummy_default_handler);
1006  run_character_check(text, XCS(""));
1007}
1008END_TEST
1009
1010/* Test UTF-8 BOM is accepted */
1011START_TEST(test_ext_entity_set_bom) {
1012  const char *text = "<!DOCTYPE doc [\n"
1013                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014                     "]>\n"
1015                     "<doc>&en;</doc>";
1016  ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017                       "<?xml encoding='iso-8859-3'?>"
1018                       "\xC3\xA9",
1019                       XCS("utf-8"), NULL};
1020#ifdef XML_UNICODE
1021  const XML_Char *expected = XCS("\x00e9");
1022#else
1023  const XML_Char *expected = XCS("\xc3\xa9");
1024#endif
1025
1026  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1027  run_ext_character_check(text, &test_data, expected);
1028}
1029END_TEST
1030
1031/* Test that bad encodings are faulted */
1032START_TEST(test_ext_entity_bad_encoding) {
1033  const char *text = "<!DOCTYPE doc [\n"
1034                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035                     "]>\n"
1036                     "<doc>&en;</doc>";
1037  ExtFaults fault
1038      = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039         XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040
1041  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1042  XML_SetUserData(g_parser, &fault);
1043  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1044                 "Bad encoding should not have been accepted");
1045}
1046END_TEST
1047
1048/* Try handing an invalid encoding to an external entity parser */
1049START_TEST(test_ext_entity_bad_encoding_2) {
1050  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052                     "<doc>&entity;</doc>";
1053  ExtFaults fault
1054      = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055         XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056
1057  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1058  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1059  XML_SetUserData(g_parser, &fault);
1060  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1061                 "Bad encoding not faulted in external entity handler");
1062}
1063END_TEST
1064
1065/* Test that no error is reported for unknown entities if we don't
1066   read an external subset.  This was fixed in Expat 1.95.5.
1067*/
1068START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1069  const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070                     "<doc>&entity;</doc>";
1071
1072  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1073      == XML_STATUS_ERROR)
1074    xml_failure(g_parser);
1075}
1076END_TEST
1077
1078/* Test that an error is reported for unknown entities if we don't
1079   have an external subset.
1080*/
1081START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1082  expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083                 "Parser did not report undefined entity w/out a DTD.");
1084}
1085END_TEST
1086
1087/* Test that an error is reported for unknown entities if we don't
1088   read an external subset, but have been declared standalone.
1089*/
1090START_TEST(test_wfc_undeclared_entity_standalone) {
1091  const char *text
1092      = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093        "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094        "<doc>&entity;</doc>";
1095
1096  expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1097                 "Parser did not report undefined entity (standalone).");
1098}
1099END_TEST
1100
1101/* Test that an error is reported for unknown entities if we have read
1102   an external subset, and standalone is true.
1103*/
1104START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1105  const char *text
1106      = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107        "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108        "<doc>&entity;</doc>";
1109  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110
1111  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1112  XML_SetUserData(g_parser, &test_data);
1113  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1114  expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1115                 "Parser did not report undefined entity (external DTD).");
1116}
1117END_TEST
1118
1119/* Test that external entity handling is not done if the parsing flag
1120 * is set to UNLESS_STANDALONE
1121 */
1122START_TEST(test_entity_with_external_subset_unless_standalone) {
1123  const char *text
1124      = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125        "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126        "<doc>&entity;</doc>";
1127  ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128
1129  XML_SetParamEntityParsing(g_parser,
1130                            XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1131  XML_SetUserData(g_parser, &test_data);
1132  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1133  expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1134                 "Parser did not report undefined entity");
1135}
1136END_TEST
1137
1138/* Test that no error is reported for unknown entities if we have read
1139   an external subset, and standalone is false.
1140*/
1141START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1142  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144                     "<doc>&entity;</doc>";
1145  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146
1147  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1148  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1149  run_ext_character_check(text, &test_data, XCS(""));
1150}
1151END_TEST
1152
1153/* Test that an error is reported if our NotStandalone handler fails */
1154START_TEST(test_not_standalone_handler_reject) {
1155  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157                     "<doc>&entity;</doc>";
1158  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159
1160  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1161  XML_SetUserData(g_parser, &test_data);
1162  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1163  XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1164  expect_failure(text, XML_ERROR_NOT_STANDALONE,
1165                 "NotStandalone handler failed to reject");
1166
1167  /* Try again but without external entity handling */
1168  XML_ParserReset(g_parser, NULL);
1169  XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1170  expect_failure(text, XML_ERROR_NOT_STANDALONE,
1171                 "NotStandalone handler failed to reject");
1172}
1173END_TEST
1174
1175/* Test that no error is reported if our NotStandalone handler succeeds */
1176START_TEST(test_not_standalone_handler_accept) {
1177  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179                     "<doc>&entity;</doc>";
1180  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181
1182  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1183  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1184  XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1185  run_ext_character_check(text, &test_data, XCS(""));
1186
1187  /* Repeat without the external entity handler */
1188  XML_ParserReset(g_parser, NULL);
1189  XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1190  run_character_check(text, XCS(""));
1191}
1192END_TEST
1193
1194START_TEST(test_wfc_no_recursive_entity_refs) {
1195  const char *text = "<!DOCTYPE doc [\n"
1196                     "  <!ENTITY entity '&#38;entity;'>\n"
1197                     "]>\n"
1198                     "<doc>&entity;</doc>";
1199
1200  expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1201                 "Parser did not report recursive entity reference.");
1202}
1203END_TEST
1204
1205/* Test incomplete external entities are faulted */
1206START_TEST(test_ext_entity_invalid_parse) {
1207  const char *text = "<!DOCTYPE doc [\n"
1208                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1209                     "]>\n"
1210                     "<doc>&en;</doc>";
1211  const ExtFaults faults[]
1212      = {{"<", "Incomplete element declaration not faulted", NULL,
1213          XML_ERROR_UNCLOSED_TOKEN},
1214         {"<\xe2\x82", /* First two bytes of a three-byte char */
1215          "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1216         {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1217          XML_ERROR_PARTIAL_CHAR},
1218         {NULL, NULL, NULL, XML_ERROR_NONE}};
1219  const ExtFaults *fault = faults;
1220
1221  for (; fault->parse_text != NULL; fault++) {
1222    set_subtest("\"%s\"", fault->parse_text);
1223    XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1224    XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1225    XML_SetUserData(g_parser, (void *)fault);
1226    expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1227                   "Parser did not report external entity error");
1228    XML_ParserReset(g_parser, NULL);
1229  }
1230}
1231END_TEST
1232
1233/* Regression test for SF bug #483514. */
1234START_TEST(test_dtd_default_handling) {
1235  const char *text = "<!DOCTYPE doc [\n"
1236                     "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1237                     "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1238                     "<!ELEMENT doc EMPTY>\n"
1239                     "<!ATTLIST doc a CDATA #IMPLIED>\n"
1240                     "<?pi in dtd?>\n"
1241                     "<!--comment in dtd-->\n"
1242                     "]><doc/>";
1243
1244  XML_SetDefaultHandler(g_parser, accumulate_characters);
1245  XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1246  XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1247  XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1248  XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1249  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1250  XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1251  XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1252  XML_SetCommentHandler(g_parser, dummy_comment_handler);
1253  XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1254  XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1255  run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1256}
1257END_TEST
1258
1259/* Test handling of attribute declarations */
1260START_TEST(test_dtd_attr_handling) {
1261  const char *prolog = "<!DOCTYPE doc [\n"
1262                       "<!ELEMENT doc EMPTY>\n";
1263  AttTest attr_data[]
1264      = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1265          "]>"
1266          "<doc a='two'/>",
1267          XCS("doc"), XCS("a"),
1268          XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1269          NULL, XML_TRUE},
1270         {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1271          "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1272          "]>"
1273          "<doc/>",
1274          XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1275         {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1276          "]>"
1277          "<doc/>",
1278          XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1279         {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1280          "]>"
1281          "<doc/>",
1282          XCS("doc"), XCS("a"), XCS("CDATA"),
1283#ifdef XML_UNICODE
1284          XCS("\x06f2"),
1285#else
1286          XCS("\xdb\xb2"),
1287#endif
1288          XML_FALSE},
1289         {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1290  AttTest *test;
1291
1292  for (test = attr_data; test->definition != NULL; test++) {
1293    set_subtest("%s", test->definition);
1294    XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1295    XML_SetUserData(g_parser, test);
1296    if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1297                                XML_FALSE)
1298        == XML_STATUS_ERROR)
1299      xml_failure(g_parser);
1300    if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1301                                (int)strlen(test->definition), XML_TRUE)
1302        == XML_STATUS_ERROR)
1303      xml_failure(g_parser);
1304    XML_ParserReset(g_parser, NULL);
1305  }
1306}
1307END_TEST
1308
1309/* See related SF bug #673791.
1310   When namespace processing is enabled, setting the namespace URI for
1311   a prefix is not allowed; this test ensures that it *is* allowed
1312   when namespace processing is not enabled.
1313   (See Namespaces in XML, section 2.)
1314*/
1315START_TEST(test_empty_ns_without_namespaces) {
1316  const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1317                     "  <e xmlns:prefix=''/>\n"
1318                     "</doc>";
1319
1320  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1321      == XML_STATUS_ERROR)
1322    xml_failure(g_parser);
1323}
1324END_TEST
1325
1326/* Regression test for SF bug #824420.
1327   Checks that an xmlns:prefix attribute set in an attribute's default
1328   value isn't misinterpreted.
1329*/
1330START_TEST(test_ns_in_attribute_default_without_namespaces) {
1331  const char *text = "<!DOCTYPE e:element [\n"
1332                     "  <!ATTLIST e:element\n"
1333                     "    xmlns:e CDATA 'http://example.org/'>\n"
1334                     "      ]>\n"
1335                     "<e:element/>";
1336
1337  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1338      == XML_STATUS_ERROR)
1339    xml_failure(g_parser);
1340}
1341END_TEST
1342
1343/* Regression test for SF bug #1515266: missing check of stopped
1344   parser in doContext() 'for' loop. */
1345START_TEST(test_stop_parser_between_char_data_calls) {
1346  /* The sample data must be big enough that there are two calls to
1347     the character data handler from within the inner "for" loop of
1348     the XML_TOK_DATA_CHARS case in doContent(), and the character
1349     handler must stop the parser and clear the character data
1350     handler.
1351  */
1352  const char *text = long_character_data_text;
1353
1354  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1355  g_resumable = XML_FALSE;
1356  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1357      != XML_STATUS_ERROR)
1358    xml_failure(g_parser);
1359  if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1360    xml_failure(g_parser);
1361}
1362END_TEST
1363
1364/* Regression test for SF bug #1515266: missing check of stopped
1365   parser in doContext() 'for' loop. */
1366START_TEST(test_suspend_parser_between_char_data_calls) {
1367  /* The sample data must be big enough that there are two calls to
1368     the character data handler from within the inner "for" loop of
1369     the XML_TOK_DATA_CHARS case in doContent(), and the character
1370     handler must stop the parser and clear the character data
1371     handler.
1372  */
1373  const char *text = long_character_data_text;
1374
1375  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1376  g_resumable = XML_TRUE;
1377  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1378      != XML_STATUS_SUSPENDED)
1379    xml_failure(g_parser);
1380  if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1381    xml_failure(g_parser);
1382  /* Try parsing directly */
1383  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1384      != XML_STATUS_ERROR)
1385    fail("Attempt to continue parse while suspended not faulted");
1386  if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1387    fail("Suspended parse not faulted with correct error");
1388}
1389END_TEST
1390
1391/* Test repeated calls to XML_StopParser are handled correctly */
1392START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1393  const char *text = long_character_data_text;
1394
1395  XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1396  g_resumable = XML_FALSE;
1397  g_abortable = XML_FALSE;
1398  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1399      != XML_STATUS_ERROR)
1400    fail("Failed to double-stop parser");
1401
1402  XML_ParserReset(g_parser, NULL);
1403  XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1404  g_resumable = XML_TRUE;
1405  g_abortable = XML_FALSE;
1406  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1407      != XML_STATUS_SUSPENDED)
1408    fail("Failed to double-suspend parser");
1409
1410  XML_ParserReset(g_parser, NULL);
1411  XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1412  g_resumable = XML_TRUE;
1413  g_abortable = XML_TRUE;
1414  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1415      != XML_STATUS_ERROR)
1416    fail("Failed to suspend-abort parser");
1417}
1418END_TEST
1419
1420START_TEST(test_good_cdata_ascii) {
1421  const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1422  const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1423
1424  CharData storage;
1425  CharData_Init(&storage);
1426  XML_SetUserData(g_parser, &storage);
1427  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1428  /* Add start and end handlers for coverage */
1429  XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1430  XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1431
1432  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1433      == XML_STATUS_ERROR)
1434    xml_failure(g_parser);
1435  CharData_CheckXMLChars(&storage, expected);
1436
1437  /* Try again, this time with a default handler */
1438  XML_ParserReset(g_parser, NULL);
1439  CharData_Init(&storage);
1440  XML_SetUserData(g_parser, &storage);
1441  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1442  XML_SetDefaultHandler(g_parser, dummy_default_handler);
1443
1444  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1445      == XML_STATUS_ERROR)
1446    xml_failure(g_parser);
1447  CharData_CheckXMLChars(&storage, expected);
1448}
1449END_TEST
1450
1451START_TEST(test_good_cdata_utf16) {
1452  /* Test data is:
1453   *   <?xml version='1.0' encoding='utf-16'?>
1454   *   <a><![CDATA[hello]]></a>
1455   */
1456  const char text[]
1457      = "\0<\0?\0x\0m\0l\0"
1458        " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1459        " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1460        "1\0"
1461        "6\0'"
1462        "\0?\0>\0\n"
1463        "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1464  const XML_Char *expected = XCS("hello");
1465
1466  CharData storage;
1467  CharData_Init(&storage);
1468  XML_SetUserData(g_parser, &storage);
1469  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1470
1471  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1472      == XML_STATUS_ERROR)
1473    xml_failure(g_parser);
1474  CharData_CheckXMLChars(&storage, expected);
1475}
1476END_TEST
1477
1478START_TEST(test_good_cdata_utf16_le) {
1479  /* Test data is:
1480   *   <?xml version='1.0' encoding='utf-16'?>
1481   *   <a><![CDATA[hello]]></a>
1482   */
1483  const char text[]
1484      = "<\0?\0x\0m\0l\0"
1485        " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1486        " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1487        "1\0"
1488        "6\0'"
1489        "\0?\0>\0\n"
1490        "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1491  const XML_Char *expected = XCS("hello");
1492
1493  CharData storage;
1494  CharData_Init(&storage);
1495  XML_SetUserData(g_parser, &storage);
1496  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1497
1498  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1499      == XML_STATUS_ERROR)
1500    xml_failure(g_parser);
1501  CharData_CheckXMLChars(&storage, expected);
1502}
1503END_TEST
1504
1505/* Test UTF16 conversion of a long cdata string */
1506
1507/* 16 characters: handy macro to reduce visual clutter */
1508#define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1509
1510START_TEST(test_long_cdata_utf16) {
1511  /* Test data is:
1512   * <?xlm version='1.0' encoding='utf-16'?>
1513   * <a><![CDATA[
1514   * ABCDEFGHIJKLMNOP
1515   * ]]></a>
1516   */
1517  const char text[]
1518      = "\0<\0?\0x\0m\0l\0 "
1519        "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1520        "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1521        "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1522      /* 64 characters per line */
1523      /* clang-format off */
1524        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1525        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1526        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1527        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1528        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1529        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1530        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1531        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1532        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1533        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1534        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1535        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1536        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1537        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1538        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1539        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1540        A_TO_P_IN_UTF16
1541        /* clang-format on */
1542        "\0]\0]\0>\0<\0/\0a\0>";
1543  const XML_Char *expected =
1544      /* clang-format off */
1545        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1546        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1547        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1548        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1549        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1550        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1551        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1552        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1553        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1554        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1555        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1556        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1557        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1558        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1559        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1560        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1561        XCS("ABCDEFGHIJKLMNOP");
1562  /* clang-format on */
1563  CharData storage;
1564  void *buffer;
1565
1566  CharData_Init(&storage);
1567  XML_SetUserData(g_parser, &storage);
1568  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1569  buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1570  if (buffer == NULL)
1571    fail("Could not allocate parse buffer");
1572  assert(buffer != NULL);
1573  memcpy(buffer, text, sizeof(text) - 1);
1574  if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1575    xml_failure(g_parser);
1576  CharData_CheckXMLChars(&storage, expected);
1577}
1578END_TEST
1579
1580/* Test handling of multiple unit UTF-16 characters */
1581START_TEST(test_multichar_cdata_utf16) {
1582  /* Test data is:
1583   *   <?xml version='1.0' encoding='utf-16'?>
1584   *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1585   *
1586   * where {MINIM} is U+1d15e (a minim or half-note)
1587   *   UTF-16: 0xd834 0xdd5e
1588   *   UTF-8:  0xf0 0x9d 0x85 0x9e
1589   * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1590   *   UTF-16: 0xd834 0xdd5f
1591   *   UTF-8:  0xf0 0x9d 0x85 0x9f
1592   */
1593  const char text[] = "\0<\0?\0x\0m\0l\0"
1594                      " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1595                      " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1596                      "1\0"
1597                      "6\0'"
1598                      "\0?\0>\0\n"
1599                      "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1600                      "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1601                      "\0]\0]\0>\0<\0/\0a\0>";
1602#ifdef XML_UNICODE
1603  const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1604#else
1605  const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1606#endif
1607  CharData storage;
1608
1609  CharData_Init(&storage);
1610  XML_SetUserData(g_parser, &storage);
1611  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1612
1613  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1614      == XML_STATUS_ERROR)
1615    xml_failure(g_parser);
1616  CharData_CheckXMLChars(&storage, expected);
1617}
1618END_TEST
1619
1620/* Test that an element name with a UTF-16 surrogate pair is rejected */
1621START_TEST(test_utf16_bad_surrogate_pair) {
1622  /* Test data is:
1623   *   <?xml version='1.0' encoding='utf-16'?>
1624   *   <a><![CDATA[{BADLINB}]]></a>
1625   *
1626   * where {BADLINB} is U+10000 (the first Linear B character)
1627   * with the UTF-16 surrogate pair in the wrong order, i.e.
1628   *   0xdc00 0xd800
1629   */
1630  const char text[] = "\0<\0?\0x\0m\0l\0"
1631                      " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1632                      " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1633                      "1\0"
1634                      "6\0'"
1635                      "\0?\0>\0\n"
1636                      "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1637                      "\xdc\x00\xd8\x00"
1638                      "\0]\0]\0>\0<\0/\0a\0>";
1639
1640  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1641      != XML_STATUS_ERROR)
1642    fail("Reversed UTF-16 surrogate pair not faulted");
1643  if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1644    xml_failure(g_parser);
1645}
1646END_TEST
1647
1648START_TEST(test_bad_cdata) {
1649  struct CaseData {
1650    const char *text;
1651    enum XML_Error expectedError;
1652  };
1653
1654  struct CaseData cases[]
1655      = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1656         {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1657         {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1658         {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1659         {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1660         {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1661         {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1662         {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1663
1664         {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1665         {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1666         {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1667
1668         {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1669         {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1670         {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1671         {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1672         {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1673         {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1674         {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1675
1676         {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1677         {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1678         {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1679
1680  size_t i = 0;
1681  for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1682    set_subtest("%s", cases[i].text);
1683    const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1684        g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1685    const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1686
1687    assert(actualStatus == XML_STATUS_ERROR);
1688
1689    if (actualError != cases[i].expectedError) {
1690      char message[100];
1691      snprintf(message, sizeof(message),
1692               "Expected error %d but got error %d for case %u: \"%s\"\n",
1693               cases[i].expectedError, actualError, (unsigned int)i + 1,
1694               cases[i].text);
1695      fail(message);
1696    }
1697
1698    XML_ParserReset(g_parser, NULL);
1699  }
1700}
1701END_TEST
1702
1703/* Test failures in UTF-16 CDATA */
1704START_TEST(test_bad_cdata_utf16) {
1705  struct CaseData {
1706    size_t text_bytes;
1707    const char *text;
1708    enum XML_Error expected_error;
1709  };
1710
1711  const char prolog[] = "\0<\0?\0x\0m\0l\0"
1712                        " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1713                        " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1714                        "1\0"
1715                        "6\0'"
1716                        "\0?\0>\0\n"
1717                        "\0<\0a\0>";
1718  struct CaseData cases[] = {
1719      {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1720      {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1721      {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1722      {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1723      {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1724      {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1725      {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1726      {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1727      {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1728      {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1729      {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1730      {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1731      {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1732      {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1733      {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1734      {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1735      {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1736      {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1737      {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1738      {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1739      /* Now add a four-byte UTF-16 character */
1740      {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1741       XML_ERROR_UNCLOSED_CDATA_SECTION},
1742      {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1743      {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1744       XML_ERROR_PARTIAL_CHAR},
1745      {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1746       XML_ERROR_UNCLOSED_CDATA_SECTION}};
1747  size_t i;
1748
1749  for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1750    set_subtest("case %lu", (long unsigned)(i + 1));
1751    enum XML_Status actual_status;
1752    enum XML_Error actual_error;
1753
1754    if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1755                                XML_FALSE)
1756        == XML_STATUS_ERROR)
1757      xml_failure(g_parser);
1758    actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1759                                            (int)cases[i].text_bytes, XML_TRUE);
1760    assert(actual_status == XML_STATUS_ERROR);
1761    actual_error = XML_GetErrorCode(g_parser);
1762    if (actual_error != cases[i].expected_error) {
1763      char message[1024];
1764
1765      snprintf(message, sizeof(message),
1766               "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1767               ") for case %lu\n",
1768               cases[i].expected_error,
1769               XML_ErrorString(cases[i].expected_error), actual_error,
1770               XML_ErrorString(actual_error), (long unsigned)(i + 1));
1771      fail(message);
1772    }
1773    XML_ParserReset(g_parser, NULL);
1774  }
1775}
1776END_TEST
1777
1778/* Test stopping the parser in cdata handler */
1779START_TEST(test_stop_parser_between_cdata_calls) {
1780  const char *text = long_cdata_text;
1781
1782  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1783  g_resumable = XML_FALSE;
1784  expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1785}
1786END_TEST
1787
1788/* Test suspending the parser in cdata handler */
1789START_TEST(test_suspend_parser_between_cdata_calls) {
1790  const char *text = long_cdata_text;
1791  enum XML_Status result;
1792
1793  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1794  g_resumable = XML_TRUE;
1795  result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1796  if (result != XML_STATUS_SUSPENDED) {
1797    if (result == XML_STATUS_ERROR)
1798      xml_failure(g_parser);
1799    fail("Parse not suspended in CDATA handler");
1800  }
1801  if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1802    xml_failure(g_parser);
1803}
1804END_TEST
1805
1806/* Test memory allocation functions */
1807START_TEST(test_memory_allocation) {
1808  char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1809  char *p;
1810
1811  if (buffer == NULL) {
1812    fail("Allocation failed");
1813  } else {
1814    /* Try writing to memory; some OSes try to cheat! */
1815    buffer[0] = 'T';
1816    buffer[1] = 'E';
1817    buffer[2] = 'S';
1818    buffer[3] = 'T';
1819    buffer[4] = '\0';
1820    if (strcmp(buffer, "TEST") != 0) {
1821      fail("Memory not writable");
1822    } else {
1823      p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1824      if (p == NULL) {
1825        fail("Reallocation failed");
1826      } else {
1827        /* Write again, just to be sure */
1828        buffer = p;
1829        buffer[0] = 'V';
1830        if (strcmp(buffer, "VEST") != 0) {
1831          fail("Reallocated memory not writable");
1832        }
1833      }
1834    }
1835    XML_MemFree(g_parser, buffer);
1836  }
1837}
1838END_TEST
1839
1840/* Test XML_DefaultCurrent() passes handling on correctly */
1841START_TEST(test_default_current) {
1842  const char *text = "<doc>hell]</doc>";
1843  const char *entity_text = "<!DOCTYPE doc [\n"
1844                            "<!ENTITY entity '&#37;'>\n"
1845                            "]>\n"
1846                            "<doc>&entity;</doc>";
1847
1848  set_subtest("with defaulting");
1849  {
1850    struct handler_record_list storage;
1851    storage.count = 0;
1852    XML_SetDefaultHandler(g_parser, record_default_handler);
1853    XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1854    XML_SetUserData(g_parser, &storage);
1855    if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1856        == XML_STATUS_ERROR)
1857      xml_failure(g_parser);
1858    int i = 0;
1859    assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1860    // we should have gotten one or more cdata callbacks, totaling 5 chars
1861    int cdata_len_remaining = 5;
1862    while (cdata_len_remaining > 0) {
1863      const struct handler_record_entry *c_entry
1864          = handler_record_get(&storage, i++);
1865      assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1866      assert_true(c_entry->arg > 0);
1867      assert_true(c_entry->arg <= cdata_len_remaining);
1868      cdata_len_remaining -= c_entry->arg;
1869      // default handler must follow, with the exact same len argument.
1870      assert_record_handler_called(&storage, i++, "record_default_handler",
1871                                   c_entry->arg);
1872    }
1873    assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1874    assert_true(storage.count == i);
1875  }
1876
1877  /* Again, without the defaulting */
1878  set_subtest("no defaulting");
1879  {
1880    struct handler_record_list storage;
1881    storage.count = 0;
1882    XML_ParserReset(g_parser, NULL);
1883    XML_SetDefaultHandler(g_parser, record_default_handler);
1884    XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1885    XML_SetUserData(g_parser, &storage);
1886    if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1887        == XML_STATUS_ERROR)
1888      xml_failure(g_parser);
1889    int i = 0;
1890    assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1891    // we should have gotten one or more cdata callbacks, totaling 5 chars
1892    int cdata_len_remaining = 5;
1893    while (cdata_len_remaining > 0) {
1894      const struct handler_record_entry *c_entry
1895          = handler_record_get(&storage, i++);
1896      assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1897      assert_true(c_entry->arg > 0);
1898      assert_true(c_entry->arg <= cdata_len_remaining);
1899      cdata_len_remaining -= c_entry->arg;
1900    }
1901    assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1902    assert_true(storage.count == i);
1903  }
1904
1905  /* Now with an internal entity to complicate matters */
1906  set_subtest("with internal entity");
1907  {
1908    struct handler_record_list storage;
1909    storage.count = 0;
1910    XML_ParserReset(g_parser, NULL);
1911    XML_SetDefaultHandler(g_parser, record_default_handler);
1912    XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1913    XML_SetUserData(g_parser, &storage);
1914    if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1915                                XML_TRUE)
1916        == XML_STATUS_ERROR)
1917      xml_failure(g_parser);
1918    /* The default handler suppresses the entity */
1919    assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1920    assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1921    assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1922    assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1923    assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1924    assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1925    assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1926    assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1927    assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1928    assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1929    assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1930    assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1931    assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1932    assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1933    assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1934    assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1935    assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1936    assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1937    assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1938    assert_true(storage.count == 19);
1939  }
1940
1941  /* Again, with a skip handler */
1942  set_subtest("with skip handler");
1943  {
1944    struct handler_record_list storage;
1945    storage.count = 0;
1946    XML_ParserReset(g_parser, NULL);
1947    XML_SetDefaultHandler(g_parser, record_default_handler);
1948    XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1949    XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1950    XML_SetUserData(g_parser, &storage);
1951    if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1952                                XML_TRUE)
1953        == XML_STATUS_ERROR)
1954      xml_failure(g_parser);
1955    /* The default handler suppresses the entity */
1956    assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1957    assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1958    assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1959    assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1960    assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1961    assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1962    assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1963    assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1964    assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1965    assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1966    assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1967    assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1968    assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1969    assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1970    assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1971    assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1972    assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1973    assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
1974    assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1975    assert_true(storage.count == 19);
1976  }
1977
1978  /* This time, allow the entity through */
1979  set_subtest("allow entity");
1980  {
1981    struct handler_record_list storage;
1982    storage.count = 0;
1983    XML_ParserReset(g_parser, NULL);
1984    XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
1985    XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1986    XML_SetUserData(g_parser, &storage);
1987    if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1988                                XML_TRUE)
1989        == XML_STATUS_ERROR)
1990      xml_failure(g_parser);
1991    assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1992    assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1993    assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1994    assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1995    assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1996    assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1997    assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1998    assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1999    assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2000    assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2001    assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2002    assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2003    assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2004    assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2005    assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2006    assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2007    assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2008    assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2009    assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2010    assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2011    assert_true(storage.count == 20);
2012  }
2013
2014  /* Finally, without passing the cdata to the default handler */
2015  set_subtest("not passing cdata");
2016  {
2017    struct handler_record_list storage;
2018    storage.count = 0;
2019    XML_ParserReset(g_parser, NULL);
2020    XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2021    XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2022    XML_SetUserData(g_parser, &storage);
2023    if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2024                                XML_TRUE)
2025        == XML_STATUS_ERROR)
2026      xml_failure(g_parser);
2027    assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2028    assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2029    assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2030    assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2031    assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2032    assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2033    assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2034    assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2035    assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2036    assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2037    assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2038    assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2039    assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2040    assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2041    assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2042    assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2043    assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2044    assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2045                                 1);
2046    assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2047    assert_true(storage.count == 19);
2048  }
2049}
2050END_TEST
2051
2052/* Test DTD element parsing code paths */
2053START_TEST(test_dtd_elements) {
2054  const char *text = "<!DOCTYPE doc [\n"
2055                     "<!ELEMENT doc (chapter)>\n"
2056                     "<!ELEMENT chapter (#PCDATA)>\n"
2057                     "]>\n"
2058                     "<doc><chapter>Wombats are go</chapter></doc>";
2059
2060  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2061  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2062      == XML_STATUS_ERROR)
2063    xml_failure(g_parser);
2064}
2065END_TEST
2066
2067static void XMLCALL
2068element_decl_check_model(void *userData, const XML_Char *name,
2069                         XML_Content *model) {
2070  UNUSED_P(userData);
2071  uint32_t errorFlags = 0;
2072
2073  /* Expected model array structure is this:
2074   * [0] (type 6, quant 0)
2075   *   [1] (type 5, quant 0)
2076   *     [3] (type 4, quant 0, name "bar")
2077   *     [4] (type 4, quant 0, name "foo")
2078   *     [5] (type 4, quant 3, name "xyz")
2079   *   [2] (type 4, quant 2, name "zebra")
2080   */
2081  errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2082  errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2083
2084  if (model != NULL) {
2085    errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2086    errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2087    errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2088    errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2089    errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2090
2091    errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2092    errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2093    errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2094    errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2095    errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2096
2097    errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2098    errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2099    errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2100    errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2101    errorFlags
2102        |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2103
2104    errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2105    errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2106    errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2107    errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2108    errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2109
2110    errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2111    errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2112    errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2113    errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2114    errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2115
2116    errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2117    errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2118    errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2119    errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2120    errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2121  }
2122
2123  XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2124  XML_FreeContentModel(g_parser, model);
2125}
2126
2127START_TEST(test_dtd_elements_nesting) {
2128  // Payload inspired by a test in Perl's XML::Parser
2129  const char *text = "<!DOCTYPE foo [\n"
2130                     "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2131                     "]>\n"
2132                     "<foo/>";
2133
2134  XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2135
2136  XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2137  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2138      == XML_STATUS_ERROR)
2139    xml_failure(g_parser);
2140
2141  if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2142    fail("Element declaration model regression detected");
2143}
2144END_TEST
2145
2146/* Test foreign DTD handling */
2147START_TEST(test_set_foreign_dtd) {
2148  const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2149  const char *text2 = "<doc>&entity;</doc>";
2150  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2151
2152  /* Check hash salt is passed through too */
2153  XML_SetHashSalt(g_parser, 0x12345678);
2154  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2155  XML_SetUserData(g_parser, &test_data);
2156  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2157  /* Add a default handler to exercise more code paths */
2158  XML_SetDefaultHandler(g_parser, dummy_default_handler);
2159  if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2160    fail("Could not set foreign DTD");
2161  if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2162      == XML_STATUS_ERROR)
2163    xml_failure(g_parser);
2164
2165  /* Ensure that trying to set the DTD after parsing has started
2166   * is faulted, even if it's the same setting.
2167   */
2168  if (XML_UseForeignDTD(g_parser, XML_TRUE)
2169      != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2170    fail("Failed to reject late foreign DTD setting");
2171  /* Ditto for the hash salt */
2172  if (XML_SetHashSalt(g_parser, 0x23456789))
2173    fail("Failed to reject late hash salt change");
2174
2175  /* Now finish the parse */
2176  if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2177      == XML_STATUS_ERROR)
2178    xml_failure(g_parser);
2179}
2180END_TEST
2181
2182/* Test foreign DTD handling with a failing NotStandalone handler */
2183START_TEST(test_foreign_dtd_not_standalone) {
2184  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2185                     "<doc>&entity;</doc>";
2186  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2187
2188  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2189  XML_SetUserData(g_parser, &test_data);
2190  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2191  XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2192  if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2193    fail("Could not set foreign DTD");
2194  expect_failure(text, XML_ERROR_NOT_STANDALONE,
2195                 "NotStandalonehandler failed to reject");
2196}
2197END_TEST
2198
2199/* Test invalid character in a foreign DTD is faulted */
2200START_TEST(test_invalid_foreign_dtd) {
2201  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2202                     "<doc>&entity;</doc>";
2203  ExtFaults test_data
2204      = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2205
2206  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2207  XML_SetUserData(g_parser, &test_data);
2208  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2209  XML_UseForeignDTD(g_parser, XML_TRUE);
2210  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2211                 "Bad DTD should not have been accepted");
2212}
2213END_TEST
2214
2215/* Test foreign DTD use with a doctype */
2216START_TEST(test_foreign_dtd_with_doctype) {
2217  const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2218                      "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2219  const char *text2 = "<doc>&entity;</doc>";
2220  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2221
2222  /* Check hash salt is passed through too */
2223  XML_SetHashSalt(g_parser, 0x12345678);
2224  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2225  XML_SetUserData(g_parser, &test_data);
2226  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2227  /* Add a default handler to exercise more code paths */
2228  XML_SetDefaultHandler(g_parser, dummy_default_handler);
2229  if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2230    fail("Could not set foreign DTD");
2231  if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2232      == XML_STATUS_ERROR)
2233    xml_failure(g_parser);
2234
2235  /* Ensure that trying to set the DTD after parsing has started
2236   * is faulted, even if it's the same setting.
2237   */
2238  if (XML_UseForeignDTD(g_parser, XML_TRUE)
2239      != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2240    fail("Failed to reject late foreign DTD setting");
2241  /* Ditto for the hash salt */
2242  if (XML_SetHashSalt(g_parser, 0x23456789))
2243    fail("Failed to reject late hash salt change");
2244
2245  /* Now finish the parse */
2246  if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2247      == XML_STATUS_ERROR)
2248    xml_failure(g_parser);
2249}
2250END_TEST
2251
2252/* Test XML_UseForeignDTD with no external subset present */
2253START_TEST(test_foreign_dtd_without_external_subset) {
2254  const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2255                     "<doc>&foo;</doc>";
2256
2257  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2258  XML_SetUserData(g_parser, NULL);
2259  XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2260  XML_UseForeignDTD(g_parser, XML_TRUE);
2261  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2262      == XML_STATUS_ERROR)
2263    xml_failure(g_parser);
2264}
2265END_TEST
2266
2267START_TEST(test_empty_foreign_dtd) {
2268  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2269                     "<doc>&entity;</doc>";
2270
2271  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2272  XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2273  XML_UseForeignDTD(g_parser, XML_TRUE);
2274  expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2275                 "Undefined entity not faulted");
2276}
2277END_TEST
2278
2279/* Test XML Base is set and unset appropriately */
2280START_TEST(test_set_base) {
2281  const XML_Char *old_base;
2282  const XML_Char *new_base = XCS("/local/file/name.xml");
2283
2284  old_base = XML_GetBase(g_parser);
2285  if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2286    fail("Unable to set base");
2287  if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2288    fail("Base setting not correct");
2289  if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2290    fail("Unable to NULL base");
2291  if (XML_GetBase(g_parser) != NULL)
2292    fail("Base setting not nulled");
2293  XML_SetBase(g_parser, old_base);
2294}
2295END_TEST
2296
2297/* Test attribute counts, indexing, etc */
2298START_TEST(test_attributes) {
2299  const char *text = "<!DOCTYPE doc [\n"
2300                     "<!ELEMENT doc (tag)>\n"
2301                     "<!ATTLIST doc id ID #REQUIRED>\n"
2302                     "]>"
2303                     "<doc a='1' id='one' b='2'>"
2304                     "<tag c='3'/>"
2305                     "</doc>";
2306  AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2307                         {XCS("b"), XCS("2")},
2308                         {XCS("id"), XCS("one")},
2309                         {NULL, NULL}};
2310  AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2311  ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2312                        {XCS("tag"), 1, NULL, NULL},
2313                        {NULL, 0, NULL, NULL}};
2314  info[0].attributes = doc_info;
2315  info[1].attributes = tag_info;
2316
2317  XML_SetStartElementHandler(g_parser, counting_start_element_handler);
2318  XML_SetUserData(g_parser, info);
2319  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2320      == XML_STATUS_ERROR)
2321    xml_failure(g_parser);
2322}
2323END_TEST
2324
2325/* Test reset works correctly in the middle of processing an internal
2326 * entity.  Exercises some obscure code in XML_ParserReset().
2327 */
2328START_TEST(test_reset_in_entity) {
2329  const char *text = "<!DOCTYPE doc [\n"
2330                     "<!ENTITY wombat 'wom'>\n"
2331                     "<!ENTITY entity 'hi &wom; there'>\n"
2332                     "]>\n"
2333                     "<doc>&entity;</doc>";
2334  XML_ParsingStatus status;
2335
2336  g_resumable = XML_TRUE;
2337  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2338  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2339      == XML_STATUS_ERROR)
2340    xml_failure(g_parser);
2341  XML_GetParsingStatus(g_parser, &status);
2342  if (status.parsing != XML_SUSPENDED)
2343    fail("Parsing status not SUSPENDED");
2344  XML_ParserReset(g_parser, NULL);
2345  XML_GetParsingStatus(g_parser, &status);
2346  if (status.parsing != XML_INITIALIZED)
2347    fail("Parsing status doesn't reset to INITIALIZED");
2348}
2349END_TEST
2350
2351/* Test that resume correctly passes through parse errors */
2352START_TEST(test_resume_invalid_parse) {
2353  const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2354
2355  g_resumable = XML_TRUE;
2356  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2357  if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2358      == XML_STATUS_ERROR)
2359    xml_failure(g_parser);
2360  if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2361    fail("Resumed invalid parse not faulted");
2362  if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2363    fail("Invalid parse not correctly faulted");
2364}
2365END_TEST
2366
2367/* Test that re-suspended parses are correctly passed through */
2368START_TEST(test_resume_resuspended) {
2369  const char *text = "<doc>Hello<meep/>world</doc>";
2370
2371  g_resumable = XML_TRUE;
2372  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2373  if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2374      == XML_STATUS_ERROR)
2375    xml_failure(g_parser);
2376  g_resumable = XML_TRUE;
2377  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2378  if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2379    fail("Resumption not suspended");
2380  /* This one should succeed and finish up */
2381  if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2382    xml_failure(g_parser);
2383}
2384END_TEST
2385
2386/* Test that CDATA shows up correctly through a default handler */
2387START_TEST(test_cdata_default) {
2388  const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2389  const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2390  CharData storage;
2391
2392  CharData_Init(&storage);
2393  XML_SetUserData(g_parser, &storage);
2394  XML_SetDefaultHandler(g_parser, accumulate_characters);
2395
2396  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2397      == XML_STATUS_ERROR)
2398    xml_failure(g_parser);
2399  CharData_CheckXMLChars(&storage, expected);
2400}
2401END_TEST
2402
2403/* Test resetting a subordinate parser does exactly nothing */
2404START_TEST(test_subordinate_reset) {
2405  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2406                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
2407                     "<doc>&entity;</doc>";
2408
2409  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2410  XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2411  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2412      == XML_STATUS_ERROR)
2413    xml_failure(g_parser);
2414}
2415END_TEST
2416
2417/* Test suspending a subordinate parser */
2418START_TEST(test_subordinate_suspend) {
2419  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2420                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
2421                     "<doc>&entity;</doc>";
2422
2423  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2424  XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2425  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2426      == XML_STATUS_ERROR)
2427    xml_failure(g_parser);
2428}
2429END_TEST
2430
2431/* Test suspending a subordinate parser from an XML declaration */
2432/* Increases code coverage of the tests */
2433
2434START_TEST(test_subordinate_xdecl_suspend) {
2435  const char *text
2436      = "<!DOCTYPE doc [\n"
2437        "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2438        "]>\n"
2439        "<doc>&entity;</doc>";
2440
2441  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2442  XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2443  g_resumable = XML_TRUE;
2444  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2445      == XML_STATUS_ERROR)
2446    xml_failure(g_parser);
2447}
2448END_TEST
2449
2450START_TEST(test_subordinate_xdecl_abort) {
2451  const char *text
2452      = "<!DOCTYPE doc [\n"
2453        "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2454        "]>\n"
2455        "<doc>&entity;</doc>";
2456
2457  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2458  XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2459  g_resumable = XML_FALSE;
2460  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2461      == XML_STATUS_ERROR)
2462    xml_failure(g_parser);
2463}
2464END_TEST
2465
2466/* Test external entity fault handling with suspension */
2467START_TEST(test_ext_entity_invalid_suspended_parse) {
2468  const char *text = "<!DOCTYPE doc [\n"
2469                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2470                     "]>\n"
2471                     "<doc>&en;</doc>";
2472  ExtFaults faults[]
2473      = {{"<?xml version='1.0' encoding='us-ascii'?><",
2474          "Incomplete element declaration not faulted", NULL,
2475          XML_ERROR_UNCLOSED_TOKEN},
2476         {/* First two bytes of a three-byte char */
2477          "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2478          "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2479         {NULL, NULL, NULL, XML_ERROR_NONE}};
2480  ExtFaults *fault;
2481
2482  for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2483    set_subtest("%s", fault->parse_text);
2484    XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2485    XML_SetExternalEntityRefHandler(g_parser,
2486                                    external_entity_suspending_faulter);
2487    XML_SetUserData(g_parser, fault);
2488    expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2489                   "Parser did not report external entity error");
2490    XML_ParserReset(g_parser, NULL);
2491  }
2492}
2493END_TEST
2494
2495/* Test setting an explicit encoding */
2496START_TEST(test_explicit_encoding) {
2497  const char *text1 = "<doc>Hello ";
2498  const char *text2 = " World</doc>";
2499
2500  /* Just check that we can set the encoding to NULL before starting */
2501  if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2502    fail("Failed to initialise encoding to NULL");
2503  /* Say we are UTF-8 */
2504  if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2505    fail("Failed to set explicit encoding");
2506  if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2507      == XML_STATUS_ERROR)
2508    xml_failure(g_parser);
2509  /* Try to switch encodings mid-parse */
2510  if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2511    fail("Allowed encoding change");
2512  if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2513      == XML_STATUS_ERROR)
2514    xml_failure(g_parser);
2515  /* Try now the parse is over */
2516  if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2517    fail("Failed to unset encoding");
2518}
2519END_TEST
2520
2521/* Test handling of trailing CR (rather than newline) */
2522START_TEST(test_trailing_cr) {
2523  const char *text = "<doc>\r";
2524  int found_cr;
2525
2526  /* Try with a character handler, for code coverage */
2527  XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2528  XML_SetUserData(g_parser, &found_cr);
2529  found_cr = 0;
2530  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2531      == XML_STATUS_OK)
2532    fail("Failed to fault unclosed doc");
2533  if (found_cr == 0)
2534    fail("Did not catch the carriage return");
2535  XML_ParserReset(g_parser, NULL);
2536
2537  /* Now with a default handler instead */
2538  XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2539  XML_SetUserData(g_parser, &found_cr);
2540  found_cr = 0;
2541  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2542      == XML_STATUS_OK)
2543    fail("Failed to fault unclosed doc");
2544  if (found_cr == 0)
2545    fail("Did not catch default carriage return");
2546}
2547END_TEST
2548
2549/* Test trailing CR in an external entity parse */
2550START_TEST(test_ext_entity_trailing_cr) {
2551  const char *text = "<!DOCTYPE doc [\n"
2552                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2553                     "]>\n"
2554                     "<doc>&en;</doc>";
2555  int found_cr;
2556
2557  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2558  XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2559  XML_SetUserData(g_parser, &found_cr);
2560  found_cr = 0;
2561  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2562      != XML_STATUS_OK)
2563    xml_failure(g_parser);
2564  if (found_cr == 0)
2565    fail("No carriage return found");
2566  XML_ParserReset(g_parser, NULL);
2567
2568  /* Try again with a different trailing CR */
2569  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2570  XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2571  XML_SetUserData(g_parser, &found_cr);
2572  found_cr = 0;
2573  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2574      != XML_STATUS_OK)
2575    xml_failure(g_parser);
2576  if (found_cr == 0)
2577    fail("No carriage return found");
2578}
2579END_TEST
2580
2581/* Test handling of trailing square bracket */
2582START_TEST(test_trailing_rsqb) {
2583  const char *text8 = "<doc>]";
2584  const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2585  int found_rsqb;
2586  int text8_len = (int)strlen(text8);
2587
2588  XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2589  XML_SetUserData(g_parser, &found_rsqb);
2590  found_rsqb = 0;
2591  if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2592      == XML_STATUS_OK)
2593    fail("Failed to fault unclosed doc");
2594  if (found_rsqb == 0)
2595    fail("Did not catch the right square bracket");
2596
2597  /* Try again with a different encoding */
2598  XML_ParserReset(g_parser, NULL);
2599  XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2600  XML_SetUserData(g_parser, &found_rsqb);
2601  found_rsqb = 0;
2602  if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2603                              XML_TRUE)
2604      == XML_STATUS_OK)
2605    fail("Failed to fault unclosed doc");
2606  if (found_rsqb == 0)
2607    fail("Did not catch the right square bracket");
2608
2609  /* And finally with a default handler */
2610  XML_ParserReset(g_parser, NULL);
2611  XML_SetDefaultHandler(g_parser, rsqb_handler);
2612  XML_SetUserData(g_parser, &found_rsqb);
2613  found_rsqb = 0;
2614  if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2615                              XML_TRUE)
2616      == XML_STATUS_OK)
2617    fail("Failed to fault unclosed doc");
2618  if (found_rsqb == 0)
2619    fail("Did not catch the right square bracket");
2620}
2621END_TEST
2622
2623/* Test trailing right square bracket in an external entity parse */
2624START_TEST(test_ext_entity_trailing_rsqb) {
2625  const char *text = "<!DOCTYPE doc [\n"
2626                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2627                     "]>\n"
2628                     "<doc>&en;</doc>";
2629  int found_rsqb;
2630
2631  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2632  XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2633  XML_SetUserData(g_parser, &found_rsqb);
2634  found_rsqb = 0;
2635  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2636      != XML_STATUS_OK)
2637    xml_failure(g_parser);
2638  if (found_rsqb == 0)
2639    fail("No right square bracket found");
2640}
2641END_TEST
2642
2643/* Test CDATA handling in an external entity */
2644START_TEST(test_ext_entity_good_cdata) {
2645  const char *text = "<!DOCTYPE doc [\n"
2646                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2647                     "]>\n"
2648                     "<doc>&en;</doc>";
2649
2650  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2651  XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2652  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2653      != XML_STATUS_OK)
2654    xml_failure(g_parser);
2655}
2656END_TEST
2657
2658/* Test user parameter settings */
2659START_TEST(test_user_parameters) {
2660  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2661                     "<!-- Primary parse -->\n"
2662                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
2663                     "<doc>&entity;";
2664  const char *epilog = "<!-- Back to primary parser -->\n"
2665                       "</doc>";
2666
2667  g_comment_count = 0;
2668  g_skip_count = 0;
2669  g_xdecl_count = 0;
2670  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2671  XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2672  XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2673  XML_SetCommentHandler(g_parser, data_check_comment_handler);
2674  XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2675  XML_UseParserAsHandlerArg(g_parser);
2676  XML_SetUserData(g_parser, (void *)1);
2677  g_handler_data = g_parser;
2678  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2679      == XML_STATUS_ERROR)
2680    xml_failure(g_parser);
2681  /* Ensure we can't change policy mid-parse */
2682  if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2683    fail("Changed param entity parsing policy while parsing");
2684  if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2685      == XML_STATUS_ERROR)
2686    xml_failure(g_parser);
2687  if (g_comment_count != 3)
2688    fail("Comment handler not invoked enough times");
2689  if (g_skip_count != 1)
2690    fail("Skip handler not invoked enough times");
2691  if (g_xdecl_count != 1)
2692    fail("XML declaration handler not invoked");
2693}
2694END_TEST
2695
2696/* Test that an explicit external entity handler argument replaces
2697 * the parser as the first argument.
2698 *
2699 * We do not call the first parameter to the external entity handler
2700 * 'parser' for once, since the first time the handler is called it
2701 * will actually be a text string.  We need to be able to access the
2702 * global 'parser' variable to create our external entity parser from,
2703 * since there are code paths we need to ensure get executed.
2704 */
2705START_TEST(test_ext_entity_ref_parameter) {
2706  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2707                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
2708                     "<doc>&entity;</doc>";
2709
2710  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2711  XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2712  /* Set a handler arg that is not NULL and not parser (which is
2713   * what NULL would cause to be passed.
2714   */
2715  XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2716  g_handler_data = text;
2717  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2718      == XML_STATUS_ERROR)
2719    xml_failure(g_parser);
2720
2721  /* Now try again with unset args */
2722  XML_ParserReset(g_parser, NULL);
2723  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2724  XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2725  XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2726  g_handler_data = g_parser;
2727  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2728      == XML_STATUS_ERROR)
2729    xml_failure(g_parser);
2730}
2731END_TEST
2732
2733/* Test the parsing of an empty string */
2734START_TEST(test_empty_parse) {
2735  const char *text = "<doc></doc>";
2736  const char *partial = "<doc>";
2737
2738  if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2739    fail("Parsing empty string faulted");
2740  if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2741    fail("Parsing final empty string not faulted");
2742  if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2743    fail("Parsing final empty string faulted for wrong reason");
2744
2745  /* Now try with valid text before the empty end */
2746  XML_ParserReset(g_parser, NULL);
2747  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2748      == XML_STATUS_ERROR)
2749    xml_failure(g_parser);
2750  if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2751    fail("Parsing final empty string faulted");
2752
2753  /* Now try with invalid text before the empty end */
2754  XML_ParserReset(g_parser, NULL);
2755  if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2756                              XML_FALSE)
2757      == XML_STATUS_ERROR)
2758    xml_failure(g_parser);
2759  if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2760    fail("Parsing final incomplete empty string not faulted");
2761}
2762END_TEST
2763
2764/* Test odd corners of the XML_GetBuffer interface */
2765static enum XML_Status
2766get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2767  const XML_Feature *feature = XML_GetFeatureList();
2768
2769  if (feature == NULL)
2770    return XML_STATUS_ERROR;
2771  for (; feature->feature != XML_FEATURE_END; feature++) {
2772    if (feature->feature == feature_id) {
2773      *presult = feature->value;
2774      return XML_STATUS_OK;
2775    }
2776  }
2777  return XML_STATUS_ERROR;
2778}
2779
2780/* Test odd corners of the XML_GetBuffer interface */
2781START_TEST(test_get_buffer_1) {
2782  const char *text = get_buffer_test_text;
2783  void *buffer;
2784  long context_bytes;
2785
2786  /* Attempt to allocate a negative length buffer */
2787  if (XML_GetBuffer(g_parser, -12) != NULL)
2788    fail("Negative length buffer not failed");
2789
2790  /* Now get a small buffer and extend it past valid length */
2791  buffer = XML_GetBuffer(g_parser, 1536);
2792  if (buffer == NULL)
2793    fail("1.5K buffer failed");
2794  assert(buffer != NULL);
2795  memcpy(buffer, text, strlen(text));
2796  if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2797      == XML_STATUS_ERROR)
2798    xml_failure(g_parser);
2799  if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2800    fail("INT_MAX buffer not failed");
2801
2802  /* Now try extending it a more reasonable but still too large
2803   * amount.  The allocator in XML_GetBuffer() doubles the buffer
2804   * size until it exceeds the requested amount or INT_MAX.  If it
2805   * exceeds INT_MAX, it rejects the request, so we want a request
2806   * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
2807   * with an extra byte just to ensure that the request is off any
2808   * boundary.  The request will be inflated internally by
2809   * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2810   * request.
2811   */
2812  if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2813    context_bytes = 0;
2814  if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2815    fail("INT_MAX- buffer not failed");
2816
2817  /* Now try extending it a carefully crafted amount */
2818  if (XML_GetBuffer(g_parser, 1000) == NULL)
2819    fail("1000 buffer failed");
2820}
2821END_TEST
2822
2823/* Test more corners of the XML_GetBuffer interface */
2824START_TEST(test_get_buffer_2) {
2825  const char *text = get_buffer_test_text;
2826  void *buffer;
2827
2828  /* Now get a decent buffer */
2829  buffer = XML_GetBuffer(g_parser, 1536);
2830  if (buffer == NULL)
2831    fail("1.5K buffer failed");
2832  assert(buffer != NULL);
2833  memcpy(buffer, text, strlen(text));
2834  if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2835      == XML_STATUS_ERROR)
2836    xml_failure(g_parser);
2837
2838  /* Extend it, to catch a different code path */
2839  if (XML_GetBuffer(g_parser, 1024) == NULL)
2840    fail("1024 buffer failed");
2841}
2842END_TEST
2843
2844/* Test for signed integer overflow CVE-2022-23852 */
2845#if XML_CONTEXT_BYTES > 0
2846START_TEST(test_get_buffer_3_overflow) {
2847  XML_Parser parser = XML_ParserCreate(NULL);
2848  assert(parser != NULL);
2849
2850  const char *const text = "\n";
2851  const int expectedKeepValue = (int)strlen(text);
2852
2853  // After this call, variable "keep" in XML_GetBuffer will
2854  // have value expectedKeepValue
2855  if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2856                              XML_FALSE /* isFinal */)
2857      == XML_STATUS_ERROR)
2858    xml_failure(parser);
2859
2860  assert(expectedKeepValue > 0);
2861  if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2862    fail("enlarging buffer not failed");
2863
2864  XML_ParserFree(parser);
2865}
2866END_TEST
2867#endif // XML_CONTEXT_BYTES > 0
2868
2869START_TEST(test_buffer_can_grow_to_max) {
2870  const char *const prefixes[] = {
2871      "",
2872      "<",
2873      "<x a='",
2874      "<doc><x a='",
2875      "<document><x a='",
2876      "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2877      "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2878      "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2879      "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2880      "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2881  const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2882  int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2883#if defined(__MINGW32__) && ! defined(__MINGW64__)
2884  // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2885  // Can we make a big allocation?
2886  void *big = malloc(maxbuf);
2887  if (! big) {
2888    // The big allocation failed. Let's be a little lenient.
2889    maxbuf = maxbuf / 2;
2890  }
2891  free(big);
2892#endif
2893
2894  for (int i = 0; i < num_prefixes; ++i) {
2895    set_subtest("\"%s\"", prefixes[i]);
2896    XML_Parser parser = XML_ParserCreate(NULL);
2897    const int prefix_len = (int)strlen(prefixes[i]);
2898    const enum XML_Status s
2899        = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
2900    if (s != XML_STATUS_OK)
2901      xml_failure(parser);
2902
2903    // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
2904    // subtracting the whole prefix is easiest, and close enough.
2905    assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
2906    // The limit should be consistent; no prefix should allow us to
2907    // reach above the max buffer size.
2908    assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
2909    XML_ParserFree(parser);
2910  }
2911}
2912END_TEST
2913
2914START_TEST(test_getbuffer_allocates_on_zero_len) {
2915  for (int first_len = 1; first_len >= 0; first_len--) {
2916    set_subtest("with len=%d first", first_len);
2917    XML_Parser parser = XML_ParserCreate(NULL);
2918    assert_true(parser != NULL);
2919    assert_true(XML_GetBuffer(parser, first_len) != NULL);
2920    assert_true(XML_GetBuffer(parser, 0) != NULL);
2921    if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
2922      xml_failure(parser);
2923    XML_ParserFree(parser);
2924  }
2925}
2926END_TEST
2927
2928/* Test position information macros */
2929START_TEST(test_byte_info_at_end) {
2930  const char *text = "<doc></doc>";
2931
2932  if (XML_GetCurrentByteIndex(g_parser) != -1
2933      || XML_GetCurrentByteCount(g_parser) != 0)
2934    fail("Byte index/count incorrect at start of parse");
2935  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2936      == XML_STATUS_ERROR)
2937    xml_failure(g_parser);
2938  /* At end, the count will be zero and the index the end of string */
2939  if (XML_GetCurrentByteCount(g_parser) != 0)
2940    fail("Terminal byte count incorrect");
2941  if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
2942    fail("Terminal byte index incorrect");
2943}
2944END_TEST
2945
2946/* Test position information from errors */
2947#define PRE_ERROR_STR "<doc></"
2948#define POST_ERROR_STR "wombat></doc>"
2949START_TEST(test_byte_info_at_error) {
2950  const char *text = PRE_ERROR_STR POST_ERROR_STR;
2951
2952  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2953      == XML_STATUS_OK)
2954    fail("Syntax error not faulted");
2955  if (XML_GetCurrentByteCount(g_parser) != 0)
2956    fail("Error byte count incorrect");
2957  if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
2958    fail("Error byte index incorrect");
2959}
2960END_TEST
2961#undef PRE_ERROR_STR
2962#undef POST_ERROR_STR
2963
2964/* Test position information in handler */
2965#define START_ELEMENT "<e>"
2966#define CDATA_TEXT "Hello"
2967#define END_ELEMENT "</e>"
2968START_TEST(test_byte_info_at_cdata) {
2969  const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
2970  int offset, size;
2971  ByteTestData data;
2972
2973  /* Check initial context is empty */
2974  if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
2975    fail("Unexpected context at start of parse");
2976
2977  data.start_element_len = (int)strlen(START_ELEMENT);
2978  data.cdata_len = (int)strlen(CDATA_TEXT);
2979  data.total_string_len = (int)strlen(text);
2980  XML_SetCharacterDataHandler(g_parser, byte_character_handler);
2981  XML_SetUserData(g_parser, &data);
2982  if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
2983    xml_failure(g_parser);
2984}
2985END_TEST
2986#undef START_ELEMENT
2987#undef CDATA_TEXT
2988#undef END_ELEMENT
2989
2990/* Test predefined entities are correctly recognised */
2991START_TEST(test_predefined_entities) {
2992  const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
2993  const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
2994  const XML_Char *result = XCS("<>&\"'");
2995  CharData storage;
2996
2997  XML_SetDefaultHandler(g_parser, accumulate_characters);
2998  /* run_character_check uses XML_SetCharacterDataHandler(), which
2999   * unfortunately heads off a code path that we need to exercise.
3000   */
3001  CharData_Init(&storage);
3002  XML_SetUserData(g_parser, &storage);
3003  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3004      == XML_STATUS_ERROR)
3005    xml_failure(g_parser);
3006  /* The default handler doesn't translate the entities */
3007  CharData_CheckXMLChars(&storage, expected);
3008
3009  /* Now try again and check the translation */
3010  XML_ParserReset(g_parser, NULL);
3011  run_character_check(text, result);
3012}
3013END_TEST
3014
3015/* Regression test that an invalid tag in an external parameter
3016 * reference in an external DTD is correctly faulted.
3017 *
3018 * Only a few specific tags are legal in DTDs ignoring comments and
3019 * processing instructions, all of which begin with an exclamation
3020 * mark.  "<el/>" is not one of them, so the parser should raise an
3021 * error on encountering it.
3022 */
3023START_TEST(test_invalid_tag_in_dtd) {
3024  const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3025                     "<doc></doc>\n";
3026
3027  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3028  XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3029  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3030                 "Invalid tag IN DTD external param not rejected");
3031}
3032END_TEST
3033
3034/* Test entities not quite the predefined ones are not mis-recognised */
3035START_TEST(test_not_predefined_entities) {
3036  const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3037                        "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3038  int i = 0;
3039
3040  while (text[i] != NULL) {
3041    expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3042                   "Undefined entity not rejected");
3043    XML_ParserReset(g_parser, NULL);
3044    i++;
3045  }
3046}
3047END_TEST
3048
3049/* Test conditional inclusion (IGNORE) */
3050START_TEST(test_ignore_section) {
3051  const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3052                     "<doc><e>&entity;</e></doc>";
3053  const XML_Char *expected
3054      = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3055  CharData storage;
3056
3057  CharData_Init(&storage);
3058  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3059  XML_SetUserData(g_parser, &storage);
3060  XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3061  XML_SetDefaultHandler(g_parser, accumulate_characters);
3062  XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3063  XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3064  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3065  XML_SetStartElementHandler(g_parser, dummy_start_element);
3066  XML_SetEndElementHandler(g_parser, dummy_end_element);
3067  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3068      == XML_STATUS_ERROR)
3069    xml_failure(g_parser);
3070  CharData_CheckXMLChars(&storage, expected);
3071}
3072END_TEST
3073
3074START_TEST(test_ignore_section_utf16) {
3075  const char text[] =
3076      /* <!DOCTYPE d SYSTEM 's'> */
3077      "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3078      "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3079      /* <d><e>&en;</e></d> */
3080      "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3081  const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3082  CharData storage;
3083
3084  CharData_Init(&storage);
3085  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3086  XML_SetUserData(g_parser, &storage);
3087  XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3088  XML_SetDefaultHandler(g_parser, accumulate_characters);
3089  XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3090  XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3091  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3092  XML_SetStartElementHandler(g_parser, dummy_start_element);
3093  XML_SetEndElementHandler(g_parser, dummy_end_element);
3094  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3095      == XML_STATUS_ERROR)
3096    xml_failure(g_parser);
3097  CharData_CheckXMLChars(&storage, expected);
3098}
3099END_TEST
3100
3101START_TEST(test_ignore_section_utf16_be) {
3102  const char text[] =
3103      /* <!DOCTYPE d SYSTEM 's'> */
3104      "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3105      "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3106      /* <d><e>&en;</e></d> */
3107      "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3108  const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3109  CharData storage;
3110
3111  CharData_Init(&storage);
3112  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3113  XML_SetUserData(g_parser, &storage);
3114  XML_SetExternalEntityRefHandler(g_parser,
3115                                  external_entity_load_ignore_utf16_be);
3116  XML_SetDefaultHandler(g_parser, accumulate_characters);
3117  XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3118  XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3119  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3120  XML_SetStartElementHandler(g_parser, dummy_start_element);
3121  XML_SetEndElementHandler(g_parser, dummy_end_element);
3122  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3123      == XML_STATUS_ERROR)
3124    xml_failure(g_parser);
3125  CharData_CheckXMLChars(&storage, expected);
3126}
3127END_TEST
3128
3129/* Test mis-formatted conditional exclusion */
3130START_TEST(test_bad_ignore_section) {
3131  const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3132                     "<doc><e>&entity;</e></doc>";
3133  ExtFaults faults[]
3134      = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3135          XML_ERROR_SYNTAX},
3136         {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3137          XML_ERROR_INVALID_TOKEN},
3138         {/* FIrst two bytes of a three-byte char */
3139          "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3140          XML_ERROR_PARTIAL_CHAR},
3141         {NULL, NULL, NULL, XML_ERROR_NONE}};
3142  ExtFaults *fault;
3143
3144  for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3145    set_subtest("%s", fault->parse_text);
3146    XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3147    XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3148    XML_SetUserData(g_parser, fault);
3149    expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3150                   "Incomplete IGNORE section not failed");
3151    XML_ParserReset(g_parser, NULL);
3152  }
3153}
3154END_TEST
3155
3156struct bom_testdata {
3157  const char *external;
3158  int split;
3159  XML_Bool nested_callback_happened;
3160};
3161
3162static int XMLCALL
3163external_bom_checker(XML_Parser parser, const XML_Char *context,
3164                     const XML_Char *base, const XML_Char *systemId,
3165                     const XML_Char *publicId) {
3166  const char *text;
3167  UNUSED_P(base);
3168  UNUSED_P(systemId);
3169  UNUSED_P(publicId);
3170
3171  XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3172  if (ext_parser == NULL)
3173    fail("Could not create external entity parser");
3174
3175  if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3176    struct bom_testdata *const testdata
3177        = (struct bom_testdata *)XML_GetUserData(parser);
3178    const char *const external = testdata->external;
3179    const int split = testdata->split;
3180    testdata->nested_callback_happened = XML_TRUE;
3181
3182    if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3183        != XML_STATUS_OK) {
3184      xml_failure(ext_parser);
3185    }
3186    text = external + split; // the parse below will continue where we left off.
3187  } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3188    text = "<!ELEMENT doc EMPTY>\n"
3189           "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3190           "<!ENTITY % e2 '%e1;'>\n";
3191  } else {
3192    fail("unknown systemId");
3193  }
3194
3195  if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3196      != XML_STATUS_OK)
3197    xml_failure(ext_parser);
3198
3199  XML_ParserFree(ext_parser);
3200  return XML_STATUS_OK;
3201}
3202
3203/* regression test: BOM should be consumed when followed by a partial token. */
3204START_TEST(test_external_bom_consumed) {
3205  const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3206                           "<doc></doc>\n";
3207  const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3208  const int len = (int)strlen(external);
3209  for (int split = 0; split <= len; ++split) {
3210    set_subtest("split at byte %d", split);
3211
3212    struct bom_testdata testdata;
3213    testdata.external = external;
3214    testdata.split = split;
3215    testdata.nested_callback_happened = XML_FALSE;
3216
3217    XML_Parser parser = XML_ParserCreate(NULL);
3218    if (parser == NULL) {
3219      fail("Couldn't create parser");
3220    }
3221    XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3222    XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3223    XML_SetUserData(parser, &testdata);
3224    if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3225        == XML_STATUS_ERROR)
3226      xml_failure(parser);
3227    if (! testdata.nested_callback_happened) {
3228      fail("ref handler not called");
3229    }
3230    XML_ParserFree(parser);
3231  }
3232}
3233END_TEST
3234
3235/* Test recursive parsing */
3236START_TEST(test_external_entity_values) {
3237  const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3238                     "<doc></doc>\n";
3239  ExtFaults data_004_2[] = {
3240      {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3241      {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3242       XML_ERROR_INVALID_TOKEN},
3243      {"'wombat", "Unterminated string not faulted", NULL,
3244       XML_ERROR_UNCLOSED_TOKEN},
3245      {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3246       XML_ERROR_PARTIAL_CHAR},
3247      {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3248      {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3249       XML_ERROR_XML_DECL},
3250      {/* UTF-8 BOM */
3251       "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3252       XML_ERROR_NONE},
3253      {"<?xml version='1.0' encoding='utf-8'?>\n$",
3254       "Invalid token after text declaration not faulted", NULL,
3255       XML_ERROR_INVALID_TOKEN},
3256      {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3257       "Unterminated string after text decl not faulted", NULL,
3258       XML_ERROR_UNCLOSED_TOKEN},
3259      {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3260       "Partial UTF-8 character after text decl not faulted", NULL,
3261       XML_ERROR_PARTIAL_CHAR},
3262      {"%e1;", "Recursive parameter entity not faulted", NULL,
3263       XML_ERROR_RECURSIVE_ENTITY_REF},
3264      {NULL, NULL, NULL, XML_ERROR_NONE}};
3265  int i;
3266
3267  for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3268    set_subtest("%s", data_004_2[i].parse_text);
3269    XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3270    XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3271    XML_SetUserData(g_parser, &data_004_2[i]);
3272    if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3273        == XML_STATUS_ERROR)
3274      xml_failure(g_parser);
3275    XML_ParserReset(g_parser, NULL);
3276  }
3277}
3278END_TEST
3279
3280/* Test the recursive parse interacts with a not standalone handler */
3281START_TEST(test_ext_entity_not_standalone) {
3282  const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3283                     "<doc></doc>";
3284
3285  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3286  XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3287  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3288                 "Standalone rejection not caught");
3289}
3290END_TEST
3291
3292START_TEST(test_ext_entity_value_abort) {
3293  const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3294                     "<doc></doc>\n";
3295
3296  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3297  XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3298  g_resumable = XML_FALSE;
3299  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3300      == XML_STATUS_ERROR)
3301    xml_failure(g_parser);
3302}
3303END_TEST
3304
3305START_TEST(test_bad_public_doctype) {
3306  const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3307                     "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3308                     "<doc></doc>";
3309
3310  /* Setting a handler provokes a particular code path */
3311  XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3312                            dummy_end_doctype_handler);
3313  expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3314}
3315END_TEST
3316
3317/* Test based on ibm/valid/P32/ibm32v04.xml */
3318START_TEST(test_attribute_enum_value) {
3319  const char *text = "<?xml version='1.0' standalone='no'?>\n"
3320                     "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3321                     "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3322  ExtTest dtd_data
3323      = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3324         "<!ELEMENT a EMPTY>\n"
3325         "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3326         NULL, NULL};
3327  const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3328
3329  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3330  XML_SetUserData(g_parser, &dtd_data);
3331  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3332  /* An attribute list handler provokes a different code path */
3333  XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3334  run_ext_character_check(text, &dtd_data, expected);
3335}
3336END_TEST
3337
3338/* Slightly bizarrely, the library seems to silently ignore entity
3339 * definitions for predefined entities, even when they are wrong.  The
3340 * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3341 * to happen, so this is currently treated as acceptable.
3342 */
3343START_TEST(test_predefined_entity_redefinition) {
3344  const char *text = "<!DOCTYPE doc [\n"
3345                     "<!ENTITY apos 'foo'>\n"
3346                     "]>\n"
3347                     "<doc>&apos;</doc>";
3348  run_character_check(text, XCS("'"));
3349}
3350END_TEST
3351
3352/* Test that the parser stops processing the DTD after an unresolved
3353 * parameter entity is encountered.
3354 */
3355START_TEST(test_dtd_stop_processing) {
3356  const char *text = "<!DOCTYPE doc [\n"
3357                     "%foo;\n"
3358                     "<!ENTITY bar 'bas'>\n"
3359                     "]><doc/>";
3360
3361  XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3362  init_dummy_handlers();
3363  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3364      == XML_STATUS_ERROR)
3365    xml_failure(g_parser);
3366  if (get_dummy_handler_flags() != 0)
3367    fail("DTD processing still going after undefined PE");
3368}
3369END_TEST
3370
3371/* Test public notations with no system ID */
3372START_TEST(test_public_notation_no_sysid) {
3373  const char *text = "<!DOCTYPE doc [\n"
3374                     "<!NOTATION note PUBLIC 'foo'>\n"
3375                     "<!ELEMENT doc EMPTY>\n"
3376                     "]>\n<doc/>";
3377
3378  init_dummy_handlers();
3379  XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3380  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3381      == XML_STATUS_ERROR)
3382    xml_failure(g_parser);
3383  if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3384    fail("Notation declaration handler not called");
3385}
3386END_TEST
3387
3388START_TEST(test_nested_groups) {
3389  const char *text
3390      = "<!DOCTYPE doc [\n"
3391        "<!ELEMENT doc "
3392        /* Sixteen elements per line */
3393        "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3394        "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3395        "))))))))))))))))))))))))))))))))>\n"
3396        "<!ELEMENT e EMPTY>"
3397        "]>\n"
3398        "<doc><e/></doc>";
3399  CharData storage;
3400
3401  CharData_Init(&storage);
3402  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3403  XML_SetStartElementHandler(g_parser, record_element_start_handler);
3404  XML_SetUserData(g_parser, &storage);
3405  init_dummy_handlers();
3406  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3407      == XML_STATUS_ERROR)
3408    xml_failure(g_parser);
3409  CharData_CheckXMLChars(&storage, XCS("doce"));
3410  if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3411    fail("Element handler not fired");
3412}
3413END_TEST
3414
3415START_TEST(test_group_choice) {
3416  const char *text = "<!DOCTYPE doc [\n"
3417                     "<!ELEMENT doc (a|b|c)+>\n"
3418                     "<!ELEMENT a EMPTY>\n"
3419                     "<!ELEMENT b (#PCDATA)>\n"
3420                     "<!ELEMENT c ANY>\n"
3421                     "]>\n"
3422                     "<doc>\n"
3423                     "<a/>\n"
3424                     "<b attr='foo'>This is a foo</b>\n"
3425                     "<c></c>\n"
3426                     "</doc>\n";
3427
3428  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3429  init_dummy_handlers();
3430  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3431      == XML_STATUS_ERROR)
3432    xml_failure(g_parser);
3433  if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3434    fail("Element handler flag not raised");
3435}
3436END_TEST
3437
3438START_TEST(test_standalone_parameter_entity) {
3439  const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3440                     "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3441                     "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3442                     "%entity;\n"
3443                     "]>\n"
3444                     "<doc></doc>";
3445  char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3446
3447  XML_SetUserData(g_parser, dtd_data);
3448  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3449  XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3450  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3451      == XML_STATUS_ERROR)
3452    xml_failure(g_parser);
3453}
3454END_TEST
3455
3456/* Test skipping of parameter entity in an external DTD */
3457/* Derived from ibm/invalid/P69/ibm69i01.xml */
3458START_TEST(test_skipped_parameter_entity) {
3459  const char *text = "<?xml version='1.0'?>\n"
3460                     "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3461                     "<!ELEMENT root (#PCDATA|a)* >\n"
3462                     "]>\n"
3463                     "<root></root>";
3464  ExtTest dtd_data = {"%pe2;", NULL, NULL};
3465
3466  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3467  XML_SetUserData(g_parser, &dtd_data);
3468  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3469  XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3470  init_dummy_handlers();
3471  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3472      == XML_STATUS_ERROR)
3473    xml_failure(g_parser);
3474  if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3475    fail("Skip handler not executed");
3476}
3477END_TEST
3478
3479/* Test recursive parameter entity definition rejected in external DTD */
3480START_TEST(test_recursive_external_parameter_entity) {
3481  const char *text = "<?xml version='1.0'?>\n"
3482                     "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3483                     "<!ELEMENT root (#PCDATA|a)* >\n"
3484                     "]>\n"
3485                     "<root></root>";
3486  ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3487                        "Recursive external parameter entity not faulted", NULL,
3488                        XML_ERROR_RECURSIVE_ENTITY_REF};
3489
3490  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3491  XML_SetUserData(g_parser, &dtd_data);
3492  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3493  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3494                 "Recursive external parameter not spotted");
3495}
3496END_TEST
3497
3498/* Test undefined parameter entity in external entity handler */
3499START_TEST(test_undefined_ext_entity_in_external_dtd) {
3500  const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3501                     "<doc></doc>\n";
3502
3503  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3504  XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3505  XML_SetUserData(g_parser, NULL);
3506  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3507      == XML_STATUS_ERROR)
3508    xml_failure(g_parser);
3509
3510  /* Now repeat without the external entity ref handler invoking
3511   * another copy of itself.
3512   */
3513  XML_ParserReset(g_parser, NULL);
3514  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3515  XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3516  XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3517  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3518      == XML_STATUS_ERROR)
3519    xml_failure(g_parser);
3520}
3521END_TEST
3522
3523/* Test suspending the parse on receiving an XML declaration works */
3524START_TEST(test_suspend_xdecl) {
3525  const char *text = long_character_data_text;
3526
3527  XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3528  XML_SetUserData(g_parser, g_parser);
3529  g_resumable = XML_TRUE;
3530  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3531      != XML_STATUS_SUSPENDED)
3532    xml_failure(g_parser);
3533  if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3534    xml_failure(g_parser);
3535  /* Attempt to start a new parse while suspended */
3536  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3537      != XML_STATUS_ERROR)
3538    fail("Attempt to parse while suspended not faulted");
3539  if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3540    fail("Suspended parse not faulted with correct error");
3541}
3542END_TEST
3543
3544/* Test aborting the parse in an epilog works */
3545START_TEST(test_abort_epilog) {
3546  const char *text = "<doc></doc>\n\r\n";
3547  XML_Char trigger_char = XCS('\r');
3548
3549  XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3550  XML_SetUserData(g_parser, &trigger_char);
3551  g_resumable = XML_FALSE;
3552  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3553      != XML_STATUS_ERROR)
3554    fail("Abort not triggered");
3555  if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3556    xml_failure(g_parser);
3557}
3558END_TEST
3559
3560/* Test a different code path for abort in the epilog */
3561START_TEST(test_abort_epilog_2) {
3562  const char *text = "<doc></doc>\n";
3563  XML_Char trigger_char = XCS('\n');
3564
3565  XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3566  XML_SetUserData(g_parser, &trigger_char);
3567  g_resumable = XML_FALSE;
3568  expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3569}
3570END_TEST
3571
3572/* Test suspension from the epilog */
3573START_TEST(test_suspend_epilog) {
3574  const char *text = "<doc></doc>\n";
3575  XML_Char trigger_char = XCS('\n');
3576
3577  XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3578  XML_SetUserData(g_parser, &trigger_char);
3579  g_resumable = XML_TRUE;
3580  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3581      != XML_STATUS_SUSPENDED)
3582    xml_failure(g_parser);
3583}
3584END_TEST
3585
3586START_TEST(test_suspend_in_sole_empty_tag) {
3587  const char *text = "<doc/>";
3588  enum XML_Status rc;
3589
3590  XML_SetEndElementHandler(g_parser, suspending_end_handler);
3591  XML_SetUserData(g_parser, g_parser);
3592  rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3593  if (rc == XML_STATUS_ERROR)
3594    xml_failure(g_parser);
3595  else if (rc != XML_STATUS_SUSPENDED)
3596    fail("Suspend not triggered");
3597  rc = XML_ResumeParser(g_parser);
3598  if (rc == XML_STATUS_ERROR)
3599    xml_failure(g_parser);
3600  else if (rc != XML_STATUS_OK)
3601    fail("Resume failed");
3602}
3603END_TEST
3604
3605START_TEST(test_unfinished_epilog) {
3606  const char *text = "<doc></doc><";
3607
3608  expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3609                 "Incomplete epilog entry not faulted");
3610}
3611END_TEST
3612
3613START_TEST(test_partial_char_in_epilog) {
3614  const char *text = "<doc></doc>\xe2\x82";
3615
3616  /* First check that no fault is raised if the parse is not finished */
3617  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3618      == XML_STATUS_ERROR)
3619    xml_failure(g_parser);
3620  /* Now check that it is faulted once we finish */
3621  if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3622    fail("Partial character in epilog not faulted");
3623  if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3624    xml_failure(g_parser);
3625}
3626END_TEST
3627
3628/* Test resuming a parse suspended in entity substitution */
3629START_TEST(test_suspend_resume_internal_entity) {
3630  const char *text
3631      = "<!DOCTYPE doc [\n"
3632        "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3633        "]>\n"
3634        "<doc>&foo;</doc>\n";
3635  const XML_Char *expected1 = XCS("Hi");
3636  const XML_Char *expected2 = XCS("HiHo");
3637  CharData storage;
3638
3639  CharData_Init(&storage);
3640  XML_SetStartElementHandler(g_parser, start_element_suspender);
3641  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3642  XML_SetUserData(g_parser, &storage);
3643  // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3644  // we won't know exactly how much input we actually managed to give Expat.
3645  if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3646      != XML_STATUS_SUSPENDED)
3647    xml_failure(g_parser);
3648  CharData_CheckXMLChars(&storage, XCS(""));
3649  if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3650    xml_failure(g_parser);
3651  CharData_CheckXMLChars(&storage, expected1);
3652  if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3653    xml_failure(g_parser);
3654  CharData_CheckXMLChars(&storage, expected2);
3655}
3656END_TEST
3657
3658START_TEST(test_suspend_resume_internal_entity_issue_629) {
3659  const char *const text
3660      = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3661        "<"
3662        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3663        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3664        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3665        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3666        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3667        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3668        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3669        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3670        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3671        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3672        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3673        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3674        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3675        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3676        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3677        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3678        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3679        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3680        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3681        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3682        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3683        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3684        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3685        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3686        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3687        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3688        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3689        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3690        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3691        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3692        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3693        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3694        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3695        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3696        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3697        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3698        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3699        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3700        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3701        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3702        "/>"
3703        "</b></a>";
3704  const size_t firstChunkSizeBytes = 54;
3705
3706  XML_Parser parser = XML_ParserCreate(NULL);
3707  XML_SetUserData(parser, parser);
3708  XML_SetCommentHandler(parser, suspending_comment_handler);
3709
3710  if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3711      != XML_STATUS_SUSPENDED)
3712    xml_failure(parser);
3713  if (XML_ResumeParser(parser) != XML_STATUS_OK)
3714    xml_failure(parser);
3715  if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3716                              (int)(strlen(text) - firstChunkSizeBytes),
3717                              XML_TRUE)
3718      != XML_STATUS_OK)
3719    xml_failure(parser);
3720  XML_ParserFree(parser);
3721}
3722END_TEST
3723
3724/* Test syntax error is caught at parse resumption */
3725START_TEST(test_resume_entity_with_syntax_error) {
3726  const char *text = "<!DOCTYPE doc [\n"
3727                     "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3728                     "]>\n"
3729                     "<doc>&foo;</doc>\n";
3730
3731  XML_SetStartElementHandler(g_parser, start_element_suspender);
3732  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3733      != XML_STATUS_SUSPENDED)
3734    xml_failure(g_parser);
3735  if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3736    fail("Syntax error in entity not faulted");
3737  if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3738    xml_failure(g_parser);
3739}
3740END_TEST
3741
3742/* Test suspending and resuming in a parameter entity substitution */
3743START_TEST(test_suspend_resume_parameter_entity) {
3744  const char *text = "<!DOCTYPE doc [\n"
3745                     "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3746                     "%foo;\n"
3747                     "]>\n"
3748                     "<doc>Hello, world</doc>";
3749  const XML_Char *expected = XCS("Hello, world");
3750  CharData storage;
3751
3752  CharData_Init(&storage);
3753  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3754  XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3755  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3756  XML_SetUserData(g_parser, &storage);
3757  if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3758      != XML_STATUS_SUSPENDED)
3759    xml_failure(g_parser);
3760  CharData_CheckXMLChars(&storage, XCS(""));
3761  if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3762    xml_failure(g_parser);
3763  CharData_CheckXMLChars(&storage, expected);
3764}
3765END_TEST
3766
3767/* Test attempting to use parser after an error is faulted */
3768START_TEST(test_restart_on_error) {
3769  const char *text = "<$doc><doc></doc>";
3770
3771  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3772      != XML_STATUS_ERROR)
3773    fail("Invalid tag name not faulted");
3774  if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3775    xml_failure(g_parser);
3776  if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3777    fail("Restarting invalid parse not faulted");
3778  if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3779    xml_failure(g_parser);
3780}
3781END_TEST
3782
3783/* Test that angle brackets in an attribute default value are faulted */
3784START_TEST(test_reject_lt_in_attribute_value) {
3785  const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3786                     "<doc></doc>";
3787
3788  expect_failure(text, XML_ERROR_INVALID_TOKEN,
3789                 "Bad attribute default not faulted");
3790}
3791END_TEST
3792
3793START_TEST(test_reject_unfinished_param_in_att_value) {
3794  const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3795                     "<doc></doc>";
3796
3797  expect_failure(text, XML_ERROR_INVALID_TOKEN,
3798                 "Bad attribute default not faulted");
3799}
3800END_TEST
3801
3802START_TEST(test_trailing_cr_in_att_value) {
3803  const char *text = "<doc a='value\r'/>";
3804
3805  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3806      == XML_STATUS_ERROR)
3807    xml_failure(g_parser);
3808}
3809END_TEST
3810
3811/* Try parsing a general entity within a parameter entity in a
3812 * standalone internal DTD.  Covers a corner case in the parser.
3813 */
3814START_TEST(test_standalone_internal_entity) {
3815  const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3816                     "<!DOCTYPE doc [\n"
3817                     "  <!ELEMENT doc (#PCDATA)>\n"
3818                     "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
3819                     "  <!ENTITY ge 'AttDefaultValue'>\n"
3820                     "  %pe;\n"
3821                     "]>\n"
3822                     "<doc att2='any'/>";
3823
3824  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3825  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3826      == XML_STATUS_ERROR)
3827    xml_failure(g_parser);
3828}
3829END_TEST
3830
3831/* Test that a reference to an unknown external entity is skipped */
3832START_TEST(test_skipped_external_entity) {
3833  const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3834                     "<doc></doc>\n";
3835  ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3836                       "<!ENTITY % e2 '%e1;'>\n",
3837                       NULL, NULL};
3838
3839  XML_SetUserData(g_parser, &test_data);
3840  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3841  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3842  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3843      == XML_STATUS_ERROR)
3844    xml_failure(g_parser);
3845}
3846END_TEST
3847
3848/* Test a different form of unknown external entity */
3849START_TEST(test_skipped_null_loaded_ext_entity) {
3850  const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3851                     "<doc />";
3852  ExtHdlrData test_data
3853      = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3854         "<!ENTITY % pe2 '%pe1;'>\n"
3855         "%pe2;\n",
3856         external_entity_null_loader};
3857
3858  XML_SetUserData(g_parser, &test_data);
3859  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3860  XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3861  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3862      == XML_STATUS_ERROR)
3863    xml_failure(g_parser);
3864}
3865END_TEST
3866
3867START_TEST(test_skipped_unloaded_ext_entity) {
3868  const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3869                     "<doc />";
3870  ExtHdlrData test_data
3871      = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3872         "<!ENTITY % pe2 '%pe1;'>\n"
3873         "%pe2;\n",
3874         NULL};
3875
3876  XML_SetUserData(g_parser, &test_data);
3877  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3878  XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3879  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3880      == XML_STATUS_ERROR)
3881    xml_failure(g_parser);
3882}
3883END_TEST
3884
3885/* Test that a parameter entity value ending with a carriage return
3886 * has it translated internally into a newline.
3887 */
3888START_TEST(test_param_entity_with_trailing_cr) {
3889#define PARAM_ENTITY_NAME "pe"
3890#define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3891  const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3892                     "<doc/>";
3893  ExtTest test_data
3894      = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
3895         "%" PARAM_ENTITY_NAME ";\n",
3896         NULL, NULL};
3897
3898  XML_SetUserData(g_parser, &test_data);
3899  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3900  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3901  XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
3902  param_entity_match_init(XCS(PARAM_ENTITY_NAME),
3903                          XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
3904  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3905      == XML_STATUS_ERROR)
3906    xml_failure(g_parser);
3907  int entity_match_flag = get_param_entity_match_flag();
3908  if (entity_match_flag == ENTITY_MATCH_FAIL)
3909    fail("Parameter entity CR->NEWLINE conversion failed");
3910  else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
3911    fail("Parameter entity not parsed");
3912}
3913#undef PARAM_ENTITY_NAME
3914#undef PARAM_ENTITY_CORE_VALUE
3915END_TEST
3916
3917START_TEST(test_invalid_character_entity) {
3918  const char *text = "<!DOCTYPE doc [\n"
3919                     "  <!ENTITY entity '&#x110000;'>\n"
3920                     "]>\n"
3921                     "<doc>&entity;</doc>";
3922
3923  expect_failure(text, XML_ERROR_BAD_CHAR_REF,
3924                 "Out of range character reference not faulted");
3925}
3926END_TEST
3927
3928START_TEST(test_invalid_character_entity_2) {
3929  const char *text = "<!DOCTYPE doc [\n"
3930                     "  <!ENTITY entity '&#xg0;'>\n"
3931                     "]>\n"
3932                     "<doc>&entity;</doc>";
3933
3934  expect_failure(text, XML_ERROR_INVALID_TOKEN,
3935                 "Out of range character reference not faulted");
3936}
3937END_TEST
3938
3939START_TEST(test_invalid_character_entity_3) {
3940  const char text[] =
3941      /* <!DOCTYPE doc [\n */
3942      "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
3943      /* U+0E04 = KHO KHWAI
3944       * U+0E08 = CHO CHAN */
3945      /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
3946      "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
3947      "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
3948      /* ]>\n */
3949      "\0]\0>\0\n"
3950      /* <doc>&entity;</doc> */
3951      "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
3952
3953  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3954      != XML_STATUS_ERROR)
3955    fail("Invalid start of entity name not faulted");
3956  if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
3957    xml_failure(g_parser);
3958}
3959END_TEST
3960
3961START_TEST(test_invalid_character_entity_4) {
3962  const char *text = "<!DOCTYPE doc [\n"
3963                     "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
3964                     "]>\n"
3965                     "<doc>&entity;</doc>";
3966
3967  expect_failure(text, XML_ERROR_BAD_CHAR_REF,
3968                 "Out of range character reference not faulted");
3969}
3970END_TEST
3971
3972/* Test that processing instructions are picked up by a default handler */
3973START_TEST(test_pi_handled_in_default) {
3974  const char *text = "<?test processing instruction?>\n<doc/>";
3975  const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
3976  CharData storage;
3977
3978  CharData_Init(&storage);
3979  XML_SetDefaultHandler(g_parser, accumulate_characters);
3980  XML_SetUserData(g_parser, &storage);
3981  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3982      == XML_STATUS_ERROR)
3983    xml_failure(g_parser);
3984  CharData_CheckXMLChars(&storage, expected);
3985}
3986END_TEST
3987
3988/* Test that comments are picked up by a default handler */
3989START_TEST(test_comment_handled_in_default) {
3990  const char *text = "<!-- This is a comment -->\n<doc/>";
3991  const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
3992  CharData storage;
3993
3994  CharData_Init(&storage);
3995  XML_SetDefaultHandler(g_parser, accumulate_characters);
3996  XML_SetUserData(g_parser, &storage);
3997  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3998      == XML_STATUS_ERROR)
3999    xml_failure(g_parser);
4000  CharData_CheckXMLChars(&storage, expected);
4001}
4002END_TEST
4003
4004/* Test PIs that look almost but not quite like XML declarations */
4005START_TEST(test_pi_yml) {
4006  const char *text = "<?yml something like data?><doc/>";
4007  const XML_Char *expected = XCS("yml: something like data\n");
4008  CharData storage;
4009
4010  CharData_Init(&storage);
4011  XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4012  XML_SetUserData(g_parser, &storage);
4013  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4014      == XML_STATUS_ERROR)
4015    xml_failure(g_parser);
4016  CharData_CheckXMLChars(&storage, expected);
4017}
4018END_TEST
4019
4020START_TEST(test_pi_xnl) {
4021  const char *text = "<?xnl nothing like data?><doc/>";
4022  const XML_Char *expected = XCS("xnl: nothing like data\n");
4023  CharData storage;
4024
4025  CharData_Init(&storage);
4026  XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4027  XML_SetUserData(g_parser, &storage);
4028  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4029      == XML_STATUS_ERROR)
4030    xml_failure(g_parser);
4031  CharData_CheckXMLChars(&storage, expected);
4032}
4033END_TEST
4034
4035START_TEST(test_pi_xmm) {
4036  const char *text = "<?xmm everything like data?><doc/>";
4037  const XML_Char *expected = XCS("xmm: everything like data\n");
4038  CharData storage;
4039
4040  CharData_Init(&storage);
4041  XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4042  XML_SetUserData(g_parser, &storage);
4043  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4044      == XML_STATUS_ERROR)
4045    xml_failure(g_parser);
4046  CharData_CheckXMLChars(&storage, expected);
4047}
4048END_TEST
4049
4050START_TEST(test_utf16_pi) {
4051  const char text[] =
4052      /* <?{KHO KHWAI}{CHO CHAN}?>
4053       * where {KHO KHWAI} = U+0E04
4054       * and   {CHO CHAN}  = U+0E08
4055       */
4056      "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4057      /* <q/> */
4058      "<\0q\0/\0>\0";
4059#ifdef XML_UNICODE
4060  const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4061#else
4062  const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4063#endif
4064  CharData storage;
4065
4066  CharData_Init(&storage);
4067  XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4068  XML_SetUserData(g_parser, &storage);
4069  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4070      == XML_STATUS_ERROR)
4071    xml_failure(g_parser);
4072  CharData_CheckXMLChars(&storage, expected);
4073}
4074END_TEST
4075
4076START_TEST(test_utf16_be_pi) {
4077  const char text[] =
4078      /* <?{KHO KHWAI}{CHO CHAN}?>
4079       * where {KHO KHWAI} = U+0E04
4080       * and   {CHO CHAN}  = U+0E08
4081       */
4082      "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4083      /* <q/> */
4084      "\0<\0q\0/\0>";
4085#ifdef XML_UNICODE
4086  const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4087#else
4088  const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4089#endif
4090  CharData storage;
4091
4092  CharData_Init(&storage);
4093  XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4094  XML_SetUserData(g_parser, &storage);
4095  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4096      == XML_STATUS_ERROR)
4097    xml_failure(g_parser);
4098  CharData_CheckXMLChars(&storage, expected);
4099}
4100END_TEST
4101
4102/* Test that comments can be picked up and translated */
4103START_TEST(test_utf16_be_comment) {
4104  const char text[] =
4105      /* <!-- Comment A --> */
4106      "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4107      /* <doc/> */
4108      "\0<\0d\0o\0c\0/\0>";
4109  const XML_Char *expected = XCS(" Comment A ");
4110  CharData storage;
4111
4112  CharData_Init(&storage);
4113  XML_SetCommentHandler(g_parser, accumulate_comment);
4114  XML_SetUserData(g_parser, &storage);
4115  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4116      == XML_STATUS_ERROR)
4117    xml_failure(g_parser);
4118  CharData_CheckXMLChars(&storage, expected);
4119}
4120END_TEST
4121
4122START_TEST(test_utf16_le_comment) {
4123  const char text[] =
4124      /* <!-- Comment B --> */
4125      "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4126      /* <doc/> */
4127      "<\0d\0o\0c\0/\0>\0";
4128  const XML_Char *expected = XCS(" Comment B ");
4129  CharData storage;
4130
4131  CharData_Init(&storage);
4132  XML_SetCommentHandler(g_parser, accumulate_comment);
4133  XML_SetUserData(g_parser, &storage);
4134  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4135      == XML_STATUS_ERROR)
4136    xml_failure(g_parser);
4137  CharData_CheckXMLChars(&storage, expected);
4138}
4139END_TEST
4140
4141/* Test that the unknown encoding handler with map entries that expect
4142 * conversion but no conversion function is faulted
4143 */
4144START_TEST(test_missing_encoding_conversion_fn) {
4145  const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4146                     "<doc>\x81</doc>";
4147
4148  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4149  /* MiscEncodingHandler sets up an encoding with every top-bit-set
4150   * character introducing a two-byte sequence.  For this, it
4151   * requires a convert function.  The above function call doesn't
4152   * pass one through, so when BadEncodingHandler actually gets
4153   * called it should supply an invalid encoding.
4154   */
4155  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4156                 "Encoding with missing convert() not faulted");
4157}
4158END_TEST
4159
4160START_TEST(test_failing_encoding_conversion_fn) {
4161  const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4162                     "<doc>\x81</doc>";
4163
4164  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4165  /* BadEncodingHandler sets up an encoding with every top-bit-set
4166   * character introducing a two-byte sequence.  For this, it
4167   * requires a convert function.  The above function call passes
4168   * one that insists all possible sequences are invalid anyway.
4169   */
4170  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4171                 "Encoding with failing convert() not faulted");
4172}
4173END_TEST
4174
4175/* Test unknown encoding conversions */
4176START_TEST(test_unknown_encoding_success) {
4177  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4178                     /* Equivalent to <eoc>Hello, world</eoc> */
4179                     "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4180
4181  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4182  run_character_check(text, XCS("Hello, world"));
4183}
4184END_TEST
4185
4186/* Test bad name character in unknown encoding */
4187START_TEST(test_unknown_encoding_bad_name) {
4188  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4189                     "<\xff\x64oc>Hello, world</\xff\x64oc>";
4190
4191  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4192  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4193                 "Bad name start in unknown encoding not faulted");
4194}
4195END_TEST
4196
4197/* Test bad mid-name character in unknown encoding */
4198START_TEST(test_unknown_encoding_bad_name_2) {
4199  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4200                     "<d\xffoc>Hello, world</d\xffoc>";
4201
4202  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4203  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4204                 "Bad name in unknown encoding not faulted");
4205}
4206END_TEST
4207
4208/* Test element name that is long enough to fill the conversion buffer
4209 * in an unknown encoding, finishing with an encoded character.
4210 */
4211START_TEST(test_unknown_encoding_long_name_1) {
4212  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4213                     "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4214                     "Hi"
4215                     "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4216  const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4217  CharData storage;
4218
4219  CharData_Init(&storage);
4220  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4221  XML_SetStartElementHandler(g_parser, record_element_start_handler);
4222  XML_SetUserData(g_parser, &storage);
4223  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4224      == XML_STATUS_ERROR)
4225    xml_failure(g_parser);
4226  CharData_CheckXMLChars(&storage, expected);
4227}
4228END_TEST
4229
4230/* Test element name that is long enough to fill the conversion buffer
4231 * in an unknown encoding, finishing with an simple character.
4232 */
4233START_TEST(test_unknown_encoding_long_name_2) {
4234  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4235                     "<abcdefghabcdefghabcdefghijklmnop>"
4236                     "Hi"
4237                     "</abcdefghabcdefghabcdefghijklmnop>";
4238  const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4239  CharData storage;
4240
4241  CharData_Init(&storage);
4242  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4243  XML_SetStartElementHandler(g_parser, record_element_start_handler);
4244  XML_SetUserData(g_parser, &storage);
4245  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4246      == XML_STATUS_ERROR)
4247    xml_failure(g_parser);
4248  CharData_CheckXMLChars(&storage, expected);
4249}
4250END_TEST
4251
4252START_TEST(test_invalid_unknown_encoding) {
4253  const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4254                     "<doc>Hello world</doc>";
4255
4256  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4257  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4258                 "Invalid unknown encoding not faulted");
4259}
4260END_TEST
4261
4262START_TEST(test_unknown_ascii_encoding_ok) {
4263  const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4264                     "<doc>Hello, world</doc>";
4265
4266  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4267  run_character_check(text, XCS("Hello, world"));
4268}
4269END_TEST
4270
4271START_TEST(test_unknown_ascii_encoding_fail) {
4272  const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4273                     "<doc>Hello, \x80 world</doc>";
4274
4275  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4276  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4277                 "Invalid character not faulted");
4278}
4279END_TEST
4280
4281START_TEST(test_unknown_encoding_invalid_length) {
4282  const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4283                     "<doc>Hello, world</doc>";
4284
4285  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4286  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4287                 "Invalid unknown encoding not faulted");
4288}
4289END_TEST
4290
4291START_TEST(test_unknown_encoding_invalid_topbit) {
4292  const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4293                     "<doc>Hello, world</doc>";
4294
4295  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4296  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4297                 "Invalid unknown encoding not faulted");
4298}
4299END_TEST
4300
4301START_TEST(test_unknown_encoding_invalid_surrogate) {
4302  const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4303                     "<doc>Hello, \x82 world</doc>";
4304
4305  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4306  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4307                 "Invalid unknown encoding not faulted");
4308}
4309END_TEST
4310
4311START_TEST(test_unknown_encoding_invalid_high) {
4312  const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4313                     "<doc>Hello, world</doc>";
4314
4315  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4316  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4317                 "Invalid unknown encoding not faulted");
4318}
4319END_TEST
4320
4321START_TEST(test_unknown_encoding_invalid_attr_value) {
4322  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4323                     "<doc attr='\xff\x30'/>";
4324
4325  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4326  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4327                 "Invalid attribute valid not faulted");
4328}
4329END_TEST
4330
4331/* Test an external entity parser set to use latin-1 detects UTF-16
4332 * BOMs correctly.
4333 */
4334/* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
4335START_TEST(test_ext_entity_latin1_utf16le_bom) {
4336  const char *text = "<!DOCTYPE doc [\n"
4337                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4338                     "]>\n"
4339                     "<doc>&en;</doc>";
4340  ExtTest2 test_data
4341      = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4342         /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4343          *   0x4c = L and 0x20 is a space
4344          */
4345         "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4346#ifdef XML_UNICODE
4347  const XML_Char *expected = XCS("\x00ff\x00feL ");
4348#else
4349  /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4350  const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4351#endif
4352  CharData storage;
4353
4354  CharData_Init(&storage);
4355  test_data.storage = &storage;
4356  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4357  XML_SetUserData(g_parser, &test_data);
4358  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4359  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4360      == XML_STATUS_ERROR)
4361    xml_failure(g_parser);
4362  CharData_CheckXMLChars(&storage, expected);
4363}
4364END_TEST
4365
4366START_TEST(test_ext_entity_latin1_utf16be_bom) {
4367  const char *text = "<!DOCTYPE doc [\n"
4368                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4369                     "]>\n"
4370                     "<doc>&en;</doc>";
4371  ExtTest2 test_data
4372      = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4373         /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4374          *   0x4c = L and 0x20 is a space
4375          */
4376         "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4377#ifdef XML_UNICODE
4378  const XML_Char *expected = XCS("\x00fe\x00ff L");
4379#else
4380  /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4381  const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4382#endif
4383  CharData storage;
4384
4385  CharData_Init(&storage);
4386  test_data.storage = &storage;
4387  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4388  XML_SetUserData(g_parser, &test_data);
4389  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4390  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4391      == XML_STATUS_ERROR)
4392    xml_failure(g_parser);
4393  CharData_CheckXMLChars(&storage, expected);
4394}
4395END_TEST
4396
4397/* Parsing the full buffer rather than a byte at a time makes a
4398 * difference to the encoding scanning code, so repeat the above tests
4399 * without breaking them down by byte.
4400 */
4401START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4402  const char *text = "<!DOCTYPE doc [\n"
4403                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4404                     "]>\n"
4405                     "<doc>&en;</doc>";
4406  ExtTest2 test_data
4407      = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4408         /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4409          *   0x4c = L and 0x20 is a space
4410          */
4411         "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4412#ifdef XML_UNICODE
4413  const XML_Char *expected = XCS("\x00ff\x00feL ");
4414#else
4415  /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4416  const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4417#endif
4418  CharData storage;
4419
4420  CharData_Init(&storage);
4421  test_data.storage = &storage;
4422  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4423  XML_SetUserData(g_parser, &test_data);
4424  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4425  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4426      == XML_STATUS_ERROR)
4427    xml_failure(g_parser);
4428  CharData_CheckXMLChars(&storage, expected);
4429}
4430END_TEST
4431
4432START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4433  const char *text = "<!DOCTYPE doc [\n"
4434                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4435                     "]>\n"
4436                     "<doc>&en;</doc>";
4437  ExtTest2 test_data
4438      = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4439         /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4440          *   0x4c = L and 0x20 is a space
4441          */
4442         "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4443#ifdef XML_UNICODE
4444  const XML_Char *expected = XCS("\x00fe\x00ff L");
4445#else
4446  /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4447  const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4448#endif
4449  CharData storage;
4450
4451  CharData_Init(&storage);
4452  test_data.storage = &storage;
4453  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4454  XML_SetUserData(g_parser, &test_data);
4455  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4456  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4457      == XML_STATUS_ERROR)
4458    xml_failure(g_parser);
4459  CharData_CheckXMLChars(&storage, expected);
4460}
4461END_TEST
4462
4463/* Test little-endian UTF-16 given an explicit big-endian encoding */
4464START_TEST(test_ext_entity_utf16_be) {
4465  const char *text = "<!DOCTYPE doc [\n"
4466                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4467                     "]>\n"
4468                     "<doc>&en;</doc>";
4469  ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4470#ifdef XML_UNICODE
4471  const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4472#else
4473  const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4474                                 "\xe6\x94\x80"   /* U+6500 */
4475                                 "\xe2\xbc\x80"   /* U+2F00 */
4476                                 "\xe3\xb8\x80"); /* U+3E00 */
4477#endif
4478  CharData storage;
4479
4480  CharData_Init(&storage);
4481  test_data.storage = &storage;
4482  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4483  XML_SetUserData(g_parser, &test_data);
4484  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4485  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4486      == XML_STATUS_ERROR)
4487    xml_failure(g_parser);
4488  CharData_CheckXMLChars(&storage, expected);
4489}
4490END_TEST
4491
4492/* Test big-endian UTF-16 given an explicit little-endian encoding */
4493START_TEST(test_ext_entity_utf16_le) {
4494  const char *text = "<!DOCTYPE doc [\n"
4495                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4496                     "]>\n"
4497                     "<doc>&en;</doc>";
4498  ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4499#ifdef XML_UNICODE
4500  const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4501#else
4502  const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4503                                 "\xe6\x94\x80"   /* U+6500 */
4504                                 "\xe2\xbc\x80"   /* U+2F00 */
4505                                 "\xe3\xb8\x80"); /* U+3E00 */
4506#endif
4507  CharData storage;
4508
4509  CharData_Init(&storage);
4510  test_data.storage = &storage;
4511  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4512  XML_SetUserData(g_parser, &test_data);
4513  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4514  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4515      == XML_STATUS_ERROR)
4516    xml_failure(g_parser);
4517  CharData_CheckXMLChars(&storage, expected);
4518}
4519END_TEST
4520
4521/* Test little-endian UTF-16 given no explicit encoding.
4522 * The existing default encoding (UTF-8) is assumed to hold without a
4523 * BOM to contradict it, so the entity value will in fact provoke an
4524 * error because 0x00 is not a valid XML character.  We parse the
4525 * whole buffer in one go rather than feeding it in byte by byte to
4526 * exercise different code paths in the initial scanning routines.
4527 */
4528START_TEST(test_ext_entity_utf16_unknown) {
4529  const char *text = "<!DOCTYPE doc [\n"
4530                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4531                     "]>\n"
4532                     "<doc>&en;</doc>";
4533  ExtFaults2 test_data
4534      = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4535         XML_ERROR_INVALID_TOKEN};
4536
4537  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4538  XML_SetUserData(g_parser, &test_data);
4539  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4540                 "Invalid character should not have been accepted");
4541}
4542END_TEST
4543
4544/* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
4545START_TEST(test_ext_entity_utf8_non_bom) {
4546  const char *text = "<!DOCTYPE doc [\n"
4547                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4548                     "]>\n"
4549                     "<doc>&en;</doc>";
4550  ExtTest2 test_data
4551      = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4552         3, NULL, NULL};
4553#ifdef XML_UNICODE
4554  const XML_Char *expected = XCS("\xfec0");
4555#else
4556  const XML_Char *expected = XCS("\xef\xbb\x80");
4557#endif
4558  CharData storage;
4559
4560  CharData_Init(&storage);
4561  test_data.storage = &storage;
4562  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4563  XML_SetUserData(g_parser, &test_data);
4564  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4565  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4566      == XML_STATUS_ERROR)
4567    xml_failure(g_parser);
4568  CharData_CheckXMLChars(&storage, expected);
4569}
4570END_TEST
4571
4572/* Test that UTF-8 in a CDATA section is correctly passed through */
4573START_TEST(test_utf8_in_cdata_section) {
4574  const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4575#ifdef XML_UNICODE
4576  const XML_Char *expected = XCS("one \x00e9 two");
4577#else
4578  const XML_Char *expected = XCS("one \xc3\xa9 two");
4579#endif
4580
4581  run_character_check(text, expected);
4582}
4583END_TEST
4584
4585/* Test that little-endian UTF-16 in a CDATA section is handled */
4586START_TEST(test_utf8_in_cdata_section_2) {
4587  const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4588#ifdef XML_UNICODE
4589  const XML_Char *expected = XCS("\x00e9]\x00e9two");
4590#else
4591  const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4592#endif
4593
4594  run_character_check(text, expected);
4595}
4596END_TEST
4597
4598START_TEST(test_utf8_in_start_tags) {
4599  struct test_case {
4600    bool goodName;
4601    bool goodNameStart;
4602    const char *tagName;
4603  };
4604
4605  // The idea with the tests below is this:
4606  // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4607  // go to isNever and are hence not a concern.
4608  //
4609  // We start with a character that is a valid name character
4610  // (or even name-start character, see XML 1.0r4 spec) and then we flip
4611  // single bits at places where (1) the result leaves the UTF-8 encoding space
4612  // and (2) we stay in the same n-byte sequence family.
4613  //
4614  // The flipped bits are highlighted in angle brackets in comments,
4615  // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4616  // the most significant bit to 1 to leave UTF-8 encoding space.
4617  struct test_case cases[] = {
4618      // 1-byte UTF-8: [0xxx xxxx]
4619      {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
4620      {false, false, "\xBA"}, // [<1>011 1010]
4621      {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
4622      {false, false, "\xB9"}, // [<1>011 1001]
4623
4624      // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4625      {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
4626                                  // Arabic small waw U+06E5
4627      {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4628      {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4629      {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4630      {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
4631                                  // combining char U+0301
4632      {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4633      {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4634      {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4635
4636      // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4637      {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
4638                                      // Devanagari Letter A U+0905
4639      {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4640      {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4641      {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4642      {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4643      {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4644      {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
4645                                      // combining char U+0901
4646      {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4647      {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4648      {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4649      {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4650      {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4651  };
4652  const bool atNameStart[] = {true, false};
4653
4654  size_t i = 0;
4655  char doc[1024];
4656  size_t failCount = 0;
4657
4658  // we need all the bytes to be parsed, but we don't want the errors that can
4659  // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4660  if (g_reparseDeferralEnabledDefault) {
4661    return;
4662  }
4663
4664  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4665    size_t j = 0;
4666    for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4667      const bool expectedSuccess
4668          = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4669      snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4670               cases[i].tagName);
4671      XML_Parser parser = XML_ParserCreate(NULL);
4672
4673      const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4674          parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4675
4676      bool success = true;
4677      if ((status == XML_STATUS_OK) != expectedSuccess) {
4678        success = false;
4679      }
4680      if ((status == XML_STATUS_ERROR)
4681          && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4682        success = false;
4683      }
4684
4685      if (! success) {
4686        fprintf(
4687            stderr,
4688            "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4689            (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
4690            (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4691        failCount++;
4692      }
4693
4694      XML_ParserFree(parser);
4695    }
4696  }
4697
4698  if (failCount > 0) {
4699    fail("UTF-8 regression detected");
4700  }
4701}
4702END_TEST
4703
4704/* Test trailing spaces in elements are accepted */
4705START_TEST(test_trailing_spaces_in_elements) {
4706  const char *text = "<doc   >Hi</doc >";
4707  const XML_Char *expected = XCS("doc/doc");
4708  CharData storage;
4709
4710  CharData_Init(&storage);
4711  XML_SetElementHandler(g_parser, record_element_start_handler,
4712                        record_element_end_handler);
4713  XML_SetUserData(g_parser, &storage);
4714  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4715      == XML_STATUS_ERROR)
4716    xml_failure(g_parser);
4717  CharData_CheckXMLChars(&storage, expected);
4718}
4719END_TEST
4720
4721START_TEST(test_utf16_attribute) {
4722  const char text[] =
4723      /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4724       * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4725       * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4726       */
4727      "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4728  const XML_Char *expected = XCS("a");
4729  CharData storage;
4730
4731  CharData_Init(&storage);
4732  XML_SetStartElementHandler(g_parser, accumulate_attribute);
4733  XML_SetUserData(g_parser, &storage);
4734  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4735      == XML_STATUS_ERROR)
4736    xml_failure(g_parser);
4737  CharData_CheckXMLChars(&storage, expected);
4738}
4739END_TEST
4740
4741START_TEST(test_utf16_second_attr) {
4742  /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4743   * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4744   * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4745   */
4746  const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4747                      "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4748  const XML_Char *expected = XCS("1");
4749  CharData storage;
4750
4751  CharData_Init(&storage);
4752  XML_SetStartElementHandler(g_parser, accumulate_attribute);
4753  XML_SetUserData(g_parser, &storage);
4754  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4755      == XML_STATUS_ERROR)
4756    xml_failure(g_parser);
4757  CharData_CheckXMLChars(&storage, expected);
4758}
4759END_TEST
4760
4761START_TEST(test_attr_after_solidus) {
4762  const char *text = "<doc attr1='a' / attr2='b'>";
4763
4764  expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4765}
4766END_TEST
4767
4768START_TEST(test_utf16_pe) {
4769  /* <!DOCTYPE doc [
4770   * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4771   * %{KHO KHWAI}{CHO CHAN};
4772   * ]>
4773   * <doc></doc>
4774   *
4775   * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4776   * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4777   */
4778  const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4779                      "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4780                      "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4781                      "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4782                      "\0%\x0e\x04\x0e\x08\0;\0\n"
4783                      "\0]\0>\0\n"
4784                      "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4785#ifdef XML_UNICODE
4786  const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4787#else
4788  const XML_Char *expected
4789      = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4790#endif
4791  CharData storage;
4792
4793  CharData_Init(&storage);
4794  XML_SetUserData(g_parser, &storage);
4795  XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4796  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4797      == XML_STATUS_ERROR)
4798    xml_failure(g_parser);
4799  CharData_CheckXMLChars(&storage, expected);
4800}
4801END_TEST
4802
4803/* Test that duff attribute description keywords are rejected */
4804START_TEST(test_bad_attr_desc_keyword) {
4805  const char *text = "<!DOCTYPE doc [\n"
4806                     "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4807                     "]>\n"
4808                     "<doc />";
4809
4810  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4811                 "Bad keyword !IMPLIED not faulted");
4812}
4813END_TEST
4814
4815/* Test that an invalid attribute description keyword consisting of
4816 * UTF-16 characters with their top bytes non-zero are correctly
4817 * faulted
4818 */
4819START_TEST(test_bad_attr_desc_keyword_utf16) {
4820  /* <!DOCTYPE d [
4821   * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4822   * ]><d/>
4823   *
4824   * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4825   * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4826   */
4827  const char text[]
4828      = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4829        "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4830        "\0#\x0e\x04\x0e\x08\0>\0\n"
4831        "\0]\0>\0<\0d\0/\0>";
4832
4833  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4834      != XML_STATUS_ERROR)
4835    fail("Invalid UTF16 attribute keyword not faulted");
4836  if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4837    xml_failure(g_parser);
4838}
4839END_TEST
4840
4841/* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
4842 * using prefix-encoding (see above) to trigger specific code paths
4843 */
4844START_TEST(test_bad_doctype) {
4845  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4846                     "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4847
4848  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4849  expect_failure(text, XML_ERROR_SYNTAX,
4850                 "Invalid bytes in DOCTYPE not faulted");
4851}
4852END_TEST
4853
4854START_TEST(test_bad_doctype_utf8) {
4855  const char *text = "<!DOCTYPE \xDB\x25"
4856                     "doc><doc/>"; // [1101 1011] [<0>010 0101]
4857  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4858                 "Invalid UTF-8 in DOCTYPE not faulted");
4859}
4860END_TEST
4861
4862START_TEST(test_bad_doctype_utf16) {
4863  const char text[] =
4864      /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4865       *
4866       * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4867       * (name character) but not a valid letter (name start character)
4868       */
4869      "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4870      "\x06\xf2"
4871      "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4872
4873  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4874      != XML_STATUS_ERROR)
4875    fail("Invalid bytes in DOCTYPE not faulted");
4876  if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4877    xml_failure(g_parser);
4878}
4879END_TEST
4880
4881START_TEST(test_bad_doctype_plus) {
4882  const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4883                     "<1+>&foo;</1+>";
4884
4885  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4886                 "'+' in document name not faulted");
4887}
4888END_TEST
4889
4890START_TEST(test_bad_doctype_star) {
4891  const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4892                     "<1*>&foo;</1*>";
4893
4894  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4895                 "'*' in document name not faulted");
4896}
4897END_TEST
4898
4899START_TEST(test_bad_doctype_query) {
4900  const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
4901                     "<1?>&foo;</1?>";
4902
4903  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4904                 "'?' in document name not faulted");
4905}
4906END_TEST
4907
4908START_TEST(test_unknown_encoding_bad_ignore) {
4909  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
4910                     "<!DOCTYPE doc SYSTEM 'foo'>"
4911                     "<doc><e>&entity;</e></doc>";
4912  ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
4913                     "Invalid character not faulted", XCS("prefix-conv"),
4914                     XML_ERROR_INVALID_TOKEN};
4915
4916  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4917  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4918  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
4919  XML_SetUserData(g_parser, &fault);
4920  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4921                 "Bad IGNORE section with unknown encoding not failed");
4922}
4923END_TEST
4924
4925START_TEST(test_entity_in_utf16_be_attr) {
4926  const char text[] =
4927      /* <e a='&#228; &#x00E4;'></e> */
4928      "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
4929      "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
4930#ifdef XML_UNICODE
4931  const XML_Char *expected = XCS("\x00e4 \x00e4");
4932#else
4933  const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4934#endif
4935  CharData storage;
4936
4937  CharData_Init(&storage);
4938  XML_SetUserData(g_parser, &storage);
4939  XML_SetStartElementHandler(g_parser, accumulate_attribute);
4940  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4941      == XML_STATUS_ERROR)
4942    xml_failure(g_parser);
4943  CharData_CheckXMLChars(&storage, expected);
4944}
4945END_TEST
4946
4947START_TEST(test_entity_in_utf16_le_attr) {
4948  const char text[] =
4949      /* <e a='&#228; &#x00E4;'></e> */
4950      "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
4951      "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
4952#ifdef XML_UNICODE
4953  const XML_Char *expected = XCS("\x00e4 \x00e4");
4954#else
4955  const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4956#endif
4957  CharData storage;
4958
4959  CharData_Init(&storage);
4960  XML_SetUserData(g_parser, &storage);
4961  XML_SetStartElementHandler(g_parser, accumulate_attribute);
4962  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4963      == XML_STATUS_ERROR)
4964    xml_failure(g_parser);
4965  CharData_CheckXMLChars(&storage, expected);
4966}
4967END_TEST
4968
4969START_TEST(test_entity_public_utf16_be) {
4970  const char text[] =
4971      /* <!DOCTYPE d [ */
4972      "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4973      /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
4974      "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
4975      "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
4976      /* %e; */
4977      "\0%\0e\0;\0\n"
4978      /* ]> */
4979      "\0]\0>\0\n"
4980      /* <d>&j;</d> */
4981      "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
4982  ExtTest2 test_data
4983      = {/* <!ENTITY j 'baz'> */
4984         "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
4985  const XML_Char *expected = XCS("baz");
4986  CharData storage;
4987
4988  CharData_Init(&storage);
4989  test_data.storage = &storage;
4990  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4991  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4992  XML_SetUserData(g_parser, &test_data);
4993  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4994  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4995      == XML_STATUS_ERROR)
4996    xml_failure(g_parser);
4997  CharData_CheckXMLChars(&storage, expected);
4998}
4999END_TEST
5000
5001START_TEST(test_entity_public_utf16_le) {
5002  const char text[] =
5003      /* <!DOCTYPE d [ */
5004      "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5005      /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5006      "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5007      "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5008      /* %e; */
5009      "%\0e\0;\0\n\0"
5010      /* ]> */
5011      "]\0>\0\n\0"
5012      /* <d>&j;</d> */
5013      "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5014  ExtTest2 test_data
5015      = {/* <!ENTITY j 'baz'> */
5016         "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5017  const XML_Char *expected = XCS("baz");
5018  CharData storage;
5019
5020  CharData_Init(&storage);
5021  test_data.storage = &storage;
5022  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5023  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5024  XML_SetUserData(g_parser, &test_data);
5025  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5026  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5027      == XML_STATUS_ERROR)
5028    xml_failure(g_parser);
5029  CharData_CheckXMLChars(&storage, expected);
5030}
5031END_TEST
5032
5033/* Test that a doctype with neither an internal nor external subset is
5034 * faulted
5035 */
5036START_TEST(test_short_doctype) {
5037  const char *text = "<!DOCTYPE doc></doc>";
5038  expect_failure(text, XML_ERROR_INVALID_TOKEN,
5039                 "DOCTYPE without subset not rejected");
5040}
5041END_TEST
5042
5043START_TEST(test_short_doctype_2) {
5044  const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5045  expect_failure(text, XML_ERROR_SYNTAX,
5046                 "DOCTYPE without Public ID not rejected");
5047}
5048END_TEST
5049
5050START_TEST(test_short_doctype_3) {
5051  const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5052  expect_failure(text, XML_ERROR_SYNTAX,
5053                 "DOCTYPE without System ID not rejected");
5054}
5055END_TEST
5056
5057START_TEST(test_long_doctype) {
5058  const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5059  expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5060}
5061END_TEST
5062
5063START_TEST(test_bad_entity) {
5064  const char *text = "<!DOCTYPE doc [\n"
5065                     "  <!ENTITY foo PUBLIC>\n"
5066                     "]>\n"
5067                     "<doc/>";
5068  expect_failure(text, XML_ERROR_SYNTAX,
5069                 "ENTITY without Public ID is not rejected");
5070}
5071END_TEST
5072
5073/* Test unquoted value is faulted */
5074START_TEST(test_bad_entity_2) {
5075  const char *text = "<!DOCTYPE doc [\n"
5076                     "  <!ENTITY % foo bar>\n"
5077                     "]>\n"
5078                     "<doc/>";
5079  expect_failure(text, XML_ERROR_SYNTAX,
5080                 "ENTITY without Public ID is not rejected");
5081}
5082END_TEST
5083
5084START_TEST(test_bad_entity_3) {
5085  const char *text = "<!DOCTYPE doc [\n"
5086                     "  <!ENTITY % foo PUBLIC>\n"
5087                     "]>\n"
5088                     "<doc/>";
5089  expect_failure(text, XML_ERROR_SYNTAX,
5090                 "Parameter ENTITY without Public ID is not rejected");
5091}
5092END_TEST
5093
5094START_TEST(test_bad_entity_4) {
5095  const char *text = "<!DOCTYPE doc [\n"
5096                     "  <!ENTITY % foo SYSTEM>\n"
5097                     "]>\n"
5098                     "<doc/>";
5099  expect_failure(text, XML_ERROR_SYNTAX,
5100                 "Parameter ENTITY without Public ID is not rejected");
5101}
5102END_TEST
5103
5104START_TEST(test_bad_notation) {
5105  const char *text = "<!DOCTYPE doc [\n"
5106                     "  <!NOTATION n SYSTEM>\n"
5107                     "]>\n"
5108                     "<doc/>";
5109  expect_failure(text, XML_ERROR_SYNTAX,
5110                 "Notation without System ID is not rejected");
5111}
5112END_TEST
5113
5114/* Test for issue #11, wrongly suppressed default handler */
5115START_TEST(test_default_doctype_handler) {
5116  const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5117                     "  <!ENTITY foo 'bar'>\n"
5118                     "]>\n"
5119                     "<doc>&foo;</doc>";
5120  DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5121                              {XCS("'test.dtd'"), 10, XML_FALSE},
5122                              {NULL, 0, XML_FALSE}};
5123  int i;
5124
5125  XML_SetUserData(g_parser, &test_data);
5126  XML_SetDefaultHandler(g_parser, checking_default_handler);
5127  XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5128  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5129      == XML_STATUS_ERROR)
5130    xml_failure(g_parser);
5131  for (i = 0; test_data[i].expected != NULL; i++)
5132    if (! test_data[i].seen)
5133      fail("Default handler not run for public !DOCTYPE");
5134}
5135END_TEST
5136
5137START_TEST(test_empty_element_abort) {
5138  const char *text = "<abort/>";
5139
5140  XML_SetStartElementHandler(g_parser, start_element_suspender);
5141  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5142      != XML_STATUS_ERROR)
5143    fail("Expected to error on abort");
5144}
5145END_TEST
5146
5147/* Regression test for GH issue #612: unfinished m_declAttributeType
5148 * allocation in ->m_tempPool can corrupt following allocation.
5149 */
5150START_TEST(test_pool_integrity_with_unfinished_attr) {
5151  const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5152                     "<!DOCTYPE foo [\n"
5153                     "<!ELEMENT foo ANY>\n"
5154                     "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5155                     "%entp;\n"
5156                     "]>\n"
5157                     "<a></a>\n";
5158  const XML_Char *expected = XCS("COMMENT");
5159  CharData storage;
5160
5161  CharData_Init(&storage);
5162  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5163  XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5164  XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5165  XML_SetCommentHandler(g_parser, accumulate_comment);
5166  XML_SetUserData(g_parser, &storage);
5167  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5168      == XML_STATUS_ERROR)
5169    xml_failure(g_parser);
5170  CharData_CheckXMLChars(&storage, expected);
5171}
5172END_TEST
5173
5174START_TEST(test_nested_entity_suspend) {
5175  const char *const text = "<!DOCTYPE a [\n"
5176                           "  <!ENTITY e1 '<!--e1-->'>\n"
5177                           "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5178                           "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5179                           "]>\n"
5180                           "<a><!--start-->&e3;<!--end--></a>";
5181  const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5182      XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5183  CharData storage;
5184  CharData_Init(&storage);
5185  XML_Parser parser = XML_ParserCreate(NULL);
5186  ParserPlusStorage parserPlusStorage = {parser, &storage};
5187
5188  XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5189  XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5190  XML_SetUserData(parser, &parserPlusStorage);
5191
5192  enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5193  while (status == XML_STATUS_SUSPENDED) {
5194    status = XML_ResumeParser(parser);
5195  }
5196  if (status != XML_STATUS_OK)
5197    xml_failure(parser);
5198
5199  CharData_CheckXMLChars(&storage, expected);
5200  XML_ParserFree(parser);
5201}
5202END_TEST
5203
5204/* Regression test for quadratic parsing on large tokens */
5205START_TEST(test_big_tokens_take_linear_time) {
5206  const char *const too_slow_failure_message
5207      = "Compared to the baseline runtime of the first test, this test has a "
5208        "slowdown of more than <max_slowdown>. "
5209        "Please keep increasing the value by 1 until it reliably passes the "
5210        "test on your hardware and open a bug sharing that number with us. "
5211        "Thanks in advance!";
5212  const struct {
5213    const char *pre;
5214    const char *post;
5215  } text[] = {
5216      {"<a>", "</a>"},                      // assumed good, used as baseline
5217      {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5218      {"<c attr='", "'></c>"},              // big attribute, used to be O(N��)
5219      {"<d><!-- ", " --></d>"},             // long comment, used to be O(N��)
5220      {"<e><", "/></e>"},                   // big elem name, used to be O(N��)
5221  };
5222  const int num_cases = sizeof(text) / sizeof(text[0]);
5223  // For the test we need a <max_slowdown> value that is:
5224  // (1) big enough that the test passes reliably (avoiding flaky tests), and
5225  // (2) small enough that the test actually catches regressions.
5226  const int max_slowdown = 15;
5227  char aaaaaa[4096];
5228  const int fillsize = (int)sizeof(aaaaaa);
5229  const int fillcount = 100;
5230
5231  memset(aaaaaa, 'a', fillsize);
5232
5233  if (! g_reparseDeferralEnabledDefault) {
5234    return; // heuristic is disabled; we would get O(n^2) and fail.
5235  }
5236#if ! defined(__linux__)
5237  if (CLOCKS_PER_SEC < 100000) {
5238    // Skip this test if clock() doesn't have reasonably good resolution.
5239    // This workaround is primarily targeting Windows and FreeBSD, since
5240    // XSI requires the value to be 1.000.000 (10x the condition here), and
5241    // we want to be very sure that at least one platform in CI can catch
5242    // regressions (through a failing test).
5243    return;
5244  }
5245#endif
5246
5247  clock_t baseline = 0;
5248  for (int i = 0; i < num_cases; ++i) {
5249    XML_Parser parser = XML_ParserCreate(NULL);
5250    assert_true(parser != NULL);
5251    enum XML_Status status;
5252    set_subtest("max_slowdown=%d text=\"%saaaaaa%s\"", max_slowdown,
5253                text[i].pre, text[i].post);
5254    const clock_t start = clock();
5255
5256    // parse the start text
5257    status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5258                                     (int)strlen(text[i].pre), XML_FALSE);
5259    if (status != XML_STATUS_OK) {
5260      xml_failure(parser);
5261    }
5262    // parse lots of 'a', failing the test early if it takes too long
5263    for (int f = 0; f < fillcount; ++f) {
5264      status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5265      if (status != XML_STATUS_OK) {
5266        xml_failure(parser);
5267      }
5268      // i == 0 means we're still calculating the baseline value
5269      if (i > 0) {
5270        const clock_t now = clock();
5271        const clock_t clocks_so_far = now - start;
5272        const int slowdown = clocks_so_far / baseline;
5273        if (slowdown >= max_slowdown) {
5274          fprintf(
5275              stderr,
5276              "fill#%d: clocks_so_far=%d baseline=%d slowdown=%d max_slowdown=%d\n",
5277              f, (int)clocks_so_far, (int)baseline, slowdown, max_slowdown);
5278          fail(too_slow_failure_message);
5279        }
5280      }
5281    }
5282    // parse the end text
5283    status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5284                                     (int)strlen(text[i].post), XML_TRUE);
5285    if (status != XML_STATUS_OK) {
5286      xml_failure(parser);
5287    }
5288
5289    // how long did it take in total?
5290    const clock_t end = clock();
5291    const clock_t taken = end - start;
5292    if (i == 0) {
5293      assert_true(taken > 0); // just to make sure we don't div-by-0 later
5294      baseline = taken;
5295    }
5296    const int slowdown = taken / baseline;
5297    if (slowdown >= max_slowdown) {
5298      fprintf(stderr, "taken=%d baseline=%d slowdown=%d max_slowdown=%d\n",
5299              (int)taken, (int)baseline, slowdown, max_slowdown);
5300      fail(too_slow_failure_message);
5301    }
5302
5303    XML_ParserFree(parser);
5304  }
5305}
5306END_TEST
5307
5308START_TEST(test_set_reparse_deferral) {
5309  const char *const pre = "<d>";
5310  const char *const start = "<x attr='";
5311  const char *const end = "'></x>";
5312  char eeeeee[100];
5313  const int fillsize = (int)sizeof(eeeeee);
5314  memset(eeeeee, 'e', fillsize);
5315
5316  for (int enabled = 0; enabled <= 1; enabled += 1) {
5317    set_subtest("deferral=%d", enabled);
5318
5319    XML_Parser parser = XML_ParserCreate(NULL);
5320    assert_true(parser != NULL);
5321    assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5322    // pre-grow the buffer to avoid reparsing due to almost-fullness
5323    assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5324
5325    CharData storage;
5326    CharData_Init(&storage);
5327    XML_SetUserData(parser, &storage);
5328    XML_SetStartElementHandler(parser, start_element_event_handler);
5329
5330    enum XML_Status status;
5331    // parse the start text
5332    status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5333    if (status != XML_STATUS_OK) {
5334      xml_failure(parser);
5335    }
5336    CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5337
5338    // ..and the start of the token
5339    status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5340    if (status != XML_STATUS_OK) {
5341      xml_failure(parser);
5342    }
5343    CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5344
5345    // try to parse lots of 'e', but the token isn't finished
5346    for (int c = 0; c < 100; ++c) {
5347      status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5348      if (status != XML_STATUS_OK) {
5349        xml_failure(parser);
5350      }
5351    }
5352    CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5353
5354    // end the <x> token.
5355    status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5356    if (status != XML_STATUS_OK) {
5357      xml_failure(parser);
5358    }
5359
5360    if (enabled) {
5361      // In general, we may need to push more data to trigger a reparse attempt,
5362      // but in this test, the data is constructed to always require it.
5363      CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5364      // 2x the token length should suffice; the +1 covers the start and end.
5365      for (int c = 0; c < 101; ++c) {
5366        status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5367        if (status != XML_STATUS_OK) {
5368          xml_failure(parser);
5369        }
5370      }
5371    }
5372    CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5373
5374    XML_ParserFree(parser);
5375  }
5376}
5377END_TEST
5378
5379struct element_decl_data {
5380  XML_Parser parser;
5381  int count;
5382};
5383
5384static void
5385element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5386  UNUSED_P(name);
5387  struct element_decl_data *testdata = (struct element_decl_data *)userData;
5388  testdata->count += 1;
5389  XML_FreeContentModel(testdata->parser, model);
5390}
5391
5392static int
5393external_inherited_parser(XML_Parser p, const XML_Char *context,
5394                          const XML_Char *base, const XML_Char *systemId,
5395                          const XML_Char *publicId) {
5396  UNUSED_P(base);
5397  UNUSED_P(systemId);
5398  UNUSED_P(publicId);
5399  const char *const pre = "<!ELEMENT document ANY>\n";
5400  const char *const start = "<!ELEMENT ";
5401  const char *const end = " ANY>\n";
5402  const char *const post = "<!ELEMENT xyz ANY>\n";
5403  const int enabled = *(int *)XML_GetUserData(p);
5404  char eeeeee[100];
5405  char spaces[100];
5406  const int fillsize = (int)sizeof(eeeeee);
5407  assert_true(fillsize == (int)sizeof(spaces));
5408  memset(eeeeee, 'e', fillsize);
5409  memset(spaces, ' ', fillsize);
5410
5411  XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5412  assert_true(parser != NULL);
5413  // pre-grow the buffer to avoid reparsing due to almost-fullness
5414  assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5415
5416  struct element_decl_data testdata;
5417  testdata.parser = parser;
5418  testdata.count = 0;
5419  XML_SetUserData(parser, &testdata);
5420  XML_SetElementDeclHandler(parser, element_decl_counter);
5421
5422  enum XML_Status status;
5423  // parse the initial text
5424  status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5425  if (status != XML_STATUS_OK) {
5426    xml_failure(parser);
5427  }
5428  assert_true(testdata.count == 1); // first element should be done
5429
5430  // ..and the start of the big token
5431  status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5432  if (status != XML_STATUS_OK) {
5433    xml_failure(parser);
5434  }
5435  assert_true(testdata.count == 1); // still just the first one
5436
5437  // try to parse lots of 'e', but the token isn't finished
5438  for (int c = 0; c < 100; ++c) {
5439    status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5440    if (status != XML_STATUS_OK) {
5441      xml_failure(parser);
5442    }
5443  }
5444  assert_true(testdata.count == 1); // *still* just the first one
5445
5446  // end the big token.
5447  status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5448  if (status != XML_STATUS_OK) {
5449    xml_failure(parser);
5450  }
5451
5452  if (enabled) {
5453    // In general, we may need to push more data to trigger a reparse attempt,
5454    // but in this test, the data is constructed to always require it.
5455    assert_true(testdata.count == 1); // or the test is incorrect
5456    // 2x the token length should suffice; the +1 covers the start and end.
5457    for (int c = 0; c < 101; ++c) {
5458      status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5459      if (status != XML_STATUS_OK) {
5460        xml_failure(parser);
5461      }
5462    }
5463  }
5464  assert_true(testdata.count == 2); // the big token should be done
5465
5466  // parse the final text
5467  status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5468  if (status != XML_STATUS_OK) {
5469    xml_failure(parser);
5470  }
5471  assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5472
5473  XML_ParserFree(parser);
5474  return XML_STATUS_OK;
5475}
5476
5477START_TEST(test_reparse_deferral_is_inherited) {
5478  const char *const text
5479      = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5480  for (int enabled = 0; enabled <= 1; ++enabled) {
5481    set_subtest("deferral=%d", enabled);
5482
5483    XML_Parser parser = XML_ParserCreate(NULL);
5484    assert_true(parser != NULL);
5485    XML_SetUserData(parser, (void *)&enabled);
5486    XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5487    // this handler creates a sub-parser and checks that its deferral behavior
5488    // is what we expected, based on the value of `enabled` (in userdata).
5489    XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5490    assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5491    if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5492      xml_failure(parser);
5493
5494    XML_ParserFree(parser);
5495  }
5496}
5497END_TEST
5498
5499START_TEST(test_set_reparse_deferral_on_null_parser) {
5500  assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5501  assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5502  assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5503  assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5504  assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5505              == XML_FALSE);
5506  assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5507              == XML_FALSE);
5508}
5509END_TEST
5510
5511START_TEST(test_set_reparse_deferral_on_the_fly) {
5512  const char *const pre = "<d><x attr='";
5513  const char *const end = "'></x>";
5514  char iiiiii[100];
5515  const int fillsize = (int)sizeof(iiiiii);
5516  memset(iiiiii, 'i', fillsize);
5517
5518  XML_Parser parser = XML_ParserCreate(NULL);
5519  assert_true(parser != NULL);
5520  assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5521
5522  CharData storage;
5523  CharData_Init(&storage);
5524  XML_SetUserData(parser, &storage);
5525  XML_SetStartElementHandler(parser, start_element_event_handler);
5526
5527  enum XML_Status status;
5528  // parse the start text
5529  status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5530  if (status != XML_STATUS_OK) {
5531    xml_failure(parser);
5532  }
5533  CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5534
5535  // try to parse some 'i', but the token isn't finished
5536  status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5537  if (status != XML_STATUS_OK) {
5538    xml_failure(parser);
5539  }
5540  CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5541
5542  // end the <x> token.
5543  status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5544  if (status != XML_STATUS_OK) {
5545    xml_failure(parser);
5546  }
5547  CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5548
5549  // now change the heuristic setting and add *no* data
5550  assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5551  // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5552  status = XML_Parse(parser, "", 0, XML_FALSE);
5553  if (status != XML_STATUS_OK) {
5554    xml_failure(parser);
5555  }
5556  CharData_CheckXMLChars(&storage, XCS("dx"));
5557
5558  XML_ParserFree(parser);
5559}
5560END_TEST
5561
5562START_TEST(test_set_bad_reparse_option) {
5563  XML_Parser parser = XML_ParserCreate(NULL);
5564  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5565  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5566  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5567  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5568  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5569  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5570  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5571  assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5572  assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5573  XML_ParserFree(parser);
5574}
5575END_TEST
5576
5577static size_t g_totalAlloc = 0;
5578static size_t g_biggestAlloc = 0;
5579
5580static void *
5581counting_realloc(void *ptr, size_t size) {
5582  g_totalAlloc += size;
5583  if (size > g_biggestAlloc) {
5584    g_biggestAlloc = size;
5585  }
5586  return realloc(ptr, size);
5587}
5588
5589static void *
5590counting_malloc(size_t size) {
5591  return counting_realloc(NULL, size);
5592}
5593
5594START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5595  if (g_chunkSize != 0) {
5596    // this test does not use SINGLE_BYTES, because it depends on very precise
5597    // buffer fills.
5598    return;
5599  }
5600  if (! g_reparseDeferralEnabledDefault) {
5601    return; // this test is irrelevant when the deferral heuristic is disabled.
5602  }
5603
5604  const int document_length = 65536;
5605  char *const document = (char *)malloc(document_length);
5606
5607  const XML_Memory_Handling_Suite memfuncs = {
5608      counting_malloc,
5609      counting_realloc,
5610      free,
5611  };
5612
5613  const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5614  const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5615  const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5616
5617  for (const int *leading = leading_list; *leading >= 0; leading++) {
5618    for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5619      for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5620        set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5621                    *fillsize);
5622        // start by checking that the test looks reasonably valid
5623        assert_true(*leading + *bigtoken <= document_length);
5624
5625        // put 'x' everywhere; some will be overwritten by elements.
5626        memset(document, 'x', document_length);
5627        // maybe add an initial tag
5628        if (*leading) {
5629          assert_true(*leading >= 3); // or the test case is invalid
5630          memcpy(document, "<a>", 3);
5631        }
5632        // add the large token
5633        document[*leading + 0] = '<';
5634        document[*leading + 1] = 'b';
5635        memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5636        document[*leading + *bigtoken - 1] = '>';
5637
5638        // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5639        const int expected_elem_total = 1 + (*leading ? 1 : 0);
5640
5641        XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5642        assert_true(parser != NULL);
5643
5644        CharData storage;
5645        CharData_Init(&storage);
5646        XML_SetUserData(parser, &storage);
5647        XML_SetStartElementHandler(parser, start_element_event_handler);
5648
5649        g_biggestAlloc = 0;
5650        g_totalAlloc = 0;
5651        int offset = 0;
5652        // fill data until the big token is covered (but not necessarily parsed)
5653        while (offset < *leading + *bigtoken) {
5654          assert_true(offset + *fillsize <= document_length);
5655          const enum XML_Status status
5656              = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5657          if (status != XML_STATUS_OK) {
5658            xml_failure(parser);
5659          }
5660          offset += *fillsize;
5661        }
5662        // Now, check that we've had a buffer allocation that could fit the
5663        // context bytes and our big token. In order to detect a special case,
5664        // we need to know how many bytes of our big token were included in the
5665        // first push that contained _any_ bytes of the big token:
5666        const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5667        if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5668          // Special case: we aren't saving any context, and the whole big token
5669          // was covered by a single fill, so Expat may have parsed directly
5670          // from our input pointer, without allocating an internal buffer.
5671        } else if (*leading < XML_CONTEXT_BYTES) {
5672          assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5673        } else {
5674          assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5675        }
5676        // fill data until the big token is actually parsed
5677        while (storage.count < expected_elem_total) {
5678          const size_t alloc_before = g_totalAlloc;
5679          assert_true(offset + *fillsize <= document_length);
5680          const enum XML_Status status
5681              = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5682          if (status != XML_STATUS_OK) {
5683            xml_failure(parser);
5684          }
5685          offset += *fillsize;
5686          // since all the bytes of the big token are already in the buffer,
5687          // the bufsize ceiling should make us finish its parsing without any
5688          // further buffer allocations. We assume that there will be no other
5689          // large allocations in this test.
5690          assert_true(g_totalAlloc - alloc_before < 4096);
5691        }
5692        // test-the-test: was our alloc even called?
5693        assert_true(g_totalAlloc > 0);
5694        // test-the-test: there shouldn't be any extra start elements
5695        assert_true(storage.count == expected_elem_total);
5696
5697        XML_ParserFree(parser);
5698      }
5699    }
5700  }
5701  free(document);
5702}
5703END_TEST
5704
5705START_TEST(test_varying_buffer_fills) {
5706  const int KiB = 1024;
5707  const int MiB = 1024 * KiB;
5708  const int document_length = 16 * MiB;
5709  const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5710
5711  if (g_chunkSize != 0) {
5712    return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5713  }
5714
5715  char *const document = (char *)malloc(document_length);
5716  assert_true(document != NULL);
5717  memset(document, 'x', document_length);
5718  document[0] = '<';
5719  document[1] = 't';
5720  memset(&document[2], ' ', big - 2); // a very spacy token
5721  document[big - 1] = '>';
5722
5723  // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5724  // When reparse deferral is enabled, the final (negated) value is the expected
5725  // maximum number of bytes scanned in parse attempts.
5726  const int testcases[][30] = {
5727      {8 * MiB, -8 * MiB},
5728      {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5729      // zero-size fills shouldn't trigger the bypass
5730      {4 * MiB, 0, 4 * MiB, -12 * MiB},
5731      {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5732      {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5733      // try to hit the buffer ceiling only once (at the end)
5734      {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5735      // try to hit the same buffer ceiling multiple times
5736      {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5737
5738      // try to hit every ceiling, by always landing 1K shy of the buffer size
5739      {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5740       128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5741
5742      // try to avoid every ceiling, by always landing 1B past the buffer size
5743      // the normal 2x heuristic threshold still forces parse attempts.
5744      {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5745       2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5746       8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5747       32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5748       128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5749       512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5750       2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5751       -(10 * MiB + 682 * KiB + 7)},
5752      // try to avoid every ceiling again, except on our last fill.
5753      {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5754       2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5755       8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5756       32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5757       128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5758       512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5759       2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5760       -(10 * MiB + 682 * KiB + 6)},
5761
5762      // try to hit ceilings on the way multiple times
5763      {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5764       512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5765       1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
5766       2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
5767       // we'll make a parse attempt at every parse call
5768       -(45 * MiB + 12)},
5769  };
5770  const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5771  for (int test_i = 0; test_i < testcount; test_i++) {
5772    const int *fillsize = testcases[test_i];
5773    set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5774                fillsize[2], fillsize[3]);
5775    XML_Parser parser = XML_ParserCreate(NULL);
5776    assert_true(parser != NULL);
5777    g_parseAttempts = 0;
5778
5779    CharData storage;
5780    CharData_Init(&storage);
5781    XML_SetUserData(parser, &storage);
5782    XML_SetStartElementHandler(parser, start_element_event_handler);
5783
5784    int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5785    int scanned_bytes = 0;   // sum of (buffered bytes at each actual parse)
5786    int offset = 0;
5787    while (*fillsize >= 0) {
5788      assert_true(offset + *fillsize <= document_length); // or test is invalid
5789      const unsigned attempts_before = g_parseAttempts;
5790      const enum XML_Status status
5791          = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5792      if (status != XML_STATUS_OK) {
5793        xml_failure(parser);
5794      }
5795      offset += *fillsize;
5796      fillsize++;
5797      assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5798      worstcase_bytes += offset; // we might've tried to parse all pending bytes
5799      if (g_parseAttempts != attempts_before) {
5800        assert_true(g_parseAttempts == attempts_before + 1); // max 1/XML_Parse
5801        assert_true(offset <= INT_MAX - scanned_bytes);      // avoid overflow
5802        scanned_bytes += offset; // we *did* try to parse all pending bytes
5803      }
5804    }
5805    assert_true(storage.count == 1); // the big token should've been parsed
5806    assert_true(scanned_bytes > 0);  // test-the-test: does our counter work?
5807    if (g_reparseDeferralEnabledDefault) {
5808      // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5809      const int max_bytes_scanned = -*fillsize;
5810      if (scanned_bytes > max_bytes_scanned) {
5811        fprintf(stderr,
5812                "bytes scanned in parse attempts: actual=%d limit=%d \n",
5813                scanned_bytes, max_bytes_scanned);
5814        fail("too many bytes scanned in parse attempts");
5815      }
5816      assert_true(scanned_bytes <= worstcase_bytes);
5817    } else {
5818      // heuristic is disabled; every XML_Parse() will have reparsed
5819      assert_true(scanned_bytes == worstcase_bytes);
5820    }
5821
5822    XML_ParserFree(parser);
5823  }
5824  free(document);
5825}
5826END_TEST
5827
5828void
5829make_basic_test_case(Suite *s) {
5830  TCase *tc_basic = tcase_create("basic tests");
5831
5832  suite_add_tcase(s, tc_basic);
5833  tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
5834
5835  tcase_add_test(tc_basic, test_nul_byte);
5836  tcase_add_test(tc_basic, test_u0000_char);
5837  tcase_add_test(tc_basic, test_siphash_self);
5838  tcase_add_test(tc_basic, test_siphash_spec);
5839  tcase_add_test(tc_basic, test_bom_utf8);
5840  tcase_add_test(tc_basic, test_bom_utf16_be);
5841  tcase_add_test(tc_basic, test_bom_utf16_le);
5842  tcase_add_test(tc_basic, test_nobom_utf16_le);
5843  tcase_add_test(tc_basic, test_hash_collision);
5844  tcase_add_test(tc_basic, test_illegal_utf8);
5845  tcase_add_test(tc_basic, test_utf8_auto_align);
5846  tcase_add_test(tc_basic, test_utf16);
5847  tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
5848  tcase_add_test(tc_basic, test_not_utf16);
5849  tcase_add_test(tc_basic, test_bad_encoding);
5850  tcase_add_test(tc_basic, test_latin1_umlauts);
5851  tcase_add_test(tc_basic, test_long_utf8_character);
5852  tcase_add_test(tc_basic, test_long_latin1_attribute);
5853  tcase_add_test(tc_basic, test_long_ascii_attribute);
5854  /* Regression test for SF bug #491986. */
5855  tcase_add_test(tc_basic, test_danish_latin1);
5856  /* Regression test for SF bug #514281. */
5857  tcase_add_test(tc_basic, test_french_charref_hexidecimal);
5858  tcase_add_test(tc_basic, test_french_charref_decimal);
5859  tcase_add_test(tc_basic, test_french_latin1);
5860  tcase_add_test(tc_basic, test_french_utf8);
5861  tcase_add_test(tc_basic, test_utf8_false_rejection);
5862  tcase_add_test(tc_basic, test_line_number_after_parse);
5863  tcase_add_test(tc_basic, test_column_number_after_parse);
5864  tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
5865  tcase_add_test(tc_basic, test_line_number_after_error);
5866  tcase_add_test(tc_basic, test_column_number_after_error);
5867  tcase_add_test(tc_basic, test_really_long_lines);
5868  tcase_add_test(tc_basic, test_really_long_encoded_lines);
5869  tcase_add_test(tc_basic, test_end_element_events);
5870  tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
5871  tcase_add_test(tc_basic, test_attr_whitespace_normalization);
5872  tcase_add_test(tc_basic, test_xmldecl_misplaced);
5873  tcase_add_test(tc_basic, test_xmldecl_invalid);
5874  tcase_add_test(tc_basic, test_xmldecl_missing_attr);
5875  tcase_add_test(tc_basic, test_xmldecl_missing_value);
5876  tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
5877  tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
5878  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
5879  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
5880  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
5881  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
5882  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
5883  tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
5884  tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
5885  tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
5886  tcase_add_test(tc_basic,
5887                 test_wfc_undeclared_entity_with_external_subset_standalone);
5888  tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
5889  tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
5890  tcase_add_test(tc_basic, test_not_standalone_handler_reject);
5891  tcase_add_test(tc_basic, test_not_standalone_handler_accept);
5892  tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
5893  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
5894  tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
5895  tcase_add_test(tc_basic, test_dtd_attr_handling);
5896  tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
5897  tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
5898  tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
5899  tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
5900  tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
5901  tcase_add_test(tc_basic, test_good_cdata_ascii);
5902  tcase_add_test(tc_basic, test_good_cdata_utf16);
5903  tcase_add_test(tc_basic, test_good_cdata_utf16_le);
5904  tcase_add_test(tc_basic, test_long_cdata_utf16);
5905  tcase_add_test(tc_basic, test_multichar_cdata_utf16);
5906  tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
5907  tcase_add_test(tc_basic, test_bad_cdata);
5908  tcase_add_test(tc_basic, test_bad_cdata_utf16);
5909  tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
5910  tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
5911  tcase_add_test(tc_basic, test_memory_allocation);
5912  tcase_add_test__if_xml_ge(tc_basic, test_default_current);
5913  tcase_add_test(tc_basic, test_dtd_elements);
5914  tcase_add_test(tc_basic, test_dtd_elements_nesting);
5915  tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
5916  tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
5917  tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
5918  tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
5919  tcase_add_test__ifdef_xml_dtd(tc_basic,
5920                                test_foreign_dtd_without_external_subset);
5921  tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
5922  tcase_add_test(tc_basic, test_set_base);
5923  tcase_add_test(tc_basic, test_attributes);
5924  tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
5925  tcase_add_test(tc_basic, test_resume_invalid_parse);
5926  tcase_add_test(tc_basic, test_resume_resuspended);
5927  tcase_add_test(tc_basic, test_cdata_default);
5928  tcase_add_test(tc_basic, test_subordinate_reset);
5929  tcase_add_test(tc_basic, test_subordinate_suspend);
5930  tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
5931  tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
5932  tcase_add_test__ifdef_xml_dtd(tc_basic,
5933                                test_ext_entity_invalid_suspended_parse);
5934  tcase_add_test(tc_basic, test_explicit_encoding);
5935  tcase_add_test(tc_basic, test_trailing_cr);
5936  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
5937  tcase_add_test(tc_basic, test_trailing_rsqb);
5938  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
5939  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
5940  tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
5941  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
5942  tcase_add_test(tc_basic, test_empty_parse);
5943  tcase_add_test(tc_basic, test_get_buffer_1);
5944  tcase_add_test(tc_basic, test_get_buffer_2);
5945#if XML_CONTEXT_BYTES > 0
5946  tcase_add_test(tc_basic, test_get_buffer_3_overflow);
5947#endif
5948  tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
5949  tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
5950  tcase_add_test(tc_basic, test_byte_info_at_end);
5951  tcase_add_test(tc_basic, test_byte_info_at_error);
5952  tcase_add_test(tc_basic, test_byte_info_at_cdata);
5953  tcase_add_test(tc_basic, test_predefined_entities);
5954  tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
5955  tcase_add_test(tc_basic, test_not_predefined_entities);
5956  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
5957  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
5958  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
5959  tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
5960  tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
5961  tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
5962  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
5963  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
5964  tcase_add_test(tc_basic, test_bad_public_doctype);
5965  tcase_add_test(tc_basic, test_attribute_enum_value);
5966  tcase_add_test(tc_basic, test_predefined_entity_redefinition);
5967  tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
5968  tcase_add_test(tc_basic, test_public_notation_no_sysid);
5969  tcase_add_test(tc_basic, test_nested_groups);
5970  tcase_add_test(tc_basic, test_group_choice);
5971  tcase_add_test(tc_basic, test_standalone_parameter_entity);
5972  tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
5973  tcase_add_test__ifdef_xml_dtd(tc_basic,
5974                                test_recursive_external_parameter_entity);
5975  tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
5976  tcase_add_test(tc_basic, test_suspend_xdecl);
5977  tcase_add_test(tc_basic, test_abort_epilog);
5978  tcase_add_test(tc_basic, test_abort_epilog_2);
5979  tcase_add_test(tc_basic, test_suspend_epilog);
5980  tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
5981  tcase_add_test(tc_basic, test_unfinished_epilog);
5982  tcase_add_test(tc_basic, test_partial_char_in_epilog);
5983  tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
5984  tcase_add_test__ifdef_xml_dtd(tc_basic,
5985                                test_suspend_resume_internal_entity_issue_629);
5986  tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
5987  tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
5988  tcase_add_test(tc_basic, test_restart_on_error);
5989  tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
5990  tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
5991  tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
5992  tcase_add_test(tc_basic, test_standalone_internal_entity);
5993  tcase_add_test(tc_basic, test_skipped_external_entity);
5994  tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
5995  tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
5996  tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
5997  tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
5998  tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
5999  tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6000  tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6001  tcase_add_test(tc_basic, test_pi_handled_in_default);
6002  tcase_add_test(tc_basic, test_comment_handled_in_default);
6003  tcase_add_test(tc_basic, test_pi_yml);
6004  tcase_add_test(tc_basic, test_pi_xnl);
6005  tcase_add_test(tc_basic, test_pi_xmm);
6006  tcase_add_test(tc_basic, test_utf16_pi);
6007  tcase_add_test(tc_basic, test_utf16_be_pi);
6008  tcase_add_test(tc_basic, test_utf16_be_comment);
6009  tcase_add_test(tc_basic, test_utf16_le_comment);
6010  tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6011  tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6012  tcase_add_test(tc_basic, test_unknown_encoding_success);
6013  tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6014  tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6015  tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6016  tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6017  tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6018  tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6019  tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6020  tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6021  tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6022  tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6023  tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6024  tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6025  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6026  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6027  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6028  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6029  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6030  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6031  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6032  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6033  tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6034  tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6035  tcase_add_test(tc_basic, test_utf8_in_start_tags);
6036  tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6037  tcase_add_test(tc_basic, test_utf16_attribute);
6038  tcase_add_test(tc_basic, test_utf16_second_attr);
6039  tcase_add_test(tc_basic, test_attr_after_solidus);
6040  tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6041  tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6042  tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6043  tcase_add_test(tc_basic, test_bad_doctype);
6044  tcase_add_test(tc_basic, test_bad_doctype_utf8);
6045  tcase_add_test(tc_basic, test_bad_doctype_utf16);
6046  tcase_add_test(tc_basic, test_bad_doctype_plus);
6047  tcase_add_test(tc_basic, test_bad_doctype_star);
6048  tcase_add_test(tc_basic, test_bad_doctype_query);
6049  tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6050  tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6051  tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6052  tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6053  tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6054  tcase_add_test(tc_basic, test_short_doctype);
6055  tcase_add_test(tc_basic, test_short_doctype_2);
6056  tcase_add_test(tc_basic, test_short_doctype_3);
6057  tcase_add_test(tc_basic, test_long_doctype);
6058  tcase_add_test(tc_basic, test_bad_entity);
6059  tcase_add_test(tc_basic, test_bad_entity_2);
6060  tcase_add_test(tc_basic, test_bad_entity_3);
6061  tcase_add_test(tc_basic, test_bad_entity_4);
6062  tcase_add_test(tc_basic, test_bad_notation);
6063  tcase_add_test(tc_basic, test_default_doctype_handler);
6064  tcase_add_test(tc_basic, test_empty_element_abort);
6065  tcase_add_test__ifdef_xml_dtd(tc_basic,
6066                                test_pool_integrity_with_unfinished_attr);
6067  tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6068  tcase_add_test(tc_basic, test_big_tokens_take_linear_time);
6069  tcase_add_test(tc_basic, test_set_reparse_deferral);
6070  tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6071  tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6072  tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6073  tcase_add_test(tc_basic, test_set_bad_reparse_option);
6074  tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6075  tcase_add_test(tc_basic, test_varying_buffer_fills);
6076}
6077