1/* Tests in the "basic" test case for the Expat test suite
2                            __  __            _
3                         ___\ \/ /_ __   __ _| |_
4                        / _ \\  /| '_ \ / _` | __|
5                       |  __//  \| |_) | (_| | |_
6                        \___/_/\_\ .__/ \__,_|\__|
7                                 |_| XML parser
8
9   Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10   Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11   Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12   Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13   Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
14   Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15   Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16   Copyright (c) 2017      Jos�� Guti��rrez de la Concha <jose@zeroc.com>
17   Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18   Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19   Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20   Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21   Copyright (c) 2023-2024 Sony Corporation / Snild Dolkow <snild@sony.com>
22   Licensed under the MIT license:
23
24   Permission is  hereby granted,  free of charge,  to any  person obtaining
25   a  copy  of  this  software   and  associated  documentation  files  (the
26   "Software"),  to  deal in  the  Software  without restriction,  including
27   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28   distribute, sublicense, and/or sell copies of the Software, and to permit
29   persons  to whom  the Software  is  furnished to  do so,  subject to  the
30   following conditions:
31
32   The above copyright  notice and this permission notice  shall be included
33   in all copies or substantial portions of the Software.
34
35   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41   USE OR OTHER DEALINGS IN THE SOFTWARE.
42*/
43
44#if defined(NDEBUG)
45#  undef NDEBUG /* because test suite relies on assert(...) at the moment */
46#endif
47
48#include <assert.h>
49
50#include <stdio.h>
51#include <string.h>
52#include <time.h>
53
54#if ! defined(__cplusplus)
55#  include <stdbool.h>
56#endif
57
58#include "expat_config.h"
59
60#include "expat.h"
61#include "internal.h"
62#include "minicheck.h"
63#include "structdata.h"
64#include "common.h"
65#include "dummy.h"
66#include "handlers.h"
67#include "siphash.h"
68#include "basic_tests.h"
69
70static void
71basic_setup(void) {
72  g_parser = XML_ParserCreate(NULL);
73  if (g_parser == NULL)
74    fail("Parser not created.");
75}
76
77/*
78 * Character & encoding tests.
79 */
80
81START_TEST(test_nul_byte) {
82  char text[] = "<doc>\0</doc>";
83
84  /* test that a NUL byte (in US-ASCII data) is an error */
85  if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
86      == XML_STATUS_OK)
87    fail("Parser did not report error on NUL-byte.");
88  if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
89    xml_failure(g_parser);
90}
91END_TEST
92
93START_TEST(test_u0000_char) {
94  /* test that a NUL byte (in US-ASCII data) is an error */
95  expect_failure("<doc>&#0;</doc>", XML_ERROR_BAD_CHAR_REF,
96                 "Parser did not report error on NUL-byte.");
97}
98END_TEST
99
100START_TEST(test_siphash_self) {
101  if (! sip24_valid())
102    fail("SipHash self-test failed");
103}
104END_TEST
105
106START_TEST(test_siphash_spec) {
107  /* https://131002.net/siphash/siphash.pdf (page 19, "Test values") */
108  const char message[] = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
109                         "\x0a\x0b\x0c\x0d\x0e";
110  const size_t len = sizeof(message) - 1;
111  const uint64_t expected = SIP_ULL(0xa129ca61U, 0x49be45e5U);
112  struct siphash state;
113  struct sipkey key;
114
115  sip_tokey(&key, "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09"
116                  "\x0a\x0b\x0c\x0d\x0e\x0f");
117  sip24_init(&state, &key);
118
119  /* Cover spread across calls */
120  sip24_update(&state, message, 4);
121  sip24_update(&state, message + 4, len - 4);
122
123  /* Cover null length */
124  sip24_update(&state, message, 0);
125
126  if (sip24_final(&state) != expected)
127    fail("sip24_final failed spec test\n");
128
129  /* Cover wrapper */
130  if (siphash24(message, len, &key) != expected)
131    fail("siphash24 failed spec test\n");
132}
133END_TEST
134
135START_TEST(test_bom_utf8) {
136  /* This test is really just making sure we don't core on a UTF-8 BOM. */
137  const char *text = "\357\273\277<e/>";
138
139  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
140      == XML_STATUS_ERROR)
141    xml_failure(g_parser);
142}
143END_TEST
144
145START_TEST(test_bom_utf16_be) {
146  char text[] = "\376\377\0<\0e\0/\0>";
147
148  if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
149      == XML_STATUS_ERROR)
150    xml_failure(g_parser);
151}
152END_TEST
153
154START_TEST(test_bom_utf16_le) {
155  char text[] = "\377\376<\0e\0/\0>\0";
156
157  if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
158      == XML_STATUS_ERROR)
159    xml_failure(g_parser);
160}
161END_TEST
162
163START_TEST(test_nobom_utf16_le) {
164  char text[] = " \0<\0e\0/\0>\0";
165
166  if (g_chunkSize == 1) {
167    // TODO: with just the first byte, we can't tell the difference between
168    // UTF-16-LE and UTF-8. Avoid the failure for now.
169    return;
170  }
171
172  if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
173      == XML_STATUS_ERROR)
174    xml_failure(g_parser);
175}
176END_TEST
177
178START_TEST(test_hash_collision) {
179  /* For full coverage of the lookup routine, we need to ensure a
180   * hash collision even though we can only tell that we have one
181   * through breakpoint debugging or coverage statistics.  The
182   * following will cause a hash collision on machines with a 64-bit
183   * long type; others will have to experiment.  The full coverage
184   * tests invoked from qa.sh usually provide a hash collision, but
185   * not always.  This is an attempt to provide insurance.
186   */
187#define COLLIDING_HASH_SALT (unsigned long)SIP_ULL(0xffffffffU, 0xff99fc90U)
188  const char *text
189      = "<doc>\n"
190        "<a1/><a2/><a3/><a4/><a5/><a6/><a7/><a8/>\n"
191        "<b1></b1><b2 attr='foo'>This is a foo</b2><b3></b3><b4></b4>\n"
192        "<b5></b5><b6></b6><b7></b7><b8></b8>\n"
193        "<c1/><c2/><c3/><c4/><c5/><c6/><c7/><c8/>\n"
194        "<d1/><d2/><d3/><d4/><d5/><d6/><d7/>\n"
195        "<d8>This triggers the table growth and collides with b2</d8>\n"
196        "</doc>\n";
197
198  XML_SetHashSalt(g_parser, COLLIDING_HASH_SALT);
199  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
200      == XML_STATUS_ERROR)
201    xml_failure(g_parser);
202}
203END_TEST
204#undef COLLIDING_HASH_SALT
205
206/* Regression test for SF bug #491986. */
207START_TEST(test_danish_latin1) {
208  const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
209                     "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
210#ifdef XML_UNICODE
211  const XML_Char *expected
212      = XCS("J\x00f8rgen \x00e6\x00f8\x00e5\x00c6\x00d8\x00c5");
213#else
214  const XML_Char *expected
215      = XCS("J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
216#endif
217  run_character_check(text, expected);
218}
219END_TEST
220
221/* Regression test for SF bug #514281. */
222START_TEST(test_french_charref_hexidecimal) {
223  const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
224                     "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
225#ifdef XML_UNICODE
226  const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
227#else
228  const XML_Char *expected
229      = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
230#endif
231  run_character_check(text, expected);
232}
233END_TEST
234
235START_TEST(test_french_charref_decimal) {
236  const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
237                     "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
238#ifdef XML_UNICODE
239  const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
240#else
241  const XML_Char *expected
242      = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
243#endif
244  run_character_check(text, expected);
245}
246END_TEST
247
248START_TEST(test_french_latin1) {
249  const char *text = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
250                     "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
251#ifdef XML_UNICODE
252  const XML_Char *expected = XCS("\x00e9\x00e8\x00e0\x00e7\x00ea\x00c8");
253#else
254  const XML_Char *expected
255      = XCS("\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
256#endif
257  run_character_check(text, expected);
258}
259END_TEST
260
261START_TEST(test_french_utf8) {
262  const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
263                     "<doc>\xC3\xA9</doc>";
264#ifdef XML_UNICODE
265  const XML_Char *expected = XCS("\x00e9");
266#else
267  const XML_Char *expected = XCS("\xC3\xA9");
268#endif
269  run_character_check(text, expected);
270}
271END_TEST
272
273/* Regression test for SF bug #600479.
274   XXX There should be a test that exercises all legal XML Unicode
275   characters as PCDATA and attribute value content, and XML Name
276   characters as part of element and attribute names.
277*/
278START_TEST(test_utf8_false_rejection) {
279  const char *text = "<doc>\xEF\xBA\xBF</doc>";
280#ifdef XML_UNICODE
281  const XML_Char *expected = XCS("\xfebf");
282#else
283  const XML_Char *expected = XCS("\xEF\xBA\xBF");
284#endif
285  run_character_check(text, expected);
286}
287END_TEST
288
289/* Regression test for SF bug #477667.
290   This test assures that any 8-bit character followed by a 7-bit
291   character will not be mistakenly interpreted as a valid UTF-8
292   sequence.
293*/
294START_TEST(test_illegal_utf8) {
295  char text[100];
296  int i;
297
298  for (i = 128; i <= 255; ++i) {
299    snprintf(text, sizeof(text), "<e>%ccd</e>", i);
300    if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
301        == XML_STATUS_OK) {
302      snprintf(text, sizeof(text),
303               "expected token error for '%c' (ordinal %d) in UTF-8 text", i,
304               i);
305      fail(text);
306    } else if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
307      xml_failure(g_parser);
308    /* Reset the parser since we use the same parser repeatedly. */
309    XML_ParserReset(g_parser, NULL);
310  }
311}
312END_TEST
313
314/* Examples, not masks: */
315#define UTF8_LEAD_1 "\x7f" /* 0b01111111 */
316#define UTF8_LEAD_2 "\xdf" /* 0b11011111 */
317#define UTF8_LEAD_3 "\xef" /* 0b11101111 */
318#define UTF8_LEAD_4 "\xf7" /* 0b11110111 */
319#define UTF8_FOLLOW "\xbf" /* 0b10111111 */
320
321START_TEST(test_utf8_auto_align) {
322  struct TestCase {
323    ptrdiff_t expectedMovementInChars;
324    const char *input;
325  };
326
327  struct TestCase cases[] = {
328      {00, ""},
329
330      {00, UTF8_LEAD_1},
331
332      {-1, UTF8_LEAD_2},
333      {00, UTF8_LEAD_2 UTF8_FOLLOW},
334
335      {-1, UTF8_LEAD_3},
336      {-2, UTF8_LEAD_3 UTF8_FOLLOW},
337      {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
338
339      {-1, UTF8_LEAD_4},
340      {-2, UTF8_LEAD_4 UTF8_FOLLOW},
341      {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
342      {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
343  };
344
345  size_t i = 0;
346  bool success = true;
347  for (; i < sizeof(cases) / sizeof(*cases); i++) {
348    const char *fromLim = cases[i].input + strlen(cases[i].input);
349    const char *const fromLimInitially = fromLim;
350    ptrdiff_t actualMovementInChars;
351
352    _INTERNAL_trim_to_complete_utf8_characters(cases[i].input, &fromLim);
353
354    actualMovementInChars = (fromLim - fromLimInitially);
355    if (actualMovementInChars != cases[i].expectedMovementInChars) {
356      size_t j = 0;
357      success = false;
358      printf("[-] UTF-8 case %2u: Expected movement by %2d chars"
359             ", actually moved by %2d chars: \"",
360             (unsigned)(i + 1), (int)cases[i].expectedMovementInChars,
361             (int)actualMovementInChars);
362      for (; j < strlen(cases[i].input); j++) {
363        printf("\\x%02x", (unsigned char)cases[i].input[j]);
364      }
365      printf("\"\n");
366    }
367  }
368
369  if (! success) {
370    fail("UTF-8 auto-alignment is not bullet-proof\n");
371  }
372}
373END_TEST
374
375START_TEST(test_utf16) {
376  /* <?xml version="1.0" encoding="UTF-16"?>
377   *  <doc a='123'>some {A} text</doc>
378   *
379   * where {A} is U+FF21, FULLWIDTH LATIN CAPITAL LETTER A
380   */
381  char text[]
382      = "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
383        "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
384        "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
385        "\000'\000?\000>\000\n"
386        "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'\000>"
387        "\000s\000o\000m\000e\000 \xff\x21\000 \000t\000e\000x\000t\000"
388        "<\000/\000d\000o\000c\000>";
389#ifdef XML_UNICODE
390  const XML_Char *expected = XCS("some \xff21 text");
391#else
392  const XML_Char *expected = XCS("some \357\274\241 text");
393#endif
394  CharData storage;
395
396  CharData_Init(&storage);
397  XML_SetUserData(g_parser, &storage);
398  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
399  if (_XML_Parse_SINGLE_BYTES(g_parser, text, sizeof(text) - 1, XML_TRUE)
400      == XML_STATUS_ERROR)
401    xml_failure(g_parser);
402  CharData_CheckXMLChars(&storage, expected);
403}
404END_TEST
405
406START_TEST(test_utf16_le_epilog_newline) {
407  unsigned int first_chunk_bytes = 17;
408  char text[] = "\xFF\xFE"                  /* BOM */
409                "<\000e\000/\000>\000"      /* document element */
410                "\r\000\n\000\r\000\n\000"; /* epilog */
411
412  if (first_chunk_bytes >= sizeof(text) - 1)
413    fail("bad value of first_chunk_bytes");
414  if (_XML_Parse_SINGLE_BYTES(g_parser, text, first_chunk_bytes, XML_FALSE)
415      == XML_STATUS_ERROR)
416    xml_failure(g_parser);
417  else {
418    enum XML_Status rc;
419    rc = _XML_Parse_SINGLE_BYTES(g_parser, text + first_chunk_bytes,
420                                 sizeof(text) - first_chunk_bytes - 1,
421                                 XML_TRUE);
422    if (rc == XML_STATUS_ERROR)
423      xml_failure(g_parser);
424  }
425}
426END_TEST
427
428/* Test that an outright lie in the encoding is faulted */
429START_TEST(test_not_utf16) {
430  const char *text = "<?xml version='1.0' encoding='utf-16'?>"
431                     "<doc>Hi</doc>";
432
433  /* Use a handler to provoke the appropriate code paths */
434  XML_SetXmlDeclHandler(g_parser, dummy_xdecl_handler);
435  expect_failure(text, XML_ERROR_INCORRECT_ENCODING,
436                 "UTF-16 declared in UTF-8 not faulted");
437}
438END_TEST
439
440/* Test that an unknown encoding is rejected */
441START_TEST(test_bad_encoding) {
442  const char *text = "<doc>Hi</doc>";
443
444  if (! XML_SetEncoding(g_parser, XCS("unknown-encoding")))
445    fail("XML_SetEncoding failed");
446  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
447                 "Unknown encoding not faulted");
448}
449END_TEST
450
451/* Regression test for SF bug #481609, #774028. */
452START_TEST(test_latin1_umlauts) {
453  const char *text
454      = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
455        "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
456        "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
457#ifdef XML_UNICODE
458  /* Expected results in UTF-16 */
459  const XML_Char *expected = XCS("\x00e4 \x00f6 \x00fc ")
460      XCS("\x00e4 \x00f6 \x00fc ") XCS("\x00e4 \x00f6 \x00fc >");
461#else
462  /* Expected results in UTF-8 */
463  const XML_Char *expected = XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ")
464      XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC ") XCS("\xC3\xA4 \xC3\xB6 \xC3\xBC >");
465#endif
466
467  run_character_check(text, expected);
468  XML_ParserReset(g_parser, NULL);
469  run_attribute_check(text, expected);
470  /* Repeat with a default handler */
471  XML_ParserReset(g_parser, NULL);
472  XML_SetDefaultHandler(g_parser, dummy_default_handler);
473  run_character_check(text, expected);
474  XML_ParserReset(g_parser, NULL);
475  XML_SetDefaultHandler(g_parser, dummy_default_handler);
476  run_attribute_check(text, expected);
477}
478END_TEST
479
480/* Test that an element name with a 4-byte UTF-8 character is rejected */
481START_TEST(test_long_utf8_character) {
482  const char *text
483      = "<?xml version='1.0' encoding='utf-8'?>\n"
484        /* 0xf0 0x90 0x80 0x80 = U+10000, the first Linear B character */
485        "<do\xf0\x90\x80\x80/>";
486  expect_failure(text, XML_ERROR_INVALID_TOKEN,
487                 "4-byte UTF-8 character in element name not faulted");
488}
489END_TEST
490
491/* Test that a long latin-1 attribute (too long to convert in one go)
492 * is correctly converted
493 */
494START_TEST(test_long_latin1_attribute) {
495  const char *text
496      = "<?xml version='1.0' encoding='iso-8859-1'?>\n"
497        "<doc att='"
498        /* 64 characters per line */
499        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
512        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
513        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
514        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO"
515        /* Last character splits across a buffer boundary */
516        "\xe4'>\n</doc>";
517
518  const XML_Char *expected =
519      /* 64 characters per line */
520      /* clang-format off */
521        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
522        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
523        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
524        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
525        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
526        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
527        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
528        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
529        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
530        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
531        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
532        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
533        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
534        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
535        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
536        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNO")
537  /* clang-format on */
538#ifdef XML_UNICODE
539                                                  XCS("\x00e4");
540#else
541                                                  XCS("\xc3\xa4");
542#endif
543
544  run_attribute_check(text, expected);
545}
546END_TEST
547
548/* Test that a long ASCII attribute (too long to convert in one go)
549 * is correctly converted
550 */
551START_TEST(test_long_ascii_attribute) {
552  const char *text
553      = "<?xml version='1.0' encoding='us-ascii'?>\n"
554        "<doc att='"
555        /* 64 characters per line */
556        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
557        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
558        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
559        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
560        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
561        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
562        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
563        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
564        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
565        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
566        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
567        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
568        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
569        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
570        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
571        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
572        "01234'>\n</doc>";
573  const XML_Char *expected =
574      /* 64 characters per line */
575      /* clang-format off */
576        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
577        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
578        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
579        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
580        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
581        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
582        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
583        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
584        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
585        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
586        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
587        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
588        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
589        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
590        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
591        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
592        XCS("01234");
593  /* clang-format on */
594
595  run_attribute_check(text, expected);
596}
597END_TEST
598
599/* Regression test #1 for SF bug #653180. */
600START_TEST(test_line_number_after_parse) {
601  const char *text = "<tag>\n"
602                     "\n"
603                     "\n</tag>";
604  XML_Size lineno;
605
606  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
607      == XML_STATUS_ERROR)
608    xml_failure(g_parser);
609  lineno = XML_GetCurrentLineNumber(g_parser);
610  if (lineno != 4) {
611    char buffer[100];
612    snprintf(buffer, sizeof(buffer),
613             "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
614    fail(buffer);
615  }
616}
617END_TEST
618
619/* Regression test #2 for SF bug #653180. */
620START_TEST(test_column_number_after_parse) {
621  const char *text = "<tag></tag>";
622  XML_Size colno;
623
624  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
625      == XML_STATUS_ERROR)
626    xml_failure(g_parser);
627  colno = XML_GetCurrentColumnNumber(g_parser);
628  if (colno != 11) {
629    char buffer[100];
630    snprintf(buffer, sizeof(buffer),
631             "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
632    fail(buffer);
633  }
634}
635END_TEST
636
637/* Regression test #3 for SF bug #653180. */
638START_TEST(test_line_and_column_numbers_inside_handlers) {
639  const char *text = "<a>\n"      /* Unix end-of-line */
640                     "  <b>\r\n"  /* Windows end-of-line */
641                     "    <c/>\r" /* Mac OS end-of-line */
642                     "  </b>\n"
643                     "  <d>\n"
644                     "    <f/>\n"
645                     "  </d>\n"
646                     "</a>";
647  const StructDataEntry expected[]
648      = {{XCS("a"), 0, 1, STRUCT_START_TAG}, {XCS("b"), 2, 2, STRUCT_START_TAG},
649         {XCS("c"), 4, 3, STRUCT_START_TAG}, {XCS("c"), 8, 3, STRUCT_END_TAG},
650         {XCS("b"), 2, 4, STRUCT_END_TAG},   {XCS("d"), 2, 5, STRUCT_START_TAG},
651         {XCS("f"), 4, 6, STRUCT_START_TAG}, {XCS("f"), 8, 6, STRUCT_END_TAG},
652         {XCS("d"), 2, 7, STRUCT_END_TAG},   {XCS("a"), 0, 8, STRUCT_END_TAG}};
653  const int expected_count = sizeof(expected) / sizeof(StructDataEntry);
654  StructData storage;
655
656  StructData_Init(&storage);
657  XML_SetUserData(g_parser, &storage);
658  XML_SetStartElementHandler(g_parser, start_element_event_handler2);
659  XML_SetEndElementHandler(g_parser, end_element_event_handler2);
660  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
661      == XML_STATUS_ERROR)
662    xml_failure(g_parser);
663
664  StructData_CheckItems(&storage, expected, expected_count);
665  StructData_Dispose(&storage);
666}
667END_TEST
668
669/* Regression test #4 for SF bug #653180. */
670START_TEST(test_line_number_after_error) {
671  const char *text = "<a>\n"
672                     "  <b>\n"
673                     "  </a>"; /* missing </b> */
674  XML_Size lineno;
675  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
676      != XML_STATUS_ERROR)
677    fail("Expected a parse error");
678
679  lineno = XML_GetCurrentLineNumber(g_parser);
680  if (lineno != 3) {
681    char buffer[100];
682    snprintf(buffer, sizeof(buffer),
683             "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
684    fail(buffer);
685  }
686}
687END_TEST
688
689/* Regression test #5 for SF bug #653180. */
690START_TEST(test_column_number_after_error) {
691  const char *text = "<a>\n"
692                     "  <b>\n"
693                     "  </a>"; /* missing </b> */
694  XML_Size colno;
695  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
696      != XML_STATUS_ERROR)
697    fail("Expected a parse error");
698
699  colno = XML_GetCurrentColumnNumber(g_parser);
700  if (colno != 4) {
701    char buffer[100];
702    snprintf(buffer, sizeof(buffer),
703             "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
704    fail(buffer);
705  }
706}
707END_TEST
708
709/* Regression test for SF bug #478332. */
710START_TEST(test_really_long_lines) {
711  /* This parses an input line longer than INIT_DATA_BUF_SIZE
712     characters long (defined to be 1024 in xmlparse.c).  We take a
713     really cheesy approach to building the input buffer, because
714     this avoids writing bugs in buffer-filling code.
715  */
716  const char *text
717      = "<e>"
718        /* 64 chars */
719        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
720        /* until we have at least 1024 characters on the line: */
721        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
722        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
723        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
724        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
725        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
726        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
727        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
728        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
729        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
730        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
731        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
732        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
733        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
734        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
735        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
736        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
737        "</e>";
738  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
739      == XML_STATUS_ERROR)
740    xml_failure(g_parser);
741}
742END_TEST
743
744/* Test cdata processing across a buffer boundary */
745START_TEST(test_really_long_encoded_lines) {
746  /* As above, except that we want to provoke an output buffer
747   * overflow with a non-trivial encoding.  For this we need to pass
748   * the whole cdata in one go, not byte-by-byte.
749   */
750  void *buffer;
751  const char *text
752      = "<?xml version='1.0' encoding='iso-8859-1'?>"
753        "<e>"
754        /* 64 chars */
755        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
756        /* until we have at least 1024 characters on the line: */
757        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
758        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
759        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
760        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
761        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
762        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
763        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
764        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
765        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
766        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
767        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
768        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
769        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
770        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
771        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
772        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
773        "</e>";
774  int parse_len = (int)strlen(text);
775
776  /* Need a cdata handler to provoke the code path we want to test */
777  XML_SetCharacterDataHandler(g_parser, dummy_cdata_handler);
778  buffer = XML_GetBuffer(g_parser, parse_len);
779  if (buffer == NULL)
780    fail("Could not allocate parse buffer");
781  assert(buffer != NULL);
782  memcpy(buffer, text, parse_len);
783  if (XML_ParseBuffer(g_parser, parse_len, XML_TRUE) == XML_STATUS_ERROR)
784    xml_failure(g_parser);
785}
786END_TEST
787
788/*
789 * Element event tests.
790 */
791
792START_TEST(test_end_element_events) {
793  const char *text = "<a><b><c/></b><d><f/></d></a>";
794  const XML_Char *expected = XCS("/c/b/f/d/a");
795  CharData storage;
796
797  CharData_Init(&storage);
798  XML_SetUserData(g_parser, &storage);
799  XML_SetEndElementHandler(g_parser, end_element_event_handler);
800  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
801      == XML_STATUS_ERROR)
802    xml_failure(g_parser);
803  CharData_CheckXMLChars(&storage, expected);
804}
805END_TEST
806
807/*
808 * Attribute tests.
809 */
810
811/* Helper used by the following tests; this checks any "attr" and "refs"
812   attributes to make sure whitespace has been normalized.
813
814   Return true if whitespace has been normalized in a string, using
815   the rules for attribute value normalization.  The 'is_cdata' flag
816   is needed since CDATA attributes don't need to have multiple
817   whitespace characters collapsed to a single space, while other
818   attribute data types do.  (Section 3.3.3 of the recommendation.)
819*/
820static int
821is_whitespace_normalized(const XML_Char *s, int is_cdata) {
822  int blanks = 0;
823  int at_start = 1;
824  while (*s) {
825    if (*s == XCS(' '))
826      ++blanks;
827    else if (*s == XCS('\t') || *s == XCS('\n') || *s == XCS('\r'))
828      return 0;
829    else {
830      if (at_start) {
831        at_start = 0;
832        if (blanks && ! is_cdata)
833          /* illegal leading blanks */
834          return 0;
835      } else if (blanks > 1 && ! is_cdata)
836        return 0;
837      blanks = 0;
838    }
839    ++s;
840  }
841  if (blanks && ! is_cdata)
842    return 0;
843  return 1;
844}
845
846/* Check the attribute whitespace checker: */
847START_TEST(test_helper_is_whitespace_normalized) {
848  assert(is_whitespace_normalized(XCS("abc"), 0));
849  assert(is_whitespace_normalized(XCS("abc"), 1));
850  assert(is_whitespace_normalized(XCS("abc def ghi"), 0));
851  assert(is_whitespace_normalized(XCS("abc def ghi"), 1));
852  assert(! is_whitespace_normalized(XCS(" abc def ghi"), 0));
853  assert(is_whitespace_normalized(XCS(" abc def ghi"), 1));
854  assert(! is_whitespace_normalized(XCS("abc  def ghi"), 0));
855  assert(is_whitespace_normalized(XCS("abc  def ghi"), 1));
856  assert(! is_whitespace_normalized(XCS("abc def ghi "), 0));
857  assert(is_whitespace_normalized(XCS("abc def ghi "), 1));
858  assert(! is_whitespace_normalized(XCS(" "), 0));
859  assert(is_whitespace_normalized(XCS(" "), 1));
860  assert(! is_whitespace_normalized(XCS("\t"), 0));
861  assert(! is_whitespace_normalized(XCS("\t"), 1));
862  assert(! is_whitespace_normalized(XCS("\n"), 0));
863  assert(! is_whitespace_normalized(XCS("\n"), 1));
864  assert(! is_whitespace_normalized(XCS("\r"), 0));
865  assert(! is_whitespace_normalized(XCS("\r"), 1));
866  assert(! is_whitespace_normalized(XCS("abc\t def"), 1));
867}
868END_TEST
869
870static void XMLCALL
871check_attr_contains_normalized_whitespace(void *userData, const XML_Char *name,
872                                          const XML_Char **atts) {
873  int i;
874  UNUSED_P(userData);
875  UNUSED_P(name);
876  for (i = 0; atts[i] != NULL; i += 2) {
877    const XML_Char *attrname = atts[i];
878    const XML_Char *value = atts[i + 1];
879    if (xcstrcmp(XCS("attr"), attrname) == 0
880        || xcstrcmp(XCS("ents"), attrname) == 0
881        || xcstrcmp(XCS("refs"), attrname) == 0) {
882      if (! is_whitespace_normalized(value, 0)) {
883        char buffer[256];
884        snprintf(buffer, sizeof(buffer),
885                 "attribute value not normalized: %" XML_FMT_STR
886                 "='%" XML_FMT_STR "'",
887                 attrname, value);
888        fail(buffer);
889      }
890    }
891  }
892}
893
894START_TEST(test_attr_whitespace_normalization) {
895  const char *text
896      = "<!DOCTYPE doc [\n"
897        "  <!ATTLIST doc\n"
898        "            attr NMTOKENS #REQUIRED\n"
899        "            ents ENTITIES #REQUIRED\n"
900        "            refs IDREFS   #REQUIRED>\n"
901        "]>\n"
902        "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
903        "     ents=' ent-1   \t\r\n"
904        "            ent-2  ' >\n"
905        "  <e id='id-1'/>\n"
906        "  <e id='id-2'/>\n"
907        "</doc>";
908
909  XML_SetStartElementHandler(g_parser,
910                             check_attr_contains_normalized_whitespace);
911  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
912      == XML_STATUS_ERROR)
913    xml_failure(g_parser);
914}
915END_TEST
916
917/*
918 * XML declaration tests.
919 */
920
921START_TEST(test_xmldecl_misplaced) {
922  expect_failure("\n"
923                 "<?xml version='1.0'?>\n"
924                 "<a/>",
925                 XML_ERROR_MISPLACED_XML_PI,
926                 "failed to report misplaced XML declaration");
927}
928END_TEST
929
930START_TEST(test_xmldecl_invalid) {
931  expect_failure("<?xml version='1.0' \xc3\xa7?>\n<doc/>", XML_ERROR_XML_DECL,
932                 "Failed to report invalid XML declaration");
933}
934END_TEST
935
936START_TEST(test_xmldecl_missing_attr) {
937  expect_failure("<?xml ='1.0'?>\n<doc/>\n", XML_ERROR_XML_DECL,
938                 "Failed to report missing XML declaration attribute");
939}
940END_TEST
941
942START_TEST(test_xmldecl_missing_value) {
943  expect_failure("<?xml version='1.0' encoding='us-ascii' standalone?>\n"
944                 "<doc/>",
945                 XML_ERROR_XML_DECL,
946                 "Failed to report missing attribute value");
947}
948END_TEST
949
950/* Regression test for SF bug #584832. */
951START_TEST(test_unknown_encoding_internal_entity) {
952  const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
953                     "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
954                     "<test a='&foo;'/>";
955
956  XML_SetUnknownEncodingHandler(g_parser, UnknownEncodingHandler, NULL);
957  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
958      == XML_STATUS_ERROR)
959    xml_failure(g_parser);
960}
961END_TEST
962
963/* Test unrecognised encoding handler */
964START_TEST(test_unrecognised_encoding_internal_entity) {
965  const char *text = "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
966                     "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
967                     "<test a='&foo;'/>";
968
969  XML_SetUnknownEncodingHandler(g_parser, UnrecognisedEncodingHandler, NULL);
970  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
971      != XML_STATUS_ERROR)
972    fail("Unrecognised encoding not rejected");
973}
974END_TEST
975
976/* Regression test for SF bug #620106. */
977START_TEST(test_ext_entity_set_encoding) {
978  const char *text = "<!DOCTYPE doc [\n"
979                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
980                     "]>\n"
981                     "<doc>&en;</doc>";
982  ExtTest test_data
983      = {/* This text says it's an unsupported encoding, but it's really
984            UTF-8, which we tell Expat using XML_SetEncoding().
985         */
986         "<?xml encoding='iso-8859-3'?>\xC3\xA9", XCS("utf-8"), NULL};
987#ifdef XML_UNICODE
988  const XML_Char *expected = XCS("\x00e9");
989#else
990  const XML_Char *expected = XCS("\xc3\xa9");
991#endif
992
993  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
994  run_ext_character_check(text, &test_data, expected);
995}
996END_TEST
997
998/* Test external entities with no handler */
999START_TEST(test_ext_entity_no_handler) {
1000  const char *text = "<!DOCTYPE doc [\n"
1001                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1002                     "]>\n"
1003                     "<doc>&en;</doc>";
1004
1005  XML_SetDefaultHandler(g_parser, dummy_default_handler);
1006  run_character_check(text, XCS(""));
1007}
1008END_TEST
1009
1010/* Test UTF-8 BOM is accepted */
1011START_TEST(test_ext_entity_set_bom) {
1012  const char *text = "<!DOCTYPE doc [\n"
1013                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1014                     "]>\n"
1015                     "<doc>&en;</doc>";
1016  ExtTest test_data = {"\xEF\xBB\xBF" /* BOM */
1017                       "<?xml encoding='iso-8859-3'?>"
1018                       "\xC3\xA9",
1019                       XCS("utf-8"), NULL};
1020#ifdef XML_UNICODE
1021  const XML_Char *expected = XCS("\x00e9");
1022#else
1023  const XML_Char *expected = XCS("\xc3\xa9");
1024#endif
1025
1026  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1027  run_ext_character_check(text, &test_data, expected);
1028}
1029END_TEST
1030
1031/* Test that bad encodings are faulted */
1032START_TEST(test_ext_entity_bad_encoding) {
1033  const char *text = "<!DOCTYPE doc [\n"
1034                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1035                     "]>\n"
1036                     "<doc>&en;</doc>";
1037  ExtFaults fault
1038      = {"<?xml encoding='iso-8859-3'?>u", "Unsupported encoding not faulted",
1039         XCS("unknown"), XML_ERROR_UNKNOWN_ENCODING};
1040
1041  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1042  XML_SetUserData(g_parser, &fault);
1043  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1044                 "Bad encoding should not have been accepted");
1045}
1046END_TEST
1047
1048/* Try handing an invalid encoding to an external entity parser */
1049START_TEST(test_ext_entity_bad_encoding_2) {
1050  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1051                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
1052                     "<doc>&entity;</doc>";
1053  ExtFaults fault
1054      = {"<!ELEMENT doc (#PCDATA)*>", "Unknown encoding not faulted",
1055         XCS("unknown-encoding"), XML_ERROR_UNKNOWN_ENCODING};
1056
1057  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1058  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1059  XML_SetUserData(g_parser, &fault);
1060  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1061                 "Bad encoding not faulted in external entity handler");
1062}
1063END_TEST
1064
1065/* Test that no error is reported for unknown entities if we don't
1066   read an external subset.  This was fixed in Expat 1.95.5.
1067*/
1068START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
1069  const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
1070                     "<doc>&entity;</doc>";
1071
1072  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1073      == XML_STATUS_ERROR)
1074    xml_failure(g_parser);
1075}
1076END_TEST
1077
1078/* Test that an error is reported for unknown entities if we don't
1079   have an external subset.
1080*/
1081START_TEST(test_wfc_undeclared_entity_no_external_subset) {
1082  expect_failure("<doc>&entity;</doc>", XML_ERROR_UNDEFINED_ENTITY,
1083                 "Parser did not report undefined entity w/out a DTD.");
1084}
1085END_TEST
1086
1087/* Test that an error is reported for unknown entities if we don't
1088   read an external subset, but have been declared standalone.
1089*/
1090START_TEST(test_wfc_undeclared_entity_standalone) {
1091  const char *text
1092      = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1093        "<!DOCTYPE doc SYSTEM 'foo'>\n"
1094        "<doc>&entity;</doc>";
1095
1096  expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1097                 "Parser did not report undefined entity (standalone).");
1098}
1099END_TEST
1100
1101/* Test that an error is reported for unknown entities if we have read
1102   an external subset, and standalone is true.
1103*/
1104START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
1105  const char *text
1106      = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1107        "<!DOCTYPE doc SYSTEM 'foo'>\n"
1108        "<doc>&entity;</doc>";
1109  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1110
1111  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1112  XML_SetUserData(g_parser, &test_data);
1113  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1114  expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1115                 "Parser did not report undefined entity (external DTD).");
1116}
1117END_TEST
1118
1119/* Test that external entity handling is not done if the parsing flag
1120 * is set to UNLESS_STANDALONE
1121 */
1122START_TEST(test_entity_with_external_subset_unless_standalone) {
1123  const char *text
1124      = "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
1125        "<!DOCTYPE doc SYSTEM 'foo'>\n"
1126        "<doc>&entity;</doc>";
1127  ExtTest test_data = {"<!ENTITY entity 'bar'>", NULL, NULL};
1128
1129  XML_SetParamEntityParsing(g_parser,
1130                            XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1131  XML_SetUserData(g_parser, &test_data);
1132  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1133  expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
1134                 "Parser did not report undefined entity");
1135}
1136END_TEST
1137
1138/* Test that no error is reported for unknown entities if we have read
1139   an external subset, and standalone is false.
1140*/
1141START_TEST(test_wfc_undeclared_entity_with_external_subset) {
1142  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1143                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
1144                     "<doc>&entity;</doc>";
1145  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1146
1147  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1148  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1149  run_ext_character_check(text, &test_data, XCS(""));
1150}
1151END_TEST
1152
1153/* Test that an error is reported if our NotStandalone handler fails */
1154START_TEST(test_not_standalone_handler_reject) {
1155  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1156                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
1157                     "<doc>&entity;</doc>";
1158  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1159
1160  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1161  XML_SetUserData(g_parser, &test_data);
1162  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1163  XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1164  expect_failure(text, XML_ERROR_NOT_STANDALONE,
1165                 "NotStandalone handler failed to reject");
1166
1167  /* Try again but without external entity handling */
1168  XML_ParserReset(g_parser, NULL);
1169  XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
1170  expect_failure(text, XML_ERROR_NOT_STANDALONE,
1171                 "NotStandalone handler failed to reject");
1172}
1173END_TEST
1174
1175/* Test that no error is reported if our NotStandalone handler succeeds */
1176START_TEST(test_not_standalone_handler_accept) {
1177  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
1178                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
1179                     "<doc>&entity;</doc>";
1180  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
1181
1182  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1183  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
1184  XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1185  run_ext_character_check(text, &test_data, XCS(""));
1186
1187  /* Repeat without the external entity handler */
1188  XML_ParserReset(g_parser, NULL);
1189  XML_SetNotStandaloneHandler(g_parser, accept_not_standalone_handler);
1190  run_character_check(text, XCS(""));
1191}
1192END_TEST
1193
1194START_TEST(test_wfc_no_recursive_entity_refs) {
1195  const char *text = "<!DOCTYPE doc [\n"
1196                     "  <!ENTITY entity '&#38;entity;'>\n"
1197                     "]>\n"
1198                     "<doc>&entity;</doc>";
1199
1200  expect_failure(text, XML_ERROR_RECURSIVE_ENTITY_REF,
1201                 "Parser did not report recursive entity reference.");
1202}
1203END_TEST
1204
1205START_TEST(test_recursive_external_parameter_entity_2) {
1206  struct TestCase {
1207    const char *doc;
1208    enum XML_Status expectedStatus;
1209  };
1210
1211  struct TestCase cases[] = {
1212      {"<!ENTITY % p1 '%p1;'>", XML_STATUS_ERROR},
1213      {"<!ENTITY % p1 '%p1;'>"
1214       "<!ENTITY % p1 'first declaration wins'>",
1215       XML_STATUS_ERROR},
1216      {"<!ENTITY % p1 'first declaration wins'>"
1217       "<!ENTITY % p1 '%p1;'>",
1218       XML_STATUS_OK},
1219      {"<!ENTITY % p1 '&#37;p1;'>", XML_STATUS_OK},
1220  };
1221
1222  for (size_t i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) {
1223    const char *const doc = cases[i].doc;
1224    const enum XML_Status expectedStatus = cases[i].expectedStatus;
1225    set_subtest("%s", doc);
1226
1227    XML_Parser parser = XML_ParserCreate(NULL);
1228    assert_true(parser != NULL);
1229
1230    XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, NULL, NULL);
1231    assert_true(ext_parser != NULL);
1232
1233    const enum XML_Status actualStatus
1234        = _XML_Parse_SINGLE_BYTES(ext_parser, doc, (int)strlen(doc), XML_TRUE);
1235
1236    assert_true(actualStatus == expectedStatus);
1237    if (actualStatus != XML_STATUS_OK) {
1238      assert_true(XML_GetErrorCode(ext_parser)
1239                  == XML_ERROR_RECURSIVE_ENTITY_REF);
1240    }
1241
1242    XML_ParserFree(ext_parser);
1243    XML_ParserFree(parser);
1244  }
1245}
1246END_TEST
1247
1248/* Test incomplete external entities are faulted */
1249START_TEST(test_ext_entity_invalid_parse) {
1250  const char *text = "<!DOCTYPE doc [\n"
1251                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
1252                     "]>\n"
1253                     "<doc>&en;</doc>";
1254  const ExtFaults faults[]
1255      = {{"<", "Incomplete element declaration not faulted", NULL,
1256          XML_ERROR_UNCLOSED_TOKEN},
1257         {"<\xe2\x82", /* First two bytes of a three-byte char */
1258          "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
1259         {"<tag>\xe2\x82", "Incomplete character in CDATA not faulted", NULL,
1260          XML_ERROR_PARTIAL_CHAR},
1261         {NULL, NULL, NULL, XML_ERROR_NONE}};
1262  const ExtFaults *fault = faults;
1263
1264  for (; fault->parse_text != NULL; fault++) {
1265    set_subtest("\"%s\"", fault->parse_text);
1266    XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1267    XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
1268    XML_SetUserData(g_parser, (void *)fault);
1269    expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
1270                   "Parser did not report external entity error");
1271    XML_ParserReset(g_parser, NULL);
1272  }
1273}
1274END_TEST
1275
1276/* Regression test for SF bug #483514. */
1277START_TEST(test_dtd_default_handling) {
1278  const char *text = "<!DOCTYPE doc [\n"
1279                     "<!ENTITY e SYSTEM 'http://example.org/e'>\n"
1280                     "<!NOTATION n SYSTEM 'http://example.org/n'>\n"
1281                     "<!ELEMENT doc EMPTY>\n"
1282                     "<!ATTLIST doc a CDATA #IMPLIED>\n"
1283                     "<?pi in dtd?>\n"
1284                     "<!--comment in dtd-->\n"
1285                     "]><doc/>";
1286
1287  XML_SetDefaultHandler(g_parser, accumulate_characters);
1288  XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
1289  XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
1290  XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
1291  XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
1292  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
1293  XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
1294  XML_SetProcessingInstructionHandler(g_parser, dummy_pi_handler);
1295  XML_SetCommentHandler(g_parser, dummy_comment_handler);
1296  XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1297  XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1298  run_character_check(text, XCS("\n\n\n\n\n\n\n<doc/>"));
1299}
1300END_TEST
1301
1302/* Test handling of attribute declarations */
1303START_TEST(test_dtd_attr_handling) {
1304  const char *prolog = "<!DOCTYPE doc [\n"
1305                       "<!ELEMENT doc EMPTY>\n";
1306  AttTest attr_data[]
1307      = {{"<!ATTLIST doc a ( one | two | three ) #REQUIRED>\n"
1308          "]>"
1309          "<doc a='two'/>",
1310          XCS("doc"), XCS("a"),
1311          XCS("(one|two|three)"), /* Extraneous spaces will be removed */
1312          NULL, XML_TRUE},
1313         {"<!NOTATION foo SYSTEM 'http://example.org/foo'>\n"
1314          "<!ATTLIST doc a NOTATION (foo) #IMPLIED>\n"
1315          "]>"
1316          "<doc/>",
1317          XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), NULL, XML_FALSE},
1318         {"<!ATTLIST doc a NOTATION (foo) 'bar'>\n"
1319          "]>"
1320          "<doc/>",
1321          XCS("doc"), XCS("a"), XCS("NOTATION(foo)"), XCS("bar"), XML_FALSE},
1322         {"<!ATTLIST doc a CDATA '\xdb\xb2'>\n"
1323          "]>"
1324          "<doc/>",
1325          XCS("doc"), XCS("a"), XCS("CDATA"),
1326#ifdef XML_UNICODE
1327          XCS("\x06f2"),
1328#else
1329          XCS("\xdb\xb2"),
1330#endif
1331          XML_FALSE},
1332         {NULL, NULL, NULL, NULL, NULL, XML_FALSE}};
1333  AttTest *test;
1334
1335  for (test = attr_data; test->definition != NULL; test++) {
1336    set_subtest("%s", test->definition);
1337    XML_SetAttlistDeclHandler(g_parser, verify_attlist_decl_handler);
1338    XML_SetUserData(g_parser, test);
1339    if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)strlen(prolog),
1340                                XML_FALSE)
1341        == XML_STATUS_ERROR)
1342      xml_failure(g_parser);
1343    if (_XML_Parse_SINGLE_BYTES(g_parser, test->definition,
1344                                (int)strlen(test->definition), XML_TRUE)
1345        == XML_STATUS_ERROR)
1346      xml_failure(g_parser);
1347    XML_ParserReset(g_parser, NULL);
1348  }
1349}
1350END_TEST
1351
1352/* See related SF bug #673791.
1353   When namespace processing is enabled, setting the namespace URI for
1354   a prefix is not allowed; this test ensures that it *is* allowed
1355   when namespace processing is not enabled.
1356   (See Namespaces in XML, section 2.)
1357*/
1358START_TEST(test_empty_ns_without_namespaces) {
1359  const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
1360                     "  <e xmlns:prefix=''/>\n"
1361                     "</doc>";
1362
1363  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1364      == XML_STATUS_ERROR)
1365    xml_failure(g_parser);
1366}
1367END_TEST
1368
1369/* Regression test for SF bug #824420.
1370   Checks that an xmlns:prefix attribute set in an attribute's default
1371   value isn't misinterpreted.
1372*/
1373START_TEST(test_ns_in_attribute_default_without_namespaces) {
1374  const char *text = "<!DOCTYPE e:element [\n"
1375                     "  <!ATTLIST e:element\n"
1376                     "    xmlns:e CDATA 'http://example.org/'>\n"
1377                     "      ]>\n"
1378                     "<e:element/>";
1379
1380  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1381      == XML_STATUS_ERROR)
1382    xml_failure(g_parser);
1383}
1384END_TEST
1385
1386/* Regression test for SF bug #1515266: missing check of stopped
1387   parser in doContext() 'for' loop. */
1388START_TEST(test_stop_parser_between_char_data_calls) {
1389  /* The sample data must be big enough that there are two calls to
1390     the character data handler from within the inner "for" loop of
1391     the XML_TOK_DATA_CHARS case in doContent(), and the character
1392     handler must stop the parser and clear the character data
1393     handler.
1394  */
1395  const char *text = long_character_data_text;
1396
1397  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1398  g_resumable = XML_FALSE;
1399  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1400      != XML_STATUS_ERROR)
1401    xml_failure(g_parser);
1402  if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
1403    xml_failure(g_parser);
1404}
1405END_TEST
1406
1407/* Regression test for SF bug #1515266: missing check of stopped
1408   parser in doContext() 'for' loop. */
1409START_TEST(test_suspend_parser_between_char_data_calls) {
1410  /* The sample data must be big enough that there are two calls to
1411     the character data handler from within the inner "for" loop of
1412     the XML_TOK_DATA_CHARS case in doContent(), and the character
1413     handler must stop the parser and clear the character data
1414     handler.
1415  */
1416  const char *text = long_character_data_text;
1417
1418  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1419  g_resumable = XML_TRUE;
1420  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1421      != XML_STATUS_SUSPENDED)
1422    xml_failure(g_parser);
1423  if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1424    xml_failure(g_parser);
1425  /* Try parsing directly */
1426  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1427      != XML_STATUS_ERROR)
1428    fail("Attempt to continue parse while suspended not faulted");
1429  if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
1430    fail("Suspended parse not faulted with correct error");
1431}
1432END_TEST
1433
1434/* Test repeated calls to XML_StopParser are handled correctly */
1435START_TEST(test_repeated_stop_parser_between_char_data_calls) {
1436  const char *text = long_character_data_text;
1437
1438  XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1439  g_resumable = XML_FALSE;
1440  g_abortable = XML_FALSE;
1441  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1442      != XML_STATUS_ERROR)
1443    fail("Failed to double-stop parser");
1444
1445  XML_ParserReset(g_parser, NULL);
1446  XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1447  g_resumable = XML_TRUE;
1448  g_abortable = XML_FALSE;
1449  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1450      != XML_STATUS_SUSPENDED)
1451    fail("Failed to double-suspend parser");
1452
1453  XML_ParserReset(g_parser, NULL);
1454  XML_SetCharacterDataHandler(g_parser, parser_stop_character_handler);
1455  g_resumable = XML_TRUE;
1456  g_abortable = XML_TRUE;
1457  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1458      != XML_STATUS_ERROR)
1459    fail("Failed to suspend-abort parser");
1460}
1461END_TEST
1462
1463START_TEST(test_good_cdata_ascii) {
1464  const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1465  const XML_Char *expected = XCS("<greeting>Hello, world!</greeting>");
1466
1467  CharData storage;
1468  CharData_Init(&storage);
1469  XML_SetUserData(g_parser, &storage);
1470  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1471  /* Add start and end handlers for coverage */
1472  XML_SetStartCdataSectionHandler(g_parser, dummy_start_cdata_handler);
1473  XML_SetEndCdataSectionHandler(g_parser, dummy_end_cdata_handler);
1474
1475  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1476      == XML_STATUS_ERROR)
1477    xml_failure(g_parser);
1478  CharData_CheckXMLChars(&storage, expected);
1479
1480  /* Try again, this time with a default handler */
1481  XML_ParserReset(g_parser, NULL);
1482  CharData_Init(&storage);
1483  XML_SetUserData(g_parser, &storage);
1484  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1485  XML_SetDefaultHandler(g_parser, dummy_default_handler);
1486
1487  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1488      == XML_STATUS_ERROR)
1489    xml_failure(g_parser);
1490  CharData_CheckXMLChars(&storage, expected);
1491}
1492END_TEST
1493
1494START_TEST(test_good_cdata_utf16) {
1495  /* Test data is:
1496   *   <?xml version='1.0' encoding='utf-16'?>
1497   *   <a><![CDATA[hello]]></a>
1498   */
1499  const char text[]
1500      = "\0<\0?\0x\0m\0l\0"
1501        " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1502        " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1503        "1\0"
1504        "6\0'"
1505        "\0?\0>\0\n"
1506        "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1507  const XML_Char *expected = XCS("hello");
1508
1509  CharData storage;
1510  CharData_Init(&storage);
1511  XML_SetUserData(g_parser, &storage);
1512  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1513
1514  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1515      == XML_STATUS_ERROR)
1516    xml_failure(g_parser);
1517  CharData_CheckXMLChars(&storage, expected);
1518}
1519END_TEST
1520
1521START_TEST(test_good_cdata_utf16_le) {
1522  /* Test data is:
1523   *   <?xml version='1.0' encoding='utf-16'?>
1524   *   <a><![CDATA[hello]]></a>
1525   */
1526  const char text[]
1527      = "<\0?\0x\0m\0l\0"
1528        " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1529        " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1530        "1\0"
1531        "6\0'"
1532        "\0?\0>\0\n"
1533        "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>\0";
1534  const XML_Char *expected = XCS("hello");
1535
1536  CharData storage;
1537  CharData_Init(&storage);
1538  XML_SetUserData(g_parser, &storage);
1539  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1540
1541  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1542      == XML_STATUS_ERROR)
1543    xml_failure(g_parser);
1544  CharData_CheckXMLChars(&storage, expected);
1545}
1546END_TEST
1547
1548/* Test UTF16 conversion of a long cdata string */
1549
1550/* 16 characters: handy macro to reduce visual clutter */
1551#define A_TO_P_IN_UTF16 "\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"
1552
1553START_TEST(test_long_cdata_utf16) {
1554  /* Test data is:
1555   * <?xlm version='1.0' encoding='utf-16'?>
1556   * <a><![CDATA[
1557   * ABCDEFGHIJKLMNOP
1558   * ]]></a>
1559   */
1560  const char text[]
1561      = "\0<\0?\0x\0m\0l\0 "
1562        "\0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0 "
1563        "\0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0\x31\0\x36\0'\0?\0>"
1564        "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1565      /* 64 characters per line */
1566      /* clang-format off */
1567        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1568        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1569        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1570        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1571        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1572        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1573        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1574        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1575        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1576        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1577        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1578        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1579        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1580        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1581        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1582        A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16  A_TO_P_IN_UTF16
1583        A_TO_P_IN_UTF16
1584        /* clang-format on */
1585        "\0]\0]\0>\0<\0/\0a\0>";
1586  const XML_Char *expected =
1587      /* clang-format off */
1588        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1589        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1590        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1591        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1592        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1593        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1594        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1595        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1596        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1597        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1598        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1599        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1600        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1601        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1602        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1603        XCS("ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP")
1604        XCS("ABCDEFGHIJKLMNOP");
1605  /* clang-format on */
1606  CharData storage;
1607  void *buffer;
1608
1609  CharData_Init(&storage);
1610  XML_SetUserData(g_parser, &storage);
1611  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1612  buffer = XML_GetBuffer(g_parser, sizeof(text) - 1);
1613  if (buffer == NULL)
1614    fail("Could not allocate parse buffer");
1615  assert(buffer != NULL);
1616  memcpy(buffer, text, sizeof(text) - 1);
1617  if (XML_ParseBuffer(g_parser, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1618    xml_failure(g_parser);
1619  CharData_CheckXMLChars(&storage, expected);
1620}
1621END_TEST
1622
1623/* Test handling of multiple unit UTF-16 characters */
1624START_TEST(test_multichar_cdata_utf16) {
1625  /* Test data is:
1626   *   <?xml version='1.0' encoding='utf-16'?>
1627   *   <a><![CDATA[{MINIM}{CROTCHET}]]></a>
1628   *
1629   * where {MINIM} is U+1d15e (a minim or half-note)
1630   *   UTF-16: 0xd834 0xdd5e
1631   *   UTF-8:  0xf0 0x9d 0x85 0x9e
1632   * and {CROTCHET} is U+1d15f (a crotchet or quarter-note)
1633   *   UTF-16: 0xd834 0xdd5f
1634   *   UTF-8:  0xf0 0x9d 0x85 0x9f
1635   */
1636  const char text[] = "\0<\0?\0x\0m\0l\0"
1637                      " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1638                      " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1639                      "1\0"
1640                      "6\0'"
1641                      "\0?\0>\0\n"
1642                      "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1643                      "\xd8\x34\xdd\x5e\xd8\x34\xdd\x5f"
1644                      "\0]\0]\0>\0<\0/\0a\0>";
1645#ifdef XML_UNICODE
1646  const XML_Char *expected = XCS("\xd834\xdd5e\xd834\xdd5f");
1647#else
1648  const XML_Char *expected = XCS("\xf0\x9d\x85\x9e\xf0\x9d\x85\x9f");
1649#endif
1650  CharData storage;
1651
1652  CharData_Init(&storage);
1653  XML_SetUserData(g_parser, &storage);
1654  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
1655
1656  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1657      == XML_STATUS_ERROR)
1658    xml_failure(g_parser);
1659  CharData_CheckXMLChars(&storage, expected);
1660}
1661END_TEST
1662
1663/* Test that an element name with a UTF-16 surrogate pair is rejected */
1664START_TEST(test_utf16_bad_surrogate_pair) {
1665  /* Test data is:
1666   *   <?xml version='1.0' encoding='utf-16'?>
1667   *   <a><![CDATA[{BADLINB}]]></a>
1668   *
1669   * where {BADLINB} is U+10000 (the first Linear B character)
1670   * with the UTF-16 surrogate pair in the wrong order, i.e.
1671   *   0xdc00 0xd800
1672   */
1673  const char text[] = "\0<\0?\0x\0m\0l\0"
1674                      " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1675                      " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1676                      "1\0"
1677                      "6\0'"
1678                      "\0?\0>\0\n"
1679                      "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0["
1680                      "\xdc\x00\xd8\x00"
1681                      "\0]\0]\0>\0<\0/\0a\0>";
1682
1683  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
1684      != XML_STATUS_ERROR)
1685    fail("Reversed UTF-16 surrogate pair not faulted");
1686  if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
1687    xml_failure(g_parser);
1688}
1689END_TEST
1690
1691START_TEST(test_bad_cdata) {
1692  struct CaseData {
1693    const char *text;
1694    enum XML_Error expectedError;
1695  };
1696
1697  struct CaseData cases[]
1698      = {{"<a><", XML_ERROR_UNCLOSED_TOKEN},
1699         {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1700         {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1701         {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1702         {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1703         {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1704         {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1705         {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1706
1707         {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1708         {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1709         {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1710
1711         {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1712         {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN},  /* ?! */
1713         {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1714         {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1715         {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1716         {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1717         {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1718
1719         {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1720         {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1721         {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}};
1722
1723  size_t i = 0;
1724  for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1725    set_subtest("%s", cases[i].text);
1726    const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1727        g_parser, cases[i].text, (int)strlen(cases[i].text), XML_TRUE);
1728    const enum XML_Error actualError = XML_GetErrorCode(g_parser);
1729
1730    assert(actualStatus == XML_STATUS_ERROR);
1731
1732    if (actualError != cases[i].expectedError) {
1733      char message[100];
1734      snprintf(message, sizeof(message),
1735               "Expected error %d but got error %d for case %u: \"%s\"\n",
1736               cases[i].expectedError, actualError, (unsigned int)i + 1,
1737               cases[i].text);
1738      fail(message);
1739    }
1740
1741    XML_ParserReset(g_parser, NULL);
1742  }
1743}
1744END_TEST
1745
1746/* Test failures in UTF-16 CDATA */
1747START_TEST(test_bad_cdata_utf16) {
1748  struct CaseData {
1749    size_t text_bytes;
1750    const char *text;
1751    enum XML_Error expected_error;
1752  };
1753
1754  const char prolog[] = "\0<\0?\0x\0m\0l\0"
1755                        " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1756                        " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0"
1757                        "1\0"
1758                        "6\0'"
1759                        "\0?\0>\0\n"
1760                        "\0<\0a\0>";
1761  struct CaseData cases[] = {
1762      {1, "\0", XML_ERROR_UNCLOSED_TOKEN},
1763      {2, "\0<", XML_ERROR_UNCLOSED_TOKEN},
1764      {3, "\0<\0", XML_ERROR_UNCLOSED_TOKEN},
1765      {4, "\0<\0!", XML_ERROR_UNCLOSED_TOKEN},
1766      {5, "\0<\0!\0", XML_ERROR_UNCLOSED_TOKEN},
1767      {6, "\0<\0!\0[", XML_ERROR_UNCLOSED_TOKEN},
1768      {7, "\0<\0!\0[\0", XML_ERROR_UNCLOSED_TOKEN},
1769      {8, "\0<\0!\0[\0C", XML_ERROR_UNCLOSED_TOKEN},
1770      {9, "\0<\0!\0[\0C\0", XML_ERROR_UNCLOSED_TOKEN},
1771      {10, "\0<\0!\0[\0C\0D", XML_ERROR_UNCLOSED_TOKEN},
1772      {11, "\0<\0!\0[\0C\0D\0", XML_ERROR_UNCLOSED_TOKEN},
1773      {12, "\0<\0!\0[\0C\0D\0A", XML_ERROR_UNCLOSED_TOKEN},
1774      {13, "\0<\0!\0[\0C\0D\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1775      {14, "\0<\0!\0[\0C\0D\0A\0T", XML_ERROR_UNCLOSED_TOKEN},
1776      {15, "\0<\0!\0[\0C\0D\0A\0T\0", XML_ERROR_UNCLOSED_TOKEN},
1777      {16, "\0<\0!\0[\0C\0D\0A\0T\0A", XML_ERROR_UNCLOSED_TOKEN},
1778      {17, "\0<\0!\0[\0C\0D\0A\0T\0A\0", XML_ERROR_UNCLOSED_TOKEN},
1779      {18, "\0<\0!\0[\0C\0D\0A\0T\0A\0[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1780      {19, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0", XML_ERROR_UNCLOSED_CDATA_SECTION},
1781      {20, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z", XML_ERROR_UNCLOSED_CDATA_SECTION},
1782      /* Now add a four-byte UTF-16 character */
1783      {21, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8",
1784       XML_ERROR_UNCLOSED_CDATA_SECTION},
1785      {22, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34", XML_ERROR_PARTIAL_CHAR},
1786      {23, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd",
1787       XML_ERROR_PARTIAL_CHAR},
1788      {24, "\0<\0!\0[\0C\0D\0A\0T\0A\0[\0Z\xd8\x34\xdd\x5e",
1789       XML_ERROR_UNCLOSED_CDATA_SECTION}};
1790  size_t i;
1791
1792  for (i = 0; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1793    set_subtest("case %lu", (long unsigned)(i + 1));
1794    enum XML_Status actual_status;
1795    enum XML_Error actual_error;
1796
1797    if (_XML_Parse_SINGLE_BYTES(g_parser, prolog, (int)sizeof(prolog) - 1,
1798                                XML_FALSE)
1799        == XML_STATUS_ERROR)
1800      xml_failure(g_parser);
1801    actual_status = _XML_Parse_SINGLE_BYTES(g_parser, cases[i].text,
1802                                            (int)cases[i].text_bytes, XML_TRUE);
1803    assert(actual_status == XML_STATUS_ERROR);
1804    actual_error = XML_GetErrorCode(g_parser);
1805    if (actual_error != cases[i].expected_error) {
1806      char message[1024];
1807
1808      snprintf(message, sizeof(message),
1809               "Expected error %d (%" XML_FMT_STR "), got %d (%" XML_FMT_STR
1810               ") for case %lu\n",
1811               cases[i].expected_error,
1812               XML_ErrorString(cases[i].expected_error), actual_error,
1813               XML_ErrorString(actual_error), (long unsigned)(i + 1));
1814      fail(message);
1815    }
1816    XML_ParserReset(g_parser, NULL);
1817  }
1818}
1819END_TEST
1820
1821/* Test stopping the parser in cdata handler */
1822START_TEST(test_stop_parser_between_cdata_calls) {
1823  const char *text = long_cdata_text;
1824
1825  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1826  g_resumable = XML_FALSE;
1827  expect_failure(text, XML_ERROR_ABORTED, "Parse not aborted in CDATA handler");
1828}
1829END_TEST
1830
1831/* Test suspending the parser in cdata handler */
1832START_TEST(test_suspend_parser_between_cdata_calls) {
1833  const char *text = long_cdata_text;
1834  enum XML_Status result;
1835
1836  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
1837  g_resumable = XML_TRUE;
1838  result = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
1839  if (result != XML_STATUS_SUSPENDED) {
1840    if (result == XML_STATUS_ERROR)
1841      xml_failure(g_parser);
1842    fail("Parse not suspended in CDATA handler");
1843  }
1844  if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
1845    xml_failure(g_parser);
1846}
1847END_TEST
1848
1849/* Test memory allocation functions */
1850START_TEST(test_memory_allocation) {
1851  char *buffer = (char *)XML_MemMalloc(g_parser, 256);
1852  char *p;
1853
1854  if (buffer == NULL) {
1855    fail("Allocation failed");
1856  } else {
1857    /* Try writing to memory; some OSes try to cheat! */
1858    buffer[0] = 'T';
1859    buffer[1] = 'E';
1860    buffer[2] = 'S';
1861    buffer[3] = 'T';
1862    buffer[4] = '\0';
1863    if (strcmp(buffer, "TEST") != 0) {
1864      fail("Memory not writable");
1865    } else {
1866      p = (char *)XML_MemRealloc(g_parser, buffer, 512);
1867      if (p == NULL) {
1868        fail("Reallocation failed");
1869      } else {
1870        /* Write again, just to be sure */
1871        buffer = p;
1872        buffer[0] = 'V';
1873        if (strcmp(buffer, "VEST") != 0) {
1874          fail("Reallocated memory not writable");
1875        }
1876      }
1877    }
1878    XML_MemFree(g_parser, buffer);
1879  }
1880}
1881END_TEST
1882
1883/* Test XML_DefaultCurrent() passes handling on correctly */
1884START_TEST(test_default_current) {
1885  const char *text = "<doc>hell]</doc>";
1886  const char *entity_text = "<!DOCTYPE doc [\n"
1887                            "<!ENTITY entity '&#37;'>\n"
1888                            "]>\n"
1889                            "<doc>&entity;</doc>";
1890
1891  set_subtest("with defaulting");
1892  {
1893    struct handler_record_list storage;
1894    storage.count = 0;
1895    XML_SetDefaultHandler(g_parser, record_default_handler);
1896    XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1897    XML_SetUserData(g_parser, &storage);
1898    if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1899        == XML_STATUS_ERROR)
1900      xml_failure(g_parser);
1901    int i = 0;
1902    assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1903    // we should have gotten one or more cdata callbacks, totaling 5 chars
1904    int cdata_len_remaining = 5;
1905    while (cdata_len_remaining > 0) {
1906      const struct handler_record_entry *c_entry
1907          = handler_record_get(&storage, i++);
1908      assert_true(strcmp(c_entry->name, "record_cdata_handler") == 0);
1909      assert_true(c_entry->arg > 0);
1910      assert_true(c_entry->arg <= cdata_len_remaining);
1911      cdata_len_remaining -= c_entry->arg;
1912      // default handler must follow, with the exact same len argument.
1913      assert_record_handler_called(&storage, i++, "record_default_handler",
1914                                   c_entry->arg);
1915    }
1916    assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1917    assert_true(storage.count == i);
1918  }
1919
1920  /* Again, without the defaulting */
1921  set_subtest("no defaulting");
1922  {
1923    struct handler_record_list storage;
1924    storage.count = 0;
1925    XML_ParserReset(g_parser, NULL);
1926    XML_SetDefaultHandler(g_parser, record_default_handler);
1927    XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
1928    XML_SetUserData(g_parser, &storage);
1929    if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
1930        == XML_STATUS_ERROR)
1931      xml_failure(g_parser);
1932    int i = 0;
1933    assert_record_handler_called(&storage, i++, "record_default_handler", 5);
1934    // we should have gotten one or more cdata callbacks, totaling 5 chars
1935    int cdata_len_remaining = 5;
1936    while (cdata_len_remaining > 0) {
1937      const struct handler_record_entry *c_entry
1938          = handler_record_get(&storage, i++);
1939      assert_true(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
1940      assert_true(c_entry->arg > 0);
1941      assert_true(c_entry->arg <= cdata_len_remaining);
1942      cdata_len_remaining -= c_entry->arg;
1943    }
1944    assert_record_handler_called(&storage, i++, "record_default_handler", 6);
1945    assert_true(storage.count == i);
1946  }
1947
1948  /* Now with an internal entity to complicate matters */
1949  set_subtest("with internal entity");
1950  {
1951    struct handler_record_list storage;
1952    storage.count = 0;
1953    XML_ParserReset(g_parser, NULL);
1954    XML_SetDefaultHandler(g_parser, record_default_handler);
1955    XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1956    XML_SetUserData(g_parser, &storage);
1957    if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1958                                XML_TRUE)
1959        == XML_STATUS_ERROR)
1960      xml_failure(g_parser);
1961    /* The default handler suppresses the entity */
1962    assert_record_handler_called(&storage, 0, "record_default_handler", 9);
1963    assert_record_handler_called(&storage, 1, "record_default_handler", 1);
1964    assert_record_handler_called(&storage, 2, "record_default_handler", 3);
1965    assert_record_handler_called(&storage, 3, "record_default_handler", 1);
1966    assert_record_handler_called(&storage, 4, "record_default_handler", 1);
1967    assert_record_handler_called(&storage, 5, "record_default_handler", 1);
1968    assert_record_handler_called(&storage, 6, "record_default_handler", 8);
1969    assert_record_handler_called(&storage, 7, "record_default_handler", 1);
1970    assert_record_handler_called(&storage, 8, "record_default_handler", 6);
1971    assert_record_handler_called(&storage, 9, "record_default_handler", 1);
1972    assert_record_handler_called(&storage, 10, "record_default_handler", 7);
1973    assert_record_handler_called(&storage, 11, "record_default_handler", 1);
1974    assert_record_handler_called(&storage, 12, "record_default_handler", 1);
1975    assert_record_handler_called(&storage, 13, "record_default_handler", 1);
1976    assert_record_handler_called(&storage, 14, "record_default_handler", 1);
1977    assert_record_handler_called(&storage, 15, "record_default_handler", 1);
1978    assert_record_handler_called(&storage, 16, "record_default_handler", 5);
1979    assert_record_handler_called(&storage, 17, "record_default_handler", 8);
1980    assert_record_handler_called(&storage, 18, "record_default_handler", 6);
1981    assert_true(storage.count == 19);
1982  }
1983
1984  /* Again, with a skip handler */
1985  set_subtest("with skip handler");
1986  {
1987    struct handler_record_list storage;
1988    storage.count = 0;
1989    XML_ParserReset(g_parser, NULL);
1990    XML_SetDefaultHandler(g_parser, record_default_handler);
1991    XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
1992    XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
1993    XML_SetUserData(g_parser, &storage);
1994    if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
1995                                XML_TRUE)
1996        == XML_STATUS_ERROR)
1997      xml_failure(g_parser);
1998    /* The default handler suppresses the entity */
1999    assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2000    assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2001    assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2002    assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2003    assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2004    assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2005    assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2006    assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2007    assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2008    assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2009    assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2010    assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2011    assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2012    assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2013    assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2014    assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2015    assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2016    assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
2017    assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2018    assert_true(storage.count == 19);
2019  }
2020
2021  /* This time, allow the entity through */
2022  set_subtest("allow entity");
2023  {
2024    struct handler_record_list storage;
2025    storage.count = 0;
2026    XML_ParserReset(g_parser, NULL);
2027    XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2028    XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
2029    XML_SetUserData(g_parser, &storage);
2030    if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2031                                XML_TRUE)
2032        == XML_STATUS_ERROR)
2033      xml_failure(g_parser);
2034    assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2035    assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2036    assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2037    assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2038    assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2039    assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2040    assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2041    assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2042    assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2043    assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2044    assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2045    assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2046    assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2047    assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2048    assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2049    assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2050    assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2051    assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
2052    assert_record_handler_called(&storage, 18, "record_default_handler", 1);
2053    assert_record_handler_called(&storage, 19, "record_default_handler", 6);
2054    assert_true(storage.count == 20);
2055  }
2056
2057  /* Finally, without passing the cdata to the default handler */
2058  set_subtest("not passing cdata");
2059  {
2060    struct handler_record_list storage;
2061    storage.count = 0;
2062    XML_ParserReset(g_parser, NULL);
2063    XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
2064    XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
2065    XML_SetUserData(g_parser, &storage);
2066    if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
2067                                XML_TRUE)
2068        == XML_STATUS_ERROR)
2069      xml_failure(g_parser);
2070    assert_record_handler_called(&storage, 0, "record_default_handler", 9);
2071    assert_record_handler_called(&storage, 1, "record_default_handler", 1);
2072    assert_record_handler_called(&storage, 2, "record_default_handler", 3);
2073    assert_record_handler_called(&storage, 3, "record_default_handler", 1);
2074    assert_record_handler_called(&storage, 4, "record_default_handler", 1);
2075    assert_record_handler_called(&storage, 5, "record_default_handler", 1);
2076    assert_record_handler_called(&storage, 6, "record_default_handler", 8);
2077    assert_record_handler_called(&storage, 7, "record_default_handler", 1);
2078    assert_record_handler_called(&storage, 8, "record_default_handler", 6);
2079    assert_record_handler_called(&storage, 9, "record_default_handler", 1);
2080    assert_record_handler_called(&storage, 10, "record_default_handler", 7);
2081    assert_record_handler_called(&storage, 11, "record_default_handler", 1);
2082    assert_record_handler_called(&storage, 12, "record_default_handler", 1);
2083    assert_record_handler_called(&storage, 13, "record_default_handler", 1);
2084    assert_record_handler_called(&storage, 14, "record_default_handler", 1);
2085    assert_record_handler_called(&storage, 15, "record_default_handler", 1);
2086    assert_record_handler_called(&storage, 16, "record_default_handler", 5);
2087    assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
2088                                 1);
2089    assert_record_handler_called(&storage, 18, "record_default_handler", 6);
2090    assert_true(storage.count == 19);
2091  }
2092}
2093END_TEST
2094
2095/* Test DTD element parsing code paths */
2096START_TEST(test_dtd_elements) {
2097  const char *text = "<!DOCTYPE doc [\n"
2098                     "<!ELEMENT doc (chapter)>\n"
2099                     "<!ELEMENT chapter (#PCDATA)>\n"
2100                     "]>\n"
2101                     "<doc><chapter>Wombats are go</chapter></doc>";
2102
2103  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
2104  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2105      == XML_STATUS_ERROR)
2106    xml_failure(g_parser);
2107}
2108END_TEST
2109
2110static void XMLCALL
2111element_decl_check_model(void *userData, const XML_Char *name,
2112                         XML_Content *model) {
2113  UNUSED_P(userData);
2114  uint32_t errorFlags = 0;
2115
2116  /* Expected model array structure is this:
2117   * [0] (type 6, quant 0)
2118   *   [1] (type 5, quant 0)
2119   *     [3] (type 4, quant 0, name "bar")
2120   *     [4] (type 4, quant 0, name "foo")
2121   *     [5] (type 4, quant 3, name "xyz")
2122   *   [2] (type 4, quant 2, name "zebra")
2123   */
2124  errorFlags |= ((xcstrcmp(name, XCS("junk")) == 0) ? 0 : (1u << 0));
2125  errorFlags |= ((model != NULL) ? 0 : (1u << 1));
2126
2127  if (model != NULL) {
2128    errorFlags |= ((model[0].type == XML_CTYPE_SEQ) ? 0 : (1u << 2));
2129    errorFlags |= ((model[0].quant == XML_CQUANT_NONE) ? 0 : (1u << 3));
2130    errorFlags |= ((model[0].numchildren == 2) ? 0 : (1u << 4));
2131    errorFlags |= ((model[0].children == &model[1]) ? 0 : (1u << 5));
2132    errorFlags |= ((model[0].name == NULL) ? 0 : (1u << 6));
2133
2134    errorFlags |= ((model[1].type == XML_CTYPE_CHOICE) ? 0 : (1u << 7));
2135    errorFlags |= ((model[1].quant == XML_CQUANT_NONE) ? 0 : (1u << 8));
2136    errorFlags |= ((model[1].numchildren == 3) ? 0 : (1u << 9));
2137    errorFlags |= ((model[1].children == &model[3]) ? 0 : (1u << 10));
2138    errorFlags |= ((model[1].name == NULL) ? 0 : (1u << 11));
2139
2140    errorFlags |= ((model[2].type == XML_CTYPE_NAME) ? 0 : (1u << 12));
2141    errorFlags |= ((model[2].quant == XML_CQUANT_REP) ? 0 : (1u << 13));
2142    errorFlags |= ((model[2].numchildren == 0) ? 0 : (1u << 14));
2143    errorFlags |= ((model[2].children == NULL) ? 0 : (1u << 15));
2144    errorFlags
2145        |= ((xcstrcmp(model[2].name, XCS("zebra")) == 0) ? 0 : (1u << 16));
2146
2147    errorFlags |= ((model[3].type == XML_CTYPE_NAME) ? 0 : (1u << 17));
2148    errorFlags |= ((model[3].quant == XML_CQUANT_NONE) ? 0 : (1u << 18));
2149    errorFlags |= ((model[3].numchildren == 0) ? 0 : (1u << 19));
2150    errorFlags |= ((model[3].children == NULL) ? 0 : (1u << 20));
2151    errorFlags |= ((xcstrcmp(model[3].name, XCS("bar")) == 0) ? 0 : (1u << 21));
2152
2153    errorFlags |= ((model[4].type == XML_CTYPE_NAME) ? 0 : (1u << 22));
2154    errorFlags |= ((model[4].quant == XML_CQUANT_NONE) ? 0 : (1u << 23));
2155    errorFlags |= ((model[4].numchildren == 0) ? 0 : (1u << 24));
2156    errorFlags |= ((model[4].children == NULL) ? 0 : (1u << 25));
2157    errorFlags |= ((xcstrcmp(model[4].name, XCS("foo")) == 0) ? 0 : (1u << 26));
2158
2159    errorFlags |= ((model[5].type == XML_CTYPE_NAME) ? 0 : (1u << 27));
2160    errorFlags |= ((model[5].quant == XML_CQUANT_PLUS) ? 0 : (1u << 28));
2161    errorFlags |= ((model[5].numchildren == 0) ? 0 : (1u << 29));
2162    errorFlags |= ((model[5].children == NULL) ? 0 : (1u << 30));
2163    errorFlags |= ((xcstrcmp(model[5].name, XCS("xyz")) == 0) ? 0 : (1u << 31));
2164  }
2165
2166  XML_SetUserData(g_parser, (void *)(uintptr_t)errorFlags);
2167  XML_FreeContentModel(g_parser, model);
2168}
2169
2170START_TEST(test_dtd_elements_nesting) {
2171  // Payload inspired by a test in Perl's XML::Parser
2172  const char *text = "<!DOCTYPE foo [\n"
2173                     "<!ELEMENT junk ((bar|foo|xyz+), zebra*)>\n"
2174                     "]>\n"
2175                     "<foo/>";
2176
2177  XML_SetUserData(g_parser, (void *)(uintptr_t)-1);
2178
2179  XML_SetElementDeclHandler(g_parser, element_decl_check_model);
2180  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2181      == XML_STATUS_ERROR)
2182    xml_failure(g_parser);
2183
2184  if ((uint32_t)(uintptr_t)XML_GetUserData(g_parser) != 0)
2185    fail("Element declaration model regression detected");
2186}
2187END_TEST
2188
2189/* Test foreign DTD handling */
2190START_TEST(test_set_foreign_dtd) {
2191  const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n";
2192  const char *text2 = "<doc>&entity;</doc>";
2193  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2194
2195  /* Check hash salt is passed through too */
2196  XML_SetHashSalt(g_parser, 0x12345678);
2197  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2198  XML_SetUserData(g_parser, &test_data);
2199  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2200  /* Add a default handler to exercise more code paths */
2201  XML_SetDefaultHandler(g_parser, dummy_default_handler);
2202  if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2203    fail("Could not set foreign DTD");
2204  if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2205      == XML_STATUS_ERROR)
2206    xml_failure(g_parser);
2207
2208  /* Ensure that trying to set the DTD after parsing has started
2209   * is faulted, even if it's the same setting.
2210   */
2211  if (XML_UseForeignDTD(g_parser, XML_TRUE)
2212      != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2213    fail("Failed to reject late foreign DTD setting");
2214  /* Ditto for the hash salt */
2215  if (XML_SetHashSalt(g_parser, 0x23456789))
2216    fail("Failed to reject late hash salt change");
2217
2218  /* Now finish the parse */
2219  if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2220      == XML_STATUS_ERROR)
2221    xml_failure(g_parser);
2222}
2223END_TEST
2224
2225/* Test foreign DTD handling with a failing NotStandalone handler */
2226START_TEST(test_foreign_dtd_not_standalone) {
2227  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2228                     "<doc>&entity;</doc>";
2229  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2230
2231  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2232  XML_SetUserData(g_parser, &test_data);
2233  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2234  XML_SetNotStandaloneHandler(g_parser, reject_not_standalone_handler);
2235  if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2236    fail("Could not set foreign DTD");
2237  expect_failure(text, XML_ERROR_NOT_STANDALONE,
2238                 "NotStandalonehandler failed to reject");
2239}
2240END_TEST
2241
2242/* Test invalid character in a foreign DTD is faulted */
2243START_TEST(test_invalid_foreign_dtd) {
2244  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2245                     "<doc>&entity;</doc>";
2246  ExtFaults test_data
2247      = {"$", "Dollar not faulted", NULL, XML_ERROR_INVALID_TOKEN};
2248
2249  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2250  XML_SetUserData(g_parser, &test_data);
2251  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
2252  XML_UseForeignDTD(g_parser, XML_TRUE);
2253  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2254                 "Bad DTD should not have been accepted");
2255}
2256END_TEST
2257
2258/* Test foreign DTD use with a doctype */
2259START_TEST(test_foreign_dtd_with_doctype) {
2260  const char *text1 = "<?xml version='1.0' encoding='us-ascii'?>\n"
2261                      "<!DOCTYPE doc [<!ENTITY entity 'hello world'>]>\n";
2262  const char *text2 = "<doc>&entity;</doc>";
2263  ExtTest test_data = {"<!ELEMENT doc (#PCDATA)*>", NULL, NULL};
2264
2265  /* Check hash salt is passed through too */
2266  XML_SetHashSalt(g_parser, 0x12345678);
2267  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2268  XML_SetUserData(g_parser, &test_data);
2269  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
2270  /* Add a default handler to exercise more code paths */
2271  XML_SetDefaultHandler(g_parser, dummy_default_handler);
2272  if (XML_UseForeignDTD(g_parser, XML_TRUE) != XML_ERROR_NONE)
2273    fail("Could not set foreign DTD");
2274  if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2275      == XML_STATUS_ERROR)
2276    xml_failure(g_parser);
2277
2278  /* Ensure that trying to set the DTD after parsing has started
2279   * is faulted, even if it's the same setting.
2280   */
2281  if (XML_UseForeignDTD(g_parser, XML_TRUE)
2282      != XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING)
2283    fail("Failed to reject late foreign DTD setting");
2284  /* Ditto for the hash salt */
2285  if (XML_SetHashSalt(g_parser, 0x23456789))
2286    fail("Failed to reject late hash salt change");
2287
2288  /* Now finish the parse */
2289  if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2290      == XML_STATUS_ERROR)
2291    xml_failure(g_parser);
2292}
2293END_TEST
2294
2295/* Test XML_UseForeignDTD with no external subset present */
2296START_TEST(test_foreign_dtd_without_external_subset) {
2297  const char *text = "<!DOCTYPE doc [<!ENTITY foo 'bar'>]>\n"
2298                     "<doc>&foo;</doc>";
2299
2300  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2301  XML_SetUserData(g_parser, NULL);
2302  XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2303  XML_UseForeignDTD(g_parser, XML_TRUE);
2304  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2305      == XML_STATUS_ERROR)
2306    xml_failure(g_parser);
2307}
2308END_TEST
2309
2310START_TEST(test_empty_foreign_dtd) {
2311  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2312                     "<doc>&entity;</doc>";
2313
2314  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2315  XML_SetExternalEntityRefHandler(g_parser, external_entity_null_loader);
2316  XML_UseForeignDTD(g_parser, XML_TRUE);
2317  expect_failure(text, XML_ERROR_UNDEFINED_ENTITY,
2318                 "Undefined entity not faulted");
2319}
2320END_TEST
2321
2322/* Test XML Base is set and unset appropriately */
2323START_TEST(test_set_base) {
2324  const XML_Char *old_base;
2325  const XML_Char *new_base = XCS("/local/file/name.xml");
2326
2327  old_base = XML_GetBase(g_parser);
2328  if (XML_SetBase(g_parser, new_base) != XML_STATUS_OK)
2329    fail("Unable to set base");
2330  if (xcstrcmp(XML_GetBase(g_parser), new_base) != 0)
2331    fail("Base setting not correct");
2332  if (XML_SetBase(g_parser, NULL) != XML_STATUS_OK)
2333    fail("Unable to NULL base");
2334  if (XML_GetBase(g_parser) != NULL)
2335    fail("Base setting not nulled");
2336  XML_SetBase(g_parser, old_base);
2337}
2338END_TEST
2339
2340/* Test attribute counts, indexing, etc */
2341START_TEST(test_attributes) {
2342  const char *text = "<!DOCTYPE doc [\n"
2343                     "<!ELEMENT doc (tag)>\n"
2344                     "<!ATTLIST doc id ID #REQUIRED>\n"
2345                     "]>"
2346                     "<doc a='1' id='one' b='2'>"
2347                     "<tag c='3'/>"
2348                     "</doc>";
2349  AttrInfo doc_info[] = {{XCS("a"), XCS("1")},
2350                         {XCS("b"), XCS("2")},
2351                         {XCS("id"), XCS("one")},
2352                         {NULL, NULL}};
2353  AttrInfo tag_info[] = {{XCS("c"), XCS("3")}, {NULL, NULL}};
2354  ElementInfo info[] = {{XCS("doc"), 3, XCS("id"), NULL},
2355                        {XCS("tag"), 1, NULL, NULL},
2356                        {NULL, 0, NULL, NULL}};
2357  info[0].attributes = doc_info;
2358  info[1].attributes = tag_info;
2359
2360  XML_SetStartElementHandler(g_parser, counting_start_element_handler);
2361  XML_SetUserData(g_parser, info);
2362  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2363      == XML_STATUS_ERROR)
2364    xml_failure(g_parser);
2365}
2366END_TEST
2367
2368/* Test reset works correctly in the middle of processing an internal
2369 * entity.  Exercises some obscure code in XML_ParserReset().
2370 */
2371START_TEST(test_reset_in_entity) {
2372  const char *text = "<!DOCTYPE doc [\n"
2373                     "<!ENTITY wombat 'wom'>\n"
2374                     "<!ENTITY entity 'hi &wom; there'>\n"
2375                     "]>\n"
2376                     "<doc>&entity;</doc>";
2377  XML_ParsingStatus status;
2378
2379  g_resumable = XML_TRUE;
2380  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2381  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2382      == XML_STATUS_ERROR)
2383    xml_failure(g_parser);
2384  XML_GetParsingStatus(g_parser, &status);
2385  if (status.parsing != XML_SUSPENDED)
2386    fail("Parsing status not SUSPENDED");
2387  XML_ParserReset(g_parser, NULL);
2388  XML_GetParsingStatus(g_parser, &status);
2389  if (status.parsing != XML_INITIALIZED)
2390    fail("Parsing status doesn't reset to INITIALIZED");
2391}
2392END_TEST
2393
2394/* Test that resume correctly passes through parse errors */
2395START_TEST(test_resume_invalid_parse) {
2396  const char *text = "<doc>Hello</doc"; /* Missing closing wedge */
2397
2398  g_resumable = XML_TRUE;
2399  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2400  if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2401      == XML_STATUS_ERROR)
2402    xml_failure(g_parser);
2403  if (XML_ResumeParser(g_parser) == XML_STATUS_OK)
2404    fail("Resumed invalid parse not faulted");
2405  if (XML_GetErrorCode(g_parser) != XML_ERROR_UNCLOSED_TOKEN)
2406    fail("Invalid parse not correctly faulted");
2407}
2408END_TEST
2409
2410/* Test that re-suspended parses are correctly passed through */
2411START_TEST(test_resume_resuspended) {
2412  const char *text = "<doc>Hello<meep/>world</doc>";
2413
2414  g_resumable = XML_TRUE;
2415  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2416  if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
2417      == XML_STATUS_ERROR)
2418    xml_failure(g_parser);
2419  g_resumable = XML_TRUE;
2420  XML_SetCharacterDataHandler(g_parser, clearing_aborting_character_handler);
2421  if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
2422    fail("Resumption not suspended");
2423  /* This one should succeed and finish up */
2424  if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
2425    xml_failure(g_parser);
2426}
2427END_TEST
2428
2429/* Test that CDATA shows up correctly through a default handler */
2430START_TEST(test_cdata_default) {
2431  const char *text = "<doc><![CDATA[Hello\nworld]]></doc>";
2432  const XML_Char *expected = XCS("<doc><![CDATA[Hello\nworld]]></doc>");
2433  CharData storage;
2434
2435  CharData_Init(&storage);
2436  XML_SetUserData(g_parser, &storage);
2437  XML_SetDefaultHandler(g_parser, accumulate_characters);
2438
2439  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2440      == XML_STATUS_ERROR)
2441    xml_failure(g_parser);
2442  CharData_CheckXMLChars(&storage, expected);
2443}
2444END_TEST
2445
2446/* Test resetting a subordinate parser does exactly nothing */
2447START_TEST(test_subordinate_reset) {
2448  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2449                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
2450                     "<doc>&entity;</doc>";
2451
2452  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2453  XML_SetExternalEntityRefHandler(g_parser, external_entity_resetter);
2454  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2455      == XML_STATUS_ERROR)
2456    xml_failure(g_parser);
2457}
2458END_TEST
2459
2460/* Test suspending a subordinate parser */
2461START_TEST(test_subordinate_suspend) {
2462  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2463                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
2464                     "<doc>&entity;</doc>";
2465
2466  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2467  XML_SetExternalEntityRefHandler(g_parser, external_entity_suspender);
2468  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2469      == XML_STATUS_ERROR)
2470    xml_failure(g_parser);
2471}
2472END_TEST
2473
2474/* Test suspending a subordinate parser from an XML declaration */
2475/* Increases code coverage of the tests */
2476
2477START_TEST(test_subordinate_xdecl_suspend) {
2478  const char *text
2479      = "<!DOCTYPE doc [\n"
2480        "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2481        "]>\n"
2482        "<doc>&entity;</doc>";
2483
2484  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2485  XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2486  g_resumable = XML_TRUE;
2487  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2488      == XML_STATUS_ERROR)
2489    xml_failure(g_parser);
2490}
2491END_TEST
2492
2493START_TEST(test_subordinate_xdecl_abort) {
2494  const char *text
2495      = "<!DOCTYPE doc [\n"
2496        "  <!ENTITY entity SYSTEM 'http://example.org/dummy.ent'>\n"
2497        "]>\n"
2498        "<doc>&entity;</doc>";
2499
2500  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2501  XML_SetExternalEntityRefHandler(g_parser, external_entity_suspend_xmldecl);
2502  g_resumable = XML_FALSE;
2503  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2504      == XML_STATUS_ERROR)
2505    xml_failure(g_parser);
2506}
2507END_TEST
2508
2509/* Test external entity fault handling with suspension */
2510START_TEST(test_ext_entity_invalid_suspended_parse) {
2511  const char *text = "<!DOCTYPE doc [\n"
2512                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2513                     "]>\n"
2514                     "<doc>&en;</doc>";
2515  ExtFaults faults[]
2516      = {{"<?xml version='1.0' encoding='us-ascii'?><",
2517          "Incomplete element declaration not faulted", NULL,
2518          XML_ERROR_UNCLOSED_TOKEN},
2519         {/* First two bytes of a three-byte char */
2520          "<?xml version='1.0' encoding='utf-8'?>\xe2\x82",
2521          "Incomplete character not faulted", NULL, XML_ERROR_PARTIAL_CHAR},
2522         {NULL, NULL, NULL, XML_ERROR_NONE}};
2523  ExtFaults *fault;
2524
2525  for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
2526    set_subtest("%s", fault->parse_text);
2527    XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2528    XML_SetExternalEntityRefHandler(g_parser,
2529                                    external_entity_suspending_faulter);
2530    XML_SetUserData(g_parser, fault);
2531    expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
2532                   "Parser did not report external entity error");
2533    XML_ParserReset(g_parser, NULL);
2534  }
2535}
2536END_TEST
2537
2538/* Test setting an explicit encoding */
2539START_TEST(test_explicit_encoding) {
2540  const char *text1 = "<doc>Hello ";
2541  const char *text2 = " World</doc>";
2542
2543  /* Just check that we can set the encoding to NULL before starting */
2544  if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2545    fail("Failed to initialise encoding to NULL");
2546  /* Say we are UTF-8 */
2547  if (XML_SetEncoding(g_parser, XCS("utf-8")) != XML_STATUS_OK)
2548    fail("Failed to set explicit encoding");
2549  if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
2550      == XML_STATUS_ERROR)
2551    xml_failure(g_parser);
2552  /* Try to switch encodings mid-parse */
2553  if (XML_SetEncoding(g_parser, XCS("us-ascii")) != XML_STATUS_ERROR)
2554    fail("Allowed encoding change");
2555  if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
2556      == XML_STATUS_ERROR)
2557    xml_failure(g_parser);
2558  /* Try now the parse is over */
2559  if (XML_SetEncoding(g_parser, NULL) != XML_STATUS_OK)
2560    fail("Failed to unset encoding");
2561}
2562END_TEST
2563
2564/* Test handling of trailing CR (rather than newline) */
2565START_TEST(test_trailing_cr) {
2566  const char *text = "<doc>\r";
2567  int found_cr;
2568
2569  /* Try with a character handler, for code coverage */
2570  XML_SetCharacterDataHandler(g_parser, cr_cdata_handler);
2571  XML_SetUserData(g_parser, &found_cr);
2572  found_cr = 0;
2573  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2574      == XML_STATUS_OK)
2575    fail("Failed to fault unclosed doc");
2576  if (found_cr == 0)
2577    fail("Did not catch the carriage return");
2578  XML_ParserReset(g_parser, NULL);
2579
2580  /* Now with a default handler instead */
2581  XML_SetDefaultHandler(g_parser, cr_cdata_handler);
2582  XML_SetUserData(g_parser, &found_cr);
2583  found_cr = 0;
2584  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2585      == XML_STATUS_OK)
2586    fail("Failed to fault unclosed doc");
2587  if (found_cr == 0)
2588    fail("Did not catch default carriage return");
2589}
2590END_TEST
2591
2592/* Test trailing CR in an external entity parse */
2593START_TEST(test_ext_entity_trailing_cr) {
2594  const char *text = "<!DOCTYPE doc [\n"
2595                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2596                     "]>\n"
2597                     "<doc>&en;</doc>";
2598  int found_cr;
2599
2600  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2601  XML_SetExternalEntityRefHandler(g_parser, external_entity_cr_catcher);
2602  XML_SetUserData(g_parser, &found_cr);
2603  found_cr = 0;
2604  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2605      != XML_STATUS_OK)
2606    xml_failure(g_parser);
2607  if (found_cr == 0)
2608    fail("No carriage return found");
2609  XML_ParserReset(g_parser, NULL);
2610
2611  /* Try again with a different trailing CR */
2612  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2613  XML_SetExternalEntityRefHandler(g_parser, external_entity_bad_cr_catcher);
2614  XML_SetUserData(g_parser, &found_cr);
2615  found_cr = 0;
2616  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2617      != XML_STATUS_OK)
2618    xml_failure(g_parser);
2619  if (found_cr == 0)
2620    fail("No carriage return found");
2621}
2622END_TEST
2623
2624/* Test handling of trailing square bracket */
2625START_TEST(test_trailing_rsqb) {
2626  const char *text8 = "<doc>]";
2627  const char text16[] = "\xFF\xFE<\000d\000o\000c\000>\000]\000";
2628  int found_rsqb;
2629  int text8_len = (int)strlen(text8);
2630
2631  XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2632  XML_SetUserData(g_parser, &found_rsqb);
2633  found_rsqb = 0;
2634  if (_XML_Parse_SINGLE_BYTES(g_parser, text8, text8_len, XML_TRUE)
2635      == XML_STATUS_OK)
2636    fail("Failed to fault unclosed doc");
2637  if (found_rsqb == 0)
2638    fail("Did not catch the right square bracket");
2639
2640  /* Try again with a different encoding */
2641  XML_ParserReset(g_parser, NULL);
2642  XML_SetCharacterDataHandler(g_parser, rsqb_handler);
2643  XML_SetUserData(g_parser, &found_rsqb);
2644  found_rsqb = 0;
2645  if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2646                              XML_TRUE)
2647      == XML_STATUS_OK)
2648    fail("Failed to fault unclosed doc");
2649  if (found_rsqb == 0)
2650    fail("Did not catch the right square bracket");
2651
2652  /* And finally with a default handler */
2653  XML_ParserReset(g_parser, NULL);
2654  XML_SetDefaultHandler(g_parser, rsqb_handler);
2655  XML_SetUserData(g_parser, &found_rsqb);
2656  found_rsqb = 0;
2657  if (_XML_Parse_SINGLE_BYTES(g_parser, text16, (int)sizeof(text16) - 1,
2658                              XML_TRUE)
2659      == XML_STATUS_OK)
2660    fail("Failed to fault unclosed doc");
2661  if (found_rsqb == 0)
2662    fail("Did not catch the right square bracket");
2663}
2664END_TEST
2665
2666/* Test trailing right square bracket in an external entity parse */
2667START_TEST(test_ext_entity_trailing_rsqb) {
2668  const char *text = "<!DOCTYPE doc [\n"
2669                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2670                     "]>\n"
2671                     "<doc>&en;</doc>";
2672  int found_rsqb;
2673
2674  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2675  XML_SetExternalEntityRefHandler(g_parser, external_entity_rsqb_catcher);
2676  XML_SetUserData(g_parser, &found_rsqb);
2677  found_rsqb = 0;
2678  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2679      != XML_STATUS_OK)
2680    xml_failure(g_parser);
2681  if (found_rsqb == 0)
2682    fail("No right square bracket found");
2683}
2684END_TEST
2685
2686/* Test CDATA handling in an external entity */
2687START_TEST(test_ext_entity_good_cdata) {
2688  const char *text = "<!DOCTYPE doc [\n"
2689                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
2690                     "]>\n"
2691                     "<doc>&en;</doc>";
2692
2693  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2694  XML_SetExternalEntityRefHandler(g_parser, external_entity_good_cdata_ascii);
2695  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2696      != XML_STATUS_OK)
2697    xml_failure(g_parser);
2698}
2699END_TEST
2700
2701/* Test user parameter settings */
2702START_TEST(test_user_parameters) {
2703  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2704                     "<!-- Primary parse -->\n"
2705                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
2706                     "<doc>&entity;";
2707  const char *epilog = "<!-- Back to primary parser -->\n"
2708                       "</doc>";
2709
2710  g_comment_count = 0;
2711  g_skip_count = 0;
2712  g_xdecl_count = 0;
2713  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2714  XML_SetXmlDeclHandler(g_parser, xml_decl_handler);
2715  XML_SetExternalEntityRefHandler(g_parser, external_entity_param_checker);
2716  XML_SetCommentHandler(g_parser, data_check_comment_handler);
2717  XML_SetSkippedEntityHandler(g_parser, param_check_skip_handler);
2718  XML_UseParserAsHandlerArg(g_parser);
2719  XML_SetUserData(g_parser, (void *)1);
2720  g_handler_data = g_parser;
2721  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2722      == XML_STATUS_ERROR)
2723    xml_failure(g_parser);
2724  /* Ensure we can't change policy mid-parse */
2725  if (XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_NEVER))
2726    fail("Changed param entity parsing policy while parsing");
2727  if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
2728      == XML_STATUS_ERROR)
2729    xml_failure(g_parser);
2730  if (g_comment_count != 3)
2731    fail("Comment handler not invoked enough times");
2732  if (g_skip_count != 1)
2733    fail("Skip handler not invoked enough times");
2734  if (g_xdecl_count != 1)
2735    fail("XML declaration handler not invoked");
2736}
2737END_TEST
2738
2739/* Test that an explicit external entity handler argument replaces
2740 * the parser as the first argument.
2741 *
2742 * We do not call the first parameter to the external entity handler
2743 * 'parser' for once, since the first time the handler is called it
2744 * will actually be a text string.  We need to be able to access the
2745 * global 'parser' variable to create our external entity parser from,
2746 * since there are code paths we need to ensure get executed.
2747 */
2748START_TEST(test_ext_entity_ref_parameter) {
2749  const char *text = "<?xml version='1.0' encoding='us-ascii'?>\n"
2750                     "<!DOCTYPE doc SYSTEM 'foo'>\n"
2751                     "<doc>&entity;</doc>";
2752
2753  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2754  XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2755  /* Set a handler arg that is not NULL and not parser (which is
2756   * what NULL would cause to be passed.
2757   */
2758  XML_SetExternalEntityRefHandlerArg(g_parser, (void *)text);
2759  g_handler_data = text;
2760  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2761      == XML_STATUS_ERROR)
2762    xml_failure(g_parser);
2763
2764  /* Now try again with unset args */
2765  XML_ParserReset(g_parser, NULL);
2766  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
2767  XML_SetExternalEntityRefHandler(g_parser, external_entity_ref_param_checker);
2768  XML_SetExternalEntityRefHandlerArg(g_parser, NULL);
2769  g_handler_data = g_parser;
2770  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2771      == XML_STATUS_ERROR)
2772    xml_failure(g_parser);
2773}
2774END_TEST
2775
2776/* Test the parsing of an empty string */
2777START_TEST(test_empty_parse) {
2778  const char *text = "<doc></doc>";
2779  const char *partial = "<doc>";
2780
2781  if (XML_Parse(g_parser, NULL, 0, XML_FALSE) == XML_STATUS_ERROR)
2782    fail("Parsing empty string faulted");
2783  if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2784    fail("Parsing final empty string not faulted");
2785  if (XML_GetErrorCode(g_parser) != XML_ERROR_NO_ELEMENTS)
2786    fail("Parsing final empty string faulted for wrong reason");
2787
2788  /* Now try with valid text before the empty end */
2789  XML_ParserReset(g_parser, NULL);
2790  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
2791      == XML_STATUS_ERROR)
2792    xml_failure(g_parser);
2793  if (XML_Parse(g_parser, NULL, 0, XML_TRUE) == XML_STATUS_ERROR)
2794    fail("Parsing final empty string faulted");
2795
2796  /* Now try with invalid text before the empty end */
2797  XML_ParserReset(g_parser, NULL);
2798  if (_XML_Parse_SINGLE_BYTES(g_parser, partial, (int)strlen(partial),
2799                              XML_FALSE)
2800      == XML_STATUS_ERROR)
2801    xml_failure(g_parser);
2802  if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
2803    fail("Parsing final incomplete empty string not faulted");
2804}
2805END_TEST
2806
2807/* Test odd corners of the XML_GetBuffer interface */
2808static enum XML_Status
2809get_feature(enum XML_FeatureEnum feature_id, long *presult) {
2810  const XML_Feature *feature = XML_GetFeatureList();
2811
2812  if (feature == NULL)
2813    return XML_STATUS_ERROR;
2814  for (; feature->feature != XML_FEATURE_END; feature++) {
2815    if (feature->feature == feature_id) {
2816      *presult = feature->value;
2817      return XML_STATUS_OK;
2818    }
2819  }
2820  return XML_STATUS_ERROR;
2821}
2822
2823/* Test odd corners of the XML_GetBuffer interface */
2824START_TEST(test_get_buffer_1) {
2825  const char *text = get_buffer_test_text;
2826  void *buffer;
2827  long context_bytes;
2828
2829  /* Attempt to allocate a negative length buffer */
2830  if (XML_GetBuffer(g_parser, -12) != NULL)
2831    fail("Negative length buffer not failed");
2832
2833  /* Now get a small buffer and extend it past valid length */
2834  buffer = XML_GetBuffer(g_parser, 1536);
2835  if (buffer == NULL)
2836    fail("1.5K buffer failed");
2837  assert(buffer != NULL);
2838  memcpy(buffer, text, strlen(text));
2839  if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2840      == XML_STATUS_ERROR)
2841    xml_failure(g_parser);
2842  if (XML_GetBuffer(g_parser, INT_MAX) != NULL)
2843    fail("INT_MAX buffer not failed");
2844
2845  /* Now try extending it a more reasonable but still too large
2846   * amount.  The allocator in XML_GetBuffer() doubles the buffer
2847   * size until it exceeds the requested amount or INT_MAX.  If it
2848   * exceeds INT_MAX, it rejects the request, so we want a request
2849   * between INT_MAX and INT_MAX/2.  A gap of 1K seems comfortable,
2850   * with an extra byte just to ensure that the request is off any
2851   * boundary.  The request will be inflated internally by
2852   * XML_CONTEXT_BYTES (if >=1), so we subtract that from our
2853   * request.
2854   */
2855  if (get_feature(XML_FEATURE_CONTEXT_BYTES, &context_bytes) != XML_STATUS_OK)
2856    context_bytes = 0;
2857  if (XML_GetBuffer(g_parser, INT_MAX - (context_bytes + 1025)) != NULL)
2858    fail("INT_MAX- buffer not failed");
2859
2860  /* Now try extending it a carefully crafted amount */
2861  if (XML_GetBuffer(g_parser, 1000) == NULL)
2862    fail("1000 buffer failed");
2863}
2864END_TEST
2865
2866/* Test more corners of the XML_GetBuffer interface */
2867START_TEST(test_get_buffer_2) {
2868  const char *text = get_buffer_test_text;
2869  void *buffer;
2870
2871  /* Now get a decent buffer */
2872  buffer = XML_GetBuffer(g_parser, 1536);
2873  if (buffer == NULL)
2874    fail("1.5K buffer failed");
2875  assert(buffer != NULL);
2876  memcpy(buffer, text, strlen(text));
2877  if (XML_ParseBuffer(g_parser, (int)strlen(text), XML_FALSE)
2878      == XML_STATUS_ERROR)
2879    xml_failure(g_parser);
2880
2881  /* Extend it, to catch a different code path */
2882  if (XML_GetBuffer(g_parser, 1024) == NULL)
2883    fail("1024 buffer failed");
2884}
2885END_TEST
2886
2887/* Test for signed integer overflow CVE-2022-23852 */
2888#if XML_CONTEXT_BYTES > 0
2889START_TEST(test_get_buffer_3_overflow) {
2890  XML_Parser parser = XML_ParserCreate(NULL);
2891  assert(parser != NULL);
2892
2893  const char *const text = "\n";
2894  const int expectedKeepValue = (int)strlen(text);
2895
2896  // After this call, variable "keep" in XML_GetBuffer will
2897  // have value expectedKeepValue
2898  if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text),
2899                              XML_FALSE /* isFinal */)
2900      == XML_STATUS_ERROR)
2901    xml_failure(parser);
2902
2903  assert(expectedKeepValue > 0);
2904  if (XML_GetBuffer(parser, INT_MAX - expectedKeepValue + 1) != NULL)
2905    fail("enlarging buffer not failed");
2906
2907  XML_ParserFree(parser);
2908}
2909END_TEST
2910#endif // XML_CONTEXT_BYTES > 0
2911
2912START_TEST(test_buffer_can_grow_to_max) {
2913  const char *const prefixes[] = {
2914      "",
2915      "<",
2916      "<x a='",
2917      "<doc><x a='",
2918      "<document><x a='",
2919      "<averylongelementnamesuchthatitwillhopefullystretchacrossmultiplelinesand"
2920      "lookprettyridiculousitsalsoveryhardtoreadandifyouredoingitihavetowonderif"
2921      "youreallydonthaveanythingbettertodoofcourseiguessicouldveputsomethingbadin"
2922      "herebutipromisethatididntheybtwhowgreatarespacesandpunctuationforhelping"
2923      "withreadabilityprettygreatithinkanywaysthisisprobablylongenoughbye><x a='"};
2924  const int num_prefixes = sizeof(prefixes) / sizeof(prefixes[0]);
2925  int maxbuf = INT_MAX / 2 + (INT_MAX & 1); // round up without overflow
2926#if defined(__MINGW32__) && ! defined(__MINGW64__)
2927  // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB
2928  // Can we make a big allocation?
2929  void *big = malloc(maxbuf);
2930  if (! big) {
2931    // The big allocation failed. Let's be a little lenient.
2932    maxbuf = maxbuf / 2;
2933  }
2934  free(big);
2935#endif
2936
2937  for (int i = 0; i < num_prefixes; ++i) {
2938    set_subtest("\"%s\"", prefixes[i]);
2939    XML_Parser parser = XML_ParserCreate(NULL);
2940    const int prefix_len = (int)strlen(prefixes[i]);
2941    const enum XML_Status s
2942        = _XML_Parse_SINGLE_BYTES(parser, prefixes[i], prefix_len, XML_FALSE);
2943    if (s != XML_STATUS_OK)
2944      xml_failure(parser);
2945
2946    // XML_CONTEXT_BYTES of the prefix may remain in the buffer;
2947    // subtracting the whole prefix is easiest, and close enough.
2948    assert_true(XML_GetBuffer(parser, maxbuf - prefix_len) != NULL);
2949    // The limit should be consistent; no prefix should allow us to
2950    // reach above the max buffer size.
2951    assert_true(XML_GetBuffer(parser, maxbuf + 1) == NULL);
2952    XML_ParserFree(parser);
2953  }
2954}
2955END_TEST
2956
2957START_TEST(test_getbuffer_allocates_on_zero_len) {
2958  for (int first_len = 1; first_len >= 0; first_len--) {
2959    set_subtest("with len=%d first", first_len);
2960    XML_Parser parser = XML_ParserCreate(NULL);
2961    assert_true(parser != NULL);
2962    assert_true(XML_GetBuffer(parser, first_len) != NULL);
2963    assert_true(XML_GetBuffer(parser, 0) != NULL);
2964    if (XML_ParseBuffer(parser, 0, XML_FALSE) != XML_STATUS_OK)
2965      xml_failure(parser);
2966    XML_ParserFree(parser);
2967  }
2968}
2969END_TEST
2970
2971/* Test position information macros */
2972START_TEST(test_byte_info_at_end) {
2973  const char *text = "<doc></doc>";
2974
2975  if (XML_GetCurrentByteIndex(g_parser) != -1
2976      || XML_GetCurrentByteCount(g_parser) != 0)
2977    fail("Byte index/count incorrect at start of parse");
2978  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2979      == XML_STATUS_ERROR)
2980    xml_failure(g_parser);
2981  /* At end, the count will be zero and the index the end of string */
2982  if (XML_GetCurrentByteCount(g_parser) != 0)
2983    fail("Terminal byte count incorrect");
2984  if (XML_GetCurrentByteIndex(g_parser) != (XML_Index)strlen(text))
2985    fail("Terminal byte index incorrect");
2986}
2987END_TEST
2988
2989/* Test position information from errors */
2990#define PRE_ERROR_STR "<doc></"
2991#define POST_ERROR_STR "wombat></doc>"
2992START_TEST(test_byte_info_at_error) {
2993  const char *text = PRE_ERROR_STR POST_ERROR_STR;
2994
2995  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
2996      == XML_STATUS_OK)
2997    fail("Syntax error not faulted");
2998  if (XML_GetCurrentByteCount(g_parser) != 0)
2999    fail("Error byte count incorrect");
3000  if (XML_GetCurrentByteIndex(g_parser) != strlen(PRE_ERROR_STR))
3001    fail("Error byte index incorrect");
3002}
3003END_TEST
3004#undef PRE_ERROR_STR
3005#undef POST_ERROR_STR
3006
3007/* Test position information in handler */
3008#define START_ELEMENT "<e>"
3009#define CDATA_TEXT "Hello"
3010#define END_ELEMENT "</e>"
3011START_TEST(test_byte_info_at_cdata) {
3012  const char *text = START_ELEMENT CDATA_TEXT END_ELEMENT;
3013  int offset, size;
3014  ByteTestData data;
3015
3016  /* Check initial context is empty */
3017  if (XML_GetInputContext(g_parser, &offset, &size) != NULL)
3018    fail("Unexpected context at start of parse");
3019
3020  data.start_element_len = (int)strlen(START_ELEMENT);
3021  data.cdata_len = (int)strlen(CDATA_TEXT);
3022  data.total_string_len = (int)strlen(text);
3023  XML_SetCharacterDataHandler(g_parser, byte_character_handler);
3024  XML_SetUserData(g_parser, &data);
3025  if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
3026    xml_failure(g_parser);
3027}
3028END_TEST
3029#undef START_ELEMENT
3030#undef CDATA_TEXT
3031#undef END_ELEMENT
3032
3033/* Test predefined entities are correctly recognised */
3034START_TEST(test_predefined_entities) {
3035  const char *text = "<doc>&lt;&gt;&amp;&quot;&apos;</doc>";
3036  const XML_Char *expected = XCS("<doc>&lt;&gt;&amp;&quot;&apos;</doc>");
3037  const XML_Char *result = XCS("<>&\"'");
3038  CharData storage;
3039
3040  XML_SetDefaultHandler(g_parser, accumulate_characters);
3041  /* run_character_check uses XML_SetCharacterDataHandler(), which
3042   * unfortunately heads off a code path that we need to exercise.
3043   */
3044  CharData_Init(&storage);
3045  XML_SetUserData(g_parser, &storage);
3046  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3047      == XML_STATUS_ERROR)
3048    xml_failure(g_parser);
3049  /* The default handler doesn't translate the entities */
3050  CharData_CheckXMLChars(&storage, expected);
3051
3052  /* Now try again and check the translation */
3053  XML_ParserReset(g_parser, NULL);
3054  run_character_check(text, result);
3055}
3056END_TEST
3057
3058/* Regression test that an invalid tag in an external parameter
3059 * reference in an external DTD is correctly faulted.
3060 *
3061 * Only a few specific tags are legal in DTDs ignoring comments and
3062 * processing instructions, all of which begin with an exclamation
3063 * mark.  "<el/>" is not one of them, so the parser should raise an
3064 * error on encountering it.
3065 */
3066START_TEST(test_invalid_tag_in_dtd) {
3067  const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3068                     "<doc></doc>\n";
3069
3070  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3071  XML_SetExternalEntityRefHandler(g_parser, external_entity_param);
3072  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3073                 "Invalid tag IN DTD external param not rejected");
3074}
3075END_TEST
3076
3077/* Test entities not quite the predefined ones are not mis-recognised */
3078START_TEST(test_not_predefined_entities) {
3079  const char *text[] = {"<doc>&pt;</doc>", "<doc>&amo;</doc>",
3080                        "<doc>&quid;</doc>", "<doc>&apod;</doc>", NULL};
3081  int i = 0;
3082
3083  while (text[i] != NULL) {
3084    expect_failure(text[i], XML_ERROR_UNDEFINED_ENTITY,
3085                   "Undefined entity not rejected");
3086    XML_ParserReset(g_parser, NULL);
3087    i++;
3088  }
3089}
3090END_TEST
3091
3092/* Test conditional inclusion (IGNORE) */
3093START_TEST(test_ignore_section) {
3094  const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3095                     "<doc><e>&entity;</e></doc>";
3096  const XML_Char *expected
3097      = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&entity;");
3098  CharData storage;
3099
3100  CharData_Init(&storage);
3101  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3102  XML_SetUserData(g_parser, &storage);
3103  XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore);
3104  XML_SetDefaultHandler(g_parser, accumulate_characters);
3105  XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3106  XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3107  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3108  XML_SetStartElementHandler(g_parser, dummy_start_element);
3109  XML_SetEndElementHandler(g_parser, dummy_end_element);
3110  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3111      == XML_STATUS_ERROR)
3112    xml_failure(g_parser);
3113  CharData_CheckXMLChars(&storage, expected);
3114}
3115END_TEST
3116
3117START_TEST(test_ignore_section_utf16) {
3118  const char text[] =
3119      /* <!DOCTYPE d SYSTEM 's'> */
3120      "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3121      "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n\0"
3122      /* <d><e>&en;</e></d> */
3123      "<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>\0";
3124  const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3125  CharData storage;
3126
3127  CharData_Init(&storage);
3128  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3129  XML_SetUserData(g_parser, &storage);
3130  XML_SetExternalEntityRefHandler(g_parser, external_entity_load_ignore_utf16);
3131  XML_SetDefaultHandler(g_parser, accumulate_characters);
3132  XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3133  XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3134  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3135  XML_SetStartElementHandler(g_parser, dummy_start_element);
3136  XML_SetEndElementHandler(g_parser, dummy_end_element);
3137  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3138      == XML_STATUS_ERROR)
3139    xml_failure(g_parser);
3140  CharData_CheckXMLChars(&storage, expected);
3141}
3142END_TEST
3143
3144START_TEST(test_ignore_section_utf16_be) {
3145  const char text[] =
3146      /* <!DOCTYPE d SYSTEM 's'> */
3147      "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 "
3148      "\0S\0Y\0S\0T\0E\0M\0 \0'\0s\0'\0>\0\n"
3149      /* <d><e>&en;</e></d> */
3150      "\0<\0d\0>\0<\0e\0>\0&\0e\0n\0;\0<\0/\0e\0>\0<\0/\0d\0>";
3151  const XML_Char *expected = XCS("<![IGNORE[<!ELEMENT e (#PCDATA)*>]]>\n&en;");
3152  CharData storage;
3153
3154  CharData_Init(&storage);
3155  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3156  XML_SetUserData(g_parser, &storage);
3157  XML_SetExternalEntityRefHandler(g_parser,
3158                                  external_entity_load_ignore_utf16_be);
3159  XML_SetDefaultHandler(g_parser, accumulate_characters);
3160  XML_SetStartDoctypeDeclHandler(g_parser, dummy_start_doctype_handler);
3161  XML_SetEndDoctypeDeclHandler(g_parser, dummy_end_doctype_handler);
3162  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3163  XML_SetStartElementHandler(g_parser, dummy_start_element);
3164  XML_SetEndElementHandler(g_parser, dummy_end_element);
3165  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3166      == XML_STATUS_ERROR)
3167    xml_failure(g_parser);
3168  CharData_CheckXMLChars(&storage, expected);
3169}
3170END_TEST
3171
3172/* Test mis-formatted conditional exclusion */
3173START_TEST(test_bad_ignore_section) {
3174  const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3175                     "<doc><e>&entity;</e></doc>";
3176  ExtFaults faults[]
3177      = {{"<![IGNORE[<!ELEM", "Broken-off declaration not faulted", NULL,
3178          XML_ERROR_SYNTAX},
3179         {"<![IGNORE[\x01]]>", "Invalid XML character not faulted", NULL,
3180          XML_ERROR_INVALID_TOKEN},
3181         {/* FIrst two bytes of a three-byte char */
3182          "<![IGNORE[\xe2\x82", "Partial XML character not faulted", NULL,
3183          XML_ERROR_PARTIAL_CHAR},
3184         {NULL, NULL, NULL, XML_ERROR_NONE}};
3185  ExtFaults *fault;
3186
3187  for (fault = &faults[0]; fault->parse_text != NULL; fault++) {
3188    set_subtest("%s", fault->parse_text);
3189    XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3190    XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3191    XML_SetUserData(g_parser, fault);
3192    expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3193                   "Incomplete IGNORE section not failed");
3194    XML_ParserReset(g_parser, NULL);
3195  }
3196}
3197END_TEST
3198
3199struct bom_testdata {
3200  const char *external;
3201  int split;
3202  XML_Bool nested_callback_happened;
3203};
3204
3205static int XMLCALL
3206external_bom_checker(XML_Parser parser, const XML_Char *context,
3207                     const XML_Char *base, const XML_Char *systemId,
3208                     const XML_Char *publicId) {
3209  const char *text;
3210  UNUSED_P(base);
3211  UNUSED_P(systemId);
3212  UNUSED_P(publicId);
3213
3214  XML_Parser ext_parser = XML_ExternalEntityParserCreate(parser, context, NULL);
3215  if (ext_parser == NULL)
3216    fail("Could not create external entity parser");
3217
3218  if (! xcstrcmp(systemId, XCS("004-2.ent"))) {
3219    struct bom_testdata *const testdata
3220        = (struct bom_testdata *)XML_GetUserData(parser);
3221    const char *const external = testdata->external;
3222    const int split = testdata->split;
3223    testdata->nested_callback_happened = XML_TRUE;
3224
3225    if (_XML_Parse_SINGLE_BYTES(ext_parser, external, split, XML_FALSE)
3226        != XML_STATUS_OK) {
3227      xml_failure(ext_parser);
3228    }
3229    text = external + split; // the parse below will continue where we left off.
3230  } else if (! xcstrcmp(systemId, XCS("004-1.ent"))) {
3231    text = "<!ELEMENT doc EMPTY>\n"
3232           "<!ENTITY % e1 SYSTEM '004-2.ent'>\n"
3233           "<!ENTITY % e2 '%e1;'>\n";
3234  } else {
3235    fail("unknown systemId");
3236  }
3237
3238  if (_XML_Parse_SINGLE_BYTES(ext_parser, text, (int)strlen(text), XML_TRUE)
3239      != XML_STATUS_OK)
3240    xml_failure(ext_parser);
3241
3242  XML_ParserFree(ext_parser);
3243  return XML_STATUS_OK;
3244}
3245
3246/* regression test: BOM should be consumed when followed by a partial token. */
3247START_TEST(test_external_bom_consumed) {
3248  const char *const text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3249                           "<doc></doc>\n";
3250  const char *const external = "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>";
3251  const int len = (int)strlen(external);
3252  for (int split = 0; split <= len; ++split) {
3253    set_subtest("split at byte %d", split);
3254
3255    struct bom_testdata testdata;
3256    testdata.external = external;
3257    testdata.split = split;
3258    testdata.nested_callback_happened = XML_FALSE;
3259
3260    XML_Parser parser = XML_ParserCreate(NULL);
3261    if (parser == NULL) {
3262      fail("Couldn't create parser");
3263    }
3264    XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3265    XML_SetExternalEntityRefHandler(parser, external_bom_checker);
3266    XML_SetUserData(parser, &testdata);
3267    if (_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE)
3268        == XML_STATUS_ERROR)
3269      xml_failure(parser);
3270    if (! testdata.nested_callback_happened) {
3271      fail("ref handler not called");
3272    }
3273    XML_ParserFree(parser);
3274  }
3275}
3276END_TEST
3277
3278/* Test recursive parsing */
3279START_TEST(test_external_entity_values) {
3280  const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3281                     "<doc></doc>\n";
3282  ExtFaults data_004_2[] = {
3283      {"<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL, XML_ERROR_NONE},
3284      {"<!ATTLIST $doc a1 CDATA 'value'>", "Invalid token not faulted", NULL,
3285       XML_ERROR_INVALID_TOKEN},
3286      {"'wombat", "Unterminated string not faulted", NULL,
3287       XML_ERROR_UNCLOSED_TOKEN},
3288      {"\xe2\x82", "Partial UTF-8 character not faulted", NULL,
3289       XML_ERROR_PARTIAL_CHAR},
3290      {"<?xml version='1.0' encoding='utf-8'?>\n", NULL, NULL, XML_ERROR_NONE},
3291      {"<?xml?>", "Malformed XML declaration not faulted", NULL,
3292       XML_ERROR_XML_DECL},
3293      {/* UTF-8 BOM */
3294       "\xEF\xBB\xBF<!ATTLIST doc a1 CDATA 'value'>", NULL, NULL,
3295       XML_ERROR_NONE},
3296      {"<?xml version='1.0' encoding='utf-8'?>\n$",
3297       "Invalid token after text declaration not faulted", NULL,
3298       XML_ERROR_INVALID_TOKEN},
3299      {"<?xml version='1.0' encoding='utf-8'?>\n'wombat",
3300       "Unterminated string after text decl not faulted", NULL,
3301       XML_ERROR_UNCLOSED_TOKEN},
3302      {"<?xml version='1.0' encoding='utf-8'?>\n\xe2\x82",
3303       "Partial UTF-8 character after text decl not faulted", NULL,
3304       XML_ERROR_PARTIAL_CHAR},
3305      {"%e1;", "Recursive parameter entity not faulted", NULL,
3306       XML_ERROR_RECURSIVE_ENTITY_REF},
3307      {NULL, NULL, NULL, XML_ERROR_NONE}};
3308  int i;
3309
3310  for (i = 0; data_004_2[i].parse_text != NULL; i++) {
3311    set_subtest("%s", data_004_2[i].parse_text);
3312    XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3313    XML_SetExternalEntityRefHandler(g_parser, external_entity_valuer);
3314    XML_SetUserData(g_parser, &data_004_2[i]);
3315    if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3316        == XML_STATUS_ERROR)
3317      xml_failure(g_parser);
3318    XML_ParserReset(g_parser, NULL);
3319  }
3320}
3321END_TEST
3322
3323/* Test the recursive parse interacts with a not standalone handler */
3324START_TEST(test_ext_entity_not_standalone) {
3325  const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3326                     "<doc></doc>";
3327
3328  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3329  XML_SetExternalEntityRefHandler(g_parser, external_entity_not_standalone);
3330  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3331                 "Standalone rejection not caught");
3332}
3333END_TEST
3334
3335START_TEST(test_ext_entity_value_abort) {
3336  const char *text = "<!DOCTYPE doc SYSTEM '004-1.ent'>\n"
3337                     "<doc></doc>\n";
3338
3339  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3340  XML_SetExternalEntityRefHandler(g_parser, external_entity_value_aborter);
3341  g_resumable = XML_FALSE;
3342  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3343      == XML_STATUS_ERROR)
3344    xml_failure(g_parser);
3345}
3346END_TEST
3347
3348START_TEST(test_bad_public_doctype) {
3349  const char *text = "<?xml version='1.0' encoding='utf-8'?>\n"
3350                     "<!DOCTYPE doc PUBLIC '{BadName}' 'test'>\n"
3351                     "<doc></doc>";
3352
3353  /* Setting a handler provokes a particular code path */
3354  XML_SetDoctypeDeclHandler(g_parser, dummy_start_doctype_handler,
3355                            dummy_end_doctype_handler);
3356  expect_failure(text, XML_ERROR_PUBLICID, "Bad Public ID not failed");
3357}
3358END_TEST
3359
3360/* Test based on ibm/valid/P32/ibm32v04.xml */
3361START_TEST(test_attribute_enum_value) {
3362  const char *text = "<?xml version='1.0' standalone='no'?>\n"
3363                     "<!DOCTYPE animal SYSTEM 'test.dtd'>\n"
3364                     "<animal>This is a \n    <a/>  \n\nyellow tiger</animal>";
3365  ExtTest dtd_data
3366      = {"<!ELEMENT animal (#PCDATA|a)*>\n"
3367         "<!ELEMENT a EMPTY>\n"
3368         "<!ATTLIST animal xml:space (default|preserve) 'preserve'>",
3369         NULL, NULL};
3370  const XML_Char *expected = XCS("This is a \n      \n\nyellow tiger");
3371
3372  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3373  XML_SetUserData(g_parser, &dtd_data);
3374  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3375  /* An attribute list handler provokes a different code path */
3376  XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
3377  run_ext_character_check(text, &dtd_data, expected);
3378}
3379END_TEST
3380
3381/* Slightly bizarrely, the library seems to silently ignore entity
3382 * definitions for predefined entities, even when they are wrong.  The
3383 * language of the XML 1.0 spec is somewhat unhelpful as to what ought
3384 * to happen, so this is currently treated as acceptable.
3385 */
3386START_TEST(test_predefined_entity_redefinition) {
3387  const char *text = "<!DOCTYPE doc [\n"
3388                     "<!ENTITY apos 'foo'>\n"
3389                     "]>\n"
3390                     "<doc>&apos;</doc>";
3391  run_character_check(text, XCS("'"));
3392}
3393END_TEST
3394
3395/* Test that the parser stops processing the DTD after an unresolved
3396 * parameter entity is encountered.
3397 */
3398START_TEST(test_dtd_stop_processing) {
3399  const char *text = "<!DOCTYPE doc [\n"
3400                     "%foo;\n"
3401                     "<!ENTITY bar 'bas'>\n"
3402                     "]><doc/>";
3403
3404  XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
3405  init_dummy_handlers();
3406  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3407      == XML_STATUS_ERROR)
3408    xml_failure(g_parser);
3409  if (get_dummy_handler_flags() != 0)
3410    fail("DTD processing still going after undefined PE");
3411}
3412END_TEST
3413
3414/* Test public notations with no system ID */
3415START_TEST(test_public_notation_no_sysid) {
3416  const char *text = "<!DOCTYPE doc [\n"
3417                     "<!NOTATION note PUBLIC 'foo'>\n"
3418                     "<!ELEMENT doc EMPTY>\n"
3419                     "]>\n<doc/>";
3420
3421  init_dummy_handlers();
3422  XML_SetNotationDeclHandler(g_parser, dummy_notation_decl_handler);
3423  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3424      == XML_STATUS_ERROR)
3425    xml_failure(g_parser);
3426  if (get_dummy_handler_flags() != DUMMY_NOTATION_DECL_HANDLER_FLAG)
3427    fail("Notation declaration handler not called");
3428}
3429END_TEST
3430
3431START_TEST(test_nested_groups) {
3432  const char *text
3433      = "<!DOCTYPE doc [\n"
3434        "<!ELEMENT doc "
3435        /* Sixteen elements per line */
3436        "(e,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,"
3437        "(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?,(e?"
3438        "))))))))))))))))))))))))))))))))>\n"
3439        "<!ELEMENT e EMPTY>"
3440        "]>\n"
3441        "<doc><e/></doc>";
3442  CharData storage;
3443
3444  CharData_Init(&storage);
3445  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3446  XML_SetStartElementHandler(g_parser, record_element_start_handler);
3447  XML_SetUserData(g_parser, &storage);
3448  init_dummy_handlers();
3449  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3450      == XML_STATUS_ERROR)
3451    xml_failure(g_parser);
3452  CharData_CheckXMLChars(&storage, XCS("doce"));
3453  if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3454    fail("Element handler not fired");
3455}
3456END_TEST
3457
3458START_TEST(test_group_choice) {
3459  const char *text = "<!DOCTYPE doc [\n"
3460                     "<!ELEMENT doc (a|b|c)+>\n"
3461                     "<!ELEMENT a EMPTY>\n"
3462                     "<!ELEMENT b (#PCDATA)>\n"
3463                     "<!ELEMENT c ANY>\n"
3464                     "]>\n"
3465                     "<doc>\n"
3466                     "<a/>\n"
3467                     "<b attr='foo'>This is a foo</b>\n"
3468                     "<c></c>\n"
3469                     "</doc>\n";
3470
3471  XML_SetElementDeclHandler(g_parser, dummy_element_decl_handler);
3472  init_dummy_handlers();
3473  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3474      == XML_STATUS_ERROR)
3475    xml_failure(g_parser);
3476  if (get_dummy_handler_flags() != DUMMY_ELEMENT_DECL_HANDLER_FLAG)
3477    fail("Element handler flag not raised");
3478}
3479END_TEST
3480
3481START_TEST(test_standalone_parameter_entity) {
3482  const char *text = "<?xml version='1.0' standalone='yes'?>\n"
3483                     "<!DOCTYPE doc SYSTEM 'http://example.org/' [\n"
3484                     "<!ENTITY % entity '<!ELEMENT doc (#PCDATA)>'>\n"
3485                     "%entity;\n"
3486                     "]>\n"
3487                     "<doc></doc>";
3488  char dtd_data[] = "<!ENTITY % e1 'foo'>\n";
3489
3490  XML_SetUserData(g_parser, dtd_data);
3491  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3492  XML_SetExternalEntityRefHandler(g_parser, external_entity_public);
3493  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3494      == XML_STATUS_ERROR)
3495    xml_failure(g_parser);
3496}
3497END_TEST
3498
3499/* Test skipping of parameter entity in an external DTD */
3500/* Derived from ibm/invalid/P69/ibm69i01.xml */
3501START_TEST(test_skipped_parameter_entity) {
3502  const char *text = "<?xml version='1.0'?>\n"
3503                     "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3504                     "<!ELEMENT root (#PCDATA|a)* >\n"
3505                     "]>\n"
3506                     "<root></root>";
3507  ExtTest dtd_data = {"%pe2;", NULL, NULL};
3508
3509  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3510  XML_SetUserData(g_parser, &dtd_data);
3511  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3512  XML_SetSkippedEntityHandler(g_parser, dummy_skip_handler);
3513  init_dummy_handlers();
3514  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3515      == XML_STATUS_ERROR)
3516    xml_failure(g_parser);
3517  if (get_dummy_handler_flags() != DUMMY_SKIP_HANDLER_FLAG)
3518    fail("Skip handler not executed");
3519}
3520END_TEST
3521
3522/* Test recursive parameter entity definition rejected in external DTD */
3523START_TEST(test_recursive_external_parameter_entity) {
3524  const char *text = "<?xml version='1.0'?>\n"
3525                     "<!DOCTYPE root SYSTEM 'http://example.org/dtd.ent' [\n"
3526                     "<!ELEMENT root (#PCDATA|a)* >\n"
3527                     "]>\n"
3528                     "<root></root>";
3529  ExtFaults dtd_data = {"<!ENTITY % pe2 '&#37;pe2;'>\n%pe2;",
3530                        "Recursive external parameter entity not faulted", NULL,
3531                        XML_ERROR_RECURSIVE_ENTITY_REF};
3532
3533  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
3534  XML_SetUserData(g_parser, &dtd_data);
3535  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3536  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
3537                 "Recursive external parameter not spotted");
3538}
3539END_TEST
3540
3541/* Test undefined parameter entity in external entity handler */
3542START_TEST(test_undefined_ext_entity_in_external_dtd) {
3543  const char *text = "<!DOCTYPE doc SYSTEM 'foo'>\n"
3544                     "<doc></doc>\n";
3545
3546  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3547  XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3548  XML_SetUserData(g_parser, NULL);
3549  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3550      == XML_STATUS_ERROR)
3551    xml_failure(g_parser);
3552
3553  /* Now repeat without the external entity ref handler invoking
3554   * another copy of itself.
3555   */
3556  XML_ParserReset(g_parser, NULL);
3557  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3558  XML_SetExternalEntityRefHandler(g_parser, external_entity_devaluer);
3559  XML_SetUserData(g_parser, g_parser); /* Any non-NULL value will do */
3560  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3561      == XML_STATUS_ERROR)
3562    xml_failure(g_parser);
3563}
3564END_TEST
3565
3566/* Test suspending the parse on receiving an XML declaration works */
3567START_TEST(test_suspend_xdecl) {
3568  const char *text = long_character_data_text;
3569
3570  XML_SetXmlDeclHandler(g_parser, entity_suspending_xdecl_handler);
3571  XML_SetUserData(g_parser, g_parser);
3572  g_resumable = XML_TRUE;
3573  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3574      != XML_STATUS_SUSPENDED)
3575    xml_failure(g_parser);
3576  if (XML_GetErrorCode(g_parser) != XML_ERROR_NONE)
3577    xml_failure(g_parser);
3578  /* Attempt to start a new parse while suspended */
3579  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3580      != XML_STATUS_ERROR)
3581    fail("Attempt to parse while suspended not faulted");
3582  if (XML_GetErrorCode(g_parser) != XML_ERROR_SUSPENDED)
3583    fail("Suspended parse not faulted with correct error");
3584}
3585END_TEST
3586
3587/* Test aborting the parse in an epilog works */
3588START_TEST(test_abort_epilog) {
3589  const char *text = "<doc></doc>\n\r\n";
3590  XML_Char trigger_char = XCS('\r');
3591
3592  XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3593  XML_SetUserData(g_parser, &trigger_char);
3594  g_resumable = XML_FALSE;
3595  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3596      != XML_STATUS_ERROR)
3597    fail("Abort not triggered");
3598  if (XML_GetErrorCode(g_parser) != XML_ERROR_ABORTED)
3599    xml_failure(g_parser);
3600}
3601END_TEST
3602
3603/* Test a different code path for abort in the epilog */
3604START_TEST(test_abort_epilog_2) {
3605  const char *text = "<doc></doc>\n";
3606  XML_Char trigger_char = XCS('\n');
3607
3608  XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3609  XML_SetUserData(g_parser, &trigger_char);
3610  g_resumable = XML_FALSE;
3611  expect_failure(text, XML_ERROR_ABORTED, "Abort not triggered");
3612}
3613END_TEST
3614
3615/* Test suspension from the epilog */
3616START_TEST(test_suspend_epilog) {
3617  const char *text = "<doc></doc>\n";
3618  XML_Char trigger_char = XCS('\n');
3619
3620  XML_SetDefaultHandler(g_parser, selective_aborting_default_handler);
3621  XML_SetUserData(g_parser, &trigger_char);
3622  g_resumable = XML_TRUE;
3623  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3624      != XML_STATUS_SUSPENDED)
3625    xml_failure(g_parser);
3626}
3627END_TEST
3628
3629START_TEST(test_suspend_in_sole_empty_tag) {
3630  const char *text = "<doc/>";
3631  enum XML_Status rc;
3632
3633  XML_SetEndElementHandler(g_parser, suspending_end_handler);
3634  XML_SetUserData(g_parser, g_parser);
3635  rc = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
3636  if (rc == XML_STATUS_ERROR)
3637    xml_failure(g_parser);
3638  else if (rc != XML_STATUS_SUSPENDED)
3639    fail("Suspend not triggered");
3640  rc = XML_ResumeParser(g_parser);
3641  if (rc == XML_STATUS_ERROR)
3642    xml_failure(g_parser);
3643  else if (rc != XML_STATUS_OK)
3644    fail("Resume failed");
3645}
3646END_TEST
3647
3648START_TEST(test_unfinished_epilog) {
3649  const char *text = "<doc></doc><";
3650
3651  expect_failure(text, XML_ERROR_UNCLOSED_TOKEN,
3652                 "Incomplete epilog entry not faulted");
3653}
3654END_TEST
3655
3656START_TEST(test_partial_char_in_epilog) {
3657  const char *text = "<doc></doc>\xe2\x82";
3658
3659  /* First check that no fault is raised if the parse is not finished */
3660  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
3661      == XML_STATUS_ERROR)
3662    xml_failure(g_parser);
3663  /* Now check that it is faulted once we finish */
3664  if (XML_ParseBuffer(g_parser, 0, XML_TRUE) != XML_STATUS_ERROR)
3665    fail("Partial character in epilog not faulted");
3666  if (XML_GetErrorCode(g_parser) != XML_ERROR_PARTIAL_CHAR)
3667    xml_failure(g_parser);
3668}
3669END_TEST
3670
3671/* Test resuming a parse suspended in entity substitution */
3672START_TEST(test_suspend_resume_internal_entity) {
3673  const char *text
3674      = "<!DOCTYPE doc [\n"
3675        "<!ENTITY foo '<suspend>Hi<suspend>Ho</suspend></suspend>'>\n"
3676        "]>\n"
3677        "<doc>&foo;</doc>\n";
3678  const XML_Char *expected1 = XCS("Hi");
3679  const XML_Char *expected2 = XCS("HiHo");
3680  CharData storage;
3681
3682  CharData_Init(&storage);
3683  XML_SetStartElementHandler(g_parser, start_element_suspender);
3684  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3685  XML_SetUserData(g_parser, &storage);
3686  // can't use SINGLE_BYTES here, because it'll return early on suspension, and
3687  // we won't know exactly how much input we actually managed to give Expat.
3688  if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3689      != XML_STATUS_SUSPENDED)
3690    xml_failure(g_parser);
3691  CharData_CheckXMLChars(&storage, XCS(""));
3692  if (XML_ResumeParser(g_parser) != XML_STATUS_SUSPENDED)
3693    xml_failure(g_parser);
3694  CharData_CheckXMLChars(&storage, expected1);
3695  if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3696    xml_failure(g_parser);
3697  CharData_CheckXMLChars(&storage, expected2);
3698}
3699END_TEST
3700
3701START_TEST(test_suspend_resume_internal_entity_issue_629) {
3702  const char *const text
3703      = "<!DOCTYPE a [<!ENTITY e '<!--COMMENT-->a'>]><a>&e;<b>\n"
3704        "<"
3705        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3706        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3707        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3708        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3709        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3710        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3711        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3712        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3713        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3714        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3715        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3716        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3717        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3718        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3719        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3720        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3721        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3722        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3723        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3724        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3725        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3726        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3727        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3728        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3729        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3730        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3731        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3732        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3733        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3734        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3735        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3736        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3737        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3738        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3739        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3740        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3741        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3742        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3743        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3744        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
3745        "/>"
3746        "</b></a>";
3747  const size_t firstChunkSizeBytes = 54;
3748
3749  XML_Parser parser = XML_ParserCreate(NULL);
3750  XML_SetUserData(parser, parser);
3751  XML_SetCommentHandler(parser, suspending_comment_handler);
3752
3753  if (XML_Parse(parser, text, (int)firstChunkSizeBytes, XML_FALSE)
3754      != XML_STATUS_SUSPENDED)
3755    xml_failure(parser);
3756  if (XML_ResumeParser(parser) != XML_STATUS_OK)
3757    xml_failure(parser);
3758  if (_XML_Parse_SINGLE_BYTES(parser, text + firstChunkSizeBytes,
3759                              (int)(strlen(text) - firstChunkSizeBytes),
3760                              XML_TRUE)
3761      != XML_STATUS_OK)
3762    xml_failure(parser);
3763  XML_ParserFree(parser);
3764}
3765END_TEST
3766
3767/* Test syntax error is caught at parse resumption */
3768START_TEST(test_resume_entity_with_syntax_error) {
3769  const char *text = "<!DOCTYPE doc [\n"
3770                     "<!ENTITY foo '<suspend>Hi</wombat>'>\n"
3771                     "]>\n"
3772                     "<doc>&foo;</doc>\n";
3773
3774  XML_SetStartElementHandler(g_parser, start_element_suspender);
3775  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3776      != XML_STATUS_SUSPENDED)
3777    xml_failure(g_parser);
3778  if (XML_ResumeParser(g_parser) != XML_STATUS_ERROR)
3779    fail("Syntax error in entity not faulted");
3780  if (XML_GetErrorCode(g_parser) != XML_ERROR_TAG_MISMATCH)
3781    xml_failure(g_parser);
3782}
3783END_TEST
3784
3785/* Test suspending and resuming in a parameter entity substitution */
3786START_TEST(test_suspend_resume_parameter_entity) {
3787  const char *text = "<!DOCTYPE doc [\n"
3788                     "<!ENTITY % foo '<!ELEMENT doc (#PCDATA)*>'>\n"
3789                     "%foo;\n"
3790                     "]>\n"
3791                     "<doc>Hello, world</doc>";
3792  const XML_Char *expected = XCS("Hello, world");
3793  CharData storage;
3794
3795  CharData_Init(&storage);
3796  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3797  XML_SetElementDeclHandler(g_parser, element_decl_suspender);
3798  XML_SetCharacterDataHandler(g_parser, accumulate_characters);
3799  XML_SetUserData(g_parser, &storage);
3800  if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE)
3801      != XML_STATUS_SUSPENDED)
3802    xml_failure(g_parser);
3803  CharData_CheckXMLChars(&storage, XCS(""));
3804  if (XML_ResumeParser(g_parser) != XML_STATUS_OK)
3805    xml_failure(g_parser);
3806  CharData_CheckXMLChars(&storage, expected);
3807}
3808END_TEST
3809
3810/* Test attempting to use parser after an error is faulted */
3811START_TEST(test_restart_on_error) {
3812  const char *text = "<$doc><doc></doc>";
3813
3814  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3815      != XML_STATUS_ERROR)
3816    fail("Invalid tag name not faulted");
3817  if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3818    xml_failure(g_parser);
3819  if (XML_Parse(g_parser, NULL, 0, XML_TRUE) != XML_STATUS_ERROR)
3820    fail("Restarting invalid parse not faulted");
3821  if (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)
3822    xml_failure(g_parser);
3823}
3824END_TEST
3825
3826/* Test that angle brackets in an attribute default value are faulted */
3827START_TEST(test_reject_lt_in_attribute_value) {
3828  const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '<bar>'>]>\n"
3829                     "<doc></doc>";
3830
3831  expect_failure(text, XML_ERROR_INVALID_TOKEN,
3832                 "Bad attribute default not faulted");
3833}
3834END_TEST
3835
3836START_TEST(test_reject_unfinished_param_in_att_value) {
3837  const char *text = "<!DOCTYPE doc [<!ATTLIST doc a CDATA '&foo'>]>\n"
3838                     "<doc></doc>";
3839
3840  expect_failure(text, XML_ERROR_INVALID_TOKEN,
3841                 "Bad attribute default not faulted");
3842}
3843END_TEST
3844
3845START_TEST(test_trailing_cr_in_att_value) {
3846  const char *text = "<doc a='value\r'/>";
3847
3848  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3849      == XML_STATUS_ERROR)
3850    xml_failure(g_parser);
3851}
3852END_TEST
3853
3854/* Try parsing a general entity within a parameter entity in a
3855 * standalone internal DTD.  Covers a corner case in the parser.
3856 */
3857START_TEST(test_standalone_internal_entity) {
3858  const char *text = "<?xml version='1.0' standalone='yes' ?>\n"
3859                     "<!DOCTYPE doc [\n"
3860                     "  <!ELEMENT doc (#PCDATA)>\n"
3861                     "  <!ENTITY % pe '<!ATTLIST doc att2 CDATA \"&ge;\">'>\n"
3862                     "  <!ENTITY ge 'AttDefaultValue'>\n"
3863                     "  %pe;\n"
3864                     "]>\n"
3865                     "<doc att2='any'/>";
3866
3867  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3868  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3869      == XML_STATUS_ERROR)
3870    xml_failure(g_parser);
3871}
3872END_TEST
3873
3874/* Test that a reference to an unknown external entity is skipped */
3875START_TEST(test_skipped_external_entity) {
3876  const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3877                     "<doc></doc>\n";
3878  ExtTest test_data = {"<!ELEMENT doc EMPTY>\n"
3879                       "<!ENTITY % e2 '%e1;'>\n",
3880                       NULL, NULL};
3881
3882  XML_SetUserData(g_parser, &test_data);
3883  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3884  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3885  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3886      == XML_STATUS_ERROR)
3887    xml_failure(g_parser);
3888}
3889END_TEST
3890
3891/* Test a different form of unknown external entity */
3892START_TEST(test_skipped_null_loaded_ext_entity) {
3893  const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3894                     "<doc />";
3895  ExtHdlrData test_data
3896      = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3897         "<!ENTITY % pe2 '%pe1;'>\n"
3898         "%pe2;\n",
3899         external_entity_null_loader};
3900
3901  XML_SetUserData(g_parser, &test_data);
3902  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3903  XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3904  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3905      == XML_STATUS_ERROR)
3906    xml_failure(g_parser);
3907}
3908END_TEST
3909
3910START_TEST(test_skipped_unloaded_ext_entity) {
3911  const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/one.ent'>\n"
3912                     "<doc />";
3913  ExtHdlrData test_data
3914      = {"<!ENTITY % pe1 SYSTEM 'http://example.org/two.ent'>\n"
3915         "<!ENTITY % pe2 '%pe1;'>\n"
3916         "%pe2;\n",
3917         NULL};
3918
3919  XML_SetUserData(g_parser, &test_data);
3920  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3921  XML_SetExternalEntityRefHandler(g_parser, external_entity_oneshot_loader);
3922  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3923      == XML_STATUS_ERROR)
3924    xml_failure(g_parser);
3925}
3926END_TEST
3927
3928/* Test that a parameter entity value ending with a carriage return
3929 * has it translated internally into a newline.
3930 */
3931START_TEST(test_param_entity_with_trailing_cr) {
3932#define PARAM_ENTITY_NAME "pe"
3933#define PARAM_ENTITY_CORE_VALUE "<!ATTLIST doc att CDATA \"default\">"
3934  const char *text = "<!DOCTYPE doc SYSTEM 'http://example.org/'>\n"
3935                     "<doc/>";
3936  ExtTest test_data
3937      = {"<!ENTITY % " PARAM_ENTITY_NAME " '" PARAM_ENTITY_CORE_VALUE "\r'>\n"
3938         "%" PARAM_ENTITY_NAME ";\n",
3939         NULL, NULL};
3940
3941  XML_SetUserData(g_parser, &test_data);
3942  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
3943  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader);
3944  XML_SetEntityDeclHandler(g_parser, param_entity_match_handler);
3945  param_entity_match_init(XCS(PARAM_ENTITY_NAME),
3946                          XCS(PARAM_ENTITY_CORE_VALUE) XCS("\n"));
3947  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
3948      == XML_STATUS_ERROR)
3949    xml_failure(g_parser);
3950  int entity_match_flag = get_param_entity_match_flag();
3951  if (entity_match_flag == ENTITY_MATCH_FAIL)
3952    fail("Parameter entity CR->NEWLINE conversion failed");
3953  else if (entity_match_flag == ENTITY_MATCH_NOT_FOUND)
3954    fail("Parameter entity not parsed");
3955}
3956#undef PARAM_ENTITY_NAME
3957#undef PARAM_ENTITY_CORE_VALUE
3958END_TEST
3959
3960START_TEST(test_invalid_character_entity) {
3961  const char *text = "<!DOCTYPE doc [\n"
3962                     "  <!ENTITY entity '&#x110000;'>\n"
3963                     "]>\n"
3964                     "<doc>&entity;</doc>";
3965
3966  expect_failure(text, XML_ERROR_BAD_CHAR_REF,
3967                 "Out of range character reference not faulted");
3968}
3969END_TEST
3970
3971START_TEST(test_invalid_character_entity_2) {
3972  const char *text = "<!DOCTYPE doc [\n"
3973                     "  <!ENTITY entity '&#xg0;'>\n"
3974                     "]>\n"
3975                     "<doc>&entity;</doc>";
3976
3977  expect_failure(text, XML_ERROR_INVALID_TOKEN,
3978                 "Out of range character reference not faulted");
3979}
3980END_TEST
3981
3982START_TEST(test_invalid_character_entity_3) {
3983  const char text[] =
3984      /* <!DOCTYPE doc [\n */
3985      "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
3986      /* U+0E04 = KHO KHWAI
3987       * U+0E08 = CHO CHAN */
3988      /* <!ENTITY entity '&\u0e04\u0e08;'>\n */
3989      "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0e\0n\0t\0i\0t\0y\0 "
3990      "\0'\0&\x0e\x04\x0e\x08\0;\0'\0>\0\n"
3991      /* ]>\n */
3992      "\0]\0>\0\n"
3993      /* <doc>&entity;</doc> */
3994      "\0<\0d\0o\0c\0>\0&\0e\0n\0t\0i\0t\0y\0;\0<\0/\0d\0o\0c\0>";
3995
3996  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
3997      != XML_STATUS_ERROR)
3998    fail("Invalid start of entity name not faulted");
3999  if (XML_GetErrorCode(g_parser) != XML_ERROR_UNDEFINED_ENTITY)
4000    xml_failure(g_parser);
4001}
4002END_TEST
4003
4004START_TEST(test_invalid_character_entity_4) {
4005  const char *text = "<!DOCTYPE doc [\n"
4006                     "  <!ENTITY entity '&#1114112;'>\n" /* = &#x110000 */
4007                     "]>\n"
4008                     "<doc>&entity;</doc>";
4009
4010  expect_failure(text, XML_ERROR_BAD_CHAR_REF,
4011                 "Out of range character reference not faulted");
4012}
4013END_TEST
4014
4015/* Test that processing instructions are picked up by a default handler */
4016START_TEST(test_pi_handled_in_default) {
4017  const char *text = "<?test processing instruction?>\n<doc/>";
4018  const XML_Char *expected = XCS("<?test processing instruction?>\n<doc/>");
4019  CharData storage;
4020
4021  CharData_Init(&storage);
4022  XML_SetDefaultHandler(g_parser, accumulate_characters);
4023  XML_SetUserData(g_parser, &storage);
4024  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4025      == XML_STATUS_ERROR)
4026    xml_failure(g_parser);
4027  CharData_CheckXMLChars(&storage, expected);
4028}
4029END_TEST
4030
4031/* Test that comments are picked up by a default handler */
4032START_TEST(test_comment_handled_in_default) {
4033  const char *text = "<!-- This is a comment -->\n<doc/>";
4034  const XML_Char *expected = XCS("<!-- This is a comment -->\n<doc/>");
4035  CharData storage;
4036
4037  CharData_Init(&storage);
4038  XML_SetDefaultHandler(g_parser, accumulate_characters);
4039  XML_SetUserData(g_parser, &storage);
4040  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4041      == XML_STATUS_ERROR)
4042    xml_failure(g_parser);
4043  CharData_CheckXMLChars(&storage, expected);
4044}
4045END_TEST
4046
4047/* Test PIs that look almost but not quite like XML declarations */
4048START_TEST(test_pi_yml) {
4049  const char *text = "<?yml something like data?><doc/>";
4050  const XML_Char *expected = XCS("yml: something like data\n");
4051  CharData storage;
4052
4053  CharData_Init(&storage);
4054  XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4055  XML_SetUserData(g_parser, &storage);
4056  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4057      == XML_STATUS_ERROR)
4058    xml_failure(g_parser);
4059  CharData_CheckXMLChars(&storage, expected);
4060}
4061END_TEST
4062
4063START_TEST(test_pi_xnl) {
4064  const char *text = "<?xnl nothing like data?><doc/>";
4065  const XML_Char *expected = XCS("xnl: nothing like data\n");
4066  CharData storage;
4067
4068  CharData_Init(&storage);
4069  XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4070  XML_SetUserData(g_parser, &storage);
4071  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4072      == XML_STATUS_ERROR)
4073    xml_failure(g_parser);
4074  CharData_CheckXMLChars(&storage, expected);
4075}
4076END_TEST
4077
4078START_TEST(test_pi_xmm) {
4079  const char *text = "<?xmm everything like data?><doc/>";
4080  const XML_Char *expected = XCS("xmm: everything like data\n");
4081  CharData storage;
4082
4083  CharData_Init(&storage);
4084  XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4085  XML_SetUserData(g_parser, &storage);
4086  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4087      == XML_STATUS_ERROR)
4088    xml_failure(g_parser);
4089  CharData_CheckXMLChars(&storage, expected);
4090}
4091END_TEST
4092
4093START_TEST(test_utf16_pi) {
4094  const char text[] =
4095      /* <?{KHO KHWAI}{CHO CHAN}?>
4096       * where {KHO KHWAI} = U+0E04
4097       * and   {CHO CHAN}  = U+0E08
4098       */
4099      "<\0?\0\x04\x0e\x08\x0e?\0>\0"
4100      /* <q/> */
4101      "<\0q\0/\0>\0";
4102#ifdef XML_UNICODE
4103  const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4104#else
4105  const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4106#endif
4107  CharData storage;
4108
4109  CharData_Init(&storage);
4110  XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4111  XML_SetUserData(g_parser, &storage);
4112  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4113      == XML_STATUS_ERROR)
4114    xml_failure(g_parser);
4115  CharData_CheckXMLChars(&storage, expected);
4116}
4117END_TEST
4118
4119START_TEST(test_utf16_be_pi) {
4120  const char text[] =
4121      /* <?{KHO KHWAI}{CHO CHAN}?>
4122       * where {KHO KHWAI} = U+0E04
4123       * and   {CHO CHAN}  = U+0E08
4124       */
4125      "\0<\0?\x0e\x04\x0e\x08\0?\0>"
4126      /* <q/> */
4127      "\0<\0q\0/\0>";
4128#ifdef XML_UNICODE
4129  const XML_Char *expected = XCS("\x0e04\x0e08: \n");
4130#else
4131  const XML_Char *expected = XCS("\xe0\xb8\x84\xe0\xb8\x88: \n");
4132#endif
4133  CharData storage;
4134
4135  CharData_Init(&storage);
4136  XML_SetProcessingInstructionHandler(g_parser, accumulate_pi_characters);
4137  XML_SetUserData(g_parser, &storage);
4138  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4139      == XML_STATUS_ERROR)
4140    xml_failure(g_parser);
4141  CharData_CheckXMLChars(&storage, expected);
4142}
4143END_TEST
4144
4145/* Test that comments can be picked up and translated */
4146START_TEST(test_utf16_be_comment) {
4147  const char text[] =
4148      /* <!-- Comment A --> */
4149      "\0<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0A\0 \0-\0-\0>\0\n"
4150      /* <doc/> */
4151      "\0<\0d\0o\0c\0/\0>";
4152  const XML_Char *expected = XCS(" Comment A ");
4153  CharData storage;
4154
4155  CharData_Init(&storage);
4156  XML_SetCommentHandler(g_parser, accumulate_comment);
4157  XML_SetUserData(g_parser, &storage);
4158  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4159      == XML_STATUS_ERROR)
4160    xml_failure(g_parser);
4161  CharData_CheckXMLChars(&storage, expected);
4162}
4163END_TEST
4164
4165START_TEST(test_utf16_le_comment) {
4166  const char text[] =
4167      /* <!-- Comment B --> */
4168      "<\0!\0-\0-\0 \0C\0o\0m\0m\0e\0n\0t\0 \0B\0 \0-\0-\0>\0\n\0"
4169      /* <doc/> */
4170      "<\0d\0o\0c\0/\0>\0";
4171  const XML_Char *expected = XCS(" Comment B ");
4172  CharData storage;
4173
4174  CharData_Init(&storage);
4175  XML_SetCommentHandler(g_parser, accumulate_comment);
4176  XML_SetUserData(g_parser, &storage);
4177  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4178      == XML_STATUS_ERROR)
4179    xml_failure(g_parser);
4180  CharData_CheckXMLChars(&storage, expected);
4181}
4182END_TEST
4183
4184/* Test that the unknown encoding handler with map entries that expect
4185 * conversion but no conversion function is faulted
4186 */
4187START_TEST(test_missing_encoding_conversion_fn) {
4188  const char *text = "<?xml version='1.0' encoding='no-conv'?>\n"
4189                     "<doc>\x81</doc>";
4190
4191  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4192  /* MiscEncodingHandler sets up an encoding with every top-bit-set
4193   * character introducing a two-byte sequence.  For this, it
4194   * requires a convert function.  The above function call doesn't
4195   * pass one through, so when BadEncodingHandler actually gets
4196   * called it should supply an invalid encoding.
4197   */
4198  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4199                 "Encoding with missing convert() not faulted");
4200}
4201END_TEST
4202
4203START_TEST(test_failing_encoding_conversion_fn) {
4204  const char *text = "<?xml version='1.0' encoding='failing-conv'?>\n"
4205                     "<doc>\x81</doc>";
4206
4207  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4208  /* BadEncodingHandler sets up an encoding with every top-bit-set
4209   * character introducing a two-byte sequence.  For this, it
4210   * requires a convert function.  The above function call passes
4211   * one that insists all possible sequences are invalid anyway.
4212   */
4213  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4214                 "Encoding with failing convert() not faulted");
4215}
4216END_TEST
4217
4218/* Test unknown encoding conversions */
4219START_TEST(test_unknown_encoding_success) {
4220  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4221                     /* Equivalent to <eoc>Hello, world</eoc> */
4222                     "<\x81\x64\x80oc>Hello, world</\x81\x64\x80oc>";
4223
4224  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4225  run_character_check(text, XCS("Hello, world"));
4226}
4227END_TEST
4228
4229/* Test bad name character in unknown encoding */
4230START_TEST(test_unknown_encoding_bad_name) {
4231  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4232                     "<\xff\x64oc>Hello, world</\xff\x64oc>";
4233
4234  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4235  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4236                 "Bad name start in unknown encoding not faulted");
4237}
4238END_TEST
4239
4240/* Test bad mid-name character in unknown encoding */
4241START_TEST(test_unknown_encoding_bad_name_2) {
4242  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4243                     "<d\xffoc>Hello, world</d\xffoc>";
4244
4245  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4246  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4247                 "Bad name in unknown encoding not faulted");
4248}
4249END_TEST
4250
4251/* Test element name that is long enough to fill the conversion buffer
4252 * in an unknown encoding, finishing with an encoded character.
4253 */
4254START_TEST(test_unknown_encoding_long_name_1) {
4255  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4256                     "<abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>"
4257                     "Hi"
4258                     "</abcdefghabcdefghabcdefghijkl\x80m\x80n\x80o\x80p>";
4259  const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4260  CharData storage;
4261
4262  CharData_Init(&storage);
4263  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4264  XML_SetStartElementHandler(g_parser, record_element_start_handler);
4265  XML_SetUserData(g_parser, &storage);
4266  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4267      == XML_STATUS_ERROR)
4268    xml_failure(g_parser);
4269  CharData_CheckXMLChars(&storage, expected);
4270}
4271END_TEST
4272
4273/* Test element name that is long enough to fill the conversion buffer
4274 * in an unknown encoding, finishing with an simple character.
4275 */
4276START_TEST(test_unknown_encoding_long_name_2) {
4277  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4278                     "<abcdefghabcdefghabcdefghijklmnop>"
4279                     "Hi"
4280                     "</abcdefghabcdefghabcdefghijklmnop>";
4281  const XML_Char *expected = XCS("abcdefghabcdefghabcdefghijklmnop");
4282  CharData storage;
4283
4284  CharData_Init(&storage);
4285  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4286  XML_SetStartElementHandler(g_parser, record_element_start_handler);
4287  XML_SetUserData(g_parser, &storage);
4288  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4289      == XML_STATUS_ERROR)
4290    xml_failure(g_parser);
4291  CharData_CheckXMLChars(&storage, expected);
4292}
4293END_TEST
4294
4295START_TEST(test_invalid_unknown_encoding) {
4296  const char *text = "<?xml version='1.0' encoding='invalid-9'?>\n"
4297                     "<doc>Hello world</doc>";
4298
4299  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4300  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4301                 "Invalid unknown encoding not faulted");
4302}
4303END_TEST
4304
4305START_TEST(test_unknown_ascii_encoding_ok) {
4306  const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4307                     "<doc>Hello, world</doc>";
4308
4309  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4310  run_character_check(text, XCS("Hello, world"));
4311}
4312END_TEST
4313
4314START_TEST(test_unknown_ascii_encoding_fail) {
4315  const char *text = "<?xml version='1.0' encoding='ascii-like'?>\n"
4316                     "<doc>Hello, \x80 world</doc>";
4317
4318  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4319  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4320                 "Invalid character not faulted");
4321}
4322END_TEST
4323
4324START_TEST(test_unknown_encoding_invalid_length) {
4325  const char *text = "<?xml version='1.0' encoding='invalid-len'?>\n"
4326                     "<doc>Hello, world</doc>";
4327
4328  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4329  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4330                 "Invalid unknown encoding not faulted");
4331}
4332END_TEST
4333
4334START_TEST(test_unknown_encoding_invalid_topbit) {
4335  const char *text = "<?xml version='1.0' encoding='invalid-a'?>\n"
4336                     "<doc>Hello, world</doc>";
4337
4338  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4339  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4340                 "Invalid unknown encoding not faulted");
4341}
4342END_TEST
4343
4344START_TEST(test_unknown_encoding_invalid_surrogate) {
4345  const char *text = "<?xml version='1.0' encoding='invalid-surrogate'?>\n"
4346                     "<doc>Hello, \x82 world</doc>";
4347
4348  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4349  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4350                 "Invalid unknown encoding not faulted");
4351}
4352END_TEST
4353
4354START_TEST(test_unknown_encoding_invalid_high) {
4355  const char *text = "<?xml version='1.0' encoding='invalid-high'?>\n"
4356                     "<doc>Hello, world</doc>";
4357
4358  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4359  expect_failure(text, XML_ERROR_UNKNOWN_ENCODING,
4360                 "Invalid unknown encoding not faulted");
4361}
4362END_TEST
4363
4364START_TEST(test_unknown_encoding_invalid_attr_value) {
4365  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4366                     "<doc attr='\xff\x30'/>";
4367
4368  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4369  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4370                 "Invalid attribute valid not faulted");
4371}
4372END_TEST
4373
4374/* Test an external entity parser set to use latin-1 detects UTF-16
4375 * BOMs correctly.
4376 */
4377/* Test that UTF-16 BOM does not select UTF-16 given explicit encoding */
4378START_TEST(test_ext_entity_latin1_utf16le_bom) {
4379  const char *text = "<!DOCTYPE doc [\n"
4380                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4381                     "]>\n"
4382                     "<doc>&en;</doc>";
4383  ExtTest2 test_data
4384      = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4385         /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4386          *   0x4c = L and 0x20 is a space
4387          */
4388         "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4389#ifdef XML_UNICODE
4390  const XML_Char *expected = XCS("\x00ff\x00feL ");
4391#else
4392  /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4393  const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4394#endif
4395  CharData storage;
4396
4397  CharData_Init(&storage);
4398  test_data.storage = &storage;
4399  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4400  XML_SetUserData(g_parser, &test_data);
4401  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4402  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4403      == XML_STATUS_ERROR)
4404    xml_failure(g_parser);
4405  CharData_CheckXMLChars(&storage, expected);
4406}
4407END_TEST
4408
4409START_TEST(test_ext_entity_latin1_utf16be_bom) {
4410  const char *text = "<!DOCTYPE doc [\n"
4411                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4412                     "]>\n"
4413                     "<doc>&en;</doc>";
4414  ExtTest2 test_data
4415      = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4416         /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4417          *   0x4c = L and 0x20 is a space
4418          */
4419         "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4420#ifdef XML_UNICODE
4421  const XML_Char *expected = XCS("\x00fe\x00ff L");
4422#else
4423  /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4424  const XML_Char *expected = XCS("\xc3\xbe\xc3\xbf L");
4425#endif
4426  CharData storage;
4427
4428  CharData_Init(&storage);
4429  test_data.storage = &storage;
4430  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4431  XML_SetUserData(g_parser, &test_data);
4432  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4433  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4434      == XML_STATUS_ERROR)
4435    xml_failure(g_parser);
4436  CharData_CheckXMLChars(&storage, expected);
4437}
4438END_TEST
4439
4440/* Parsing the full buffer rather than a byte at a time makes a
4441 * difference to the encoding scanning code, so repeat the above tests
4442 * without breaking them down by byte.
4443 */
4444START_TEST(test_ext_entity_latin1_utf16le_bom2) {
4445  const char *text = "<!DOCTYPE doc [\n"
4446                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4447                     "]>\n"
4448                     "<doc>&en;</doc>";
4449  ExtTest2 test_data
4450      = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4451         /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4452          *   0x4c = L and 0x20 is a space
4453          */
4454         "\xff\xfe\x4c\x20", 4, XCS("iso-8859-1"), NULL};
4455#ifdef XML_UNICODE
4456  const XML_Char *expected = XCS("\x00ff\x00feL ");
4457#else
4458  /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4459  const XML_Char *expected = XCS("\xc3\xbf\xc3\xbeL ");
4460#endif
4461  CharData storage;
4462
4463  CharData_Init(&storage);
4464  test_data.storage = &storage;
4465  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4466  XML_SetUserData(g_parser, &test_data);
4467  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4468  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4469      == XML_STATUS_ERROR)
4470    xml_failure(g_parser);
4471  CharData_CheckXMLChars(&storage, expected);
4472}
4473END_TEST
4474
4475START_TEST(test_ext_entity_latin1_utf16be_bom2) {
4476  const char *text = "<!DOCTYPE doc [\n"
4477                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4478                     "]>\n"
4479                     "<doc>&en;</doc>";
4480  ExtTest2 test_data
4481      = {/* If UTF-16, 0xfeff is the BOM and 0x204c is black left bullet */
4482         /* If Latin-1, 0xff = Y-diaeresis, 0xfe = lowercase thorn,
4483          *   0x4c = L and 0x20 is a space
4484          */
4485         "\xfe\xff\x20\x4c", 4, XCS("iso-8859-1"), NULL};
4486#ifdef XML_UNICODE
4487  const XML_Char *expected = XCS("\x00fe\x00ff L");
4488#else
4489  /* In UTF-8, y-diaeresis is 0xc3 0xbf, lowercase thorn is 0xc3 0xbe */
4490  const XML_Char *expected = "\xc3\xbe\xc3\xbf L";
4491#endif
4492  CharData storage;
4493
4494  CharData_Init(&storage);
4495  test_data.storage = &storage;
4496  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4497  XML_SetUserData(g_parser, &test_data);
4498  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4499  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4500      == XML_STATUS_ERROR)
4501    xml_failure(g_parser);
4502  CharData_CheckXMLChars(&storage, expected);
4503}
4504END_TEST
4505
4506/* Test little-endian UTF-16 given an explicit big-endian encoding */
4507START_TEST(test_ext_entity_utf16_be) {
4508  const char *text = "<!DOCTYPE doc [\n"
4509                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4510                     "]>\n"
4511                     "<doc>&en;</doc>";
4512  ExtTest2 test_data = {"<\0e\0/\0>\0", 8, XCS("utf-16be"), NULL};
4513#ifdef XML_UNICODE
4514  const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4515#else
4516  const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4517                                 "\xe6\x94\x80"   /* U+6500 */
4518                                 "\xe2\xbc\x80"   /* U+2F00 */
4519                                 "\xe3\xb8\x80"); /* U+3E00 */
4520#endif
4521  CharData storage;
4522
4523  CharData_Init(&storage);
4524  test_data.storage = &storage;
4525  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4526  XML_SetUserData(g_parser, &test_data);
4527  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4528  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4529      == XML_STATUS_ERROR)
4530    xml_failure(g_parser);
4531  CharData_CheckXMLChars(&storage, expected);
4532}
4533END_TEST
4534
4535/* Test big-endian UTF-16 given an explicit little-endian encoding */
4536START_TEST(test_ext_entity_utf16_le) {
4537  const char *text = "<!DOCTYPE doc [\n"
4538                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4539                     "]>\n"
4540                     "<doc>&en;</doc>";
4541  ExtTest2 test_data = {"\0<\0e\0/\0>", 8, XCS("utf-16le"), NULL};
4542#ifdef XML_UNICODE
4543  const XML_Char *expected = XCS("\x3c00\x6500\x2f00\x3e00");
4544#else
4545  const XML_Char *expected = XCS("\xe3\xb0\x80"   /* U+3C00 */
4546                                 "\xe6\x94\x80"   /* U+6500 */
4547                                 "\xe2\xbc\x80"   /* U+2F00 */
4548                                 "\xe3\xb8\x80"); /* U+3E00 */
4549#endif
4550  CharData storage;
4551
4552  CharData_Init(&storage);
4553  test_data.storage = &storage;
4554  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4555  XML_SetUserData(g_parser, &test_data);
4556  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4557  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4558      == XML_STATUS_ERROR)
4559    xml_failure(g_parser);
4560  CharData_CheckXMLChars(&storage, expected);
4561}
4562END_TEST
4563
4564/* Test little-endian UTF-16 given no explicit encoding.
4565 * The existing default encoding (UTF-8) is assumed to hold without a
4566 * BOM to contradict it, so the entity value will in fact provoke an
4567 * error because 0x00 is not a valid XML character.  We parse the
4568 * whole buffer in one go rather than feeding it in byte by byte to
4569 * exercise different code paths in the initial scanning routines.
4570 */
4571START_TEST(test_ext_entity_utf16_unknown) {
4572  const char *text = "<!DOCTYPE doc [\n"
4573                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4574                     "]>\n"
4575                     "<doc>&en;</doc>";
4576  ExtFaults2 test_data
4577      = {"a\0b\0c\0", 6, "Invalid character in entity not faulted", NULL,
4578         XML_ERROR_INVALID_TOKEN};
4579
4580  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter2);
4581  XML_SetUserData(g_parser, &test_data);
4582  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4583                 "Invalid character should not have been accepted");
4584}
4585END_TEST
4586
4587/* Test not-quite-UTF-8 BOM (0xEF 0xBB 0xBF) */
4588START_TEST(test_ext_entity_utf8_non_bom) {
4589  const char *text = "<!DOCTYPE doc [\n"
4590                     "  <!ENTITY en SYSTEM 'http://example.org/dummy.ent'>\n"
4591                     "]>\n"
4592                     "<doc>&en;</doc>";
4593  ExtTest2 test_data
4594      = {"\xef\xbb\x80", /* Arabic letter DAD medial form, U+FEC0 */
4595         3, NULL, NULL};
4596#ifdef XML_UNICODE
4597  const XML_Char *expected = XCS("\xfec0");
4598#else
4599  const XML_Char *expected = XCS("\xef\xbb\x80");
4600#endif
4601  CharData storage;
4602
4603  CharData_Init(&storage);
4604  test_data.storage = &storage;
4605  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
4606  XML_SetUserData(g_parser, &test_data);
4607  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
4608  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4609      == XML_STATUS_ERROR)
4610    xml_failure(g_parser);
4611  CharData_CheckXMLChars(&storage, expected);
4612}
4613END_TEST
4614
4615/* Test that UTF-8 in a CDATA section is correctly passed through */
4616START_TEST(test_utf8_in_cdata_section) {
4617  const char *text = "<doc><![CDATA[one \xc3\xa9 two]]></doc>";
4618#ifdef XML_UNICODE
4619  const XML_Char *expected = XCS("one \x00e9 two");
4620#else
4621  const XML_Char *expected = XCS("one \xc3\xa9 two");
4622#endif
4623
4624  run_character_check(text, expected);
4625}
4626END_TEST
4627
4628/* Test that little-endian UTF-16 in a CDATA section is handled */
4629START_TEST(test_utf8_in_cdata_section_2) {
4630  const char *text = "<doc><![CDATA[\xc3\xa9]\xc3\xa9two]]></doc>";
4631#ifdef XML_UNICODE
4632  const XML_Char *expected = XCS("\x00e9]\x00e9two");
4633#else
4634  const XML_Char *expected = XCS("\xc3\xa9]\xc3\xa9two");
4635#endif
4636
4637  run_character_check(text, expected);
4638}
4639END_TEST
4640
4641START_TEST(test_utf8_in_start_tags) {
4642  struct test_case {
4643    bool goodName;
4644    bool goodNameStart;
4645    const char *tagName;
4646  };
4647
4648  // The idea with the tests below is this:
4649  // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
4650  // go to isNever and are hence not a concern.
4651  //
4652  // We start with a character that is a valid name character
4653  // (or even name-start character, see XML 1.0r4 spec) and then we flip
4654  // single bits at places where (1) the result leaves the UTF-8 encoding space
4655  // and (2) we stay in the same n-byte sequence family.
4656  //
4657  // The flipped bits are highlighted in angle brackets in comments,
4658  // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
4659  // the most significant bit to 1 to leave UTF-8 encoding space.
4660  struct test_case cases[] = {
4661      // 1-byte UTF-8: [0xxx xxxx]
4662      {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
4663      {false, false, "\xBA"}, // [<1>011 1010]
4664      {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
4665      {false, false, "\xB9"}, // [<1>011 1001]
4666
4667      // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
4668      {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
4669                                  // Arabic small waw U+06E5
4670      {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
4671      {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
4672      {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
4673      {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
4674                                  // combining char U+0301
4675      {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
4676      {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
4677      {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
4678
4679      // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
4680      {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
4681                                      // Devanagari Letter A U+0905
4682      {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
4683      {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
4684      {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
4685      {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
4686      {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
4687      {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
4688                                      // combining char U+0901
4689      {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
4690      {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
4691      {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
4692      {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
4693      {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
4694  };
4695  const bool atNameStart[] = {true, false};
4696
4697  size_t i = 0;
4698  char doc[1024];
4699  size_t failCount = 0;
4700
4701  // we need all the bytes to be parsed, but we don't want the errors that can
4702  // trigger on isFinal=XML_TRUE, so we skip the test if the heuristic is on.
4703  if (g_reparseDeferralEnabledDefault) {
4704    return;
4705  }
4706
4707  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
4708    size_t j = 0;
4709    for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
4710      const bool expectedSuccess
4711          = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
4712      snprintf(doc, sizeof(doc), "<%s%s><!--", atNameStart[j] ? "" : "a",
4713               cases[i].tagName);
4714      XML_Parser parser = XML_ParserCreate(NULL);
4715
4716      const enum XML_Status status = _XML_Parse_SINGLE_BYTES(
4717          parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
4718
4719      bool success = true;
4720      if ((status == XML_STATUS_OK) != expectedSuccess) {
4721        success = false;
4722      }
4723      if ((status == XML_STATUS_ERROR)
4724          && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
4725        success = false;
4726      }
4727
4728      if (! success) {
4729        fprintf(
4730            stderr,
4731            "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
4732            (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
4733            (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
4734        failCount++;
4735      }
4736
4737      XML_ParserFree(parser);
4738    }
4739  }
4740
4741  if (failCount > 0) {
4742    fail("UTF-8 regression detected");
4743  }
4744}
4745END_TEST
4746
4747/* Test trailing spaces in elements are accepted */
4748START_TEST(test_trailing_spaces_in_elements) {
4749  const char *text = "<doc   >Hi</doc >";
4750  const XML_Char *expected = XCS("doc/doc");
4751  CharData storage;
4752
4753  CharData_Init(&storage);
4754  XML_SetElementHandler(g_parser, record_element_start_handler,
4755                        record_element_end_handler);
4756  XML_SetUserData(g_parser, &storage);
4757  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
4758      == XML_STATUS_ERROR)
4759    xml_failure(g_parser);
4760  CharData_CheckXMLChars(&storage, expected);
4761}
4762END_TEST
4763
4764START_TEST(test_utf16_attribute) {
4765  const char text[] =
4766      /* <d {KHO KHWAI}{CHO CHAN}='a'/>
4767       * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4768       * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4769       */
4770      "<\0d\0 \0\x04\x0e\x08\x0e=\0'\0a\0'\0/\0>\0";
4771  const XML_Char *expected = XCS("a");
4772  CharData storage;
4773
4774  CharData_Init(&storage);
4775  XML_SetStartElementHandler(g_parser, accumulate_attribute);
4776  XML_SetUserData(g_parser, &storage);
4777  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4778      == XML_STATUS_ERROR)
4779    xml_failure(g_parser);
4780  CharData_CheckXMLChars(&storage, expected);
4781}
4782END_TEST
4783
4784START_TEST(test_utf16_second_attr) {
4785  /* <d a='1' {KHO KHWAI}{CHO CHAN}='2'/>
4786   * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4787   * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4788   */
4789  const char text[] = "<\0d\0 \0a\0=\0'\0\x31\0'\0 \0"
4790                      "\x04\x0e\x08\x0e=\0'\0\x32\0'\0/\0>\0";
4791  const XML_Char *expected = XCS("1");
4792  CharData storage;
4793
4794  CharData_Init(&storage);
4795  XML_SetStartElementHandler(g_parser, accumulate_attribute);
4796  XML_SetUserData(g_parser, &storage);
4797  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4798      == XML_STATUS_ERROR)
4799    xml_failure(g_parser);
4800  CharData_CheckXMLChars(&storage, expected);
4801}
4802END_TEST
4803
4804START_TEST(test_attr_after_solidus) {
4805  const char *text = "<doc attr1='a' / attr2='b'>";
4806
4807  expect_failure(text, XML_ERROR_INVALID_TOKEN, "Misplaced / not faulted");
4808}
4809END_TEST
4810
4811START_TEST(test_utf16_pe) {
4812  /* <!DOCTYPE doc [
4813   * <!ENTITY % {KHO KHWAI}{CHO CHAN} '<!ELEMENT doc (#PCDATA)>'>
4814   * %{KHO KHWAI}{CHO CHAN};
4815   * ]>
4816   * <doc></doc>
4817   *
4818   * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4819   * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4820   */
4821  const char text[] = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0\n"
4822                      "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \x0e\x04\x0e\x08\0 "
4823                      "\0'\0<\0!\0E\0L\0E\0M\0E\0N\0T\0 "
4824                      "\0d\0o\0c\0 \0(\0#\0P\0C\0D\0A\0T\0A\0)\0>\0'\0>\0\n"
4825                      "\0%\x0e\x04\x0e\x08\0;\0\n"
4826                      "\0]\0>\0\n"
4827                      "\0<\0d\0o\0c\0>\0<\0/\0d\0o\0c\0>";
4828#ifdef XML_UNICODE
4829  const XML_Char *expected = XCS("\x0e04\x0e08=<!ELEMENT doc (#PCDATA)>\n");
4830#else
4831  const XML_Char *expected
4832      = XCS("\xe0\xb8\x84\xe0\xb8\x88=<!ELEMENT doc (#PCDATA)>\n");
4833#endif
4834  CharData storage;
4835
4836  CharData_Init(&storage);
4837  XML_SetUserData(g_parser, &storage);
4838  XML_SetEntityDeclHandler(g_parser, accumulate_entity_decl);
4839  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4840      == XML_STATUS_ERROR)
4841    xml_failure(g_parser);
4842  CharData_CheckXMLChars(&storage, expected);
4843}
4844END_TEST
4845
4846/* Test that duff attribute description keywords are rejected */
4847START_TEST(test_bad_attr_desc_keyword) {
4848  const char *text = "<!DOCTYPE doc [\n"
4849                     "  <!ATTLIST doc attr CDATA #!IMPLIED>\n"
4850                     "]>\n"
4851                     "<doc />";
4852
4853  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4854                 "Bad keyword !IMPLIED not faulted");
4855}
4856END_TEST
4857
4858/* Test that an invalid attribute description keyword consisting of
4859 * UTF-16 characters with their top bytes non-zero are correctly
4860 * faulted
4861 */
4862START_TEST(test_bad_attr_desc_keyword_utf16) {
4863  /* <!DOCTYPE d [
4864   * <!ATTLIST d a CDATA #{KHO KHWAI}{CHO CHAN}>
4865   * ]><d/>
4866   *
4867   * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
4868   * and   {CHO CHAN}  = U+0E08 = 0xe0 0xb8 0x88 in UTF-8
4869   */
4870  const char text[]
4871      = "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
4872        "\0<\0!\0A\0T\0T\0L\0I\0S\0T\0 \0d\0 \0a\0 \0C\0D\0A\0T\0A\0 "
4873        "\0#\x0e\x04\x0e\x08\0>\0\n"
4874        "\0]\0>\0<\0d\0/\0>";
4875
4876  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4877      != XML_STATUS_ERROR)
4878    fail("Invalid UTF16 attribute keyword not faulted");
4879  if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4880    xml_failure(g_parser);
4881}
4882END_TEST
4883
4884/* Test that invalid syntax in a <!DOCTYPE> is rejected.  Do this
4885 * using prefix-encoding (see above) to trigger specific code paths
4886 */
4887START_TEST(test_bad_doctype) {
4888  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
4889                     "<!DOCTYPE doc [ \x80\x44 ]><doc/>";
4890
4891  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4892  expect_failure(text, XML_ERROR_SYNTAX,
4893                 "Invalid bytes in DOCTYPE not faulted");
4894}
4895END_TEST
4896
4897START_TEST(test_bad_doctype_utf8) {
4898  const char *text = "<!DOCTYPE \xDB\x25"
4899                     "doc><doc/>"; // [1101 1011] [<0>010 0101]
4900  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4901                 "Invalid UTF-8 in DOCTYPE not faulted");
4902}
4903END_TEST
4904
4905START_TEST(test_bad_doctype_utf16) {
4906  const char text[] =
4907      /* <!DOCTYPE doc [ \x06f2 ]><doc/>
4908       *
4909       * U+06F2 = EXTENDED ARABIC-INDIC DIGIT TWO, a valid number
4910       * (name character) but not a valid letter (name start character)
4911       */
4912      "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0o\0c\0 \0[\0 "
4913      "\x06\xf2"
4914      "\0 \0]\0>\0<\0d\0o\0c\0/\0>";
4915
4916  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4917      != XML_STATUS_ERROR)
4918    fail("Invalid bytes in DOCTYPE not faulted");
4919  if (XML_GetErrorCode(g_parser) != XML_ERROR_SYNTAX)
4920    xml_failure(g_parser);
4921}
4922END_TEST
4923
4924START_TEST(test_bad_doctype_plus) {
4925  const char *text = "<!DOCTYPE 1+ [ <!ENTITY foo 'bar'> ]>\n"
4926                     "<1+>&foo;</1+>";
4927
4928  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4929                 "'+' in document name not faulted");
4930}
4931END_TEST
4932
4933START_TEST(test_bad_doctype_star) {
4934  const char *text = "<!DOCTYPE 1* [ <!ENTITY foo 'bar'> ]>\n"
4935                     "<1*>&foo;</1*>";
4936
4937  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4938                 "'*' in document name not faulted");
4939}
4940END_TEST
4941
4942START_TEST(test_bad_doctype_query) {
4943  const char *text = "<!DOCTYPE 1? [ <!ENTITY foo 'bar'> ]>\n"
4944                     "<1?>&foo;</1?>";
4945
4946  expect_failure(text, XML_ERROR_INVALID_TOKEN,
4947                 "'?' in document name not faulted");
4948}
4949END_TEST
4950
4951START_TEST(test_unknown_encoding_bad_ignore) {
4952  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>"
4953                     "<!DOCTYPE doc SYSTEM 'foo'>"
4954                     "<doc><e>&entity;</e></doc>";
4955  ExtFaults fault = {"<![IGNORE[<!ELEMENT \xffG (#PCDATA)*>]]>",
4956                     "Invalid character not faulted", XCS("prefix-conv"),
4957                     XML_ERROR_INVALID_TOKEN};
4958
4959  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
4960  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
4961  XML_SetExternalEntityRefHandler(g_parser, external_entity_faulter);
4962  XML_SetUserData(g_parser, &fault);
4963  expect_failure(text, XML_ERROR_EXTERNAL_ENTITY_HANDLING,
4964                 "Bad IGNORE section with unknown encoding not failed");
4965}
4966END_TEST
4967
4968START_TEST(test_entity_in_utf16_be_attr) {
4969  const char text[] =
4970      /* <e a='&#228; &#x00E4;'></e> */
4971      "\0<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 "
4972      "\0&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>";
4973#ifdef XML_UNICODE
4974  const XML_Char *expected = XCS("\x00e4 \x00e4");
4975#else
4976  const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4977#endif
4978  CharData storage;
4979
4980  CharData_Init(&storage);
4981  XML_SetUserData(g_parser, &storage);
4982  XML_SetStartElementHandler(g_parser, accumulate_attribute);
4983  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
4984      == XML_STATUS_ERROR)
4985    xml_failure(g_parser);
4986  CharData_CheckXMLChars(&storage, expected);
4987}
4988END_TEST
4989
4990START_TEST(test_entity_in_utf16_le_attr) {
4991  const char text[] =
4992      /* <e a='&#228; &#x00E4;'></e> */
4993      "<\0e\0 \0a\0=\0'\0&\0#\0\x32\0\x32\0\x38\0;\0 \0"
4994      "&\0#\0x\0\x30\0\x30\0E\0\x34\0;\0'\0>\0<\0/\0e\0>\0";
4995#ifdef XML_UNICODE
4996  const XML_Char *expected = XCS("\x00e4 \x00e4");
4997#else
4998  const XML_Char *expected = XCS("\xc3\xa4 \xc3\xa4");
4999#endif
5000  CharData storage;
5001
5002  CharData_Init(&storage);
5003  XML_SetUserData(g_parser, &storage);
5004  XML_SetStartElementHandler(g_parser, accumulate_attribute);
5005  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5006      == XML_STATUS_ERROR)
5007    xml_failure(g_parser);
5008  CharData_CheckXMLChars(&storage, expected);
5009}
5010END_TEST
5011
5012START_TEST(test_entity_public_utf16_be) {
5013  const char text[] =
5014      /* <!DOCTYPE d [ */
5015      "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n"
5016      /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5017      "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 "
5018      "\0'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n"
5019      /* %e; */
5020      "\0%\0e\0;\0\n"
5021      /* ]> */
5022      "\0]\0>\0\n"
5023      /* <d>&j;</d> */
5024      "\0<\0d\0>\0&\0j\0;\0<\0/\0d\0>";
5025  ExtTest2 test_data
5026      = {/* <!ENTITY j 'baz'> */
5027         "\0<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>", 34, NULL, NULL};
5028  const XML_Char *expected = XCS("baz");
5029  CharData storage;
5030
5031  CharData_Init(&storage);
5032  test_data.storage = &storage;
5033  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5034  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5035  XML_SetUserData(g_parser, &test_data);
5036  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5037  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5038      == XML_STATUS_ERROR)
5039    xml_failure(g_parser);
5040  CharData_CheckXMLChars(&storage, expected);
5041}
5042END_TEST
5043
5044START_TEST(test_entity_public_utf16_le) {
5045  const char text[] =
5046      /* <!DOCTYPE d [ */
5047      "<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0d\0 \0[\0\n\0"
5048      /* <!ENTITY % e PUBLIC 'foo' 'bar.ent'> */
5049      "<\0!\0E\0N\0T\0I\0T\0Y\0 \0%\0 \0e\0 \0P\0U\0B\0L\0I\0C\0 \0"
5050      "'\0f\0o\0o\0'\0 \0'\0b\0a\0r\0.\0e\0n\0t\0'\0>\0\n\0"
5051      /* %e; */
5052      "%\0e\0;\0\n\0"
5053      /* ]> */
5054      "]\0>\0\n\0"
5055      /* <d>&j;</d> */
5056      "<\0d\0>\0&\0j\0;\0<\0/\0d\0>\0";
5057  ExtTest2 test_data
5058      = {/* <!ENTITY j 'baz'> */
5059         "<\0!\0E\0N\0T\0I\0T\0Y\0 \0j\0 \0'\0b\0a\0z\0'\0>\0", 34, NULL, NULL};
5060  const XML_Char *expected = XCS("baz");
5061  CharData storage;
5062
5063  CharData_Init(&storage);
5064  test_data.storage = &storage;
5065  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5066  XML_SetExternalEntityRefHandler(g_parser, external_entity_loader2);
5067  XML_SetUserData(g_parser, &test_data);
5068  XML_SetCharacterDataHandler(g_parser, ext2_accumulate_characters);
5069  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
5070      == XML_STATUS_ERROR)
5071    xml_failure(g_parser);
5072  CharData_CheckXMLChars(&storage, expected);
5073}
5074END_TEST
5075
5076/* Test that a doctype with neither an internal nor external subset is
5077 * faulted
5078 */
5079START_TEST(test_short_doctype) {
5080  const char *text = "<!DOCTYPE doc></doc>";
5081  expect_failure(text, XML_ERROR_INVALID_TOKEN,
5082                 "DOCTYPE without subset not rejected");
5083}
5084END_TEST
5085
5086START_TEST(test_short_doctype_2) {
5087  const char *text = "<!DOCTYPE doc PUBLIC></doc>";
5088  expect_failure(text, XML_ERROR_SYNTAX,
5089                 "DOCTYPE without Public ID not rejected");
5090}
5091END_TEST
5092
5093START_TEST(test_short_doctype_3) {
5094  const char *text = "<!DOCTYPE doc SYSTEM></doc>";
5095  expect_failure(text, XML_ERROR_SYNTAX,
5096                 "DOCTYPE without System ID not rejected");
5097}
5098END_TEST
5099
5100START_TEST(test_long_doctype) {
5101  const char *text = "<!DOCTYPE doc PUBLIC 'foo' 'bar' 'baz'></doc>";
5102  expect_failure(text, XML_ERROR_SYNTAX, "DOCTYPE with extra ID not rejected");
5103}
5104END_TEST
5105
5106START_TEST(test_bad_entity) {
5107  const char *text = "<!DOCTYPE doc [\n"
5108                     "  <!ENTITY foo PUBLIC>\n"
5109                     "]>\n"
5110                     "<doc/>";
5111  expect_failure(text, XML_ERROR_SYNTAX,
5112                 "ENTITY without Public ID is not rejected");
5113}
5114END_TEST
5115
5116/* Test unquoted value is faulted */
5117START_TEST(test_bad_entity_2) {
5118  const char *text = "<!DOCTYPE doc [\n"
5119                     "  <!ENTITY % foo bar>\n"
5120                     "]>\n"
5121                     "<doc/>";
5122  expect_failure(text, XML_ERROR_SYNTAX,
5123                 "ENTITY without Public ID is not rejected");
5124}
5125END_TEST
5126
5127START_TEST(test_bad_entity_3) {
5128  const char *text = "<!DOCTYPE doc [\n"
5129                     "  <!ENTITY % foo PUBLIC>\n"
5130                     "]>\n"
5131                     "<doc/>";
5132  expect_failure(text, XML_ERROR_SYNTAX,
5133                 "Parameter ENTITY without Public ID is not rejected");
5134}
5135END_TEST
5136
5137START_TEST(test_bad_entity_4) {
5138  const char *text = "<!DOCTYPE doc [\n"
5139                     "  <!ENTITY % foo SYSTEM>\n"
5140                     "]>\n"
5141                     "<doc/>";
5142  expect_failure(text, XML_ERROR_SYNTAX,
5143                 "Parameter ENTITY without Public ID is not rejected");
5144}
5145END_TEST
5146
5147START_TEST(test_bad_notation) {
5148  const char *text = "<!DOCTYPE doc [\n"
5149                     "  <!NOTATION n SYSTEM>\n"
5150                     "]>\n"
5151                     "<doc/>";
5152  expect_failure(text, XML_ERROR_SYNTAX,
5153                 "Notation without System ID is not rejected");
5154}
5155END_TEST
5156
5157/* Test for issue #11, wrongly suppressed default handler */
5158START_TEST(test_default_doctype_handler) {
5159  const char *text = "<!DOCTYPE doc PUBLIC 'pubname' 'test.dtd' [\n"
5160                     "  <!ENTITY foo 'bar'>\n"
5161                     "]>\n"
5162                     "<doc>&foo;</doc>";
5163  DefaultCheck test_data[] = {{XCS("'pubname'"), 9, XML_FALSE},
5164                              {XCS("'test.dtd'"), 10, XML_FALSE},
5165                              {NULL, 0, XML_FALSE}};
5166  int i;
5167
5168  XML_SetUserData(g_parser, &test_data);
5169  XML_SetDefaultHandler(g_parser, checking_default_handler);
5170  XML_SetEntityDeclHandler(g_parser, dummy_entity_decl_handler);
5171  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5172      == XML_STATUS_ERROR)
5173    xml_failure(g_parser);
5174  for (i = 0; test_data[i].expected != NULL; i++)
5175    if (! test_data[i].seen)
5176      fail("Default handler not run for public !DOCTYPE");
5177}
5178END_TEST
5179
5180START_TEST(test_empty_element_abort) {
5181  const char *text = "<abort/>";
5182
5183  XML_SetStartElementHandler(g_parser, start_element_suspender);
5184  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5185      != XML_STATUS_ERROR)
5186    fail("Expected to error on abort");
5187}
5188END_TEST
5189
5190/* Regression test for GH issue #612: unfinished m_declAttributeType
5191 * allocation in ->m_tempPool can corrupt following allocation.
5192 */
5193START_TEST(test_pool_integrity_with_unfinished_attr) {
5194  const char *text = "<?xml version='1.0' encoding='UTF-8'?>\n"
5195                     "<!DOCTYPE foo [\n"
5196                     "<!ELEMENT foo ANY>\n"
5197                     "<!ENTITY % entp SYSTEM \"external.dtd\">\n"
5198                     "%entp;\n"
5199                     "]>\n"
5200                     "<a></a>\n";
5201  const XML_Char *expected = XCS("COMMENT");
5202  CharData storage;
5203
5204  CharData_Init(&storage);
5205  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5206  XML_SetExternalEntityRefHandler(g_parser, external_entity_unfinished_attlist);
5207  XML_SetAttlistDeclHandler(g_parser, dummy_attlist_decl_handler);
5208  XML_SetCommentHandler(g_parser, accumulate_comment);
5209  XML_SetUserData(g_parser, &storage);
5210  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
5211      == XML_STATUS_ERROR)
5212    xml_failure(g_parser);
5213  CharData_CheckXMLChars(&storage, expected);
5214}
5215END_TEST
5216
5217START_TEST(test_nested_entity_suspend) {
5218  const char *const text = "<!DOCTYPE a [\n"
5219                           "  <!ENTITY e1 '<!--e1-->'>\n"
5220                           "  <!ENTITY e2 '<!--e2 head-->&e1;<!--e2 tail-->'>\n"
5221                           "  <!ENTITY e3 '<!--e3 head-->&e2;<!--e3 tail-->'>\n"
5222                           "]>\n"
5223                           "<a><!--start-->&e3;<!--end--></a>";
5224  const XML_Char *const expected = XCS("start") XCS("e3 head") XCS("e2 head")
5225      XCS("e1") XCS("e2 tail") XCS("e3 tail") XCS("end");
5226  CharData storage;
5227  CharData_Init(&storage);
5228  XML_Parser parser = XML_ParserCreate(NULL);
5229  ParserPlusStorage parserPlusStorage = {parser, &storage};
5230
5231  XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5232  XML_SetCommentHandler(parser, accumulate_and_suspend_comment_handler);
5233  XML_SetUserData(parser, &parserPlusStorage);
5234
5235  enum XML_Status status = XML_Parse(parser, text, (int)strlen(text), XML_TRUE);
5236  while (status == XML_STATUS_SUSPENDED) {
5237    status = XML_ResumeParser(parser);
5238  }
5239  if (status != XML_STATUS_OK)
5240    xml_failure(parser);
5241
5242  CharData_CheckXMLChars(&storage, expected);
5243  XML_ParserFree(parser);
5244}
5245END_TEST
5246
5247#if defined(XML_TESTING)
5248/* Regression test for quadratic parsing on large tokens */
5249START_TEST(test_big_tokens_scale_linearly) {
5250  const struct {
5251    const char *pre;
5252    const char *post;
5253  } text[] = {
5254      {"<a>", "</a>"},                      // assumed good, used as baseline
5255      {"<b><![CDATA[ value: ", " ]]></b>"}, // CDATA, performed OK before patch
5256      {"<c attr='", "'></c>"},              // big attribute, used to be O(N��)
5257      {"<d><!-- ", " --></d>"},             // long comment, used to be O(N��)
5258      {"<e><", "/></e>"},                   // big elem name, used to be O(N��)
5259  };
5260  const int num_cases = sizeof(text) / sizeof(text[0]);
5261  char aaaaaa[4096];
5262  const int fillsize = (int)sizeof(aaaaaa);
5263  const int fillcount = 100;
5264  const unsigned approx_bytes = fillsize * fillcount; // ignore pre/post.
5265  const unsigned max_factor = 4;
5266  const unsigned max_scanned = max_factor * approx_bytes;
5267
5268  memset(aaaaaa, 'a', fillsize);
5269
5270  if (! g_reparseDeferralEnabledDefault) {
5271    return; // heuristic is disabled; we would get O(n^2) and fail.
5272  }
5273
5274  for (int i = 0; i < num_cases; ++i) {
5275    XML_Parser parser = XML_ParserCreate(NULL);
5276    assert_true(parser != NULL);
5277    enum XML_Status status;
5278    set_subtest("text=\"%saaaaaa%s\"", text[i].pre, text[i].post);
5279
5280    // parse the start text
5281    g_bytesScanned = 0;
5282    status = _XML_Parse_SINGLE_BYTES(parser, text[i].pre,
5283                                     (int)strlen(text[i].pre), XML_FALSE);
5284    if (status != XML_STATUS_OK) {
5285      xml_failure(parser);
5286    }
5287
5288    // parse lots of 'a', failing the test early if it takes too long
5289    unsigned past_max_count = 0;
5290    for (int f = 0; f < fillcount; ++f) {
5291      status = _XML_Parse_SINGLE_BYTES(parser, aaaaaa, fillsize, XML_FALSE);
5292      if (status != XML_STATUS_OK) {
5293        xml_failure(parser);
5294      }
5295      if (g_bytesScanned > max_scanned) {
5296        // We're not done, and have already passed the limit -- the test will
5297        // definitely fail. This block allows us to save time by failing early.
5298        const unsigned pushed
5299            = (unsigned)strlen(text[i].pre) + (f + 1) * fillsize;
5300        fprintf(
5301            stderr,
5302            "after %d/%d loops: pushed=%u scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5303            f + 1, fillcount, pushed, g_bytesScanned,
5304            g_bytesScanned / (double)pushed, max_scanned, max_factor);
5305        past_max_count++;
5306        // We are failing, but allow a few log prints first. If we don't reach
5307        // a count of five, the test will fail after the loop instead.
5308        assert_true(past_max_count < 5);
5309      }
5310    }
5311
5312    // parse the end text
5313    status = _XML_Parse_SINGLE_BYTES(parser, text[i].post,
5314                                     (int)strlen(text[i].post), XML_TRUE);
5315    if (status != XML_STATUS_OK) {
5316      xml_failure(parser);
5317    }
5318
5319    assert_true(g_bytesScanned > approx_bytes); // or the counter isn't working
5320    if (g_bytesScanned > max_scanned) {
5321      fprintf(
5322          stderr,
5323          "after all input: scanned=%u (factor ~%.2f) max_scanned: %u (factor ~%u)\n",
5324          g_bytesScanned, g_bytesScanned / (double)approx_bytes, max_scanned,
5325          max_factor);
5326      fail("scanned too many bytes");
5327    }
5328
5329    XML_ParserFree(parser);
5330  }
5331}
5332END_TEST
5333#endif
5334
5335START_TEST(test_set_reparse_deferral) {
5336  const char *const pre = "<d>";
5337  const char *const start = "<x attr='";
5338  const char *const end = "'></x>";
5339  char eeeeee[100];
5340  const int fillsize = (int)sizeof(eeeeee);
5341  memset(eeeeee, 'e', fillsize);
5342
5343  for (int enabled = 0; enabled <= 1; enabled += 1) {
5344    set_subtest("deferral=%d", enabled);
5345
5346    XML_Parser parser = XML_ParserCreate(NULL);
5347    assert_true(parser != NULL);
5348    assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5349    // pre-grow the buffer to avoid reparsing due to almost-fullness
5350    assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5351
5352    CharData storage;
5353    CharData_Init(&storage);
5354    XML_SetUserData(parser, &storage);
5355    XML_SetStartElementHandler(parser, start_element_event_handler);
5356
5357    enum XML_Status status;
5358    // parse the start text
5359    status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5360    if (status != XML_STATUS_OK) {
5361      xml_failure(parser);
5362    }
5363    CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5364
5365    // ..and the start of the token
5366    status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5367    if (status != XML_STATUS_OK) {
5368      xml_failure(parser);
5369    }
5370    CharData_CheckXMLChars(&storage, XCS("d")); // still just the first one
5371
5372    // try to parse lots of 'e', but the token isn't finished
5373    for (int c = 0; c < 100; ++c) {
5374      status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5375      if (status != XML_STATUS_OK) {
5376        xml_failure(parser);
5377      }
5378    }
5379    CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5380
5381    // end the <x> token.
5382    status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5383    if (status != XML_STATUS_OK) {
5384      xml_failure(parser);
5385    }
5386
5387    if (enabled) {
5388      // In general, we may need to push more data to trigger a reparse attempt,
5389      // but in this test, the data is constructed to always require it.
5390      CharData_CheckXMLChars(&storage, XCS("d")); // or the test is incorrect
5391      // 2x the token length should suffice; the +1 covers the start and end.
5392      for (int c = 0; c < 101; ++c) {
5393        status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5394        if (status != XML_STATUS_OK) {
5395          xml_failure(parser);
5396        }
5397      }
5398    }
5399    CharData_CheckXMLChars(&storage, XCS("dx")); // the <x> should be done
5400
5401    XML_ParserFree(parser);
5402  }
5403}
5404END_TEST
5405
5406struct element_decl_data {
5407  XML_Parser parser;
5408  int count;
5409};
5410
5411static void
5412element_decl_counter(void *userData, const XML_Char *name, XML_Content *model) {
5413  UNUSED_P(name);
5414  struct element_decl_data *testdata = (struct element_decl_data *)userData;
5415  testdata->count += 1;
5416  XML_FreeContentModel(testdata->parser, model);
5417}
5418
5419static int
5420external_inherited_parser(XML_Parser p, const XML_Char *context,
5421                          const XML_Char *base, const XML_Char *systemId,
5422                          const XML_Char *publicId) {
5423  UNUSED_P(base);
5424  UNUSED_P(systemId);
5425  UNUSED_P(publicId);
5426  const char *const pre = "<!ELEMENT document ANY>\n";
5427  const char *const start = "<!ELEMENT ";
5428  const char *const end = " ANY>\n";
5429  const char *const post = "<!ELEMENT xyz ANY>\n";
5430  const int enabled = *(int *)XML_GetUserData(p);
5431  char eeeeee[100];
5432  char spaces[100];
5433  const int fillsize = (int)sizeof(eeeeee);
5434  assert_true(fillsize == (int)sizeof(spaces));
5435  memset(eeeeee, 'e', fillsize);
5436  memset(spaces, ' ', fillsize);
5437
5438  XML_Parser parser = XML_ExternalEntityParserCreate(p, context, NULL);
5439  assert_true(parser != NULL);
5440  // pre-grow the buffer to avoid reparsing due to almost-fullness
5441  assert_true(XML_GetBuffer(parser, fillsize * 10103) != NULL);
5442
5443  struct element_decl_data testdata;
5444  testdata.parser = parser;
5445  testdata.count = 0;
5446  XML_SetUserData(parser, &testdata);
5447  XML_SetElementDeclHandler(parser, element_decl_counter);
5448
5449  enum XML_Status status;
5450  // parse the initial text
5451  status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5452  if (status != XML_STATUS_OK) {
5453    xml_failure(parser);
5454  }
5455  assert_true(testdata.count == 1); // first element should be done
5456
5457  // ..and the start of the big token
5458  status = XML_Parse(parser, start, (int)strlen(start), XML_FALSE);
5459  if (status != XML_STATUS_OK) {
5460    xml_failure(parser);
5461  }
5462  assert_true(testdata.count == 1); // still just the first one
5463
5464  // try to parse lots of 'e', but the token isn't finished
5465  for (int c = 0; c < 100; ++c) {
5466    status = XML_Parse(parser, eeeeee, fillsize, XML_FALSE);
5467    if (status != XML_STATUS_OK) {
5468      xml_failure(parser);
5469    }
5470  }
5471  assert_true(testdata.count == 1); // *still* just the first one
5472
5473  // end the big token.
5474  status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5475  if (status != XML_STATUS_OK) {
5476    xml_failure(parser);
5477  }
5478
5479  if (enabled) {
5480    // In general, we may need to push more data to trigger a reparse attempt,
5481    // but in this test, the data is constructed to always require it.
5482    assert_true(testdata.count == 1); // or the test is incorrect
5483    // 2x the token length should suffice; the +1 covers the start and end.
5484    for (int c = 0; c < 101; ++c) {
5485      status = XML_Parse(parser, spaces, fillsize, XML_FALSE);
5486      if (status != XML_STATUS_OK) {
5487        xml_failure(parser);
5488      }
5489    }
5490  }
5491  assert_true(testdata.count == 2); // the big token should be done
5492
5493  // parse the final text
5494  status = XML_Parse(parser, post, (int)strlen(post), XML_TRUE);
5495  if (status != XML_STATUS_OK) {
5496    xml_failure(parser);
5497  }
5498  assert_true(testdata.count == 3); // after isFinal=XML_TRUE, all must be done
5499
5500  XML_ParserFree(parser);
5501  return XML_STATUS_OK;
5502}
5503
5504START_TEST(test_reparse_deferral_is_inherited) {
5505  const char *const text
5506      = "<!DOCTYPE document SYSTEM 'something.ext'><document/>";
5507  for (int enabled = 0; enabled <= 1; ++enabled) {
5508    set_subtest("deferral=%d", enabled);
5509
5510    XML_Parser parser = XML_ParserCreate(NULL);
5511    assert_true(parser != NULL);
5512    XML_SetUserData(parser, (void *)&enabled);
5513    XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
5514    // this handler creates a sub-parser and checks that its deferral behavior
5515    // is what we expected, based on the value of `enabled` (in userdata).
5516    XML_SetExternalEntityRefHandler(parser, external_inherited_parser);
5517    assert_true(XML_SetReparseDeferralEnabled(parser, enabled));
5518    if (XML_Parse(parser, text, (int)strlen(text), XML_TRUE) != XML_STATUS_OK)
5519      xml_failure(parser);
5520
5521    XML_ParserFree(parser);
5522  }
5523}
5524END_TEST
5525
5526START_TEST(test_set_reparse_deferral_on_null_parser) {
5527  assert_true(XML_SetReparseDeferralEnabled(NULL, 0) == XML_FALSE);
5528  assert_true(XML_SetReparseDeferralEnabled(NULL, 1) == XML_FALSE);
5529  assert_true(XML_SetReparseDeferralEnabled(NULL, 10) == XML_FALSE);
5530  assert_true(XML_SetReparseDeferralEnabled(NULL, 100) == XML_FALSE);
5531  assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MIN)
5532              == XML_FALSE);
5533  assert_true(XML_SetReparseDeferralEnabled(NULL, (XML_Bool)INT_MAX)
5534              == XML_FALSE);
5535}
5536END_TEST
5537
5538START_TEST(test_set_reparse_deferral_on_the_fly) {
5539  const char *const pre = "<d><x attr='";
5540  const char *const end = "'></x>";
5541  char iiiiii[100];
5542  const int fillsize = (int)sizeof(iiiiii);
5543  memset(iiiiii, 'i', fillsize);
5544
5545  XML_Parser parser = XML_ParserCreate(NULL);
5546  assert_true(parser != NULL);
5547  assert_true(XML_SetReparseDeferralEnabled(parser, XML_TRUE));
5548
5549  CharData storage;
5550  CharData_Init(&storage);
5551  XML_SetUserData(parser, &storage);
5552  XML_SetStartElementHandler(parser, start_element_event_handler);
5553
5554  enum XML_Status status;
5555  // parse the start text
5556  status = XML_Parse(parser, pre, (int)strlen(pre), XML_FALSE);
5557  if (status != XML_STATUS_OK) {
5558    xml_failure(parser);
5559  }
5560  CharData_CheckXMLChars(&storage, XCS("d")); // first element should be done
5561
5562  // try to parse some 'i', but the token isn't finished
5563  status = XML_Parse(parser, iiiiii, fillsize, XML_FALSE);
5564  if (status != XML_STATUS_OK) {
5565    xml_failure(parser);
5566  }
5567  CharData_CheckXMLChars(&storage, XCS("d")); // *still* just the first one
5568
5569  // end the <x> token.
5570  status = XML_Parse(parser, end, (int)strlen(end), XML_FALSE);
5571  if (status != XML_STATUS_OK) {
5572    xml_failure(parser);
5573  }
5574  CharData_CheckXMLChars(&storage, XCS("d")); // not yet.
5575
5576  // now change the heuristic setting and add *no* data
5577  assert_true(XML_SetReparseDeferralEnabled(parser, XML_FALSE));
5578  // we avoid isFinal=XML_TRUE, because that would force-bypass the heuristic.
5579  status = XML_Parse(parser, "", 0, XML_FALSE);
5580  if (status != XML_STATUS_OK) {
5581    xml_failure(parser);
5582  }
5583  CharData_CheckXMLChars(&storage, XCS("dx"));
5584
5585  XML_ParserFree(parser);
5586}
5587END_TEST
5588
5589START_TEST(test_set_bad_reparse_option) {
5590  XML_Parser parser = XML_ParserCreate(NULL);
5591  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 2));
5592  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 3));
5593  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 99));
5594  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 127));
5595  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 128));
5596  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 129));
5597  assert_true(XML_FALSE == XML_SetReparseDeferralEnabled(parser, 255));
5598  assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 0));
5599  assert_true(XML_TRUE == XML_SetReparseDeferralEnabled(parser, 1));
5600  XML_ParserFree(parser);
5601}
5602END_TEST
5603
5604static size_t g_totalAlloc = 0;
5605static size_t g_biggestAlloc = 0;
5606
5607static void *
5608counting_realloc(void *ptr, size_t size) {
5609  g_totalAlloc += size;
5610  if (size > g_biggestAlloc) {
5611    g_biggestAlloc = size;
5612  }
5613  return realloc(ptr, size);
5614}
5615
5616static void *
5617counting_malloc(size_t size) {
5618  return counting_realloc(NULL, size);
5619}
5620
5621START_TEST(test_bypass_heuristic_when_close_to_bufsize) {
5622  if (g_chunkSize != 0) {
5623    // this test does not use SINGLE_BYTES, because it depends on very precise
5624    // buffer fills.
5625    return;
5626  }
5627  if (! g_reparseDeferralEnabledDefault) {
5628    return; // this test is irrelevant when the deferral heuristic is disabled.
5629  }
5630
5631  const int document_length = 65536;
5632  char *const document = (char *)malloc(document_length);
5633
5634  const XML_Memory_Handling_Suite memfuncs = {
5635      counting_malloc,
5636      counting_realloc,
5637      free,
5638  };
5639
5640  const int leading_list[] = {0, 3, 61, 96, 400, 401, 4000, 4010, 4099, -1};
5641  const int bigtoken_list[] = {3000, 4000, 4001, 4096, 4099, 5000, 20000, -1};
5642  const int fillsize_list[] = {131, 256, 399, 400, 401, 1025, 4099, 4321, -1};
5643
5644  for (const int *leading = leading_list; *leading >= 0; leading++) {
5645    for (const int *bigtoken = bigtoken_list; *bigtoken >= 0; bigtoken++) {
5646      for (const int *fillsize = fillsize_list; *fillsize >= 0; fillsize++) {
5647        set_subtest("leading=%d bigtoken=%d fillsize=%d", *leading, *bigtoken,
5648                    *fillsize);
5649        // start by checking that the test looks reasonably valid
5650        assert_true(*leading + *bigtoken <= document_length);
5651
5652        // put 'x' everywhere; some will be overwritten by elements.
5653        memset(document, 'x', document_length);
5654        // maybe add an initial tag
5655        if (*leading) {
5656          assert_true(*leading >= 3); // or the test case is invalid
5657          memcpy(document, "<a>", 3);
5658        }
5659        // add the large token
5660        document[*leading + 0] = '<';
5661        document[*leading + 1] = 'b';
5662        memset(&document[*leading + 2], ' ', *bigtoken - 2); // a spacy token
5663        document[*leading + *bigtoken - 1] = '>';
5664
5665        // 1 for 'b', plus 1 or 0 depending on the presence of 'a'
5666        const int expected_elem_total = 1 + (*leading ? 1 : 0);
5667
5668        XML_Parser parser = XML_ParserCreate_MM(NULL, &memfuncs, NULL);
5669        assert_true(parser != NULL);
5670
5671        CharData storage;
5672        CharData_Init(&storage);
5673        XML_SetUserData(parser, &storage);
5674        XML_SetStartElementHandler(parser, start_element_event_handler);
5675
5676        g_biggestAlloc = 0;
5677        g_totalAlloc = 0;
5678        int offset = 0;
5679        // fill data until the big token is covered (but not necessarily parsed)
5680        while (offset < *leading + *bigtoken) {
5681          assert_true(offset + *fillsize <= document_length);
5682          const enum XML_Status status
5683              = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5684          if (status != XML_STATUS_OK) {
5685            xml_failure(parser);
5686          }
5687          offset += *fillsize;
5688        }
5689        // Now, check that we've had a buffer allocation that could fit the
5690        // context bytes and our big token. In order to detect a special case,
5691        // we need to know how many bytes of our big token were included in the
5692        // first push that contained _any_ bytes of the big token:
5693        const int bigtok_first_chunk_bytes = *fillsize - (*leading % *fillsize);
5694        if (bigtok_first_chunk_bytes >= *bigtoken && XML_CONTEXT_BYTES == 0) {
5695          // Special case: we aren't saving any context, and the whole big token
5696          // was covered by a single fill, so Expat may have parsed directly
5697          // from our input pointer, without allocating an internal buffer.
5698        } else if (*leading < XML_CONTEXT_BYTES) {
5699          assert_true(g_biggestAlloc >= *leading + (size_t)*bigtoken);
5700        } else {
5701          assert_true(g_biggestAlloc >= XML_CONTEXT_BYTES + (size_t)*bigtoken);
5702        }
5703        // fill data until the big token is actually parsed
5704        while (storage.count < expected_elem_total) {
5705          const size_t alloc_before = g_totalAlloc;
5706          assert_true(offset + *fillsize <= document_length);
5707          const enum XML_Status status
5708              = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5709          if (status != XML_STATUS_OK) {
5710            xml_failure(parser);
5711          }
5712          offset += *fillsize;
5713          // since all the bytes of the big token are already in the buffer,
5714          // the bufsize ceiling should make us finish its parsing without any
5715          // further buffer allocations. We assume that there will be no other
5716          // large allocations in this test.
5717          assert_true(g_totalAlloc - alloc_before < 4096);
5718        }
5719        // test-the-test: was our alloc even called?
5720        assert_true(g_totalAlloc > 0);
5721        // test-the-test: there shouldn't be any extra start elements
5722        assert_true(storage.count == expected_elem_total);
5723
5724        XML_ParserFree(parser);
5725      }
5726    }
5727  }
5728  free(document);
5729}
5730END_TEST
5731
5732#if defined(XML_TESTING)
5733START_TEST(test_varying_buffer_fills) {
5734  const int KiB = 1024;
5735  const int MiB = 1024 * KiB;
5736  const int document_length = 16 * MiB;
5737  const int big = 7654321; // arbitrarily chosen between 4 and 8 MiB
5738
5739  if (g_chunkSize != 0) {
5740    return; // this test is slow, and doesn't use _XML_Parse_SINGLE_BYTES().
5741  }
5742
5743  char *const document = (char *)malloc(document_length);
5744  assert_true(document != NULL);
5745  memset(document, 'x', document_length);
5746  document[0] = '<';
5747  document[1] = 't';
5748  memset(&document[2], ' ', big - 2); // a very spacy token
5749  document[big - 1] = '>';
5750
5751  // Each testcase is a list of buffer fill sizes, terminated by a value < 0.
5752  // When reparse deferral is enabled, the final (negated) value is the expected
5753  // maximum number of bytes scanned in parse attempts.
5754  const int testcases[][30] = {
5755      {8 * MiB, -8 * MiB},
5756      {4 * MiB, 4 * MiB, -12 * MiB}, // try at 4MB, then 8MB = 12 MB total
5757      // zero-size fills shouldn't trigger the bypass
5758      {4 * MiB, 0, 4 * MiB, -12 * MiB},
5759      {4 * MiB, 0, 0, 4 * MiB, -12 * MiB},
5760      {4 * MiB, 0, 1 * MiB, 0, 3 * MiB, -12 * MiB},
5761      // try to hit the buffer ceiling only once (at the end)
5762      {4 * MiB, 2 * MiB, 1 * MiB, 512 * KiB, 256 * KiB, 256 * KiB, -12 * MiB},
5763      // try to hit the same buffer ceiling multiple times
5764      {4 * MiB + 1, 2 * MiB, 1 * MiB, 512 * KiB, -25 * MiB},
5765
5766      // try to hit every ceiling, by always landing 1K shy of the buffer size
5767      {1 * KiB, 2 * KiB, 4 * KiB, 8 * KiB, 16 * KiB, 32 * KiB, 64 * KiB,
5768       128 * KiB, 256 * KiB, 512 * KiB, 1 * MiB, 2 * MiB, 4 * MiB, -16 * MiB},
5769
5770      // try to avoid every ceiling, by always landing 1B past the buffer size
5771      // the normal 2x heuristic threshold still forces parse attempts.
5772      {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5773       2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5774       8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5775       32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5776       128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5777       512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5778       2 * MiB, 4 * MiB,     // will attempt 8MiB + 1 ==> total 10M + 682K + 7
5779       -(10 * MiB + 682 * KiB + 7)},
5780      // try to avoid every ceiling again, except on our last fill.
5781      {2 * KiB + 1,          // will attempt 2KiB + 1 ==> total 2KiB + 1
5782       2 * KiB, 4 * KiB,     // will attempt 8KiB + 1 ==> total 10KiB + 2
5783       8 * KiB, 16 * KiB,    // will attempt 32KiB + 1 ==> total 42KiB + 3
5784       32 * KiB, 64 * KiB,   // will attempt 128KiB + 1 ==> total 170KiB + 4
5785       128 * KiB, 256 * KiB, // will attempt 512KiB + 1 ==> total 682KiB + 5
5786       512 * KiB, 1 * MiB,   // will attempt 2MiB + 1 ==> total 2M + 682K + 6
5787       2 * MiB, 4 * MiB - 1, // will attempt 8MiB ==> total 10M + 682K + 6
5788       -(10 * MiB + 682 * KiB + 6)},
5789
5790      // try to hit ceilings on the way multiple times
5791      {512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 1 MiB buffer
5792       512 * KiB + 1, 256 * KiB, 128 * KiB, 128 * KiB - 1, // 2 MiB buffer
5793       1 * MiB + 1, 512 * KiB, 256 * KiB, 256 * KiB - 1,   // 4 MiB buffer
5794       2 * MiB + 1, 1 * MiB, 512 * KiB,                    // 8 MiB buffer
5795       // we'll make a parse attempt at every parse call
5796       -(45 * MiB + 12)},
5797  };
5798  const int testcount = sizeof(testcases) / sizeof(testcases[0]);
5799  for (int test_i = 0; test_i < testcount; test_i++) {
5800    const int *fillsize = testcases[test_i];
5801    set_subtest("#%d {%d %d %d %d ...}", test_i, fillsize[0], fillsize[1],
5802                fillsize[2], fillsize[3]);
5803    XML_Parser parser = XML_ParserCreate(NULL);
5804    assert_true(parser != NULL);
5805
5806    CharData storage;
5807    CharData_Init(&storage);
5808    XML_SetUserData(parser, &storage);
5809    XML_SetStartElementHandler(parser, start_element_event_handler);
5810
5811    g_bytesScanned = 0;
5812    int worstcase_bytes = 0; // sum of (buffered bytes at each XML_Parse call)
5813    int offset = 0;
5814    while (*fillsize >= 0) {
5815      assert_true(offset + *fillsize <= document_length); // or test is invalid
5816      const enum XML_Status status
5817          = XML_Parse(parser, &document[offset], *fillsize, XML_FALSE);
5818      if (status != XML_STATUS_OK) {
5819        xml_failure(parser);
5820      }
5821      offset += *fillsize;
5822      fillsize++;
5823      assert_true(offset <= INT_MAX - worstcase_bytes); // avoid overflow
5824      worstcase_bytes += offset; // we might've tried to parse all pending bytes
5825    }
5826    assert_true(storage.count == 1); // the big token should've been parsed
5827    assert_true(g_bytesScanned > 0); // test-the-test: does our counter work?
5828    if (g_reparseDeferralEnabledDefault) {
5829      // heuristic is enabled; some XML_Parse calls may have deferred reparsing
5830      const unsigned max_bytes_scanned = -*fillsize;
5831      if (g_bytesScanned > max_bytes_scanned) {
5832        fprintf(stderr,
5833                "bytes scanned in parse attempts: actual=%u limit=%u \n",
5834                g_bytesScanned, max_bytes_scanned);
5835        fail("too many bytes scanned in parse attempts");
5836      }
5837    }
5838    assert_true(g_bytesScanned <= (unsigned)worstcase_bytes);
5839
5840    XML_ParserFree(parser);
5841  }
5842  free(document);
5843}
5844END_TEST
5845#endif
5846
5847void
5848make_basic_test_case(Suite *s) {
5849  TCase *tc_basic = tcase_create("basic tests");
5850
5851  suite_add_tcase(s, tc_basic);
5852  tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
5853
5854  tcase_add_test(tc_basic, test_nul_byte);
5855  tcase_add_test(tc_basic, test_u0000_char);
5856  tcase_add_test(tc_basic, test_siphash_self);
5857  tcase_add_test(tc_basic, test_siphash_spec);
5858  tcase_add_test(tc_basic, test_bom_utf8);
5859  tcase_add_test(tc_basic, test_bom_utf16_be);
5860  tcase_add_test(tc_basic, test_bom_utf16_le);
5861  tcase_add_test(tc_basic, test_nobom_utf16_le);
5862  tcase_add_test(tc_basic, test_hash_collision);
5863  tcase_add_test(tc_basic, test_illegal_utf8);
5864  tcase_add_test(tc_basic, test_utf8_auto_align);
5865  tcase_add_test(tc_basic, test_utf16);
5866  tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
5867  tcase_add_test(tc_basic, test_not_utf16);
5868  tcase_add_test(tc_basic, test_bad_encoding);
5869  tcase_add_test(tc_basic, test_latin1_umlauts);
5870  tcase_add_test(tc_basic, test_long_utf8_character);
5871  tcase_add_test(tc_basic, test_long_latin1_attribute);
5872  tcase_add_test(tc_basic, test_long_ascii_attribute);
5873  /* Regression test for SF bug #491986. */
5874  tcase_add_test(tc_basic, test_danish_latin1);
5875  /* Regression test for SF bug #514281. */
5876  tcase_add_test(tc_basic, test_french_charref_hexidecimal);
5877  tcase_add_test(tc_basic, test_french_charref_decimal);
5878  tcase_add_test(tc_basic, test_french_latin1);
5879  tcase_add_test(tc_basic, test_french_utf8);
5880  tcase_add_test(tc_basic, test_utf8_false_rejection);
5881  tcase_add_test(tc_basic, test_line_number_after_parse);
5882  tcase_add_test(tc_basic, test_column_number_after_parse);
5883  tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
5884  tcase_add_test(tc_basic, test_line_number_after_error);
5885  tcase_add_test(tc_basic, test_column_number_after_error);
5886  tcase_add_test(tc_basic, test_really_long_lines);
5887  tcase_add_test(tc_basic, test_really_long_encoded_lines);
5888  tcase_add_test(tc_basic, test_end_element_events);
5889  tcase_add_test(tc_basic, test_helper_is_whitespace_normalized);
5890  tcase_add_test(tc_basic, test_attr_whitespace_normalization);
5891  tcase_add_test(tc_basic, test_xmldecl_misplaced);
5892  tcase_add_test(tc_basic, test_xmldecl_invalid);
5893  tcase_add_test(tc_basic, test_xmldecl_missing_attr);
5894  tcase_add_test(tc_basic, test_xmldecl_missing_value);
5895  tcase_add_test__if_xml_ge(tc_basic, test_unknown_encoding_internal_entity);
5896  tcase_add_test(tc_basic, test_unrecognised_encoding_internal_entity);
5897  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_encoding);
5898  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_no_handler);
5899  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_set_bom);
5900  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding);
5901  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_bad_encoding_2);
5902  tcase_add_test(tc_basic, test_wfc_undeclared_entity_unread_external_subset);
5903  tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
5904  tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
5905  tcase_add_test(tc_basic,
5906                 test_wfc_undeclared_entity_with_external_subset_standalone);
5907  tcase_add_test(tc_basic, test_entity_with_external_subset_unless_standalone);
5908  tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
5909  tcase_add_test(tc_basic, test_not_standalone_handler_reject);
5910  tcase_add_test(tc_basic, test_not_standalone_handler_accept);
5911  tcase_add_test__if_xml_ge(tc_basic, test_wfc_no_recursive_entity_refs);
5912  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_invalid_parse);
5913  tcase_add_test__if_xml_ge(tc_basic, test_dtd_default_handling);
5914  tcase_add_test(tc_basic, test_dtd_attr_handling);
5915  tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
5916  tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
5917  tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
5918  tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
5919  tcase_add_test(tc_basic, test_repeated_stop_parser_between_char_data_calls);
5920  tcase_add_test(tc_basic, test_good_cdata_ascii);
5921  tcase_add_test(tc_basic, test_good_cdata_utf16);
5922  tcase_add_test(tc_basic, test_good_cdata_utf16_le);
5923  tcase_add_test(tc_basic, test_long_cdata_utf16);
5924  tcase_add_test(tc_basic, test_multichar_cdata_utf16);
5925  tcase_add_test(tc_basic, test_utf16_bad_surrogate_pair);
5926  tcase_add_test(tc_basic, test_bad_cdata);
5927  tcase_add_test(tc_basic, test_bad_cdata_utf16);
5928  tcase_add_test(tc_basic, test_stop_parser_between_cdata_calls);
5929  tcase_add_test(tc_basic, test_suspend_parser_between_cdata_calls);
5930  tcase_add_test(tc_basic, test_memory_allocation);
5931  tcase_add_test__if_xml_ge(tc_basic, test_default_current);
5932  tcase_add_test(tc_basic, test_dtd_elements);
5933  tcase_add_test(tc_basic, test_dtd_elements_nesting);
5934  tcase_add_test__ifdef_xml_dtd(tc_basic, test_set_foreign_dtd);
5935  tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_not_standalone);
5936  tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_foreign_dtd);
5937  tcase_add_test__ifdef_xml_dtd(tc_basic, test_foreign_dtd_with_doctype);
5938  tcase_add_test__ifdef_xml_dtd(tc_basic,
5939                                test_foreign_dtd_without_external_subset);
5940  tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_foreign_dtd);
5941  tcase_add_test(tc_basic, test_set_base);
5942  tcase_add_test(tc_basic, test_attributes);
5943  tcase_add_test__if_xml_ge(tc_basic, test_reset_in_entity);
5944  tcase_add_test(tc_basic, test_resume_invalid_parse);
5945  tcase_add_test(tc_basic, test_resume_resuspended);
5946  tcase_add_test(tc_basic, test_cdata_default);
5947  tcase_add_test(tc_basic, test_subordinate_reset);
5948  tcase_add_test(tc_basic, test_subordinate_suspend);
5949  tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_suspend);
5950  tcase_add_test__if_xml_ge(tc_basic, test_subordinate_xdecl_abort);
5951  tcase_add_test__ifdef_xml_dtd(tc_basic,
5952                                test_ext_entity_invalid_suspended_parse);
5953  tcase_add_test(tc_basic, test_explicit_encoding);
5954  tcase_add_test(tc_basic, test_trailing_cr);
5955  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_cr);
5956  tcase_add_test(tc_basic, test_trailing_rsqb);
5957  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_trailing_rsqb);
5958  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_good_cdata);
5959  tcase_add_test__ifdef_xml_dtd(tc_basic, test_user_parameters);
5960  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_ref_parameter);
5961  tcase_add_test(tc_basic, test_empty_parse);
5962  tcase_add_test(tc_basic, test_get_buffer_1);
5963  tcase_add_test(tc_basic, test_get_buffer_2);
5964#if XML_CONTEXT_BYTES > 0
5965  tcase_add_test(tc_basic, test_get_buffer_3_overflow);
5966#endif
5967  tcase_add_test(tc_basic, test_buffer_can_grow_to_max);
5968  tcase_add_test(tc_basic, test_getbuffer_allocates_on_zero_len);
5969  tcase_add_test(tc_basic, test_byte_info_at_end);
5970  tcase_add_test(tc_basic, test_byte_info_at_error);
5971  tcase_add_test(tc_basic, test_byte_info_at_cdata);
5972  tcase_add_test(tc_basic, test_predefined_entities);
5973  tcase_add_test__ifdef_xml_dtd(tc_basic, test_invalid_tag_in_dtd);
5974  tcase_add_test(tc_basic, test_not_predefined_entities);
5975  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section);
5976  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16);
5977  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ignore_section_utf16_be);
5978  tcase_add_test__ifdef_xml_dtd(tc_basic, test_bad_ignore_section);
5979  tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_bom_consumed);
5980  tcase_add_test__ifdef_xml_dtd(tc_basic, test_external_entity_values);
5981  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_not_standalone);
5982  tcase_add_test__ifdef_xml_dtd(tc_basic, test_ext_entity_value_abort);
5983  tcase_add_test(tc_basic, test_bad_public_doctype);
5984  tcase_add_test(tc_basic, test_attribute_enum_value);
5985  tcase_add_test(tc_basic, test_predefined_entity_redefinition);
5986  tcase_add_test__ifdef_xml_dtd(tc_basic, test_dtd_stop_processing);
5987  tcase_add_test(tc_basic, test_public_notation_no_sysid);
5988  tcase_add_test(tc_basic, test_nested_groups);
5989  tcase_add_test(tc_basic, test_group_choice);
5990  tcase_add_test(tc_basic, test_standalone_parameter_entity);
5991  tcase_add_test__ifdef_xml_dtd(tc_basic, test_skipped_parameter_entity);
5992  tcase_add_test__ifdef_xml_dtd(tc_basic,
5993                                test_recursive_external_parameter_entity);
5994  tcase_add_test__ifdef_xml_dtd(tc_basic,
5995                                test_recursive_external_parameter_entity_2);
5996  tcase_add_test(tc_basic, test_undefined_ext_entity_in_external_dtd);
5997  tcase_add_test(tc_basic, test_suspend_xdecl);
5998  tcase_add_test(tc_basic, test_abort_epilog);
5999  tcase_add_test(tc_basic, test_abort_epilog_2);
6000  tcase_add_test(tc_basic, test_suspend_epilog);
6001  tcase_add_test(tc_basic, test_suspend_in_sole_empty_tag);
6002  tcase_add_test(tc_basic, test_unfinished_epilog);
6003  tcase_add_test(tc_basic, test_partial_char_in_epilog);
6004  tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_internal_entity);
6005  tcase_add_test__ifdef_xml_dtd(tc_basic,
6006                                test_suspend_resume_internal_entity_issue_629);
6007  tcase_add_test__ifdef_xml_dtd(tc_basic, test_resume_entity_with_syntax_error);
6008  tcase_add_test__ifdef_xml_dtd(tc_basic, test_suspend_resume_parameter_entity);
6009  tcase_add_test(tc_basic, test_restart_on_error);
6010  tcase_add_test(tc_basic, test_reject_lt_in_attribute_value);
6011  tcase_add_test(tc_basic, test_reject_unfinished_param_in_att_value);
6012  tcase_add_test(tc_basic, test_trailing_cr_in_att_value);
6013  tcase_add_test(tc_basic, test_standalone_internal_entity);
6014  tcase_add_test(tc_basic, test_skipped_external_entity);
6015  tcase_add_test(tc_basic, test_skipped_null_loaded_ext_entity);
6016  tcase_add_test(tc_basic, test_skipped_unloaded_ext_entity);
6017  tcase_add_test__ifdef_xml_dtd(tc_basic, test_param_entity_with_trailing_cr);
6018  tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity);
6019  tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_2);
6020  tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_3);
6021  tcase_add_test__if_xml_ge(tc_basic, test_invalid_character_entity_4);
6022  tcase_add_test(tc_basic, test_pi_handled_in_default);
6023  tcase_add_test(tc_basic, test_comment_handled_in_default);
6024  tcase_add_test(tc_basic, test_pi_yml);
6025  tcase_add_test(tc_basic, test_pi_xnl);
6026  tcase_add_test(tc_basic, test_pi_xmm);
6027  tcase_add_test(tc_basic, test_utf16_pi);
6028  tcase_add_test(tc_basic, test_utf16_be_pi);
6029  tcase_add_test(tc_basic, test_utf16_be_comment);
6030  tcase_add_test(tc_basic, test_utf16_le_comment);
6031  tcase_add_test(tc_basic, test_missing_encoding_conversion_fn);
6032  tcase_add_test(tc_basic, test_failing_encoding_conversion_fn);
6033  tcase_add_test(tc_basic, test_unknown_encoding_success);
6034  tcase_add_test(tc_basic, test_unknown_encoding_bad_name);
6035  tcase_add_test(tc_basic, test_unknown_encoding_bad_name_2);
6036  tcase_add_test(tc_basic, test_unknown_encoding_long_name_1);
6037  tcase_add_test(tc_basic, test_unknown_encoding_long_name_2);
6038  tcase_add_test(tc_basic, test_invalid_unknown_encoding);
6039  tcase_add_test(tc_basic, test_unknown_ascii_encoding_ok);
6040  tcase_add_test(tc_basic, test_unknown_ascii_encoding_fail);
6041  tcase_add_test(tc_basic, test_unknown_encoding_invalid_length);
6042  tcase_add_test(tc_basic, test_unknown_encoding_invalid_topbit);
6043  tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate);
6044  tcase_add_test(tc_basic, test_unknown_encoding_invalid_high);
6045  tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value);
6046  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom);
6047  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom);
6048  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2);
6049  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom2);
6050  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_be);
6051  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_le);
6052  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf16_unknown);
6053  tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_utf8_non_bom);
6054  tcase_add_test(tc_basic, test_utf8_in_cdata_section);
6055  tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
6056  tcase_add_test(tc_basic, test_utf8_in_start_tags);
6057  tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
6058  tcase_add_test(tc_basic, test_utf16_attribute);
6059  tcase_add_test(tc_basic, test_utf16_second_attr);
6060  tcase_add_test(tc_basic, test_attr_after_solidus);
6061  tcase_add_test__ifdef_xml_dtd(tc_basic, test_utf16_pe);
6062  tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
6063  tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
6064  tcase_add_test(tc_basic, test_bad_doctype);
6065  tcase_add_test(tc_basic, test_bad_doctype_utf8);
6066  tcase_add_test(tc_basic, test_bad_doctype_utf16);
6067  tcase_add_test(tc_basic, test_bad_doctype_plus);
6068  tcase_add_test(tc_basic, test_bad_doctype_star);
6069  tcase_add_test(tc_basic, test_bad_doctype_query);
6070  tcase_add_test__ifdef_xml_dtd(tc_basic, test_unknown_encoding_bad_ignore);
6071  tcase_add_test(tc_basic, test_entity_in_utf16_be_attr);
6072  tcase_add_test(tc_basic, test_entity_in_utf16_le_attr);
6073  tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_be);
6074  tcase_add_test__ifdef_xml_dtd(tc_basic, test_entity_public_utf16_le);
6075  tcase_add_test(tc_basic, test_short_doctype);
6076  tcase_add_test(tc_basic, test_short_doctype_2);
6077  tcase_add_test(tc_basic, test_short_doctype_3);
6078  tcase_add_test(tc_basic, test_long_doctype);
6079  tcase_add_test(tc_basic, test_bad_entity);
6080  tcase_add_test(tc_basic, test_bad_entity_2);
6081  tcase_add_test(tc_basic, test_bad_entity_3);
6082  tcase_add_test(tc_basic, test_bad_entity_4);
6083  tcase_add_test(tc_basic, test_bad_notation);
6084  tcase_add_test(tc_basic, test_default_doctype_handler);
6085  tcase_add_test(tc_basic, test_empty_element_abort);
6086  tcase_add_test__ifdef_xml_dtd(tc_basic,
6087                                test_pool_integrity_with_unfinished_attr);
6088  tcase_add_test__if_xml_ge(tc_basic, test_nested_entity_suspend);
6089#if defined(XML_TESTING)
6090  tcase_add_test(tc_basic, test_big_tokens_scale_linearly);
6091#endif
6092  tcase_add_test(tc_basic, test_set_reparse_deferral);
6093  tcase_add_test(tc_basic, test_reparse_deferral_is_inherited);
6094  tcase_add_test(tc_basic, test_set_reparse_deferral_on_null_parser);
6095  tcase_add_test(tc_basic, test_set_reparse_deferral_on_the_fly);
6096  tcase_add_test(tc_basic, test_set_bad_reparse_option);
6097  tcase_add_test(tc_basic, test_bypass_heuristic_when_close_to_bufsize);
6098#if defined(XML_TESTING)
6099  tcase_add_test(tc_basic, test_varying_buffer_fills);
6100#endif
6101}
6102