1/* Tests in the "namespace" test case for the Expat test suite
2                            __  __            _
3                         ___\ \/ /_ __   __ _| |_
4                        / _ \\  /| '_ \ / _` | __|
5                       |  __//  \| |_) | (_| | |_
6                        \___/_/\_\ .__/ \__,_|\__|
7                                 |_| XML parser
8
9   Copyright (c) 2001-2006 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
10   Copyright (c) 2003      Greg Stein <gstein@users.sourceforge.net>
11   Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
12   Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net>
13   Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org>
14   Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk>
15   Copyright (c) 2017      Joe Orton <jorton@redhat.com>
16   Copyright (c) 2017      Jos�� Guti��rrez de la Concha <jose@zeroc.com>
17   Copyright (c) 2018      Marco Maggi <marco.maggi-ipsu@poste.it>
18   Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
19   Copyright (c) 2020      Tim Gates <tim.gates@iress.com>
20   Copyright (c) 2021      Donghee Na <donghee.na@python.org>
21   Copyright (c) 2023      Sony Corporation / Snild Dolkow <snild@sony.com>
22   Licensed under the MIT license:
23
24   Permission is  hereby granted,  free of charge,  to any  person obtaining
25   a  copy  of  this  software   and  associated  documentation  files  (the
26   "Software"),  to  deal in  the  Software  without restriction,  including
27   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
28   distribute, sublicense, and/or sell copies of the Software, and to permit
29   persons  to whom  the Software  is  furnished to  do so,  subject to  the
30   following conditions:
31
32   The above copyright  notice and this permission notice  shall be included
33   in all copies or substantial portions of the Software.
34
35   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
36   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
37   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
38   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
39   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
40   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
41   USE OR OTHER DEALINGS IN THE SOFTWARE.
42*/
43
44#include "expat_config.h"
45
46#include <string.h>
47
48#include "expat.h"
49#include "internal.h"
50#include "minicheck.h"
51#include "common.h"
52#include "dummy.h"
53#include "handlers.h"
54#include "ns_tests.h"
55
56static void
57namespace_setup(void) {
58  g_parser = XML_ParserCreateNS(NULL, XCS(' '));
59  if (g_parser == NULL)
60    fail("Parser not created.");
61}
62
63static void
64namespace_teardown(void) {
65  basic_teardown();
66}
67
68START_TEST(test_return_ns_triplet) {
69  const char *text = "<foo:e xmlns:foo='http://example.org/' bar:a='12'\n"
70                     "       xmlns:bar='http://example.org/'>";
71  const char *epilog = "</foo:e>";
72  const XML_Char *elemstr[]
73      = {XCS("http://example.org/ e foo"), XCS("http://example.org/ a bar")};
74  XML_SetReturnNSTriplet(g_parser, XML_TRUE);
75  XML_SetUserData(g_parser, (void *)elemstr);
76  XML_SetElementHandler(g_parser, triplet_start_checker, triplet_end_checker);
77  XML_SetNamespaceDeclHandler(g_parser, dummy_start_namespace_decl_handler,
78                              dummy_end_namespace_decl_handler);
79  g_triplet_start_flag = XML_FALSE;
80  g_triplet_end_flag = XML_FALSE;
81  init_dummy_handlers();
82  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
83      == XML_STATUS_ERROR)
84    xml_failure(g_parser);
85  /* Check that unsetting "return triplets" fails while still parsing */
86  XML_SetReturnNSTriplet(g_parser, XML_FALSE);
87  if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
88      == XML_STATUS_ERROR)
89    xml_failure(g_parser);
90  if (! g_triplet_start_flag)
91    fail("triplet_start_checker not invoked");
92  if (! g_triplet_end_flag)
93    fail("triplet_end_checker not invoked");
94  if (get_dummy_handler_flags()
95      != (DUMMY_START_NS_DECL_HANDLER_FLAG | DUMMY_END_NS_DECL_HANDLER_FLAG))
96    fail("Namespace handlers not called");
97}
98END_TEST
99
100/* Test that the parsing status is correctly reset by XML_ParserReset().
101 * We use test_return_ns_triplet() for our example parse to improve
102 * coverage of tidying up code executed.
103 */
104START_TEST(test_ns_parser_reset) {
105  XML_ParsingStatus status;
106
107  XML_GetParsingStatus(g_parser, &status);
108  if (status.parsing != XML_INITIALIZED)
109    fail("parsing status doesn't start INITIALIZED");
110  test_return_ns_triplet();
111  XML_GetParsingStatus(g_parser, &status);
112  if (status.parsing != XML_FINISHED)
113    fail("parsing status doesn't end FINISHED");
114  XML_ParserReset(g_parser, NULL);
115  XML_GetParsingStatus(g_parser, &status);
116  if (status.parsing != XML_INITIALIZED)
117    fail("parsing status doesn't reset to INITIALIZED");
118}
119END_TEST
120
121static void
122run_ns_tagname_overwrite_test(const char *text, const XML_Char *result) {
123  CharData storage;
124  CharData_Init(&storage);
125  XML_SetUserData(g_parser, &storage);
126  XML_SetElementHandler(g_parser, overwrite_start_checker,
127                        overwrite_end_checker);
128  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
129      == XML_STATUS_ERROR)
130    xml_failure(g_parser);
131  CharData_CheckXMLChars(&storage, result);
132}
133
134/* Regression test for SF bug #566334. */
135START_TEST(test_ns_tagname_overwrite) {
136  const char *text = "<n:e xmlns:n='http://example.org/'>\n"
137                     "  <n:f n:attr='foo'/>\n"
138                     "  <n:g n:attr2='bar'/>\n"
139                     "</n:e>";
140  const XML_Char *result = XCS("start http://example.org/ e\n")
141      XCS("start http://example.org/ f\n")
142          XCS("attribute http://example.org/ attr\n")
143              XCS("end http://example.org/ f\n")
144                  XCS("start http://example.org/ g\n")
145                      XCS("attribute http://example.org/ attr2\n")
146                          XCS("end http://example.org/ g\n")
147                              XCS("end http://example.org/ e\n");
148  run_ns_tagname_overwrite_test(text, result);
149}
150END_TEST
151
152/* Regression test for SF bug #566334. */
153START_TEST(test_ns_tagname_overwrite_triplet) {
154  const char *text = "<n:e xmlns:n='http://example.org/'>\n"
155                     "  <n:f n:attr='foo'/>\n"
156                     "  <n:g n:attr2='bar'/>\n"
157                     "</n:e>";
158  const XML_Char *result = XCS("start http://example.org/ e n\n")
159      XCS("start http://example.org/ f n\n")
160          XCS("attribute http://example.org/ attr n\n")
161              XCS("end http://example.org/ f n\n")
162                  XCS("start http://example.org/ g n\n")
163                      XCS("attribute http://example.org/ attr2 n\n")
164                          XCS("end http://example.org/ g n\n")
165                              XCS("end http://example.org/ e n\n");
166  XML_SetReturnNSTriplet(g_parser, XML_TRUE);
167  run_ns_tagname_overwrite_test(text, result);
168}
169END_TEST
170
171/* Regression test for SF bug #620343. */
172START_TEST(test_start_ns_clears_start_element) {
173  /* This needs to use separate start/end tags; using the empty tag
174     syntax doesn't cause the problematic path through Expat to be
175     taken.
176  */
177  const char *text = "<e xmlns='http://example.org/'></e>";
178
179  XML_SetStartElementHandler(g_parser, start_element_fail);
180  XML_SetStartNamespaceDeclHandler(g_parser, start_ns_clearing_start_element);
181  XML_SetEndNamespaceDeclHandler(g_parser, dummy_end_namespace_decl_handler);
182  XML_UseParserAsHandlerArg(g_parser);
183  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
184      == XML_STATUS_ERROR)
185    xml_failure(g_parser);
186}
187END_TEST
188
189/* Regression test for SF bug #616863. */
190START_TEST(test_default_ns_from_ext_subset_and_ext_ge) {
191  const char *text = "<?xml version='1.0'?>\n"
192                     "<!DOCTYPE doc SYSTEM 'http://example.org/doc.dtd' [\n"
193                     "  <!ENTITY en SYSTEM 'http://example.org/entity.ent'>\n"
194                     "]>\n"
195                     "<doc xmlns='http://example.org/ns1'>\n"
196                     "&en;\n"
197                     "</doc>";
198
199  XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
200  XML_SetExternalEntityRefHandler(g_parser, external_entity_handler);
201  /* We actually need to set this handler to tickle this bug. */
202  XML_SetStartElementHandler(g_parser, dummy_start_element);
203  XML_SetUserData(g_parser, NULL);
204  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
205      == XML_STATUS_ERROR)
206    xml_failure(g_parser);
207}
208END_TEST
209
210/* Regression test #1 for SF bug #673791. */
211START_TEST(test_ns_prefix_with_empty_uri_1) {
212  const char *text = "<doc xmlns:prefix='http://example.org/'>\n"
213                     "  <e xmlns:prefix=''/>\n"
214                     "</doc>";
215
216  expect_failure(text, XML_ERROR_UNDECLARING_PREFIX,
217                 "Did not report re-setting namespace"
218                 " URI with prefix to ''.");
219}
220END_TEST
221
222/* Regression test #2 for SF bug #673791. */
223START_TEST(test_ns_prefix_with_empty_uri_2) {
224  const char *text = "<?xml version='1.0'?>\n"
225                     "<docelem xmlns:pre=''/>";
226
227  expect_failure(text, XML_ERROR_UNDECLARING_PREFIX,
228                 "Did not report setting namespace URI with prefix to ''.");
229}
230END_TEST
231
232/* Regression test #3 for SF bug #673791. */
233START_TEST(test_ns_prefix_with_empty_uri_3) {
234  const char *text = "<!DOCTYPE doc [\n"
235                     "  <!ELEMENT doc EMPTY>\n"
236                     "  <!ATTLIST doc\n"
237                     "    xmlns:prefix CDATA ''>\n"
238                     "]>\n"
239                     "<doc/>";
240
241  expect_failure(text, XML_ERROR_UNDECLARING_PREFIX,
242                 "Didn't report attr default setting NS w/ prefix to ''.");
243}
244END_TEST
245
246/* Regression test #4 for SF bug #673791. */
247START_TEST(test_ns_prefix_with_empty_uri_4) {
248  const char *text = "<!DOCTYPE doc [\n"
249                     "  <!ELEMENT prefix:doc EMPTY>\n"
250                     "  <!ATTLIST prefix:doc\n"
251                     "    xmlns:prefix CDATA 'http://example.org/'>\n"
252                     "]>\n"
253                     "<prefix:doc/>";
254  /* Packaged info expected by the end element handler;
255     the weird structuring lets us reuse the triplet_end_checker()
256     function also used for another test. */
257  const XML_Char *elemstr[] = {XCS("http://example.org/ doc prefix")};
258  XML_SetReturnNSTriplet(g_parser, XML_TRUE);
259  XML_SetUserData(g_parser, (void *)elemstr);
260  XML_SetEndElementHandler(g_parser, triplet_end_checker);
261  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
262      == XML_STATUS_ERROR)
263    xml_failure(g_parser);
264}
265END_TEST
266
267/* Test with non-xmlns prefix */
268START_TEST(test_ns_unbound_prefix) {
269  const char *text = "<!DOCTYPE doc [\n"
270                     "  <!ELEMENT prefix:doc EMPTY>\n"
271                     "  <!ATTLIST prefix:doc\n"
272                     "    notxmlns:prefix CDATA 'http://example.org/'>\n"
273                     "]>\n"
274                     "<prefix:doc/>";
275
276  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
277      != XML_STATUS_ERROR)
278    fail("Unbound prefix incorrectly passed");
279  if (XML_GetErrorCode(g_parser) != XML_ERROR_UNBOUND_PREFIX)
280    xml_failure(g_parser);
281}
282END_TEST
283
284START_TEST(test_ns_default_with_empty_uri) {
285  const char *text = "<doc xmlns='http://example.org/'>\n"
286                     "  <e xmlns=''/>\n"
287                     "</doc>";
288  /* Add some handlers to exercise extra code paths */
289  XML_SetStartNamespaceDeclHandler(g_parser,
290                                   dummy_start_namespace_decl_handler);
291  XML_SetEndNamespaceDeclHandler(g_parser, dummy_end_namespace_decl_handler);
292  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
293      == XML_STATUS_ERROR)
294    xml_failure(g_parser);
295}
296END_TEST
297
298/* Regression test for SF bug #692964: two prefixes for one namespace. */
299START_TEST(test_ns_duplicate_attrs_diff_prefixes) {
300  const char *text = "<doc xmlns:a='http://example.org/a'\n"
301                     "     xmlns:b='http://example.org/a'\n"
302                     "     a:a='v' b:a='v' />";
303  expect_failure(text, XML_ERROR_DUPLICATE_ATTRIBUTE,
304                 "did not report multiple attributes with same URI+name");
305}
306END_TEST
307
308START_TEST(test_ns_duplicate_hashes) {
309  /* The hash of an attribute is calculated as the hash of its URI
310   * concatenated with a space followed by its name (after the
311   * colon).  We wish to generate attributes with the same hash
312   * value modulo the attribute table size so that we can check that
313   * the attribute hash table works correctly.  The attribute hash
314   * table size will be the smallest power of two greater than the
315   * number of attributes, but at least eight.  There is
316   * unfortunately no programmatic way of getting the hash or the
317   * table size at user level, but the test code coverage percentage
318   * will drop if the hashes cease to point to the same row.
319   *
320   * The cunning plan is to have few enough attributes to have a
321   * reliable table size of 8, and have the single letter attribute
322   * names be 8 characters apart, producing a hash which will be the
323   * same modulo 8.
324   */
325  const char *text = "<doc xmlns:a='http://example.org/a'\n"
326                     "     a:a='v' a:i='w' />";
327  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
328      == XML_STATUS_ERROR)
329    xml_failure(g_parser);
330}
331END_TEST
332
333/* Regression test for SF bug #695401: unbound prefix. */
334START_TEST(test_ns_unbound_prefix_on_attribute) {
335  const char *text = "<doc a:attr=''/>";
336  expect_failure(text, XML_ERROR_UNBOUND_PREFIX,
337                 "did not report unbound prefix on attribute");
338}
339END_TEST
340
341/* Regression test for SF bug #695401: unbound prefix. */
342START_TEST(test_ns_unbound_prefix_on_element) {
343  const char *text = "<a:doc/>";
344  expect_failure(text, XML_ERROR_UNBOUND_PREFIX,
345                 "did not report unbound prefix on element");
346}
347END_TEST
348
349/* Test that long element names with namespaces are handled correctly */
350START_TEST(test_ns_long_element) {
351  const char *text
352      = "<foo:thisisalongenoughelementnametotriggerareallocation\n"
353        " xmlns:foo='http://example.org/' bar:a='12'\n"
354        " xmlns:bar='http://example.org/'>"
355        "</foo:thisisalongenoughelementnametotriggerareallocation>";
356  const XML_Char *elemstr[]
357      = {XCS("http://example.org/")
358             XCS(" thisisalongenoughelementnametotriggerareallocation foo"),
359         XCS("http://example.org/ a bar")};
360
361  XML_SetReturnNSTriplet(g_parser, XML_TRUE);
362  XML_SetUserData(g_parser, (void *)elemstr);
363  XML_SetElementHandler(g_parser, triplet_start_checker, triplet_end_checker);
364  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
365      == XML_STATUS_ERROR)
366    xml_failure(g_parser);
367}
368END_TEST
369
370/* Test mixed population of prefixed and unprefixed attributes */
371START_TEST(test_ns_mixed_prefix_atts) {
372  const char *text = "<e a='12' bar:b='13'\n"
373                     " xmlns:bar='http://example.org/'>"
374                     "</e>";
375
376  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
377      == XML_STATUS_ERROR)
378    xml_failure(g_parser);
379}
380END_TEST
381
382/* Test having a long namespaced element name inside a short one.
383 * This exercises some internal buffer reallocation that is shared
384 * across elements with the same namespace URI.
385 */
386START_TEST(test_ns_extend_uri_buffer) {
387  const char *text = "<foo:e xmlns:foo='http://example.org/'>"
388                     " <foo:thisisalongenoughnametotriggerallocationaction"
389                     "   foo:a='12' />"
390                     "</foo:e>";
391  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
392      == XML_STATUS_ERROR)
393    xml_failure(g_parser);
394}
395END_TEST
396
397/* Test that xmlns is correctly rejected as an attribute in the xmlns
398 * namespace, but not in other namespaces
399 */
400START_TEST(test_ns_reserved_attributes) {
401  const char *text1
402      = "<foo:e xmlns:foo='http://example.org/' xmlns:xmlns='12' />";
403  const char *text2
404      = "<foo:e xmlns:foo='http://example.org/' foo:xmlns='12' />";
405  expect_failure(text1, XML_ERROR_RESERVED_PREFIX_XMLNS,
406                 "xmlns not rejected as an attribute");
407  XML_ParserReset(g_parser, NULL);
408  if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
409      == XML_STATUS_ERROR)
410    xml_failure(g_parser);
411}
412END_TEST
413
414/* Test more reserved attributes */
415START_TEST(test_ns_reserved_attributes_2) {
416  const char *text1 = "<foo:e xmlns:foo='http://example.org/'"
417                      "  xmlns:xml='http://example.org/' />";
418  const char *text2
419      = "<foo:e xmlns:foo='http://www.w3.org/XML/1998/namespace' />";
420  const char *text3 = "<foo:e xmlns:foo='http://www.w3.org/2000/xmlns/' />";
421
422  expect_failure(text1, XML_ERROR_RESERVED_PREFIX_XML,
423                 "xml not rejected as an attribute");
424  XML_ParserReset(g_parser, NULL);
425  expect_failure(text2, XML_ERROR_RESERVED_NAMESPACE_URI,
426                 "Use of w3.org URL not faulted");
427  XML_ParserReset(g_parser, NULL);
428  expect_failure(text3, XML_ERROR_RESERVED_NAMESPACE_URI,
429                 "Use of w3.org xmlns URL not faulted");
430}
431END_TEST
432
433/* Test string pool handling of namespace names of 2048 characters */
434/* Exercises a particular string pool growth path */
435START_TEST(test_ns_extremely_long_prefix) {
436  /* C99 compilers are only required to support 4095-character
437   * strings, so the following needs to be split in two to be safe
438   * for all compilers.
439   */
440  const char *text1
441      = "<doc "
442        /* 64 character on each line */
443        /* ...gives a total length of 2048 */
444        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
445        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
446        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
447        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
448        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
449        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
450        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
451        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
452        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
453        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
454        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
455        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
456        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
457        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
458        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
459        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
460        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
461        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
462        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
463        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
464        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
465        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
466        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
467        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
468        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
469        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
470        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
471        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
472        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
473        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
474        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
475        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
476        ":a='12'";
477  const char *text2
478      = " xmlns:"
479        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
480        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
481        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
482        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
483        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
484        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
485        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
486        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
487        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
488        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
489        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
490        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
491        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
492        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
493        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
494        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
495        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
496        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
497        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
498        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
499        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
500        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
501        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
502        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
503        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
504        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
505        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
506        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
507        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
508        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
509        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
510        "ABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOPABCDEFGHIJKLMNOP"
511        "='foo'\n>"
512        "</doc>";
513
514  if (_XML_Parse_SINGLE_BYTES(g_parser, text1, (int)strlen(text1), XML_FALSE)
515      == XML_STATUS_ERROR)
516    xml_failure(g_parser);
517  if (_XML_Parse_SINGLE_BYTES(g_parser, text2, (int)strlen(text2), XML_TRUE)
518      == XML_STATUS_ERROR)
519    xml_failure(g_parser);
520}
521END_TEST
522
523/* Test unknown encoding handlers in namespace setup */
524START_TEST(test_ns_unknown_encoding_success) {
525  const char *text = "<?xml version='1.0' encoding='prefix-conv'?>\n"
526                     "<foo:e xmlns:foo='http://example.org/'>Hi</foo:e>";
527
528  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
529  run_character_check(text, XCS("Hi"));
530}
531END_TEST
532
533/* Test that too many colons are rejected */
534START_TEST(test_ns_double_colon) {
535  const char *text = "<foo:e xmlns:foo='http://example.org/' foo:a:b='bar' />";
536  const enum XML_Status status
537      = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
538#ifdef XML_NS
539  if ((status == XML_STATUS_OK)
540      || (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)) {
541    fail("Double colon in attribute name not faulted"
542         " (despite active namespace support)");
543  }
544#else
545  if (status != XML_STATUS_OK) {
546    fail("Double colon in attribute name faulted"
547         " (despite inactive namespace support");
548  }
549#endif
550}
551END_TEST
552
553START_TEST(test_ns_double_colon_element) {
554  const char *text = "<foo:bar:e xmlns:foo='http://example.org/' />";
555  const enum XML_Status status
556      = _XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE);
557#ifdef XML_NS
558  if ((status == XML_STATUS_OK)
559      || (XML_GetErrorCode(g_parser) != XML_ERROR_INVALID_TOKEN)) {
560    fail("Double colon in element name not faulted"
561         " (despite active namespace support)");
562  }
563#else
564  if (status != XML_STATUS_OK) {
565    fail("Double colon in element name faulted"
566         " (despite inactive namespace support");
567  }
568#endif
569}
570END_TEST
571
572/* Test that non-name characters after a colon are rejected */
573START_TEST(test_ns_bad_attr_leafname) {
574  const char *text = "<foo:e xmlns:foo='http://example.org/' foo:?ar='baz' />";
575
576  expect_failure(text, XML_ERROR_INVALID_TOKEN,
577                 "Invalid character in leafname not faulted");
578}
579END_TEST
580
581START_TEST(test_ns_bad_element_leafname) {
582  const char *text = "<foo:?oc xmlns:foo='http://example.org/' />";
583
584  expect_failure(text, XML_ERROR_INVALID_TOKEN,
585                 "Invalid character in element leafname not faulted");
586}
587END_TEST
588
589/* Test high-byte-set UTF-16 characters are valid in a leafname */
590START_TEST(test_ns_utf16_leafname) {
591  const char text[] =
592      /* <n:e xmlns:n='URI' n:{KHO KHWAI}='a' />
593       * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
594       */
595      "<\0n\0:\0e\0 \0x\0m\0l\0n\0s\0:\0n\0=\0'\0U\0R\0I\0'\0 \0"
596      "n\0:\0\x04\x0e=\0'\0a\0'\0 \0/\0>\0";
597  const XML_Char *expected = XCS("a");
598  CharData storage;
599
600  CharData_Init(&storage);
601  XML_SetStartElementHandler(g_parser, accumulate_attribute);
602  XML_SetUserData(g_parser, &storage);
603  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
604      == XML_STATUS_ERROR)
605    xml_failure(g_parser);
606  CharData_CheckXMLChars(&storage, expected);
607}
608END_TEST
609
610START_TEST(test_ns_utf16_element_leafname) {
611  const char text[] =
612      /* <n:{KHO KHWAI} xmlns:n='URI'/>
613       * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
614       */
615      "\0<\0n\0:\x0e\x04\0 \0x\0m\0l\0n\0s\0:\0n\0=\0'\0U\0R\0I\0'\0/\0>";
616#ifdef XML_UNICODE
617  const XML_Char *expected = XCS("URI \x0e04");
618#else
619  const XML_Char *expected = XCS("URI \xe0\xb8\x84");
620#endif
621  CharData storage;
622
623  CharData_Init(&storage);
624  XML_SetStartElementHandler(g_parser, start_element_event_handler);
625  XML_SetUserData(g_parser, &storage);
626  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
627      == XML_STATUS_ERROR)
628    xml_failure(g_parser);
629  CharData_CheckXMLChars(&storage, expected);
630}
631END_TEST
632
633START_TEST(test_ns_utf16_doctype) {
634  const char text[] =
635      /* <!DOCTYPE foo:{KHO KHWAI} [ <!ENTITY bar 'baz'> ]>\n
636       * where {KHO KHWAI} = U+0E04 = 0xe0 0xb8 0x84 in UTF-8
637       */
638      "\0<\0!\0D\0O\0C\0T\0Y\0P\0E\0 \0f\0o\0o\0:\x0e\x04\0 "
639      "\0[\0 \0<\0!\0E\0N\0T\0I\0T\0Y\0 \0b\0a\0r\0 \0'\0b\0a\0z\0'\0>\0 "
640      "\0]\0>\0\n"
641      /* <foo:{KHO KHWAI} xmlns:foo='URI'>&bar;</foo:{KHO KHWAI}> */
642      "\0<\0f\0o\0o\0:\x0e\x04\0 "
643      "\0x\0m\0l\0n\0s\0:\0f\0o\0o\0=\0'\0U\0R\0I\0'\0>"
644      "\0&\0b\0a\0r\0;"
645      "\0<\0/\0f\0o\0o\0:\x0e\x04\0>";
646#ifdef XML_UNICODE
647  const XML_Char *expected = XCS("URI \x0e04");
648#else
649  const XML_Char *expected = XCS("URI \xe0\xb8\x84");
650#endif
651  CharData storage;
652
653  CharData_Init(&storage);
654  XML_SetUserData(g_parser, &storage);
655  XML_SetStartElementHandler(g_parser, start_element_event_handler);
656  XML_SetUnknownEncodingHandler(g_parser, MiscEncodingHandler, NULL);
657  if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)sizeof(text) - 1, XML_TRUE)
658      == XML_STATUS_ERROR)
659    xml_failure(g_parser);
660  CharData_CheckXMLChars(&storage, expected);
661}
662END_TEST
663
664START_TEST(test_ns_invalid_doctype) {
665  const char *text = "<!DOCTYPE foo:!bad [ <!ENTITY bar 'baz' ]>\n"
666                     "<foo:!bad>&bar;</foo:!bad>";
667
668  expect_failure(text, XML_ERROR_INVALID_TOKEN,
669                 "Invalid character in document local name not faulted");
670}
671END_TEST
672
673START_TEST(test_ns_double_colon_doctype) {
674  const char *text = "<!DOCTYPE foo:a:doc [ <!ENTITY bar 'baz' ]>\n"
675                     "<foo:a:doc>&bar;</foo:a:doc>";
676
677  expect_failure(text, XML_ERROR_SYNTAX,
678                 "Double colon in document name not faulted");
679}
680END_TEST
681
682START_TEST(test_ns_separator_in_uri) {
683  struct test_case {
684    enum XML_Status expectedStatus;
685    const char *doc;
686    XML_Char namesep;
687  };
688  struct test_case cases[] = {
689      {XML_STATUS_OK, "<doc xmlns='one_two' />", XCS('\n')},
690      {XML_STATUS_ERROR, "<doc xmlns='one&#x0A;two' />", XCS('\n')},
691      {XML_STATUS_OK, "<doc xmlns='one:two' />", XCS(':')},
692  };
693
694  size_t i = 0;
695  size_t failCount = 0;
696  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
697    set_subtest("%s", cases[i].doc);
698    XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep);
699    XML_SetElementHandler(parser, dummy_start_element, dummy_end_element);
700    if (_XML_Parse_SINGLE_BYTES(parser, cases[i].doc, (int)strlen(cases[i].doc),
701                                /*isFinal*/ XML_TRUE)
702        != cases[i].expectedStatus) {
703      failCount++;
704    }
705    XML_ParserFree(parser);
706  }
707
708  if (failCount) {
709    fail("Namespace separator handling is broken");
710  }
711}
712END_TEST
713
714void
715make_namespace_test_case(Suite *s) {
716  TCase *tc_namespace = tcase_create("XML namespaces");
717
718  suite_add_tcase(s, tc_namespace);
719  tcase_add_checked_fixture(tc_namespace, namespace_setup, namespace_teardown);
720  tcase_add_test(tc_namespace, test_return_ns_triplet);
721  tcase_add_test(tc_namespace, test_ns_parser_reset);
722  tcase_add_test(tc_namespace, test_ns_tagname_overwrite);
723  tcase_add_test(tc_namespace, test_ns_tagname_overwrite_triplet);
724  tcase_add_test(tc_namespace, test_start_ns_clears_start_element);
725  tcase_add_test__ifdef_xml_dtd(tc_namespace,
726                                test_default_ns_from_ext_subset_and_ext_ge);
727  tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_1);
728  tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_2);
729  tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_3);
730  tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_4);
731  tcase_add_test(tc_namespace, test_ns_unbound_prefix);
732  tcase_add_test(tc_namespace, test_ns_default_with_empty_uri);
733  tcase_add_test(tc_namespace, test_ns_duplicate_attrs_diff_prefixes);
734  tcase_add_test(tc_namespace, test_ns_duplicate_hashes);
735  tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_attribute);
736  tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_element);
737  tcase_add_test(tc_namespace, test_ns_long_element);
738  tcase_add_test(tc_namespace, test_ns_mixed_prefix_atts);
739  tcase_add_test(tc_namespace, test_ns_extend_uri_buffer);
740  tcase_add_test(tc_namespace, test_ns_reserved_attributes);
741  tcase_add_test(tc_namespace, test_ns_reserved_attributes_2);
742  tcase_add_test(tc_namespace, test_ns_extremely_long_prefix);
743  tcase_add_test(tc_namespace, test_ns_unknown_encoding_success);
744  tcase_add_test(tc_namespace, test_ns_double_colon);
745  tcase_add_test(tc_namespace, test_ns_double_colon_element);
746  tcase_add_test(tc_namespace, test_ns_bad_attr_leafname);
747  tcase_add_test(tc_namespace, test_ns_bad_element_leafname);
748  tcase_add_test(tc_namespace, test_ns_utf16_leafname);
749  tcase_add_test(tc_namespace, test_ns_utf16_element_leafname);
750  tcase_add_test__if_xml_ge(tc_namespace, test_ns_utf16_doctype);
751  tcase_add_test(tc_namespace, test_ns_invalid_doctype);
752  tcase_add_test(tc_namespace, test_ns_double_colon_doctype);
753  tcase_add_test(tc_namespace, test_ns_separator_in_uri);
754}
755