1// Written in the D programming language.
2
3/**
4$(RED Warning: This module is considered out-dated and not up to Phobos'
5      current standards. It will remain until we have a suitable replacement,
6      but be aware that it will not remain long term.)
7
8Classes and functions for creating and parsing XML
9
10The basic architecture of this module is that there are standalone functions,
11classes for constructing an XML document from scratch (Tag, Element and
12Document), and also classes for parsing a pre-existing XML file (ElementParser
13and DocumentParser). The parsing classes <i>may</i> be used to build a
14Document, but that is not their primary purpose. The handling capabilities of
15DocumentParser and ElementParser are sufficiently customizable that you can
16make them do pretty much whatever you want.
17
18Example: This example creates a DOM (Document Object Model) tree
19    from an XML file.
20------------------------------------------------------------------------------
21import std.xml;
22import std.stdio;
23import std.string;
24import std.file;
25
26// books.xml is used in various samples throughout the Microsoft XML Core
27// Services (MSXML) SDK.
28//
29// See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
30
31void main()
32{
33    string s = cast(string) std.file.read("books.xml");
34
35    // Check for well-formedness
36    check(s);
37
38    // Make a DOM tree
39    auto doc = new Document(s);
40
41    // Plain-print it
42    writeln(doc);
43}
44------------------------------------------------------------------------------
45
46Example: This example does much the same thing, except that the file is
47    deconstructed and reconstructed by hand. This is more work, but the
48    techniques involved offer vastly more power.
49------------------------------------------------------------------------------
50import std.xml;
51import std.stdio;
52import std.string;
53
54struct Book
55{
56    string id;
57    string author;
58    string title;
59    string genre;
60    string price;
61    string pubDate;
62    string description;
63}
64
65void main()
66{
67    string s = cast(string) std.file.read("books.xml");
68
69    // Check for well-formedness
70    check(s);
71
72    // Take it apart
73    Book[] books;
74
75    auto xml = new DocumentParser(s);
76    xml.onStartTag["book"] = (ElementParser xml)
77    {
78        Book book;
79        book.id = xml.tag.attr["id"];
80
81        xml.onEndTag["author"]       = (in Element e) { book.author      = e.text(); };
82        xml.onEndTag["title"]        = (in Element e) { book.title       = e.text(); };
83        xml.onEndTag["genre"]        = (in Element e) { book.genre       = e.text(); };
84        xml.onEndTag["price"]        = (in Element e) { book.price       = e.text(); };
85        xml.onEndTag["publish-date"] = (in Element e) { book.pubDate     = e.text(); };
86        xml.onEndTag["description"]  = (in Element e) { book.description = e.text(); };
87
88        xml.parse();
89
90        books ~= book;
91    };
92    xml.parse();
93
94    // Put it back together again;
95    auto doc = new Document(new Tag("catalog"));
96    foreach (book;books)
97    {
98        auto element = new Element("book");
99        element.tag.attr["id"] = book.id;
100
101        element ~= new Element("author",      book.author);
102        element ~= new Element("title",       book.title);
103        element ~= new Element("genre",       book.genre);
104        element ~= new Element("price",       book.price);
105        element ~= new Element("publish-date",book.pubDate);
106        element ~= new Element("description", book.description);
107
108        doc ~= element;
109    }
110
111    // Pretty-print it
112    writefln(join(doc.pretty(3),"\n"));
113}
114-------------------------------------------------------------------------------
115Copyright: Copyright Janice Caron 2008 - 2009.
116License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
117Authors:   Janice Caron
118Source:    $(PHOBOSSRC std/_xml.d)
119*/
120/*
121         Copyright Janice Caron 2008 - 2009.
122Distributed under the Boost Software License, Version 1.0.
123   (See accompanying file LICENSE_1_0.txt or copy at
124         http://www.boost.org/LICENSE_1_0.txt)
125*/
126module std.xml;
127
128enum cdata = "<![CDATA[";
129
130/**
131 * Returns true if the character is a character according to the XML standard
132 *
133 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
134 *
135 * Params:
136 *    c = the character to be tested
137 */
138bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
139{
140    if (c <= 0xD7FF)
141    {
142        if (c >= 0x20)
143            return true;
144        switch (c)
145        {
146        case 0xA:
147        case 0x9:
148        case 0xD:
149            return true;
150        default:
151            return false;
152        }
153    }
154    else if (0xE000 <= c && c <= 0x10FFFF)
155    {
156        if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
157            return true;
158    }
159    return false;
160}
161
162@safe @nogc nothrow pure unittest
163{
164    assert(!isChar(cast(dchar) 0x8));
165    assert( isChar(cast(dchar) 0x9));
166    assert( isChar(cast(dchar) 0xA));
167    assert(!isChar(cast(dchar) 0xB));
168    assert(!isChar(cast(dchar) 0xC));
169    assert( isChar(cast(dchar) 0xD));
170    assert(!isChar(cast(dchar) 0xE));
171    assert(!isChar(cast(dchar) 0x1F));
172    assert( isChar(cast(dchar) 0x20));
173    assert( isChar('J'));
174    assert( isChar(cast(dchar) 0xD7FF));
175    assert(!isChar(cast(dchar) 0xD800));
176    assert(!isChar(cast(dchar) 0xDFFF));
177    assert( isChar(cast(dchar) 0xE000));
178    assert( isChar(cast(dchar) 0xFFFD));
179    assert(!isChar(cast(dchar) 0xFFFE));
180    assert(!isChar(cast(dchar) 0xFFFF));
181    assert( isChar(cast(dchar) 0x10000));
182    assert( isChar(cast(dchar) 0x10FFFF));
183    assert(!isChar(cast(dchar) 0x110000));
184
185    debug (stdxml_TestHardcodedChecks)
186    {
187        foreach (c; 0 .. dchar.max + 1)
188            assert(isChar(c) == lookup(CharTable, c));
189    }
190}
191
192/**
193 * Returns true if the character is whitespace according to the XML standard
194 *
195 * Only the following characters are considered whitespace in XML - space, tab,
196 * carriage return and linefeed
197 *
198 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
199 *
200 * Params:
201 *    c = the character to be tested
202 */
203bool isSpace(dchar c) @safe @nogc pure nothrow
204{
205    return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
206}
207
208/**
209 * Returns true if the character is a digit according to the XML standard
210 *
211 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
212 *
213 * Params:
214 *    c = the character to be tested
215 */
216bool isDigit(dchar c) @safe @nogc pure nothrow
217{
218    if (c <= 0x0039 && c >= 0x0030)
219        return true;
220    else
221        return lookup(DigitTable,c);
222}
223
224@safe @nogc nothrow pure unittest
225{
226    debug (stdxml_TestHardcodedChecks)
227    {
228        foreach (c; 0 .. dchar.max + 1)
229            assert(isDigit(c) == lookup(DigitTable, c));
230    }
231}
232
233/**
234 * Returns true if the character is a letter according to the XML standard
235 *
236 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
237 *
238 * Params:
239 *    c = the character to be tested
240 */
241bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
242{
243    return isIdeographic(c) || isBaseChar(c);
244}
245
246/**
247 * Returns true if the character is an ideographic character according to the
248 * XML standard
249 *
250 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
251 *
252 * Params:
253 *    c = the character to be tested
254 */
255bool isIdeographic(dchar c) @safe @nogc nothrow pure
256{
257    if (c == 0x3007)
258        return true;
259    if (c <= 0x3029 && c >= 0x3021 )
260        return true;
261    if (c <= 0x9FA5 && c >= 0x4E00)
262        return true;
263    return false;
264}
265
266@safe @nogc nothrow pure unittest
267{
268    assert(isIdeographic('\u4E00'));
269    assert(isIdeographic('\u9FA5'));
270    assert(isIdeographic('\u3007'));
271    assert(isIdeographic('\u3021'));
272    assert(isIdeographic('\u3029'));
273
274    debug (stdxml_TestHardcodedChecks)
275    {
276        foreach (c; 0 .. dchar.max + 1)
277            assert(isIdeographic(c) == lookup(IdeographicTable, c));
278    }
279}
280
281/**
282 * Returns true if the character is a base character according to the XML
283 * standard
284 *
285 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
286 *
287 * Params:
288 *    c = the character to be tested
289 */
290bool isBaseChar(dchar c) @safe @nogc nothrow pure
291{
292    return lookup(BaseCharTable,c);
293}
294
295/**
296 * Returns true if the character is a combining character according to the
297 * XML standard
298 *
299 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
300 *
301 * Params:
302 *    c = the character to be tested
303 */
304bool isCombiningChar(dchar c) @safe @nogc nothrow pure
305{
306    return lookup(CombiningCharTable,c);
307}
308
309/**
310 * Returns true if the character is an extender according to the XML standard
311 *
312 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
313 *
314 * Params:
315 *    c = the character to be tested
316 */
317bool isExtender(dchar c) @safe @nogc nothrow pure
318{
319    return lookup(ExtenderTable,c);
320}
321
322/**
323 * Encodes a string by replacing all characters which need to be escaped with
324 * appropriate predefined XML entities.
325 *
326 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
327 * and greater-than), and similarly, decode() unescapes them. These functions
328 * are provided for convenience only. You do not need to use them when using
329 * the std.xml classes, because then all the encoding and decoding will be done
330 * for you automatically.
331 *
332 * If the string is not modified, the original will be returned.
333 *
334 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
335 *
336 * Params:
337 *      s = The string to be encoded
338 *
339 * Returns: The encoded string
340 *
341 * Example:
342 * --------------
343 * writefln(encode("a > b")); // writes "a &gt; b"
344 * --------------
345 */
346S encode(S)(S s)
347{
348    import std.array : appender;
349
350    string r;
351    size_t lastI;
352    auto result = appender!S();
353
354    foreach (i, c; s)
355    {
356        switch (c)
357        {
358        case '&':  r = "&amp;"; break;
359        case '"':  r = "&quot;"; break;
360        case '\'': r = "&apos;"; break;
361        case '<':  r = "&lt;"; break;
362        case '>':  r = "&gt;"; break;
363        default: continue;
364        }
365        // Replace with r
366        result.put(s[lastI .. i]);
367        result.put(r);
368        lastI = i + 1;
369    }
370
371    if (!result.data.ptr) return s;
372    result.put(s[lastI .. $]);
373    return result.data;
374}
375
376@safe pure unittest
377{
378    auto s = "hello";
379    assert(encode(s) is s);
380    assert(encode("a > b") == "a &gt; b", encode("a > b"));
381    assert(encode("a < b") == "a &lt; b");
382    assert(encode("don't") == "don&apos;t");
383    assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
384    assert(encode("cat & dog") == "cat &amp; dog");
385}
386
387/**
388 * Mode to use for decoding.
389 *
390 * $(DDOC_ENUM_MEMBERS NONE) Do not decode
391 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
392 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
393 */
394enum DecodeMode
395{
396    NONE, LOOSE, STRICT
397}
398
399/**
400 * Decodes a string by unescaping all predefined XML entities.
401 *
402 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
403 * and greater-than), and similarly, decode() unescapes them. These functions
404 * are provided for convenience only. You do not need to use them when using
405 * the std.xml classes, because then all the encoding and decoding will be done
406 * for you automatically.
407 *
408 * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
409 * &amp;lt; and &amp;gt,
410 * as well as decimal and hexadecimal entities such as &amp;#x20AC;
411 *
412 * If the string does not contain an ampersand, the original will be returned.
413 *
414 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
415 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
416 * (decode, and throw a DecodeException in the event of an error).
417 *
418 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
419 *
420 * Params:
421 *      s = The string to be decoded
422 *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
423 *
424 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
425 *
426 * Returns: The decoded string
427 *
428 * Example:
429 * --------------
430 * writefln(decode("a &gt; b")); // writes "a > b"
431 * --------------
432 */
433string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
434{
435    import std.algorithm.searching : startsWith;
436
437    if (mode == DecodeMode.NONE) return s;
438
439    string buffer;
440    foreach (ref i; 0 .. s.length)
441    {
442        char c = s[i];
443        if (c != '&')
444        {
445            if (buffer.length != 0) buffer ~= c;
446        }
447        else
448        {
449            if (buffer.length == 0)
450            {
451                buffer = s[0 .. i].dup;
452            }
453            if (startsWith(s[i..$],"&#"))
454            {
455                try
456                {
457                    dchar d;
458                    string t = s[i..$];
459                    checkCharRef(t, d);
460                    char[4] temp;
461                    import std.utf : encode;
462                    buffer ~= temp[0 .. encode(temp, d)];
463                    i = s.length - t.length - 1;
464                }
465                catch (Err e)
466                {
467                    if (mode == DecodeMode.STRICT)
468                        throw new DecodeException("Unescaped &");
469                    buffer ~= '&';
470                }
471            }
472            else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
473            else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
474            else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
475            else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
476            else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
477            else
478            {
479                if (mode == DecodeMode.STRICT)
480                    throw new DecodeException("Unescaped &");
481                buffer ~= '&';
482            }
483        }
484    }
485    return (buffer.length == 0) ? s : buffer;
486}
487
488@safe pure unittest
489{
490    void assertNot(string s) pure
491    {
492        bool b = false;
493        try { decode(s,DecodeMode.STRICT); }
494        catch (DecodeException e) { b = true; }
495        assert(b,s);
496    }
497
498    // Assert that things that should work, do
499    auto s = "hello";
500    assert(decode(s,                DecodeMode.STRICT) is s);
501    assert(decode("a &gt; b",       DecodeMode.STRICT) == "a > b");
502    assert(decode("a &lt; b",       DecodeMode.STRICT) == "a < b");
503    assert(decode("don&apos;t",     DecodeMode.STRICT) == "don't");
504    assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
505    assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
506    assert(decode("&#42;",          DecodeMode.STRICT) == "*");
507    assert(decode("&#x2A;",         DecodeMode.STRICT) == "*");
508    assert(decode("cat & dog",      DecodeMode.LOOSE) == "cat & dog");
509    assert(decode("a &gt b",        DecodeMode.LOOSE) == "a &gt b");
510    assert(decode("&#;",            DecodeMode.LOOSE) == "&#;");
511    assert(decode("&#x;",           DecodeMode.LOOSE) == "&#x;");
512    assert(decode("&#2G;",          DecodeMode.LOOSE) == "&#2G;");
513    assert(decode("&#x2G;",         DecodeMode.LOOSE) == "&#x2G;");
514
515    // Assert that things that shouldn't work, don't
516    assertNot("cat & dog");
517    assertNot("a &gt b");
518    assertNot("&#;");
519    assertNot("&#x;");
520    assertNot("&#2G;");
521    assertNot("&#x2G;");
522}
523
524/**
525 * Class representing an XML document.
526 *
527 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
528 *
529 */
530class Document : Element
531{
532    /**
533     * Contains all text which occurs before the root element.
534     * Defaults to &lt;?xml version="1.0"?&gt;
535     */
536    string prolog = "<?xml version=\"1.0\"?>";
537    /**
538     * Contains all text which occurs after the root element.
539     * Defaults to the empty string
540     */
541    string epilog;
542
543    /**
544     * Constructs a Document by parsing XML text.
545     *
546     * This function creates a complete DOM (Document Object Model) tree.
547     *
548     * The input to this function MUST be valid XML.
549     * This is enforced by DocumentParser's in contract.
550     *
551     * Params:
552     *      s = the complete XML text.
553     */
554    this(string s)
555    in
556    {
557        assert(s.length != 0);
558    }
559    body
560    {
561        auto xml = new DocumentParser(s);
562        string tagString = xml.tag.tagString;
563
564        this(xml.tag);
565        prolog = s[0 .. tagString.ptr - s.ptr];
566        parse(xml);
567        epilog = *xml.s;
568    }
569
570    /**
571     * Constructs a Document from a Tag.
572     *
573     * Params:
574     *      tag = the start tag of the document.
575     */
576    this(const(Tag) tag)
577    {
578        super(tag);
579    }
580
581    const
582    {
583        /**
584         * Compares two Documents for equality
585         *
586         * Example:
587         * --------------
588         * Document d1,d2;
589         * if (d1 == d2) { }
590         * --------------
591         */
592        override bool opEquals(scope const Object o) const
593        {
594            const doc = toType!(const Document)(o);
595            return prolog == doc.prolog
596                && (cast(const) this).Element.opEquals(cast(const) doc)
597                && epilog == doc.epilog;
598        }
599
600        /**
601         * Compares two Documents
602         *
603         * You should rarely need to call this function. It exists so that
604         * Documents can be used as associative array keys.
605         *
606         * Example:
607         * --------------
608         * Document d1,d2;
609         * if (d1 < d2) { }
610         * --------------
611         */
612        override int opCmp(scope const Object o) scope const
613        {
614            const doc = toType!(const Document)(o);
615            if (prolog != doc.prolog)
616                return prolog < doc.prolog ? -1 : 1;
617            if (int cmp = this.Element.opCmp(doc))
618                return cmp;
619            if (epilog != doc.epilog)
620                return epilog < doc.epilog ? -1 : 1;
621            return 0;
622        }
623
624        /**
625         * Returns the hash of a Document
626         *
627         * You should rarely need to call this function. It exists so that
628         * Documents can be used as associative array keys.
629         */
630        override size_t toHash() scope const @trusted
631        {
632            return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
633        }
634
635        /**
636         * Returns the string representation of a Document. (That is, the
637         * complete XML of a document).
638         */
639        override string toString() scope const @safe
640        {
641            return prolog ~ super.toString() ~ epilog;
642        }
643    }
644}
645
646@system unittest
647{
648    // https://issues.dlang.org/show_bug.cgi?id=14966
649    auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
650
651    auto a = new Document(xml);
652    auto b = new Document(xml);
653    assert(a == b);
654    assert(!(a < b));
655    int[Document] aa;
656    aa[a] = 1;
657    assert(aa[b] == 1);
658
659    b ~= new Element("b");
660    assert(a < b);
661    assert(b > a);
662}
663
664/**
665 * Class representing an XML element.
666 *
667 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
668 */
669class Element : Item
670{
671    Tag tag; /// The start tag of the element
672    Item[] items; /// The element's items
673    Text[] texts; /// The element's text items
674    CData[] cdatas; /// The element's CData items
675    Comment[] comments; /// The element's comments
676    ProcessingInstruction[] pis; /// The element's processing instructions
677    Element[] elements; /// The element's child elements
678
679    /**
680     * Constructs an Element given a name and a string to be used as a Text
681     * interior.
682     *
683     * Params:
684     *      name = the name of the element.
685     *      interior = (optional) the string interior.
686     *
687     * Example:
688     * -------------------------------------------------------
689     * auto element = new Element("title","Serenity")
690     *     // constructs the element <title>Serenity</title>
691     * -------------------------------------------------------
692     */
693    this(string name, string interior=null) @safe pure
694    {
695        this(new Tag(name));
696        if (interior.length != 0) opCatAssign(new Text(interior));
697    }
698
699    /**
700     * Constructs an Element from a Tag.
701     *
702     * Params:
703     *      tag_ = the start or empty tag of the element.
704     */
705    this(const(Tag) tag_) @safe pure
706    {
707        this.tag = new Tag(tag_.name);
708        tag.type = TagType.EMPTY;
709        foreach (k,v;tag_.attr) tag.attr[k] = v;
710        tag.tagString = tag_.tagString;
711    }
712
713    /**
714     * Append a text item to the interior of this element
715     *
716     * Params:
717     *      item = the item you wish to append.
718     *
719     * Example:
720     * --------------
721     * Element element;
722     * element ~= new Text("hello");
723     * --------------
724     */
725    void opCatAssign(Text item) @safe pure
726    {
727        texts ~= item;
728        appendItem(item);
729    }
730
731    /**
732     * Append a CData item to the interior of this element
733     *
734     * Params:
735     *      item = the item you wish to append.
736     *
737     * Example:
738     * --------------
739     * Element element;
740     * element ~= new CData("hello");
741     * --------------
742     */
743    void opCatAssign(CData item) @safe pure
744    {
745        cdatas ~= item;
746        appendItem(item);
747    }
748
749    /**
750     * Append a comment to the interior of this element
751     *
752     * Params:
753     *      item = the item you wish to append.
754     *
755     * Example:
756     * --------------
757     * Element element;
758     * element ~= new Comment("hello");
759     * --------------
760     */
761    void opCatAssign(Comment item) @safe pure
762    {
763        comments ~= item;
764        appendItem(item);
765    }
766
767    /**
768     * Append a processing instruction to the interior of this element
769     *
770     * Params:
771     *      item = the item you wish to append.
772     *
773     * Example:
774     * --------------
775     * Element element;
776     * element ~= new ProcessingInstruction("hello");
777     * --------------
778     */
779    void opCatAssign(ProcessingInstruction item) @safe pure
780    {
781        pis ~= item;
782        appendItem(item);
783    }
784
785    /**
786     * Append a complete element to the interior of this element
787     *
788     * Params:
789     *      item = the item you wish to append.
790     *
791     * Example:
792     * --------------
793     * Element element;
794     * Element other = new Element("br");
795     * element ~= other;
796     *    // appends element representing <br />
797     * --------------
798     */
799    void opCatAssign(Element item) @safe pure
800    {
801        elements ~= item;
802        appendItem(item);
803    }
804
805    private void appendItem(Item item) @safe pure
806    {
807        items ~= item;
808        if (tag.type == TagType.EMPTY && !item.isEmptyXML)
809            tag.type = TagType.START;
810    }
811
812    private void parse(ElementParser xml)
813    {
814        xml.onText = (string s) { opCatAssign(new Text(s)); };
815        xml.onCData = (string s) { opCatAssign(new CData(s)); };
816        xml.onComment = (string s) { opCatAssign(new Comment(s)); };
817        xml.onPI = (string s) { opCatAssign(new ProcessingInstruction(s)); };
818
819        xml.onStartTag[null] = (ElementParser xml)
820        {
821            auto e = new Element(xml.tag);
822            e.parse(xml);
823            opCatAssign(e);
824        };
825
826        xml.parse();
827    }
828
829    /**
830     * Compares two Elements for equality
831     *
832     * Example:
833     * --------------
834     * Element e1,e2;
835     * if (e1 == e2) { }
836     * --------------
837     */
838    override bool opEquals(scope const Object o) const
839    {
840        const element = toType!(const Element)(o);
841        immutable len = items.length;
842        if (len != element.items.length) return false;
843        foreach (i; 0 .. len)
844        {
845            if (!items[i].opEquals(element.items[i])) return false;
846        }
847        return true;
848    }
849
850    /**
851     * Compares two Elements
852     *
853     * You should rarely need to call this function. It exists so that Elements
854     * can be used as associative array keys.
855     *
856     * Example:
857     * --------------
858     * Element e1,e2;
859     * if (e1 < e2) { }
860     * --------------
861     */
862    override int opCmp(scope const Object o) @safe const
863    {
864        const element = toType!(const Element)(o);
865        for (uint i=0; ; ++i)
866        {
867            if (i == items.length && i == element.items.length) return 0;
868            if (i == items.length) return -1;
869            if (i == element.items.length) return 1;
870            if (!items[i].opEquals(element.items[i]))
871                return items[i].opCmp(element.items[i]);
872        }
873    }
874
875    /**
876     * Returns the hash of an Element
877     *
878     * You should rarely need to call this function. It exists so that Elements
879     * can be used as associative array keys.
880     */
881    override size_t toHash() scope const @safe
882    {
883        size_t hash = tag.toHash();
884        foreach (item;items) hash += item.toHash();
885        return hash;
886    }
887
888    const
889    {
890        /**
891         * Returns the decoded interior of an element.
892         *
893         * The element is assumed to contain text <i>only</i>. So, for
894         * example, given XML such as "&lt;title&gt;Good &amp;amp;
895         * Bad&lt;/title&gt;", will return "Good &amp; Bad".
896         *
897         * Params:
898         *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
899         *
900         * Throws: DecodeException if decode fails
901         */
902        string text(DecodeMode mode=DecodeMode.LOOSE)
903        {
904            string buffer;
905            foreach (item;items)
906            {
907                Text t = cast(Text) item;
908                if (t is null) throw new DecodeException(item.toString());
909                buffer ~= decode(t.toString(),mode);
910            }
911            return buffer;
912        }
913
914        /**
915         * Returns an indented string representation of this item
916         *
917         * Params:
918         *      indent = (optional) number of spaces by which to indent this
919         *          element. Defaults to 2.
920         */
921        override string[] pretty(uint indent=2) scope
922        {
923            import std.algorithm.searching : count;
924            import std.string : rightJustify;
925
926            if (isEmptyXML) return [ tag.toEmptyString() ];
927
928            if (items.length == 1)
929            {
930                auto t = cast(const(Text))(items[0]);
931                if (t !is null)
932                {
933                    return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
934                }
935            }
936
937            string[] a = [ tag.toStartString() ];
938            foreach (item;items)
939            {
940                string[] b = item.pretty(indent);
941                foreach (s;b)
942                {
943                    a ~= rightJustify(s,count(s) + indent);
944                }
945            }
946            a ~= tag.toEndString();
947            return a;
948        }
949
950        /**
951         * Returns the string representation of an Element
952         *
953         * Example:
954         * --------------
955         * auto element = new Element("br");
956         * writefln(element.toString()); // writes "<br />"
957         * --------------
958         */
959        override string toString() scope @safe
960        {
961            if (isEmptyXML) return tag.toEmptyString();
962
963            string buffer = tag.toStartString();
964            foreach (item;items) { buffer ~= item.toString(); }
965            buffer ~= tag.toEndString();
966            return buffer;
967        }
968
969        override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
970    }
971}
972
973/**
974 * Tag types.
975 *
976 * $(DDOC_ENUM_MEMBERS START) Used for start tags
977 * $(DDOC_ENUM_MEMBERS END) Used for end tags
978 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
979 *
980 */
981enum TagType { START, END, EMPTY }
982
983/**
984 * Class representing an XML tag.
985 *
986 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
987 *
988 * The class invariant guarantees
989 * <ul>
990 * <li> that $(B type) is a valid enum TagType value</li>
991 * <li> that $(B name) consists of valid characters</li>
992 * <li> that each attribute name consists of valid characters</li>
993 * </ul>
994 */
995class Tag
996{
997    TagType type = TagType.START;   /// Type of tag
998    string name;                    /// Tag name
999    string[string] attr;            /// Associative array of attributes
1000    private string tagString;
1001
1002    invariant()
1003    {
1004        string s;
1005        string t;
1006
1007        assert(type == TagType.START
1008            || type == TagType.END
1009            || type == TagType.EMPTY);
1010
1011        s = name;
1012        try { checkName(s,t); }
1013        catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1014
1015        foreach (k,v;attr)
1016        {
1017            s = k;
1018            try { checkName(s,t); }
1019            catch (Err e)
1020                { assert(false,"Invalid atrribute name:" ~ e.toString()); }
1021        }
1022    }
1023
1024    /**
1025     * Constructs an instance of Tag with a specified name and type
1026     *
1027     * The constructor does not initialize the attributes. To initialize the
1028     * attributes, you access the $(B attr) member variable.
1029     *
1030     * Params:
1031     *      name = the Tag's name
1032     *      type = (optional) the Tag's type. If omitted, defaults to
1033     *          TagType.START.
1034     *
1035     * Example:
1036     * --------------
1037     * auto tag = new Tag("img",Tag.EMPTY);
1038     * tag.attr["src"] = "http://example.com/example.jpg";
1039     * --------------
1040     */
1041    this(string name, TagType type=TagType.START) @safe pure
1042    {
1043        this.name = name;
1044        this.type = type;
1045    }
1046
1047    /* Private constructor (so don't ddoc this!)
1048     *
1049     * Constructs a Tag by parsing the string representation, e.g. "<html>".
1050     *
1051     * The string is passed by reference, and is advanced over all characters
1052     * consumed.
1053     *
1054     * The second parameter is a dummy parameter only, required solely to
1055     * distinguish this constructor from the public one.
1056     */
1057    private this(ref string s, bool dummy) @safe pure
1058    {
1059        import std.algorithm.searching : countUntil;
1060        import std.ascii : isWhite;
1061        import std.utf : byCodeUnit;
1062
1063        tagString = s;
1064        try
1065        {
1066            reqc(s,'<');
1067            if (optc(s,'/')) type = TagType.END;
1068            ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1069            name = s[0 .. i];
1070            s = s[i .. $];
1071
1072            i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1073            s = s[i .. $];
1074
1075            while (s.length > 0 && s[0] != '>' && s[0] != '/')
1076            {
1077                i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1078                string key = s[0 .. i];
1079                s = s[i .. $];
1080
1081                i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1082                s = s[i .. $];
1083                reqc(s,'=');
1084                i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1085                s = s[i .. $];
1086
1087                immutable char quote = requireOneOf(s,"'\"");
1088                i = s.byCodeUnit.countUntil(quote);
1089                string val = decode(s[0 .. i], DecodeMode.LOOSE);
1090                s = s[i .. $];
1091                reqc(s,quote);
1092
1093                i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1094                s = s[i .. $];
1095                attr[key] = val;
1096            }
1097            if (optc(s,'/'))
1098            {
1099                if (type == TagType.END) throw new TagException("");
1100                type = TagType.EMPTY;
1101            }
1102            reqc(s,'>');
1103            tagString.length = tagString.length - s.length;
1104        }
1105        catch (XMLException e)
1106        {
1107            tagString.length = tagString.length - s.length;
1108            throw new TagException(tagString);
1109        }
1110    }
1111
1112    const
1113    {
1114        /**
1115         * Compares two Tags for equality
1116         *
1117         * You should rarely need to call this function. It exists so that Tags
1118         * can be used as associative array keys.
1119         *
1120         * Example:
1121         * --------------
1122         * Tag tag1,tag2
1123         * if (tag1 == tag2) { }
1124         * --------------
1125         */
1126        override bool opEquals(scope Object o)
1127        {
1128            const tag = toType!(const Tag)(o);
1129            return
1130                (name != tag.name) ? false : (
1131                (attr != tag.attr) ? false : (
1132                (type != tag.type) ? false : (
1133            true )));
1134        }
1135
1136        /**
1137         * Compares two Tags
1138         *
1139         * Example:
1140         * --------------
1141         * Tag tag1,tag2
1142         * if (tag1 < tag2) { }
1143         * --------------
1144         */
1145        override int opCmp(Object o)
1146        {
1147            const tag = toType!(const Tag)(o);
1148            // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1149            return
1150                ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1151                ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1152                ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1153            0 )));
1154        }
1155
1156        /**
1157         * Returns the hash of a Tag
1158         *
1159         * You should rarely need to call this function. It exists so that Tags
1160         * can be used as associative array keys.
1161         */
1162        override size_t toHash()
1163        {
1164            return typeid(name).getHash(&name);
1165        }
1166
1167        /**
1168         * Returns the string representation of a Tag
1169         *
1170         * Example:
1171         * --------------
1172         * auto tag = new Tag("book",TagType.START);
1173         * writefln(tag.toString()); // writes "<book>"
1174         * --------------
1175         */
1176        override string toString() @safe
1177        {
1178            if (isEmpty) return toEmptyString();
1179            return (isEnd) ? toEndString() : toStartString();
1180        }
1181
1182        private
1183        {
1184            string toNonEndString() @safe
1185            {
1186                import std.format : format;
1187
1188                string s = "<" ~ name;
1189                foreach (key,val;attr)
1190                    s ~= format(" %s=\"%s\"",key,encode(val));
1191                return s;
1192            }
1193
1194            string toStartString() @safe { return toNonEndString() ~ ">"; }
1195
1196            string toEndString() @safe { return "</" ~ name ~ ">"; }
1197
1198            string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1199        }
1200
1201        /**
1202         * Returns true if the Tag is a start tag
1203         *
1204         * Example:
1205         * --------------
1206         * if (tag.isStart) { }
1207         * --------------
1208         */
1209        @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1210
1211        /**
1212         * Returns true if the Tag is an end tag
1213         *
1214         * Example:
1215         * --------------
1216         * if (tag.isEnd) { }
1217         * --------------
1218         */
1219        @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END;   }
1220
1221        /**
1222         * Returns true if the Tag is an empty tag
1223         *
1224         * Example:
1225         * --------------
1226         * if (tag.isEmpty) { }
1227         * --------------
1228         */
1229        @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1230    }
1231}
1232
1233/**
1234 * Class representing a comment
1235 */
1236class Comment : Item
1237{
1238    private string content;
1239
1240    /**
1241     * Construct a comment
1242     *
1243     * Params:
1244     *      content = the body of the comment
1245     *
1246     * Throws: CommentException if the comment body is illegal (contains "--"
1247     * or exactly equals "-")
1248     *
1249     * Example:
1250     * --------------
1251     * auto item = new Comment("This is a comment");
1252     *    // constructs <!--This is a comment-->
1253     * --------------
1254     */
1255    this(string content) @safe pure
1256    {
1257        import std.string : indexOf;
1258
1259        if (content == "-" || content.indexOf("--") != -1)
1260            throw new CommentException(content);
1261        this.content = content;
1262    }
1263
1264    /**
1265     * Compares two comments for equality
1266     *
1267     * Example:
1268     * --------------
1269     * Comment item1,item2;
1270     * if (item1 == item2) { }
1271     * --------------
1272     */
1273    override bool opEquals(scope const Object o) const
1274    {
1275        const item = toType!(const Item)(o);
1276        const t = cast(const Comment) item;
1277        return t !is null && content == t.content;
1278    }
1279
1280    /**
1281     * Compares two comments
1282     *
1283     * You should rarely need to call this function. It exists so that Comments
1284     * can be used as associative array keys.
1285     *
1286     * Example:
1287     * --------------
1288     * Comment item1,item2;
1289     * if (item1 < item2) { }
1290     * --------------
1291     */
1292    override int opCmp(scope const Object o) scope const
1293    {
1294        const item = toType!(const Item)(o);
1295        const t = cast(const Comment) item;
1296        return t !is null && (content != t.content
1297            ? (content < t.content ? -1 : 1 ) : 0 );
1298    }
1299
1300    /**
1301     * Returns the hash of a Comment
1302     *
1303     * You should rarely need to call this function. It exists so that Comments
1304     * can be used as associative array keys.
1305     */
1306    override size_t toHash() scope const nothrow { return hash(content); }
1307
1308    /**
1309     * Returns a string representation of this comment
1310     */
1311    override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1312
1313    override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1314}
1315
1316@safe unittest // issue 16241
1317{
1318    import std.exception : assertThrown;
1319    auto c = new Comment("==");
1320    assert(c.content == "==");
1321    assertThrown!CommentException(new Comment("--"));
1322}
1323
1324/**
1325 * Class representing a Character Data section
1326 */
1327class CData : Item
1328{
1329    private string content;
1330
1331    /**
1332     * Construct a character data section
1333     *
1334     * Params:
1335     *      content = the body of the character data segment
1336     *
1337     * Throws: CDataException if the segment body is illegal (contains "]]>")
1338     *
1339     * Example:
1340     * --------------
1341     * auto item = new CData("<b>hello</b>");
1342     *    // constructs <![CDATA[<b>hello</b>]]>
1343     * --------------
1344     */
1345    this(string content) @safe pure
1346    {
1347        import std.string : indexOf;
1348        if (content.indexOf("]]>") != -1) throw new CDataException(content);
1349        this.content = content;
1350    }
1351
1352    /**
1353     * Compares two CDatas for equality
1354     *
1355     * Example:
1356     * --------------
1357     * CData item1,item2;
1358     * if (item1 == item2) { }
1359     * --------------
1360     */
1361    override bool opEquals(scope const Object o) const
1362    {
1363        const item = toType!(const Item)(o);
1364        const t = cast(const CData) item;
1365        return t !is null && content == t.content;
1366    }
1367
1368    /**
1369     * Compares two CDatas
1370     *
1371     * You should rarely need to call this function. It exists so that CDatas
1372     * can be used as associative array keys.
1373     *
1374     * Example:
1375     * --------------
1376     * CData item1,item2;
1377     * if (item1 < item2) { }
1378     * --------------
1379     */
1380    override int opCmp(scope const Object o) scope const
1381    {
1382        const item = toType!(const Item)(o);
1383        const t = cast(const CData) item;
1384        return t !is null && (content != t.content
1385            ? (content < t.content ? -1 : 1 ) : 0 );
1386    }
1387
1388    /**
1389     * Returns the hash of a CData
1390     *
1391     * You should rarely need to call this function. It exists so that CDatas
1392     * can be used as associative array keys.
1393     */
1394    override size_t toHash() scope const nothrow { return hash(content); }
1395
1396    /**
1397     * Returns a string representation of this CData section
1398     */
1399    override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1400
1401    override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1402}
1403
1404/**
1405 * Class representing a text (aka Parsed Character Data) section
1406 */
1407class Text : Item
1408{
1409    private string content;
1410
1411    /**
1412     * Construct a text (aka PCData) section
1413     *
1414     * Params:
1415     *      content = the text. This function encodes the text before
1416     *      insertion, so it is safe to insert any text
1417     *
1418     * Example:
1419     * --------------
1420     * auto Text = new CData("a < b");
1421     *    // constructs a &lt; b
1422     * --------------
1423     */
1424    this(string content) @safe pure
1425    {
1426        this.content = encode(content);
1427    }
1428
1429    /**
1430     * Compares two text sections for equality
1431     *
1432     * Example:
1433     * --------------
1434     * Text item1,item2;
1435     * if (item1 == item2) { }
1436     * --------------
1437     */
1438    override bool opEquals(scope const Object o) const
1439    {
1440        const item = toType!(const Item)(o);
1441        const t = cast(const Text) item;
1442        return t !is null && content == t.content;
1443    }
1444
1445    /**
1446     * Compares two text sections
1447     *
1448     * You should rarely need to call this function. It exists so that Texts
1449     * can be used as associative array keys.
1450     *
1451     * Example:
1452     * --------------
1453     * Text item1,item2;
1454     * if (item1 < item2) { }
1455     * --------------
1456     */
1457    override int opCmp(scope const Object o) scope const
1458    {
1459        const item = toType!(const Item)(o);
1460        const t = cast(const Text) item;
1461        return t !is null
1462            && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1463    }
1464
1465    /**
1466     * Returns the hash of a text section
1467     *
1468     * You should rarely need to call this function. It exists so that Texts
1469     * can be used as associative array keys.
1470     */
1471    override size_t toHash() scope const nothrow { return hash(content); }
1472
1473    /**
1474     * Returns a string representation of this Text section
1475     */
1476    override string toString() scope const @safe @nogc pure nothrow { return content; }
1477
1478    /**
1479     * Returns true if the content is the empty string
1480     */
1481    override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1482}
1483
1484/**
1485 * Class representing an XML Instruction section
1486 */
1487class XMLInstruction : Item
1488{
1489    private string content;
1490
1491    /**
1492     * Construct an XML Instruction section
1493     *
1494     * Params:
1495     *      content = the body of the instruction segment
1496     *
1497     * Throws: XIException if the segment body is illegal (contains ">")
1498     *
1499     * Example:
1500     * --------------
1501     * auto item = new XMLInstruction("ATTLIST");
1502     *    // constructs <!ATTLIST>
1503     * --------------
1504     */
1505    this(string content) @safe pure
1506    {
1507        import std.string : indexOf;
1508        if (content.indexOf(">") != -1) throw new XIException(content);
1509        this.content = content;
1510    }
1511
1512    /**
1513     * Compares two XML instructions for equality
1514     *
1515     * Example:
1516     * --------------
1517     * XMLInstruction item1,item2;
1518     * if (item1 == item2) { }
1519     * --------------
1520     */
1521    override bool opEquals(scope const Object o) const
1522    {
1523        const item = toType!(const Item)(o);
1524        const t = cast(const XMLInstruction) item;
1525        return t !is null && content == t.content;
1526    }
1527
1528    /**
1529     * Compares two XML instructions
1530     *
1531     * You should rarely need to call this function. It exists so that
1532     * XmlInstructions can be used as associative array keys.
1533     *
1534     * Example:
1535     * --------------
1536     * XMLInstruction item1,item2;
1537     * if (item1 < item2) { }
1538     * --------------
1539     */
1540    override int opCmp(scope const Object o) scope const
1541    {
1542        const item = toType!(const Item)(o);
1543        const t = cast(const XMLInstruction) item;
1544        return t !is null
1545            && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1546    }
1547
1548    /**
1549     * Returns the hash of an XMLInstruction
1550     *
1551     * You should rarely need to call this function. It exists so that
1552     * XmlInstructions can be used as associative array keys.
1553     */
1554    override size_t toHash() scope const nothrow { return hash(content); }
1555
1556    /**
1557     * Returns a string representation of this XmlInstruction
1558     */
1559    override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1560
1561    override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1562}
1563
1564/**
1565 * Class representing a Processing Instruction section
1566 */
1567class ProcessingInstruction : Item
1568{
1569    private string content;
1570
1571    /**
1572     * Construct a Processing Instruction section
1573     *
1574     * Params:
1575     *      content = the body of the instruction segment
1576     *
1577     * Throws: PIException if the segment body is illegal (contains "?>")
1578     *
1579     * Example:
1580     * --------------
1581     * auto item = new ProcessingInstruction("php");
1582     *    // constructs <?php?>
1583     * --------------
1584     */
1585    this(string content) @safe pure
1586    {
1587        import std.string : indexOf;
1588        if (content.indexOf("?>") != -1) throw new PIException(content);
1589        this.content = content;
1590    }
1591
1592    /**
1593     * Compares two processing instructions for equality
1594     *
1595     * Example:
1596     * --------------
1597     * ProcessingInstruction item1,item2;
1598     * if (item1 == item2) { }
1599     * --------------
1600     */
1601    override bool opEquals(scope const Object o) const
1602    {
1603        const item = toType!(const Item)(o);
1604        const t = cast(const ProcessingInstruction) item;
1605        return t !is null && content == t.content;
1606    }
1607
1608    /**
1609     * Compares two processing instructions
1610     *
1611     * You should rarely need to call this function. It exists so that
1612     * ProcessingInstructions can be used as associative array keys.
1613     *
1614     * Example:
1615     * --------------
1616     * ProcessingInstruction item1,item2;
1617     * if (item1 < item2) { }
1618     * --------------
1619     */
1620    override int opCmp(scope const Object o) scope const
1621    {
1622        const item = toType!(const Item)(o);
1623        const t = cast(const ProcessingInstruction) item;
1624        return t !is null
1625            && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1626    }
1627
1628    /**
1629     * Returns the hash of a ProcessingInstruction
1630     *
1631     * You should rarely need to call this function. It exists so that
1632     * ProcessingInstructions can be used as associative array keys.
1633     */
1634    override size_t toHash() scope const nothrow { return hash(content); }
1635
1636    /**
1637     * Returns a string representation of this ProcessingInstruction
1638     */
1639    override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1640
1641    override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
1642}
1643
1644/**
1645 * Abstract base class for XML items
1646 */
1647abstract class Item
1648{
1649    /// Compares with another Item of same type for equality
1650    abstract override bool opEquals(scope const Object o) @safe const;
1651
1652    /// Compares with another Item of same type
1653    abstract override int opCmp(scope const Object o) @safe const;
1654
1655    /// Returns the hash of this item
1656    abstract override size_t toHash() @safe scope const;
1657
1658    /// Returns a string representation of this item
1659    abstract override string toString() @safe scope const;
1660
1661    /**
1662     * Returns an indented string representation of this item
1663     *
1664     * Params:
1665     *      indent = number of spaces by which to indent child elements
1666     */
1667    string[] pretty(uint indent) @safe scope const
1668    {
1669        import std.string : strip;
1670        string s = strip(toString());
1671        return s.length == 0 ? [] : [ s ];
1672    }
1673
1674    /// Returns true if the item represents empty XML text
1675    abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1676}
1677
1678/**
1679 * Class for parsing an XML Document.
1680 *
1681 * This is a subclass of ElementParser. Most of the useful functions are
1682 * documented there.
1683 *
1684 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1685 *
1686 * Bugs:
1687 *      Currently only supports UTF documents.
1688 *
1689 *      If there is an encoding attribute in the prolog, it is ignored.
1690 *
1691 */
1692class DocumentParser : ElementParser
1693{
1694    string xmlText;
1695
1696    /**
1697     * Constructs a DocumentParser.
1698     *
1699     * The input to this function MUST be valid XML.
1700     * This is enforced by the function's in contract.
1701     *
1702     * Params:
1703     *      xmlText_ = the entire XML document as text
1704     *
1705     */
1706    this(string xmlText_)
1707    in
1708    {
1709        assert(xmlText_.length != 0);
1710        try
1711        {
1712            // Confirm that the input is valid XML
1713            check(xmlText_);
1714        }
1715        catch (CheckException e)
1716        {
1717            // And if it's not, tell the user why not
1718            assert(false, "\n" ~ e.toString());
1719        }
1720    }
1721    body
1722    {
1723        xmlText = xmlText_;
1724        s = &xmlText;
1725        super();    // Initialize everything
1726        parse();    // Parse through the root tag (but not beyond)
1727    }
1728}
1729
1730@system unittest
1731{
1732    auto doc = new Document("<root><child><grandchild/></child></root>");
1733    assert(doc.elements.length == 1);
1734    assert(doc.elements[0].tag.name == "child");
1735    assert(doc.items == doc.elements);
1736}
1737
1738/**
1739 * Class for parsing an XML element.
1740 *
1741 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1742 *
1743 * Note that you cannot construct instances of this class directly. You can
1744 * construct a DocumentParser (which is a subclass of ElementParser), but
1745 * otherwise, Instances of ElementParser will be created for you by the
1746 * library, and passed your way via onStartTag handlers.
1747 *
1748 */
1749class ElementParser
1750{
1751    alias Handler = void delegate(string);
1752    alias ElementHandler = void delegate(in Element element);
1753    alias ParserHandler = void delegate(ElementParser parser);
1754
1755    private
1756    {
1757        Tag tag_;
1758        string elementStart;
1759        string* s;
1760
1761        Handler commentHandler = null;
1762        Handler cdataHandler = null;
1763        Handler xiHandler = null;
1764        Handler piHandler = null;
1765        Handler rawTextHandler = null;
1766        Handler textHandler = null;
1767
1768        // Private constructor for start tags
1769        this(ElementParser parent) @safe @nogc pure nothrow
1770        {
1771            s = parent.s;
1772            this();
1773            tag_ = parent.tag_;
1774        }
1775
1776        // Private constructor for empty tags
1777        this(Tag tag, string* t) @safe @nogc pure nothrow
1778        {
1779            s = t;
1780            this();
1781            tag_ = tag;
1782        }
1783    }
1784
1785    /**
1786     * The Tag at the start of the element being parsed. You can read this to
1787     * determine the tag's name and attributes.
1788     */
1789    @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1790
1791    /**
1792     * Register a handler which will be called whenever a start tag is
1793     * encountered which matches the specified name. You can also pass null as
1794     * the name, in which case the handler will be called for any unmatched
1795     * start tag.
1796     *
1797     * Example:
1798     * --------------
1799     * // Call this function whenever a <podcast> start tag is encountered
1800     * onStartTag["podcast"] = (ElementParser xml)
1801     * {
1802     *     // Your code here
1803     *     //
1804     *     // This is a a closure, so code here may reference
1805     *     // variables which are outside of this scope
1806     * };
1807     *
1808     * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1809     * // start tag is encountered
1810     * onStartTag["episode"] = &myEpisodeStartHandler;
1811     *
1812     * // call delegate dg for all other start tags
1813     * onStartTag[null] = dg;
1814     * --------------
1815     *
1816     * This library will supply your function with a new instance of
1817     * ElementHandler, which may be used to parse inside the element whose
1818     * start tag was just found, or to identify the tag attributes of the
1819     * element, etc.
1820     *
1821     * Note that your function will be called for both start tags and empty
1822     * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1823     * and &lt;br/&gt;.
1824     */
1825    ParserHandler[string] onStartTag;
1826
1827    /**
1828     * Register a handler which will be called whenever an end tag is
1829     * encountered which matches the specified name. You can also pass null as
1830     * the name, in which case the handler will be called for any unmatched
1831     * end tag.
1832     *
1833     * Example:
1834     * --------------
1835     * // Call this function whenever a </podcast> end tag is encountered
1836     * onEndTag["podcast"] = (in Element e)
1837     * {
1838     *     // Your code here
1839     *     //
1840     *     // This is a a closure, so code here may reference
1841     *     // variables which are outside of this scope
1842     * };
1843     *
1844     * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1845     * // end tag is encountered
1846     * onEndTag["episode"] = &myEpisodeEndHandler;
1847     *
1848     * // call delegate dg for all other end tags
1849     * onEndTag[null] = dg;
1850     * --------------
1851     *
1852     * Note that your function will be called for both start tags and empty
1853     * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1854     * and &lt;br/&gt;.
1855     */
1856    ElementHandler[string] onEndTag;
1857
1858    protected this() @safe @nogc pure nothrow
1859    {
1860        elementStart = *s;
1861    }
1862
1863    /**
1864     * Register a handler which will be called whenever text is encountered.
1865     *
1866     * Example:
1867     * --------------
1868     * // Call this function whenever text is encountered
1869     * onText = (string s)
1870     * {
1871     *     // Your code here
1872     *
1873     *     // The passed parameter s will have been decoded by the time you see
1874     *     // it, and so may contain any character.
1875     *     //
1876     *     // This is a a closure, so code here may reference
1877     *     // variables which are outside of this scope
1878     * };
1879     * --------------
1880     */
1881    @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1882
1883    /**
1884     * Register an alternative handler which will be called whenever text
1885     * is encountered. This differs from onText in that onText will decode
1886     * the text, whereas onTextRaw will not. This allows you to make design
1887     * choices, since onText will be more accurate, but slower, while
1888     * onTextRaw will be faster, but less accurate. Of course, you can
1889     * still call decode() within your handler, if you want, but you'd
1890     * probably want to use onTextRaw only in circumstances where you
1891     * know that decoding is unnecessary.
1892     *
1893     * Example:
1894     * --------------
1895     * // Call this function whenever text is encountered
1896     * onText = (string s)
1897     * {
1898     *     // Your code here
1899     *
1900     *     // The passed parameter s will NOT have been decoded.
1901     *     //
1902     *     // This is a a closure, so code here may reference
1903     *     // variables which are outside of this scope
1904     * };
1905     * --------------
1906     */
1907    @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1908
1909    /**
1910     * Register a handler which will be called whenever a character data
1911     * segment is encountered.
1912     *
1913     * Example:
1914     * --------------
1915     * // Call this function whenever a CData section is encountered
1916     * onCData = (string s)
1917     * {
1918     *     // Your code here
1919     *
1920     *     // The passed parameter s does not include the opening <![CDATA[
1921     *     // nor closing ]]>
1922     *     //
1923     *     // This is a a closure, so code here may reference
1924     *     // variables which are outside of this scope
1925     * };
1926     * --------------
1927     */
1928    @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1929
1930    /**
1931     * Register a handler which will be called whenever a comment is
1932     * encountered.
1933     *
1934     * Example:
1935     * --------------
1936     * // Call this function whenever a comment is encountered
1937     * onComment = (string s)
1938     * {
1939     *     // Your code here
1940     *
1941     *     // The passed parameter s does not include the opening <!-- nor
1942     *     // closing -->
1943     *     //
1944     *     // This is a a closure, so code here may reference
1945     *     // variables which are outside of this scope
1946     * };
1947     * --------------
1948     */
1949    @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1950
1951    /**
1952     * Register a handler which will be called whenever a processing
1953     * instruction is encountered.
1954     *
1955     * Example:
1956     * --------------
1957     * // Call this function whenever a processing instruction is encountered
1958     * onPI = (string s)
1959     * {
1960     *     // Your code here
1961     *
1962     *     // The passed parameter s does not include the opening <? nor
1963     *     // closing ?>
1964     *     //
1965     *     // This is a a closure, so code here may reference
1966     *     // variables which are outside of this scope
1967     * };
1968     * --------------
1969     */
1970    @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1971
1972    /**
1973     * Register a handler which will be called whenever an XML instruction is
1974     * encountered.
1975     *
1976     * Example:
1977     * --------------
1978     * // Call this function whenever an XML instruction is encountered
1979     * // (Note: XML instructions may only occur preceding the root tag of a
1980     * // document).
1981     * onPI = (string s)
1982     * {
1983     *     // Your code here
1984     *
1985     *     // The passed parameter s does not include the opening <! nor
1986     *     // closing >
1987     *     //
1988     *     // This is a a closure, so code here may reference
1989     *     // variables which are outside of this scope
1990     * };
1991     * --------------
1992     */
1993    @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
1994
1995    /**
1996     * Parse an XML element.
1997     *
1998     * Parsing will continue until the end of the current element. Any items
1999     * encountered for which a handler has been registered will invoke that
2000     * handler.
2001     *
2002     * Throws: various kinds of XMLException
2003     */
2004    void parse()
2005    {
2006        import std.algorithm.searching : startsWith;
2007        import std.string : indexOf;
2008
2009        string t;
2010        const Tag root = tag_;
2011        Tag[string] startTags;
2012        if (tag_ !is null) startTags[tag_.name] = tag_;
2013
2014        while (s.length != 0)
2015        {
2016            if (startsWith(*s,"<!--"))
2017            {
2018                chop(*s,4);
2019                t = chop(*s,indexOf(*s,"-->"));
2020                if (commentHandler.funcptr !is null) commentHandler(t);
2021                chop(*s,3);
2022            }
2023            else if (startsWith(*s,"<![CDATA["))
2024            {
2025                chop(*s,9);
2026                t = chop(*s,indexOf(*s,"]]>"));
2027                if (cdataHandler.funcptr !is null) cdataHandler(t);
2028                chop(*s,3);
2029            }
2030            else if (startsWith(*s,"<!"))
2031            {
2032                chop(*s,2);
2033                t = chop(*s,indexOf(*s,">"));
2034                if (xiHandler.funcptr !is null) xiHandler(t);
2035                chop(*s,1);
2036            }
2037            else if (startsWith(*s,"<?"))
2038            {
2039                chop(*s,2);
2040                t = chop(*s,indexOf(*s,"?>"));
2041                if (piHandler.funcptr !is null) piHandler(t);
2042                chop(*s,2);
2043            }
2044            else if (startsWith(*s,"<"))
2045            {
2046                tag_ = new Tag(*s,true);
2047                if (root is null)
2048                    return; // Return to constructor of derived class
2049
2050                if (tag_.isStart)
2051                {
2052                    startTags[tag_.name] = tag_;
2053
2054                    auto parser = new ElementParser(this);
2055
2056                    auto handler = tag_.name in onStartTag;
2057                    if (handler !is null) (*handler)(parser);
2058                    else
2059                    {
2060                        handler = null in onStartTag;
2061                        if (handler !is null) (*handler)(parser);
2062                    }
2063                }
2064                else if (tag_.isEnd)
2065                {
2066                    const startTag = startTags[tag_.name];
2067                    string text;
2068
2069                    if (startTag.tagString.length == 0)
2070                        assert(0);
2071
2072                    immutable(char)* p = startTag.tagString.ptr
2073                        + startTag.tagString.length;
2074                    immutable(char)* q = &tag_.tagString[0];
2075                    text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2076
2077                    auto element = new Element(startTag);
2078                    if (text.length != 0) element ~= new Text(text);
2079
2080                    auto handler = tag_.name in onEndTag;
2081                    if (handler !is null) (*handler)(element);
2082                    else
2083                    {
2084                        handler = null in onEndTag;
2085                        if (handler !is null) (*handler)(element);
2086                    }
2087
2088                    if (tag_.name == root.name) return;
2089                }
2090                else if (tag_.isEmpty)
2091                {
2092                    Tag startTag = new Tag(tag_.name);
2093
2094                    // FIX by hed010gy, for bug 2979
2095                    // http://d.puremagic.com/issues/show_bug.cgi?id=2979
2096                    if (tag_.attr.length > 0)
2097                          foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2098                    // END FIX
2099
2100                    // Handle the pretend start tag
2101                    string s2;
2102                    auto parser = new ElementParser(startTag,&s2);
2103                    auto handler1 = startTag.name in onStartTag;
2104                    if (handler1 !is null) (*handler1)(parser);
2105                    else
2106                    {
2107                        handler1 = null in onStartTag;
2108                        if (handler1 !is null) (*handler1)(parser);
2109                    }
2110
2111                    // Handle the pretend end tag
2112                    auto element = new Element(startTag);
2113                    auto handler2 = tag_.name in onEndTag;
2114                    if (handler2 !is null) (*handler2)(element);
2115                    else
2116                    {
2117                        handler2 = null in onEndTag;
2118                        if (handler2 !is null) (*handler2)(element);
2119                    }
2120                }
2121            }
2122            else
2123            {
2124                t = chop(*s,indexOf(*s,"<"));
2125                if (rawTextHandler.funcptr !is null)
2126                    rawTextHandler(t);
2127                else if (textHandler.funcptr !is null)
2128                    textHandler(decode(t,DecodeMode.LOOSE));
2129            }
2130        }
2131    }
2132
2133    /**
2134     * Returns that part of the element which has already been parsed
2135     */
2136    override string toString() const @nogc @safe pure nothrow
2137    {
2138        assert(elementStart.length >= s.length);
2139        return elementStart[0 .. elementStart.length - s.length];
2140    }
2141
2142}
2143
2144private
2145{
2146    template Check(string msg)
2147    {
2148        string old = s;
2149
2150        void fail() @safe pure
2151        {
2152            s = old;
2153            throw new Err(s,msg);
2154        }
2155
2156        void fail(Err e) @safe pure
2157        {
2158            s = old;
2159            throw new Err(s,msg,e);
2160        }
2161
2162        void fail(string msg2) @safe pure
2163        {
2164            fail(new Err(s,msg2));
2165        }
2166    }
2167
2168    void checkMisc(ref string s) @safe pure // rule 27
2169    {
2170        import std.algorithm.searching : startsWith;
2171
2172        mixin Check!("Misc");
2173
2174        try
2175        {
2176                 if (s.startsWith("<!--")) { checkComment(s); }
2177            else if (s.startsWith("<?"))   { checkPI(s); }
2178            else                           { checkSpace(s); }
2179        }
2180        catch (Err e) { fail(e); }
2181    }
2182
2183    void checkDocument(ref string s) @safe pure // rule 1
2184    {
2185        mixin Check!("Document");
2186        try
2187        {
2188            checkProlog(s);
2189            checkElement(s);
2190            star!(checkMisc)(s);
2191        }
2192        catch (Err e) { fail(e); }
2193    }
2194
2195    void checkChars(ref string s) @safe pure // rule 2
2196    {
2197        // TO DO - Fix std.utf stride and decode functions, then use those
2198        // instead
2199        import std.format : format;
2200
2201        mixin Check!("Chars");
2202
2203        dchar c;
2204        ptrdiff_t n = -1;
2205        // 'i' must not be smaller than size_t because size_t is used internally in
2206        // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2207        foreach (size_t i, dchar d; s)
2208        {
2209            if (!isChar(d))
2210            {
2211                c = d;
2212                n = i;
2213                break;
2214            }
2215        }
2216        if (n != -1)
2217        {
2218            s = s[n..$];
2219            fail(format("invalid character: U+%04X",c));
2220        }
2221    }
2222
2223    void checkSpace(ref string s) @safe pure // rule 3
2224    {
2225        import std.algorithm.searching : countUntil;
2226        import std.ascii : isWhite;
2227        import std.utf : byCodeUnit;
2228
2229        mixin Check!("Whitespace");
2230        ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2231        if (i == -1 && s.length > 0 && isWhite(s[0]))
2232            s = s[$ .. $];
2233        else if (i > -1)
2234            s = s[i .. $];
2235        if (s is old) fail();
2236    }
2237
2238    void checkName(ref string s, out string name) @safe pure // rule 5
2239    {
2240        mixin Check!("Name");
2241
2242        if (s.length == 0) fail();
2243        ptrdiff_t n;
2244        // 'i' must not be smaller than size_t because size_t is used internally in
2245        // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets.
2246        foreach (size_t i, dchar c; s)
2247        {
2248            if (c == '_' || c == ':' || isLetter(c)) continue;
2249            if (i == 0) fail();
2250            if (c == '-' || c == '.' || isDigit(c)
2251                || isCombiningChar(c) || isExtender(c)) continue;
2252            n = i;
2253            break;
2254        }
2255        name = s[0 .. n];
2256        s = s[n..$];
2257    }
2258
2259    void checkAttValue(ref string s) @safe pure // rule 10
2260    {
2261        import std.algorithm.searching : countUntil;
2262        import std.utf : byCodeUnit;
2263
2264        mixin Check!("AttValue");
2265
2266        if (s.length == 0) fail();
2267        char c = s[0];
2268        if (c != '\u0022' && c != '\u0027')
2269            fail("attribute value requires quotes");
2270        s = s[1..$];
2271        for (;;)
2272        {
2273            s = s[s.byCodeUnit.countUntil(c) .. $];
2274            if (s.length == 0) fail("unterminated attribute value");
2275            if (s[0] == '<') fail("< found in attribute value");
2276            if (s[0] == c) break;
2277            try { checkReference(s); } catch (Err e) { fail(e); }
2278        }
2279        s = s[1..$];
2280    }
2281
2282    void checkCharData(ref string s) @safe pure // rule 14
2283    {
2284        import std.algorithm.searching : startsWith;
2285
2286        mixin Check!("CharData");
2287
2288        while (s.length != 0)
2289        {
2290            if (s.startsWith("&")) break;
2291            if (s.startsWith("<")) break;
2292            if (s.startsWith("]]>")) fail("]]> found within char data");
2293            s = s[1..$];
2294        }
2295    }
2296
2297    void checkComment(ref string s) @safe pure // rule 15
2298    {
2299        import std.string : indexOf;
2300
2301        mixin Check!("Comment");
2302
2303        try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2304        ptrdiff_t n = s.indexOf("--");
2305        if (n == -1) fail("unterminated comment");
2306        s = s[n..$];
2307        try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2308    }
2309
2310    void checkPI(ref string s) @safe pure // rule 16
2311    {
2312        mixin Check!("PI");
2313
2314        try
2315        {
2316            checkLiteral("<?",s);
2317            checkEnd("?>",s);
2318        }
2319        catch (Err e) { fail(e); }
2320    }
2321
2322    void checkCDSect(ref string s) @safe pure // rule 18
2323    {
2324        mixin Check!("CDSect");
2325
2326        try
2327        {
2328            checkLiteral(cdata,s);
2329            checkEnd("]]>",s);
2330        }
2331        catch (Err e) { fail(e); }
2332    }
2333
2334    void checkProlog(ref string s) @safe pure // rule 22
2335    {
2336        mixin Check!("Prolog");
2337
2338        try
2339        {
2340            /* The XML declaration is optional
2341             * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2342             */
2343            opt!(checkXMLDecl)(s);
2344
2345            star!(checkMisc)(s);
2346            opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2347        }
2348        catch (Err e) { fail(e); }
2349    }
2350
2351    void checkXMLDecl(ref string s) @safe pure // rule 23
2352    {
2353        mixin Check!("XMLDecl");
2354
2355        try
2356        {
2357            checkLiteral("<?xml",s);
2358            checkVersionInfo(s);
2359            opt!(checkEncodingDecl)(s);
2360            opt!(checkSDDecl)(s);
2361            opt!(checkSpace)(s);
2362            checkLiteral("?>",s);
2363        }
2364        catch (Err e) { fail(e); }
2365    }
2366
2367    void checkVersionInfo(ref string s) @safe pure // rule 24
2368    {
2369        mixin Check!("VersionInfo");
2370
2371        try
2372        {
2373            checkSpace(s);
2374            checkLiteral("version",s);
2375            checkEq(s);
2376            quoted!(checkVersionNum)(s);
2377        }
2378        catch (Err e) { fail(e); }
2379    }
2380
2381    void checkEq(ref string s) @safe pure // rule 25
2382    {
2383        mixin Check!("Eq");
2384
2385        try
2386        {
2387            opt!(checkSpace)(s);
2388            checkLiteral("=",s);
2389            opt!(checkSpace)(s);
2390        }
2391        catch (Err e) { fail(e); }
2392    }
2393
2394    void checkVersionNum(ref string s) @safe pure // rule 26
2395    {
2396        import std.algorithm.searching : countUntil;
2397        import std.utf : byCodeUnit;
2398
2399        mixin Check!("VersionNum");
2400
2401        s = s[s.byCodeUnit.countUntil('\"') .. $];
2402        if (s is old) fail();
2403    }
2404
2405    void checkDocTypeDecl(ref string s) @safe pure // rule 28
2406    {
2407        mixin Check!("DocTypeDecl");
2408
2409        try
2410        {
2411            checkLiteral("<!DOCTYPE",s);
2412            //
2413            // TO DO -- ensure DOCTYPE is well formed
2414            // (But not yet. That's one of our "future directions")
2415            //
2416            checkEnd(">",s);
2417        }
2418        catch (Err e) { fail(e); }
2419    }
2420
2421    void checkSDDecl(ref string s) @safe pure // rule 32
2422    {
2423        import std.algorithm.searching : startsWith;
2424
2425        mixin Check!("SDDecl");
2426
2427        try
2428        {
2429            checkSpace(s);
2430            checkLiteral("standalone",s);
2431            checkEq(s);
2432        }
2433        catch (Err e) { fail(e); }
2434
2435        int n = 0;
2436             if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2437        else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2438        else fail("standalone attribute value must be 'yes', \"yes\","~
2439            " 'no' or \"no\"");
2440        s = s[n..$];
2441    }
2442
2443    void checkElement(ref string s) @safe pure // rule 39
2444    {
2445        mixin Check!("Element");
2446
2447        string sname,ename,t;
2448        try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2449
2450        if (t == "STag")
2451        {
2452            try
2453            {
2454                checkContent(s);
2455                t = s;
2456                checkETag(s,ename);
2457            }
2458            catch (Err e) { fail(e); }
2459
2460            if (sname != ename)
2461            {
2462                s = t;
2463                fail("end tag name \"" ~ ename
2464                    ~ "\" differs from start tag name \""~sname~"\"");
2465            }
2466        }
2467    }
2468
2469    // rules 40 and 44
2470    void checkTag(ref string s, out string type, out string name) @safe pure
2471    {
2472        mixin Check!("Tag");
2473
2474        try
2475        {
2476            type = "STag";
2477            checkLiteral("<",s);
2478            checkName(s,name);
2479            star!(seq!(checkSpace,checkAttribute))(s);
2480            opt!(checkSpace)(s);
2481            if (s.length != 0 && s[0] == '/')
2482            {
2483                s = s[1..$];
2484                type = "ETag";
2485            }
2486            checkLiteral(">",s);
2487        }
2488        catch (Err e) { fail(e); }
2489    }
2490
2491    void checkAttribute(ref string s) @safe pure // rule 41
2492    {
2493        mixin Check!("Attribute");
2494
2495        try
2496        {
2497            string name;
2498            checkName(s,name);
2499            checkEq(s);
2500            checkAttValue(s);
2501        }
2502        catch (Err e) { fail(e); }
2503    }
2504
2505    void checkETag(ref string s, out string name) @safe pure // rule 42
2506    {
2507        mixin Check!("ETag");
2508
2509        try
2510        {
2511            checkLiteral("</",s);
2512            checkName(s,name);
2513            opt!(checkSpace)(s);
2514            checkLiteral(">",s);
2515        }
2516        catch (Err e) { fail(e); }
2517    }
2518
2519    void checkContent(ref string s) @safe pure // rule 43
2520    {
2521        import std.algorithm.searching : startsWith;
2522
2523        mixin Check!("Content");
2524
2525        try
2526        {
2527            while (s.length != 0)
2528            {
2529                old = s;
2530                     if (s.startsWith("&"))        { checkReference(s); }
2531                else if (s.startsWith("<!--"))     { checkComment(s); }
2532                else if (s.startsWith("<?"))       { checkPI(s); }
2533                else if (s.startsWith(cdata)) { checkCDSect(s); }
2534                else if (s.startsWith("</"))       { break; }
2535                else if (s.startsWith("<"))        { checkElement(s); }
2536                else                               { checkCharData(s); }
2537            }
2538        }
2539        catch (Err e) { fail(e); }
2540    }
2541
2542    void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2543    {
2544        import std.format : format;
2545
2546        mixin Check!("CharRef");
2547
2548        c = 0;
2549        try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2550        int radix = 10;
2551        if (s.length != 0 && s[0] == 'x')
2552        {
2553            s = s[1..$];
2554            radix = 16;
2555        }
2556        if (s.length == 0) fail("unterminated character reference");
2557        if (s[0] == ';')
2558            fail("character reference must have at least one digit");
2559        while (s.length != 0)
2560        {
2561            immutable char d = s[0];
2562            int n = 0;
2563            switch (d)
2564            {
2565                case 'F','f': ++n;      goto case;
2566                case 'E','e': ++n;      goto case;
2567                case 'D','d': ++n;      goto case;
2568                case 'C','c': ++n;      goto case;
2569                case 'B','b': ++n;      goto case;
2570                case 'A','a': ++n;      goto case;
2571                case '9':     ++n;      goto case;
2572                case '8':     ++n;      goto case;
2573                case '7':     ++n;      goto case;
2574                case '6':     ++n;      goto case;
2575                case '5':     ++n;      goto case;
2576                case '4':     ++n;      goto case;
2577                case '3':     ++n;      goto case;
2578                case '2':     ++n;      goto case;
2579                case '1':     ++n;      goto case;
2580                case '0':     break;
2581                default: n = 100; break;
2582            }
2583            if (n >= radix) break;
2584            c *= radix;
2585            c += n;
2586            s = s[1..$];
2587        }
2588        if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2589        if (s.length == 0 || s[0] != ';') fail("expected ;");
2590        else s = s[1..$];
2591    }
2592
2593    void checkReference(ref string s) @safe pure // rule 67
2594    {
2595        import std.algorithm.searching : startsWith;
2596
2597        mixin Check!("Reference");
2598
2599        try
2600        {
2601            dchar c;
2602            if (s.startsWith("&#")) checkCharRef(s,c);
2603            else checkEntityRef(s);
2604        }
2605        catch (Err e) { fail(e); }
2606    }
2607
2608    void checkEntityRef(ref string s) @safe pure // rule 68
2609    {
2610        mixin Check!("EntityRef");
2611
2612        try
2613        {
2614            string name;
2615            checkLiteral("&",s);
2616            checkName(s,name);
2617            checkLiteral(";",s);
2618        }
2619        catch (Err e) { fail(e); }
2620    }
2621
2622    void checkEncName(ref string s) @safe pure // rule 81
2623    {
2624        import std.algorithm.searching : countUntil;
2625        import std.ascii : isAlpha;
2626        import std.utf : byCodeUnit;
2627
2628        mixin Check!("EncName");
2629
2630        s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2631        if (s is old) fail();
2632        s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2633    }
2634
2635    void checkEncodingDecl(ref string s) @safe pure // rule 80
2636    {
2637        mixin Check!("EncodingDecl");
2638
2639        try
2640        {
2641            checkSpace(s);
2642            checkLiteral("encoding",s);
2643            checkEq(s);
2644            quoted!(checkEncName)(s);
2645        }
2646        catch (Err e) { fail(e); }
2647    }
2648
2649    // Helper functions
2650
2651    void checkLiteral(string literal,ref string s) @safe pure
2652    {
2653        import std.string : startsWith;
2654
2655        mixin Check!("Literal");
2656
2657        if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2658        s = s[literal.length..$];
2659    }
2660
2661    void checkEnd(string end,ref string s) @safe pure
2662    {
2663        import std.string : indexOf;
2664        // Deliberately no mixin Check here.
2665
2666        auto n = s.indexOf(end);
2667        if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2668        s = s[n..$];
2669        checkLiteral(end,s);
2670    }
2671
2672    // Metafunctions -- none of these use mixin Check
2673
2674    void opt(alias f)(ref string s)
2675    {
2676        try { f(s); } catch (Err e) {}
2677    }
2678
2679    void plus(alias f)(ref string s)
2680    {
2681        f(s);
2682        star!(f)(s);
2683    }
2684
2685    void star(alias f)(ref string s)
2686    {
2687        while (s.length != 0)
2688        {
2689            try { f(s); }
2690            catch (Err e) { return; }
2691        }
2692    }
2693
2694    void quoted(alias f)(ref string s)
2695    {
2696        import std.string : startsWith;
2697
2698        if (s.startsWith("'"))
2699        {
2700            checkLiteral("'",s);
2701            f(s);
2702            checkLiteral("'",s);
2703        }
2704        else
2705        {
2706            checkLiteral("\"",s);
2707            f(s);
2708            checkLiteral("\"",s);
2709        }
2710    }
2711
2712    void seq(alias f,alias g)(ref string s)
2713    {
2714        f(s);
2715        g(s);
2716    }
2717}
2718
2719/**
2720 * Check an entire XML document for well-formedness
2721 *
2722 * Params:
2723 *      s = the document to be checked, passed as a string
2724 *
2725 * Throws: CheckException if the document is not well formed
2726 *
2727 * CheckException's toString() method will yield the complete hierarchy of
2728 * parse failure (the XML equivalent of a stack trace), giving the line and
2729 * column number of every failure at every level.
2730 */
2731void check(string s) @safe pure
2732{
2733    try
2734    {
2735        checkChars(s);
2736        checkDocument(s);
2737        if (s.length != 0) throw new Err(s,"Junk found after document");
2738    }
2739    catch (Err e)
2740    {
2741        e.complete(s);
2742        throw e;
2743    }
2744}
2745
2746@system pure unittest
2747{
2748    import std.string : indexOf;
2749
2750    try
2751    {
2752        check(q"[<?xml version="1.0"?>
2753        <catalog>
2754           <book id="bk101">
2755              <author>Gambardella, Matthew</author>
2756              <title>XML Developer's Guide</title>
2757              <genre>Computer</genre>
2758              <price>44.95</price>
2759              <publish_date>2000-10-01</publish_date>
2760              <description>An in-depth look at creating applications
2761              with XML.</description>
2762           </book>
2763           <book id="bk102">
2764              <author>Ralls, Kim</author>
2765              <title>Midnight Rain</title>
2766              <genre>Fantasy</genres>
2767              <price>5.95</price>
2768              <publish_date>2000-12-16</publish_date>
2769              <description>A former architect battles corporate zombies,
2770              an evil sorceress, and her own childhood to become queen
2771              of the world.</description>
2772           </book>
2773           <book id="bk103">
2774              <author>Corets, Eva</author>
2775              <title>Maeve Ascendant</title>
2776              <genre>Fantasy</genre>
2777              <price>5.95</price>
2778              <publish_date>2000-11-17</publish_date>
2779              <description>After the collapse of a nanotechnology
2780              society in England, the young survivors lay the
2781              foundation for a new society.</description>
2782           </book>
2783        </catalog>
2784        ]");
2785        assert(false);
2786    }
2787    catch (CheckException e)
2788    {
2789        auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2790                                      " from start tag name \"genre\"");
2791        assert(n != -1);
2792    }
2793}
2794
2795@system unittest
2796{
2797    string s = q"EOS
2798<?xml version="1.0"?>
2799<set>
2800    <one>A</one>
2801    <!-- comment -->
2802    <two>B</two>
2803</set>
2804EOS";
2805    try
2806    {
2807        check(s);
2808    }
2809    catch (CheckException e)
2810    {
2811        assert(0, e.toString());
2812    }
2813}
2814
2815@system unittest
2816{
2817    string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2818                        xmlns:stream="http://etherx.'jabber'.org/streams"
2819                        xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2820                        xml:lang="en" version="1.0" attr='a"b"c'>
2821                        </stream:stream></r>`;
2822
2823    DocumentParser parser = new DocumentParser(test_xml);
2824    bool tested = false;
2825    parser.onStartTag["stream:stream"] = (ElementParser p) {
2826        assert(p.tag.attr["xmlns"] == "jabber:'client'");
2827        assert(p.tag.attr["from"] == "jid.pl");
2828        assert(p.tag.attr["attr"] == "a\"b\"c");
2829        tested = true;
2830    };
2831    parser.parse();
2832    assert(tested);
2833}
2834
2835@system unittest
2836{
2837    string s = q"EOS
2838<?xml version="1.0" encoding="utf-8"?> <Tests>
2839    <Test thing="What &amp; Up">What &amp; Up Second</Test>
2840</Tests>
2841EOS";
2842    auto xml = new DocumentParser(s);
2843
2844    xml.onStartTag["Test"] = (ElementParser xml) {
2845        assert(xml.tag.attr["thing"] == "What & Up");
2846    };
2847
2848    xml.onEndTag["Test"] = (in Element e) {
2849        assert(e.text() == "What & Up Second");
2850    };
2851    xml.parse();
2852}
2853
2854@system unittest
2855{
2856    string s = `<tag attr="&quot;value&gt;" />`;
2857    auto doc = new Document(s);
2858    assert(doc.toString() == s);
2859}
2860
2861/** The base class for exceptions thrown by this module */
2862class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2863
2864// Other exceptions
2865
2866/// Thrown during Comment constructor
2867class CommentException : XMLException
2868{ private this(string msg) @safe pure { super(msg); } }
2869
2870/// Thrown during CData constructor
2871class CDataException : XMLException
2872{ private this(string msg) @safe pure { super(msg); } }
2873
2874/// Thrown during XMLInstruction constructor
2875class XIException : XMLException
2876{ private this(string msg) @safe pure { super(msg); } }
2877
2878/// Thrown during ProcessingInstruction constructor
2879class PIException : XMLException
2880{ private this(string msg) @safe pure { super(msg); } }
2881
2882/// Thrown during Text constructor
2883class TextException : XMLException
2884{ private this(string msg) @safe pure { super(msg); } }
2885
2886/// Thrown during decode()
2887class DecodeException : XMLException
2888{ private this(string msg) @safe pure { super(msg); } }
2889
2890/// Thrown if comparing with wrong type
2891class InvalidTypeException : XMLException
2892{ private this(string msg) @safe pure { super(msg); } }
2893
2894/// Thrown when parsing for Tags
2895class TagException : XMLException
2896{ private this(string msg) @safe pure { super(msg); } }
2897
2898/**
2899 * Thrown during check()
2900 */
2901class CheckException : XMLException
2902{
2903    CheckException err; /// Parent in hierarchy
2904    private string tail;
2905    /**
2906     * Name of production rule which failed to parse,
2907     * or specific error message
2908     */
2909    string msg;
2910    size_t line = 0; /// Line number at which parse failure occurred
2911    size_t column = 0; /// Column number at which parse failure occurred
2912
2913    private this(string tail,string msg,Err err=null) @safe pure
2914    {
2915        super(null);
2916        this.tail = tail;
2917        this.msg = msg;
2918        this.err = err;
2919    }
2920
2921    private void complete(string entire) @safe pure
2922    {
2923        import std.string : count, lastIndexOf;
2924        import std.utf : toUTF32;
2925
2926        string head = entire[0..$-tail.length];
2927        ptrdiff_t n = head.lastIndexOf('\n') + 1;
2928        line = head.count("\n") + 1;
2929        dstring t = toUTF32(head[n..$]);
2930        column = t.length + 1;
2931        if (err !is null) err.complete(entire);
2932    }
2933
2934    override string toString() const @safe pure
2935    {
2936        import std.format : format;
2937
2938        string s;
2939        if (line != 0) s = format("Line %d, column %d: ",line,column);
2940        s ~= msg;
2941        s ~= '\n';
2942        if (err !is null) s = err.toString() ~ s;
2943        return s;
2944    }
2945}
2946
2947private alias Err = CheckException;
2948
2949// Private helper functions
2950
2951private
2952{
2953    inout(T) toType(T)(inout Object o)
2954    {
2955        T t = cast(T)(o);
2956        if (t is null)
2957        {
2958            throw new InvalidTypeException("Attempt to compare a "
2959                ~ T.stringof ~ " with an instance of another type");
2960        }
2961        return t;
2962    }
2963
2964    string chop(ref string s, size_t n) @safe pure nothrow
2965    {
2966        if (n == -1) n = s.length;
2967        string t = s[0 .. n];
2968        s = s[n..$];
2969        return t;
2970    }
2971
2972    bool optc(ref string s, char c) @safe pure nothrow
2973    {
2974        immutable bool b = s.length != 0 && s[0] == c;
2975        if (b) s = s[1..$];
2976        return b;
2977    }
2978
2979    void reqc(ref string s, char c) @safe pure
2980    {
2981        if (s.length == 0 || s[0] != c) throw new TagException("");
2982        s = s[1..$];
2983    }
2984
2985    char requireOneOf(ref string s, string chars) @safe pure
2986    {
2987        import std.string : indexOf;
2988
2989        if (s.length == 0 || indexOf(chars,s[0]) == -1)
2990            throw new TagException("");
2991        immutable char ch = s[0];
2992        s = s[1..$];
2993        return ch;
2994    }
2995
2996    size_t hash(string s,size_t h=0) @trusted nothrow
2997    {
2998        return typeid(s).getHash(&s) + h;
2999    }
3000
3001    // Definitions from the XML specification
3002    immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3003        0x10000,0x10FFFF];
3004    immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3005        0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3006        0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3007        0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3008        0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3009        0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3010        0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3011        0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3012        0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3013        0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3014        0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3015        0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3016        0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3017        0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3018        0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3019        0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3020        0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3021        0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3022        0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3023        0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3024        0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3025        0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3026        0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3027        0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3028        0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3029        0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3030        0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3031        0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3032        0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3033        0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3034        0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3035        0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3036        0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3037        0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3038        0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3039        0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3040        0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3041        0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3042        0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3043        0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3044        0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3045    immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3046    immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3047        0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3048        0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3049        0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3050        0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3051        0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3052        0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3053        0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3054        0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3055        0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3056        0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3057        0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3058        0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3059        0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3060        0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3061        0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3062        0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3063        0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3064        0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3065        0x3099,0x3099,0x309A,0x309A];
3066    immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3067        0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3068        0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3069        0x0ED9,0x0F20,0x0F29];
3070    immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3071        0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3072        0x3035,0x309D,0x309E,0x30FC,0x30FE];
3073
3074    bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3075    {
3076        while (table.length != 0)
3077        {
3078            auto m = (table.length >> 1) & ~1;
3079            if (c < table[m])
3080            {
3081                table = table[0 .. m];
3082            }
3083            else if (c > table[m+1])
3084            {
3085                table = table[m+2..$];
3086            }
3087            else return true;
3088        }
3089        return false;
3090    }
3091
3092    string startOf(string s) @safe nothrow pure
3093    {
3094        string r;
3095        foreach (char c;s)
3096        {
3097            r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3098            if (r.length >= 40) { r ~= "___"; break; }
3099        }
3100        return r;
3101    }
3102
3103    void exit(string s=null)
3104    {
3105        throw new XMLException(s);
3106    }
3107}
3108