1// Written in the D programming language. 2 3/** 4$(RED Warning: This module is considered out-dated and not up to Phobos' 5 current standards. It will remain until we have a suitable replacement, 6 but be aware that it will not remain long term.) 7 8Classes and functions for creating and parsing XML 9 10The basic architecture of this module is that there are standalone functions, 11classes for constructing an XML document from scratch (Tag, Element and 12Document), and also classes for parsing a pre-existing XML file (ElementParser 13and DocumentParser). The parsing classes <i>may</i> be used to build a 14Document, but that is not their primary purpose. The handling capabilities of 15DocumentParser and ElementParser are sufficiently customizable that you can 16make them do pretty much whatever you want. 17 18Example: This example creates a DOM (Document Object Model) tree 19 from an XML file. 20------------------------------------------------------------------------------ 21import std.xml; 22import std.stdio; 23import std.string; 24import std.file; 25 26// books.xml is used in various samples throughout the Microsoft XML Core 27// Services (MSXML) SDK. 28// 29// See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx 30 31void main() 32{ 33 string s = cast(string) std.file.read("books.xml"); 34 35 // Check for well-formedness 36 check(s); 37 38 // Make a DOM tree 39 auto doc = new Document(s); 40 41 // Plain-print it 42 writeln(doc); 43} 44------------------------------------------------------------------------------ 45 46Example: This example does much the same thing, except that the file is 47 deconstructed and reconstructed by hand. This is more work, but the 48 techniques involved offer vastly more power. 49------------------------------------------------------------------------------ 50import std.xml; 51import std.stdio; 52import std.string; 53 54struct Book 55{ 56 string id; 57 string author; 58 string title; 59 string genre; 60 string price; 61 string pubDate; 62 string description; 63} 64 65void main() 66{ 67 string s = cast(string) std.file.read("books.xml"); 68 69 // Check for well-formedness 70 check(s); 71 72 // Take it apart 73 Book[] books; 74 75 auto xml = new DocumentParser(s); 76 xml.onStartTag["book"] = (ElementParser xml) 77 { 78 Book book; 79 book.id = xml.tag.attr["id"]; 80 81 xml.onEndTag["author"] = (in Element e) { book.author = e.text(); }; 82 xml.onEndTag["title"] = (in Element e) { book.title = e.text(); }; 83 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); }; 84 xml.onEndTag["price"] = (in Element e) { book.price = e.text(); }; 85 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); }; 86 xml.onEndTag["description"] = (in Element e) { book.description = e.text(); }; 87 88 xml.parse(); 89 90 books ~= book; 91 }; 92 xml.parse(); 93 94 // Put it back together again; 95 auto doc = new Document(new Tag("catalog")); 96 foreach (book;books) 97 { 98 auto element = new Element("book"); 99 element.tag.attr["id"] = book.id; 100 101 element ~= new Element("author", book.author); 102 element ~= new Element("title", book.title); 103 element ~= new Element("genre", book.genre); 104 element ~= new Element("price", book.price); 105 element ~= new Element("publish-date",book.pubDate); 106 element ~= new Element("description", book.description); 107 108 doc ~= element; 109 } 110 111 // Pretty-print it 112 writefln(join(doc.pretty(3),"\n")); 113} 114------------------------------------------------------------------------------- 115Copyright: Copyright Janice Caron 2008 - 2009. 116License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 117Authors: Janice Caron 118Source: $(PHOBOSSRC std/_xml.d) 119*/ 120/* 121 Copyright Janice Caron 2008 - 2009. 122Distributed under the Boost Software License, Version 1.0. 123 (See accompanying file LICENSE_1_0.txt or copy at 124 http://www.boost.org/LICENSE_1_0.txt) 125*/ 126module std.xml; 127 128enum cdata = "<![CDATA["; 129 130/** 131 * Returns true if the character is a character according to the XML standard 132 * 133 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 134 * 135 * Params: 136 * c = the character to be tested 137 */ 138bool isChar(dchar c) @safe @nogc pure nothrow // rule 2 139{ 140 if (c <= 0xD7FF) 141 { 142 if (c >= 0x20) 143 return true; 144 switch (c) 145 { 146 case 0xA: 147 case 0x9: 148 case 0xD: 149 return true; 150 default: 151 return false; 152 } 153 } 154 else if (0xE000 <= c && c <= 0x10FFFF) 155 { 156 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF 157 return true; 158 } 159 return false; 160} 161 162@safe @nogc nothrow pure unittest 163{ 164 assert(!isChar(cast(dchar) 0x8)); 165 assert( isChar(cast(dchar) 0x9)); 166 assert( isChar(cast(dchar) 0xA)); 167 assert(!isChar(cast(dchar) 0xB)); 168 assert(!isChar(cast(dchar) 0xC)); 169 assert( isChar(cast(dchar) 0xD)); 170 assert(!isChar(cast(dchar) 0xE)); 171 assert(!isChar(cast(dchar) 0x1F)); 172 assert( isChar(cast(dchar) 0x20)); 173 assert( isChar('J')); 174 assert( isChar(cast(dchar) 0xD7FF)); 175 assert(!isChar(cast(dchar) 0xD800)); 176 assert(!isChar(cast(dchar) 0xDFFF)); 177 assert( isChar(cast(dchar) 0xE000)); 178 assert( isChar(cast(dchar) 0xFFFD)); 179 assert(!isChar(cast(dchar) 0xFFFE)); 180 assert(!isChar(cast(dchar) 0xFFFF)); 181 assert( isChar(cast(dchar) 0x10000)); 182 assert( isChar(cast(dchar) 0x10FFFF)); 183 assert(!isChar(cast(dchar) 0x110000)); 184 185 debug (stdxml_TestHardcodedChecks) 186 { 187 foreach (c; 0 .. dchar.max + 1) 188 assert(isChar(c) == lookup(CharTable, c)); 189 } 190} 191 192/** 193 * Returns true if the character is whitespace according to the XML standard 194 * 195 * Only the following characters are considered whitespace in XML - space, tab, 196 * carriage return and linefeed 197 * 198 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 199 * 200 * Params: 201 * c = the character to be tested 202 */ 203bool isSpace(dchar c) @safe @nogc pure nothrow 204{ 205 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D'; 206} 207 208/** 209 * Returns true if the character is a digit according to the XML standard 210 * 211 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 212 * 213 * Params: 214 * c = the character to be tested 215 */ 216bool isDigit(dchar c) @safe @nogc pure nothrow 217{ 218 if (c <= 0x0039 && c >= 0x0030) 219 return true; 220 else 221 return lookup(DigitTable,c); 222} 223 224@safe @nogc nothrow pure unittest 225{ 226 debug (stdxml_TestHardcodedChecks) 227 { 228 foreach (c; 0 .. dchar.max + 1) 229 assert(isDigit(c) == lookup(DigitTable, c)); 230 } 231} 232 233/** 234 * Returns true if the character is a letter according to the XML standard 235 * 236 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 237 * 238 * Params: 239 * c = the character to be tested 240 */ 241bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84 242{ 243 return isIdeographic(c) || isBaseChar(c); 244} 245 246/** 247 * Returns true if the character is an ideographic character according to the 248 * XML standard 249 * 250 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 251 * 252 * Params: 253 * c = the character to be tested 254 */ 255bool isIdeographic(dchar c) @safe @nogc nothrow pure 256{ 257 if (c == 0x3007) 258 return true; 259 if (c <= 0x3029 && c >= 0x3021 ) 260 return true; 261 if (c <= 0x9FA5 && c >= 0x4E00) 262 return true; 263 return false; 264} 265 266@safe @nogc nothrow pure unittest 267{ 268 assert(isIdeographic('\u4E00')); 269 assert(isIdeographic('\u9FA5')); 270 assert(isIdeographic('\u3007')); 271 assert(isIdeographic('\u3021')); 272 assert(isIdeographic('\u3029')); 273 274 debug (stdxml_TestHardcodedChecks) 275 { 276 foreach (c; 0 .. dchar.max + 1) 277 assert(isIdeographic(c) == lookup(IdeographicTable, c)); 278 } 279} 280 281/** 282 * Returns true if the character is a base character according to the XML 283 * standard 284 * 285 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 286 * 287 * Params: 288 * c = the character to be tested 289 */ 290bool isBaseChar(dchar c) @safe @nogc nothrow pure 291{ 292 return lookup(BaseCharTable,c); 293} 294 295/** 296 * Returns true if the character is a combining character according to the 297 * XML standard 298 * 299 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 300 * 301 * Params: 302 * c = the character to be tested 303 */ 304bool isCombiningChar(dchar c) @safe @nogc nothrow pure 305{ 306 return lookup(CombiningCharTable,c); 307} 308 309/** 310 * Returns true if the character is an extender according to the XML standard 311 * 312 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 313 * 314 * Params: 315 * c = the character to be tested 316 */ 317bool isExtender(dchar c) @safe @nogc nothrow pure 318{ 319 return lookup(ExtenderTable,c); 320} 321 322/** 323 * Encodes a string by replacing all characters which need to be escaped with 324 * appropriate predefined XML entities. 325 * 326 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 327 * and greater-than), and similarly, decode() unescapes them. These functions 328 * are provided for convenience only. You do not need to use them when using 329 * the std.xml classes, because then all the encoding and decoding will be done 330 * for you automatically. 331 * 332 * If the string is not modified, the original will be returned. 333 * 334 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 335 * 336 * Params: 337 * s = The string to be encoded 338 * 339 * Returns: The encoded string 340 * 341 * Example: 342 * -------------- 343 * writefln(encode("a > b")); // writes "a > b" 344 * -------------- 345 */ 346S encode(S)(S s) 347{ 348 import std.array : appender; 349 350 string r; 351 size_t lastI; 352 auto result = appender!S(); 353 354 foreach (i, c; s) 355 { 356 switch (c) 357 { 358 case '&': r = "&"; break; 359 case '"': r = """; break; 360 case '\'': r = "'"; break; 361 case '<': r = "<"; break; 362 case '>': r = ">"; break; 363 default: continue; 364 } 365 // Replace with r 366 result.put(s[lastI .. i]); 367 result.put(r); 368 lastI = i + 1; 369 } 370 371 if (!result.data.ptr) return s; 372 result.put(s[lastI .. $]); 373 return result.data; 374} 375 376@safe pure unittest 377{ 378 auto s = "hello"; 379 assert(encode(s) is s); 380 assert(encode("a > b") == "a > b", encode("a > b")); 381 assert(encode("a < b") == "a < b"); 382 assert(encode("don't") == "don't"); 383 assert(encode("\"hi\"") == ""hi"", encode("\"hi\"")); 384 assert(encode("cat & dog") == "cat & dog"); 385} 386 387/** 388 * Mode to use for decoding. 389 * 390 * $(DDOC_ENUM_MEMBERS NONE) Do not decode 391 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors 392 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error 393 */ 394enum DecodeMode 395{ 396 NONE, LOOSE, STRICT 397} 398 399/** 400 * Decodes a string by unescaping all predefined XML entities. 401 * 402 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than 403 * and greater-than), and similarly, decode() unescapes them. These functions 404 * are provided for convenience only. You do not need to use them when using 405 * the std.xml classes, because then all the encoding and decoding will be done 406 * for you automatically. 407 * 408 * This function decodes the entities &amp;, &quot;, &apos;, 409 * &lt; and &gt, 410 * as well as decimal and hexadecimal entities such as &#x20AC; 411 * 412 * If the string does not contain an ampersand, the original will be returned. 413 * 414 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not 415 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT 416 * (decode, and throw a DecodeException in the event of an error). 417 * 418 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 419 * 420 * Params: 421 * s = The string to be decoded 422 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 423 * 424 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails 425 * 426 * Returns: The decoded string 427 * 428 * Example: 429 * -------------- 430 * writefln(decode("a > b")); // writes "a > b" 431 * -------------- 432 */ 433string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure 434{ 435 import std.algorithm.searching : startsWith; 436 437 if (mode == DecodeMode.NONE) return s; 438 439 string buffer; 440 foreach (ref i; 0 .. s.length) 441 { 442 char c = s[i]; 443 if (c != '&') 444 { 445 if (buffer.length != 0) buffer ~= c; 446 } 447 else 448 { 449 if (buffer.length == 0) 450 { 451 buffer = s[0 .. i].dup; 452 } 453 if (startsWith(s[i..$],"&#")) 454 { 455 try 456 { 457 dchar d; 458 string t = s[i..$]; 459 checkCharRef(t, d); 460 char[4] temp; 461 import std.utf : encode; 462 buffer ~= temp[0 .. encode(temp, d)]; 463 i = s.length - t.length - 1; 464 } 465 catch (Err e) 466 { 467 if (mode == DecodeMode.STRICT) 468 throw new DecodeException("Unescaped &"); 469 buffer ~= '&'; 470 } 471 } 472 else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; } 473 else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; } 474 else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; } 475 else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; } 476 else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; } 477 else 478 { 479 if (mode == DecodeMode.STRICT) 480 throw new DecodeException("Unescaped &"); 481 buffer ~= '&'; 482 } 483 } 484 } 485 return (buffer.length == 0) ? s : buffer; 486} 487 488@safe pure unittest 489{ 490 void assertNot(string s) pure 491 { 492 bool b = false; 493 try { decode(s,DecodeMode.STRICT); } 494 catch (DecodeException e) { b = true; } 495 assert(b,s); 496 } 497 498 // Assert that things that should work, do 499 auto s = "hello"; 500 assert(decode(s, DecodeMode.STRICT) is s); 501 assert(decode("a > b", DecodeMode.STRICT) == "a > b"); 502 assert(decode("a < b", DecodeMode.STRICT) == "a < b"); 503 assert(decode("don't", DecodeMode.STRICT) == "don't"); 504 assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\""); 505 assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog"); 506 assert(decode("*", DecodeMode.STRICT) == "*"); 507 assert(decode("*", DecodeMode.STRICT) == "*"); 508 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog"); 509 assert(decode("a > b", DecodeMode.LOOSE) == "a > b"); 510 assert(decode("&#;", DecodeMode.LOOSE) == "&#;"); 511 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;"); 512 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 513 assert(decode("G;", DecodeMode.LOOSE) == "G;"); 514 515 // Assert that things that shouldn't work, don't 516 assertNot("cat & dog"); 517 assertNot("a > b"); 518 assertNot("&#;"); 519 assertNot("&#x;"); 520 assertNot("G;"); 521 assertNot("G;"); 522} 523 524/** 525 * Class representing an XML document. 526 * 527 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 528 * 529 */ 530class Document : Element 531{ 532 /** 533 * Contains all text which occurs before the root element. 534 * Defaults to <?xml version="1.0"?> 535 */ 536 string prolog = "<?xml version=\"1.0\"?>"; 537 /** 538 * Contains all text which occurs after the root element. 539 * Defaults to the empty string 540 */ 541 string epilog; 542 543 /** 544 * Constructs a Document by parsing XML text. 545 * 546 * This function creates a complete DOM (Document Object Model) tree. 547 * 548 * The input to this function MUST be valid XML. 549 * This is enforced by DocumentParser's in contract. 550 * 551 * Params: 552 * s = the complete XML text. 553 */ 554 this(string s) 555 in 556 { 557 assert(s.length != 0); 558 } 559 body 560 { 561 auto xml = new DocumentParser(s); 562 string tagString = xml.tag.tagString; 563 564 this(xml.tag); 565 prolog = s[0 .. tagString.ptr - s.ptr]; 566 parse(xml); 567 epilog = *xml.s; 568 } 569 570 /** 571 * Constructs a Document from a Tag. 572 * 573 * Params: 574 * tag = the start tag of the document. 575 */ 576 this(const(Tag) tag) 577 { 578 super(tag); 579 } 580 581 const 582 { 583 /** 584 * Compares two Documents for equality 585 * 586 * Example: 587 * -------------- 588 * Document d1,d2; 589 * if (d1 == d2) { } 590 * -------------- 591 */ 592 override bool opEquals(scope const Object o) const 593 { 594 const doc = toType!(const Document)(o); 595 return prolog == doc.prolog 596 && (cast(const) this).Element.opEquals(cast(const) doc) 597 && epilog == doc.epilog; 598 } 599 600 /** 601 * Compares two Documents 602 * 603 * You should rarely need to call this function. It exists so that 604 * Documents can be used as associative array keys. 605 * 606 * Example: 607 * -------------- 608 * Document d1,d2; 609 * if (d1 < d2) { } 610 * -------------- 611 */ 612 override int opCmp(scope const Object o) scope const 613 { 614 const doc = toType!(const Document)(o); 615 if (prolog != doc.prolog) 616 return prolog < doc.prolog ? -1 : 1; 617 if (int cmp = this.Element.opCmp(doc)) 618 return cmp; 619 if (epilog != doc.epilog) 620 return epilog < doc.epilog ? -1 : 1; 621 return 0; 622 } 623 624 /** 625 * Returns the hash of a Document 626 * 627 * You should rarely need to call this function. It exists so that 628 * Documents can be used as associative array keys. 629 */ 630 override size_t toHash() scope const @trusted 631 { 632 return hash(prolog, hash(epilog, (cast() this).Element.toHash())); 633 } 634 635 /** 636 * Returns the string representation of a Document. (That is, the 637 * complete XML of a document). 638 */ 639 override string toString() scope const @safe 640 { 641 return prolog ~ super.toString() ~ epilog; 642 } 643 } 644} 645 646@system unittest 647{ 648 // https://issues.dlang.org/show_bug.cgi?id=14966 649 auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`; 650 651 auto a = new Document(xml); 652 auto b = new Document(xml); 653 assert(a == b); 654 assert(!(a < b)); 655 int[Document] aa; 656 aa[a] = 1; 657 assert(aa[b] == 1); 658 659 b ~= new Element("b"); 660 assert(a < b); 661 assert(b > a); 662} 663 664/** 665 * Class representing an XML element. 666 * 667 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 668 */ 669class Element : Item 670{ 671 Tag tag; /// The start tag of the element 672 Item[] items; /// The element's items 673 Text[] texts; /// The element's text items 674 CData[] cdatas; /// The element's CData items 675 Comment[] comments; /// The element's comments 676 ProcessingInstruction[] pis; /// The element's processing instructions 677 Element[] elements; /// The element's child elements 678 679 /** 680 * Constructs an Element given a name and a string to be used as a Text 681 * interior. 682 * 683 * Params: 684 * name = the name of the element. 685 * interior = (optional) the string interior. 686 * 687 * Example: 688 * ------------------------------------------------------- 689 * auto element = new Element("title","Serenity") 690 * // constructs the element <title>Serenity</title> 691 * ------------------------------------------------------- 692 */ 693 this(string name, string interior=null) @safe pure 694 { 695 this(new Tag(name)); 696 if (interior.length != 0) opCatAssign(new Text(interior)); 697 } 698 699 /** 700 * Constructs an Element from a Tag. 701 * 702 * Params: 703 * tag_ = the start or empty tag of the element. 704 */ 705 this(const(Tag) tag_) @safe pure 706 { 707 this.tag = new Tag(tag_.name); 708 tag.type = TagType.EMPTY; 709 foreach (k,v;tag_.attr) tag.attr[k] = v; 710 tag.tagString = tag_.tagString; 711 } 712 713 /** 714 * Append a text item to the interior of this element 715 * 716 * Params: 717 * item = the item you wish to append. 718 * 719 * Example: 720 * -------------- 721 * Element element; 722 * element ~= new Text("hello"); 723 * -------------- 724 */ 725 void opCatAssign(Text item) @safe pure 726 { 727 texts ~= item; 728 appendItem(item); 729 } 730 731 /** 732 * Append a CData item to the interior of this element 733 * 734 * Params: 735 * item = the item you wish to append. 736 * 737 * Example: 738 * -------------- 739 * Element element; 740 * element ~= new CData("hello"); 741 * -------------- 742 */ 743 void opCatAssign(CData item) @safe pure 744 { 745 cdatas ~= item; 746 appendItem(item); 747 } 748 749 /** 750 * Append a comment to the interior of this element 751 * 752 * Params: 753 * item = the item you wish to append. 754 * 755 * Example: 756 * -------------- 757 * Element element; 758 * element ~= new Comment("hello"); 759 * -------------- 760 */ 761 void opCatAssign(Comment item) @safe pure 762 { 763 comments ~= item; 764 appendItem(item); 765 } 766 767 /** 768 * Append a processing instruction to the interior of this element 769 * 770 * Params: 771 * item = the item you wish to append. 772 * 773 * Example: 774 * -------------- 775 * Element element; 776 * element ~= new ProcessingInstruction("hello"); 777 * -------------- 778 */ 779 void opCatAssign(ProcessingInstruction item) @safe pure 780 { 781 pis ~= item; 782 appendItem(item); 783 } 784 785 /** 786 * Append a complete element to the interior of this element 787 * 788 * Params: 789 * item = the item you wish to append. 790 * 791 * Example: 792 * -------------- 793 * Element element; 794 * Element other = new Element("br"); 795 * element ~= other; 796 * // appends element representing <br /> 797 * -------------- 798 */ 799 void opCatAssign(Element item) @safe pure 800 { 801 elements ~= item; 802 appendItem(item); 803 } 804 805 private void appendItem(Item item) @safe pure 806 { 807 items ~= item; 808 if (tag.type == TagType.EMPTY && !item.isEmptyXML) 809 tag.type = TagType.START; 810 } 811 812 private void parse(ElementParser xml) 813 { 814 xml.onText = (string s) { opCatAssign(new Text(s)); }; 815 xml.onCData = (string s) { opCatAssign(new CData(s)); }; 816 xml.onComment = (string s) { opCatAssign(new Comment(s)); }; 817 xml.onPI = (string s) { opCatAssign(new ProcessingInstruction(s)); }; 818 819 xml.onStartTag[null] = (ElementParser xml) 820 { 821 auto e = new Element(xml.tag); 822 e.parse(xml); 823 opCatAssign(e); 824 }; 825 826 xml.parse(); 827 } 828 829 /** 830 * Compares two Elements for equality 831 * 832 * Example: 833 * -------------- 834 * Element e1,e2; 835 * if (e1 == e2) { } 836 * -------------- 837 */ 838 override bool opEquals(scope const Object o) const 839 { 840 const element = toType!(const Element)(o); 841 immutable len = items.length; 842 if (len != element.items.length) return false; 843 foreach (i; 0 .. len) 844 { 845 if (!items[i].opEquals(element.items[i])) return false; 846 } 847 return true; 848 } 849 850 /** 851 * Compares two Elements 852 * 853 * You should rarely need to call this function. It exists so that Elements 854 * can be used as associative array keys. 855 * 856 * Example: 857 * -------------- 858 * Element e1,e2; 859 * if (e1 < e2) { } 860 * -------------- 861 */ 862 override int opCmp(scope const Object o) @safe const 863 { 864 const element = toType!(const Element)(o); 865 for (uint i=0; ; ++i) 866 { 867 if (i == items.length && i == element.items.length) return 0; 868 if (i == items.length) return -1; 869 if (i == element.items.length) return 1; 870 if (!items[i].opEquals(element.items[i])) 871 return items[i].opCmp(element.items[i]); 872 } 873 } 874 875 /** 876 * Returns the hash of an Element 877 * 878 * You should rarely need to call this function. It exists so that Elements 879 * can be used as associative array keys. 880 */ 881 override size_t toHash() scope const @safe 882 { 883 size_t hash = tag.toHash(); 884 foreach (item;items) hash += item.toHash(); 885 return hash; 886 } 887 888 const 889 { 890 /** 891 * Returns the decoded interior of an element. 892 * 893 * The element is assumed to contain text <i>only</i>. So, for 894 * example, given XML such as "<title>Good &amp; 895 * Bad</title>", will return "Good & Bad". 896 * 897 * Params: 898 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE). 899 * 900 * Throws: DecodeException if decode fails 901 */ 902 string text(DecodeMode mode=DecodeMode.LOOSE) 903 { 904 string buffer; 905 foreach (item;items) 906 { 907 Text t = cast(Text) item; 908 if (t is null) throw new DecodeException(item.toString()); 909 buffer ~= decode(t.toString(),mode); 910 } 911 return buffer; 912 } 913 914 /** 915 * Returns an indented string representation of this item 916 * 917 * Params: 918 * indent = (optional) number of spaces by which to indent this 919 * element. Defaults to 2. 920 */ 921 override string[] pretty(uint indent=2) scope 922 { 923 import std.algorithm.searching : count; 924 import std.string : rightJustify; 925 926 if (isEmptyXML) return [ tag.toEmptyString() ]; 927 928 if (items.length == 1) 929 { 930 auto t = cast(const(Text))(items[0]); 931 if (t !is null) 932 { 933 return [tag.toStartString() ~ t.toString() ~ tag.toEndString()]; 934 } 935 } 936 937 string[] a = [ tag.toStartString() ]; 938 foreach (item;items) 939 { 940 string[] b = item.pretty(indent); 941 foreach (s;b) 942 { 943 a ~= rightJustify(s,count(s) + indent); 944 } 945 } 946 a ~= tag.toEndString(); 947 return a; 948 } 949 950 /** 951 * Returns the string representation of an Element 952 * 953 * Example: 954 * -------------- 955 * auto element = new Element("br"); 956 * writefln(element.toString()); // writes "<br />" 957 * -------------- 958 */ 959 override string toString() scope @safe 960 { 961 if (isEmptyXML) return tag.toEmptyString(); 962 963 string buffer = tag.toStartString(); 964 foreach (item;items) { buffer ~= item.toString(); } 965 buffer ~= tag.toEndString(); 966 return buffer; 967 } 968 969 override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; } 970 } 971} 972 973/** 974 * Tag types. 975 * 976 * $(DDOC_ENUM_MEMBERS START) Used for start tags 977 * $(DDOC_ENUM_MEMBERS END) Used for end tags 978 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags 979 * 980 */ 981enum TagType { START, END, EMPTY } 982 983/** 984 * Class representing an XML tag. 985 * 986 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 987 * 988 * The class invariant guarantees 989 * <ul> 990 * <li> that $(B type) is a valid enum TagType value</li> 991 * <li> that $(B name) consists of valid characters</li> 992 * <li> that each attribute name consists of valid characters</li> 993 * </ul> 994 */ 995class Tag 996{ 997 TagType type = TagType.START; /// Type of tag 998 string name; /// Tag name 999 string[string] attr; /// Associative array of attributes 1000 private string tagString; 1001 1002 invariant() 1003 { 1004 string s; 1005 string t; 1006 1007 assert(type == TagType.START 1008 || type == TagType.END 1009 || type == TagType.EMPTY); 1010 1011 s = name; 1012 try { checkName(s,t); } 1013 catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); } 1014 1015 foreach (k,v;attr) 1016 { 1017 s = k; 1018 try { checkName(s,t); } 1019 catch (Err e) 1020 { assert(false,"Invalid atrribute name:" ~ e.toString()); } 1021 } 1022 } 1023 1024 /** 1025 * Constructs an instance of Tag with a specified name and type 1026 * 1027 * The constructor does not initialize the attributes. To initialize the 1028 * attributes, you access the $(B attr) member variable. 1029 * 1030 * Params: 1031 * name = the Tag's name 1032 * type = (optional) the Tag's type. If omitted, defaults to 1033 * TagType.START. 1034 * 1035 * Example: 1036 * -------------- 1037 * auto tag = new Tag("img",Tag.EMPTY); 1038 * tag.attr["src"] = "http://example.com/example.jpg"; 1039 * -------------- 1040 */ 1041 this(string name, TagType type=TagType.START) @safe pure 1042 { 1043 this.name = name; 1044 this.type = type; 1045 } 1046 1047 /* Private constructor (so don't ddoc this!) 1048 * 1049 * Constructs a Tag by parsing the string representation, e.g. "<html>". 1050 * 1051 * The string is passed by reference, and is advanced over all characters 1052 * consumed. 1053 * 1054 * The second parameter is a dummy parameter only, required solely to 1055 * distinguish this constructor from the public one. 1056 */ 1057 private this(ref string s, bool dummy) @safe pure 1058 { 1059 import std.algorithm.searching : countUntil; 1060 import std.ascii : isWhite; 1061 import std.utf : byCodeUnit; 1062 1063 tagString = s; 1064 try 1065 { 1066 reqc(s,'<'); 1067 if (optc(s,'/')) type = TagType.END; 1068 ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f"); 1069 name = s[0 .. i]; 1070 s = s[i .. $]; 1071 1072 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1073 s = s[i .. $]; 1074 1075 while (s.length > 0 && s[0] != '>' && s[0] != '/') 1076 { 1077 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f"); 1078 string key = s[0 .. i]; 1079 s = s[i .. $]; 1080 1081 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1082 s = s[i .. $]; 1083 reqc(s,'='); 1084 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1085 s = s[i .. $]; 1086 1087 immutable char quote = requireOneOf(s,"'\""); 1088 i = s.byCodeUnit.countUntil(quote); 1089 string val = decode(s[0 .. i], DecodeMode.LOOSE); 1090 s = s[i .. $]; 1091 reqc(s,quote); 1092 1093 i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 1094 s = s[i .. $]; 1095 attr[key] = val; 1096 } 1097 if (optc(s,'/')) 1098 { 1099 if (type == TagType.END) throw new TagException(""); 1100 type = TagType.EMPTY; 1101 } 1102 reqc(s,'>'); 1103 tagString.length = tagString.length - s.length; 1104 } 1105 catch (XMLException e) 1106 { 1107 tagString.length = tagString.length - s.length; 1108 throw new TagException(tagString); 1109 } 1110 } 1111 1112 const 1113 { 1114 /** 1115 * Compares two Tags for equality 1116 * 1117 * You should rarely need to call this function. It exists so that Tags 1118 * can be used as associative array keys. 1119 * 1120 * Example: 1121 * -------------- 1122 * Tag tag1,tag2 1123 * if (tag1 == tag2) { } 1124 * -------------- 1125 */ 1126 override bool opEquals(scope Object o) 1127 { 1128 const tag = toType!(const Tag)(o); 1129 return 1130 (name != tag.name) ? false : ( 1131 (attr != tag.attr) ? false : ( 1132 (type != tag.type) ? false : ( 1133 true ))); 1134 } 1135 1136 /** 1137 * Compares two Tags 1138 * 1139 * Example: 1140 * -------------- 1141 * Tag tag1,tag2 1142 * if (tag1 < tag2) { } 1143 * -------------- 1144 */ 1145 override int opCmp(Object o) 1146 { 1147 const tag = toType!(const Tag)(o); 1148 // Note that attr is an AA, so the comparison is nonsensical (bug 10381) 1149 return 1150 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) : 1151 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) : 1152 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) : 1153 0 ))); 1154 } 1155 1156 /** 1157 * Returns the hash of a Tag 1158 * 1159 * You should rarely need to call this function. It exists so that Tags 1160 * can be used as associative array keys. 1161 */ 1162 override size_t toHash() 1163 { 1164 return typeid(name).getHash(&name); 1165 } 1166 1167 /** 1168 * Returns the string representation of a Tag 1169 * 1170 * Example: 1171 * -------------- 1172 * auto tag = new Tag("book",TagType.START); 1173 * writefln(tag.toString()); // writes "<book>" 1174 * -------------- 1175 */ 1176 override string toString() @safe 1177 { 1178 if (isEmpty) return toEmptyString(); 1179 return (isEnd) ? toEndString() : toStartString(); 1180 } 1181 1182 private 1183 { 1184 string toNonEndString() @safe 1185 { 1186 import std.format : format; 1187 1188 string s = "<" ~ name; 1189 foreach (key,val;attr) 1190 s ~= format(" %s=\"%s\"",key,encode(val)); 1191 return s; 1192 } 1193 1194 string toStartString() @safe { return toNonEndString() ~ ">"; } 1195 1196 string toEndString() @safe { return "</" ~ name ~ ">"; } 1197 1198 string toEmptyString() @safe { return toNonEndString() ~ " />"; } 1199 } 1200 1201 /** 1202 * Returns true if the Tag is a start tag 1203 * 1204 * Example: 1205 * -------------- 1206 * if (tag.isStart) { } 1207 * -------------- 1208 */ 1209 @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; } 1210 1211 /** 1212 * Returns true if the Tag is an end tag 1213 * 1214 * Example: 1215 * -------------- 1216 * if (tag.isEnd) { } 1217 * -------------- 1218 */ 1219 @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; } 1220 1221 /** 1222 * Returns true if the Tag is an empty tag 1223 * 1224 * Example: 1225 * -------------- 1226 * if (tag.isEmpty) { } 1227 * -------------- 1228 */ 1229 @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; } 1230 } 1231} 1232 1233/** 1234 * Class representing a comment 1235 */ 1236class Comment : Item 1237{ 1238 private string content; 1239 1240 /** 1241 * Construct a comment 1242 * 1243 * Params: 1244 * content = the body of the comment 1245 * 1246 * Throws: CommentException if the comment body is illegal (contains "--" 1247 * or exactly equals "-") 1248 * 1249 * Example: 1250 * -------------- 1251 * auto item = new Comment("This is a comment"); 1252 * // constructs <!--This is a comment--> 1253 * -------------- 1254 */ 1255 this(string content) @safe pure 1256 { 1257 import std.string : indexOf; 1258 1259 if (content == "-" || content.indexOf("--") != -1) 1260 throw new CommentException(content); 1261 this.content = content; 1262 } 1263 1264 /** 1265 * Compares two comments for equality 1266 * 1267 * Example: 1268 * -------------- 1269 * Comment item1,item2; 1270 * if (item1 == item2) { } 1271 * -------------- 1272 */ 1273 override bool opEquals(scope const Object o) const 1274 { 1275 const item = toType!(const Item)(o); 1276 const t = cast(const Comment) item; 1277 return t !is null && content == t.content; 1278 } 1279 1280 /** 1281 * Compares two comments 1282 * 1283 * You should rarely need to call this function. It exists so that Comments 1284 * can be used as associative array keys. 1285 * 1286 * Example: 1287 * -------------- 1288 * Comment item1,item2; 1289 * if (item1 < item2) { } 1290 * -------------- 1291 */ 1292 override int opCmp(scope const Object o) scope const 1293 { 1294 const item = toType!(const Item)(o); 1295 const t = cast(const Comment) item; 1296 return t !is null && (content != t.content 1297 ? (content < t.content ? -1 : 1 ) : 0 ); 1298 } 1299 1300 /** 1301 * Returns the hash of a Comment 1302 * 1303 * You should rarely need to call this function. It exists so that Comments 1304 * can be used as associative array keys. 1305 */ 1306 override size_t toHash() scope const nothrow { return hash(content); } 1307 1308 /** 1309 * Returns a string representation of this comment 1310 */ 1311 override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; } 1312 1313 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1314} 1315 1316@safe unittest // issue 16241 1317{ 1318 import std.exception : assertThrown; 1319 auto c = new Comment("=="); 1320 assert(c.content == "=="); 1321 assertThrown!CommentException(new Comment("--")); 1322} 1323 1324/** 1325 * Class representing a Character Data section 1326 */ 1327class CData : Item 1328{ 1329 private string content; 1330 1331 /** 1332 * Construct a character data section 1333 * 1334 * Params: 1335 * content = the body of the character data segment 1336 * 1337 * Throws: CDataException if the segment body is illegal (contains "]]>") 1338 * 1339 * Example: 1340 * -------------- 1341 * auto item = new CData("<b>hello</b>"); 1342 * // constructs <![CDATA[<b>hello</b>]]> 1343 * -------------- 1344 */ 1345 this(string content) @safe pure 1346 { 1347 import std.string : indexOf; 1348 if (content.indexOf("]]>") != -1) throw new CDataException(content); 1349 this.content = content; 1350 } 1351 1352 /** 1353 * Compares two CDatas for equality 1354 * 1355 * Example: 1356 * -------------- 1357 * CData item1,item2; 1358 * if (item1 == item2) { } 1359 * -------------- 1360 */ 1361 override bool opEquals(scope const Object o) const 1362 { 1363 const item = toType!(const Item)(o); 1364 const t = cast(const CData) item; 1365 return t !is null && content == t.content; 1366 } 1367 1368 /** 1369 * Compares two CDatas 1370 * 1371 * You should rarely need to call this function. It exists so that CDatas 1372 * can be used as associative array keys. 1373 * 1374 * Example: 1375 * -------------- 1376 * CData item1,item2; 1377 * if (item1 < item2) { } 1378 * -------------- 1379 */ 1380 override int opCmp(scope const Object o) scope const 1381 { 1382 const item = toType!(const Item)(o); 1383 const t = cast(const CData) item; 1384 return t !is null && (content != t.content 1385 ? (content < t.content ? -1 : 1 ) : 0 ); 1386 } 1387 1388 /** 1389 * Returns the hash of a CData 1390 * 1391 * You should rarely need to call this function. It exists so that CDatas 1392 * can be used as associative array keys. 1393 */ 1394 override size_t toHash() scope const nothrow { return hash(content); } 1395 1396 /** 1397 * Returns a string representation of this CData section 1398 */ 1399 override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; } 1400 1401 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1402} 1403 1404/** 1405 * Class representing a text (aka Parsed Character Data) section 1406 */ 1407class Text : Item 1408{ 1409 private string content; 1410 1411 /** 1412 * Construct a text (aka PCData) section 1413 * 1414 * Params: 1415 * content = the text. This function encodes the text before 1416 * insertion, so it is safe to insert any text 1417 * 1418 * Example: 1419 * -------------- 1420 * auto Text = new CData("a < b"); 1421 * // constructs a < b 1422 * -------------- 1423 */ 1424 this(string content) @safe pure 1425 { 1426 this.content = encode(content); 1427 } 1428 1429 /** 1430 * Compares two text sections for equality 1431 * 1432 * Example: 1433 * -------------- 1434 * Text item1,item2; 1435 * if (item1 == item2) { } 1436 * -------------- 1437 */ 1438 override bool opEquals(scope const Object o) const 1439 { 1440 const item = toType!(const Item)(o); 1441 const t = cast(const Text) item; 1442 return t !is null && content == t.content; 1443 } 1444 1445 /** 1446 * Compares two text sections 1447 * 1448 * You should rarely need to call this function. It exists so that Texts 1449 * can be used as associative array keys. 1450 * 1451 * Example: 1452 * -------------- 1453 * Text item1,item2; 1454 * if (item1 < item2) { } 1455 * -------------- 1456 */ 1457 override int opCmp(scope const Object o) scope const 1458 { 1459 const item = toType!(const Item)(o); 1460 const t = cast(const Text) item; 1461 return t !is null 1462 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1463 } 1464 1465 /** 1466 * Returns the hash of a text section 1467 * 1468 * You should rarely need to call this function. It exists so that Texts 1469 * can be used as associative array keys. 1470 */ 1471 override size_t toHash() scope const nothrow { return hash(content); } 1472 1473 /** 1474 * Returns a string representation of this Text section 1475 */ 1476 override string toString() scope const @safe @nogc pure nothrow { return content; } 1477 1478 /** 1479 * Returns true if the content is the empty string 1480 */ 1481 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; } 1482} 1483 1484/** 1485 * Class representing an XML Instruction section 1486 */ 1487class XMLInstruction : Item 1488{ 1489 private string content; 1490 1491 /** 1492 * Construct an XML Instruction section 1493 * 1494 * Params: 1495 * content = the body of the instruction segment 1496 * 1497 * Throws: XIException if the segment body is illegal (contains ">") 1498 * 1499 * Example: 1500 * -------------- 1501 * auto item = new XMLInstruction("ATTLIST"); 1502 * // constructs <!ATTLIST> 1503 * -------------- 1504 */ 1505 this(string content) @safe pure 1506 { 1507 import std.string : indexOf; 1508 if (content.indexOf(">") != -1) throw new XIException(content); 1509 this.content = content; 1510 } 1511 1512 /** 1513 * Compares two XML instructions for equality 1514 * 1515 * Example: 1516 * -------------- 1517 * XMLInstruction item1,item2; 1518 * if (item1 == item2) { } 1519 * -------------- 1520 */ 1521 override bool opEquals(scope const Object o) const 1522 { 1523 const item = toType!(const Item)(o); 1524 const t = cast(const XMLInstruction) item; 1525 return t !is null && content == t.content; 1526 } 1527 1528 /** 1529 * Compares two XML instructions 1530 * 1531 * You should rarely need to call this function. It exists so that 1532 * XmlInstructions can be used as associative array keys. 1533 * 1534 * Example: 1535 * -------------- 1536 * XMLInstruction item1,item2; 1537 * if (item1 < item2) { } 1538 * -------------- 1539 */ 1540 override int opCmp(scope const Object o) scope const 1541 { 1542 const item = toType!(const Item)(o); 1543 const t = cast(const XMLInstruction) item; 1544 return t !is null 1545 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1546 } 1547 1548 /** 1549 * Returns the hash of an XMLInstruction 1550 * 1551 * You should rarely need to call this function. It exists so that 1552 * XmlInstructions can be used as associative array keys. 1553 */ 1554 override size_t toHash() scope const nothrow { return hash(content); } 1555 1556 /** 1557 * Returns a string representation of this XmlInstruction 1558 */ 1559 override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; } 1560 1561 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always 1562} 1563 1564/** 1565 * Class representing a Processing Instruction section 1566 */ 1567class ProcessingInstruction : Item 1568{ 1569 private string content; 1570 1571 /** 1572 * Construct a Processing Instruction section 1573 * 1574 * Params: 1575 * content = the body of the instruction segment 1576 * 1577 * Throws: PIException if the segment body is illegal (contains "?>") 1578 * 1579 * Example: 1580 * -------------- 1581 * auto item = new ProcessingInstruction("php"); 1582 * // constructs <?php?> 1583 * -------------- 1584 */ 1585 this(string content) @safe pure 1586 { 1587 import std.string : indexOf; 1588 if (content.indexOf("?>") != -1) throw new PIException(content); 1589 this.content = content; 1590 } 1591 1592 /** 1593 * Compares two processing instructions for equality 1594 * 1595 * Example: 1596 * -------------- 1597 * ProcessingInstruction item1,item2; 1598 * if (item1 == item2) { } 1599 * -------------- 1600 */ 1601 override bool opEquals(scope const Object o) const 1602 { 1603 const item = toType!(const Item)(o); 1604 const t = cast(const ProcessingInstruction) item; 1605 return t !is null && content == t.content; 1606 } 1607 1608 /** 1609 * Compares two processing instructions 1610 * 1611 * You should rarely need to call this function. It exists so that 1612 * ProcessingInstructions can be used as associative array keys. 1613 * 1614 * Example: 1615 * -------------- 1616 * ProcessingInstruction item1,item2; 1617 * if (item1 < item2) { } 1618 * -------------- 1619 */ 1620 override int opCmp(scope const Object o) scope const 1621 { 1622 const item = toType!(const Item)(o); 1623 const t = cast(const ProcessingInstruction) item; 1624 return t !is null 1625 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 ); 1626 } 1627 1628 /** 1629 * Returns the hash of a ProcessingInstruction 1630 * 1631 * You should rarely need to call this function. It exists so that 1632 * ProcessingInstructions can be used as associative array keys. 1633 */ 1634 override size_t toHash() scope const nothrow { return hash(content); } 1635 1636 /** 1637 * Returns a string representation of this ProcessingInstruction 1638 */ 1639 override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; } 1640 1641 override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always 1642} 1643 1644/** 1645 * Abstract base class for XML items 1646 */ 1647abstract class Item 1648{ 1649 /// Compares with another Item of same type for equality 1650 abstract override bool opEquals(scope const Object o) @safe const; 1651 1652 /// Compares with another Item of same type 1653 abstract override int opCmp(scope const Object o) @safe const; 1654 1655 /// Returns the hash of this item 1656 abstract override size_t toHash() @safe scope const; 1657 1658 /// Returns a string representation of this item 1659 abstract override string toString() @safe scope const; 1660 1661 /** 1662 * Returns an indented string representation of this item 1663 * 1664 * Params: 1665 * indent = number of spaces by which to indent child elements 1666 */ 1667 string[] pretty(uint indent) @safe scope const 1668 { 1669 import std.string : strip; 1670 string s = strip(toString()); 1671 return s.length == 0 ? [] : [ s ]; 1672 } 1673 1674 /// Returns true if the item represents empty XML text 1675 abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const; 1676} 1677 1678/** 1679 * Class for parsing an XML Document. 1680 * 1681 * This is a subclass of ElementParser. Most of the useful functions are 1682 * documented there. 1683 * 1684 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1685 * 1686 * Bugs: 1687 * Currently only supports UTF documents. 1688 * 1689 * If there is an encoding attribute in the prolog, it is ignored. 1690 * 1691 */ 1692class DocumentParser : ElementParser 1693{ 1694 string xmlText; 1695 1696 /** 1697 * Constructs a DocumentParser. 1698 * 1699 * The input to this function MUST be valid XML. 1700 * This is enforced by the function's in contract. 1701 * 1702 * Params: 1703 * xmlText_ = the entire XML document as text 1704 * 1705 */ 1706 this(string xmlText_) 1707 in 1708 { 1709 assert(xmlText_.length != 0); 1710 try 1711 { 1712 // Confirm that the input is valid XML 1713 check(xmlText_); 1714 } 1715 catch (CheckException e) 1716 { 1717 // And if it's not, tell the user why not 1718 assert(false, "\n" ~ e.toString()); 1719 } 1720 } 1721 body 1722 { 1723 xmlText = xmlText_; 1724 s = &xmlText; 1725 super(); // Initialize everything 1726 parse(); // Parse through the root tag (but not beyond) 1727 } 1728} 1729 1730@system unittest 1731{ 1732 auto doc = new Document("<root><child><grandchild/></child></root>"); 1733 assert(doc.elements.length == 1); 1734 assert(doc.elements[0].tag.name == "child"); 1735 assert(doc.items == doc.elements); 1736} 1737 1738/** 1739 * Class for parsing an XML element. 1740 * 1741 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0) 1742 * 1743 * Note that you cannot construct instances of this class directly. You can 1744 * construct a DocumentParser (which is a subclass of ElementParser), but 1745 * otherwise, Instances of ElementParser will be created for you by the 1746 * library, and passed your way via onStartTag handlers. 1747 * 1748 */ 1749class ElementParser 1750{ 1751 alias Handler = void delegate(string); 1752 alias ElementHandler = void delegate(in Element element); 1753 alias ParserHandler = void delegate(ElementParser parser); 1754 1755 private 1756 { 1757 Tag tag_; 1758 string elementStart; 1759 string* s; 1760 1761 Handler commentHandler = null; 1762 Handler cdataHandler = null; 1763 Handler xiHandler = null; 1764 Handler piHandler = null; 1765 Handler rawTextHandler = null; 1766 Handler textHandler = null; 1767 1768 // Private constructor for start tags 1769 this(ElementParser parent) @safe @nogc pure nothrow 1770 { 1771 s = parent.s; 1772 this(); 1773 tag_ = parent.tag_; 1774 } 1775 1776 // Private constructor for empty tags 1777 this(Tag tag, string* t) @safe @nogc pure nothrow 1778 { 1779 s = t; 1780 this(); 1781 tag_ = tag; 1782 } 1783 } 1784 1785 /** 1786 * The Tag at the start of the element being parsed. You can read this to 1787 * determine the tag's name and attributes. 1788 */ 1789 @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; } 1790 1791 /** 1792 * Register a handler which will be called whenever a start tag is 1793 * encountered which matches the specified name. You can also pass null as 1794 * the name, in which case the handler will be called for any unmatched 1795 * start tag. 1796 * 1797 * Example: 1798 * -------------- 1799 * // Call this function whenever a <podcast> start tag is encountered 1800 * onStartTag["podcast"] = (ElementParser xml) 1801 * { 1802 * // Your code here 1803 * // 1804 * // This is a a closure, so code here may reference 1805 * // variables which are outside of this scope 1806 * }; 1807 * 1808 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode> 1809 * // start tag is encountered 1810 * onStartTag["episode"] = &myEpisodeStartHandler; 1811 * 1812 * // call delegate dg for all other start tags 1813 * onStartTag[null] = dg; 1814 * -------------- 1815 * 1816 * This library will supply your function with a new instance of 1817 * ElementHandler, which may be used to parse inside the element whose 1818 * start tag was just found, or to identify the tag attributes of the 1819 * element, etc. 1820 * 1821 * Note that your function will be called for both start tags and empty 1822 * tags. That is, we make no distinction between <br></br> 1823 * and <br/>. 1824 */ 1825 ParserHandler[string] onStartTag; 1826 1827 /** 1828 * Register a handler which will be called whenever an end tag is 1829 * encountered which matches the specified name. You can also pass null as 1830 * the name, in which case the handler will be called for any unmatched 1831 * end tag. 1832 * 1833 * Example: 1834 * -------------- 1835 * // Call this function whenever a </podcast> end tag is encountered 1836 * onEndTag["podcast"] = (in Element e) 1837 * { 1838 * // Your code here 1839 * // 1840 * // This is a a closure, so code here may reference 1841 * // variables which are outside of this scope 1842 * }; 1843 * 1844 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode> 1845 * // end tag is encountered 1846 * onEndTag["episode"] = &myEpisodeEndHandler; 1847 * 1848 * // call delegate dg for all other end tags 1849 * onEndTag[null] = dg; 1850 * -------------- 1851 * 1852 * Note that your function will be called for both start tags and empty 1853 * tags. That is, we make no distinction between <br></br> 1854 * and <br/>. 1855 */ 1856 ElementHandler[string] onEndTag; 1857 1858 protected this() @safe @nogc pure nothrow 1859 { 1860 elementStart = *s; 1861 } 1862 1863 /** 1864 * Register a handler which will be called whenever text is encountered. 1865 * 1866 * Example: 1867 * -------------- 1868 * // Call this function whenever text is encountered 1869 * onText = (string s) 1870 * { 1871 * // Your code here 1872 * 1873 * // The passed parameter s will have been decoded by the time you see 1874 * // it, and so may contain any character. 1875 * // 1876 * // This is a a closure, so code here may reference 1877 * // variables which are outside of this scope 1878 * }; 1879 * -------------- 1880 */ 1881 @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; } 1882 1883 /** 1884 * Register an alternative handler which will be called whenever text 1885 * is encountered. This differs from onText in that onText will decode 1886 * the text, whereas onTextRaw will not. This allows you to make design 1887 * choices, since onText will be more accurate, but slower, while 1888 * onTextRaw will be faster, but less accurate. Of course, you can 1889 * still call decode() within your handler, if you want, but you'd 1890 * probably want to use onTextRaw only in circumstances where you 1891 * know that decoding is unnecessary. 1892 * 1893 * Example: 1894 * -------------- 1895 * // Call this function whenever text is encountered 1896 * onText = (string s) 1897 * { 1898 * // Your code here 1899 * 1900 * // The passed parameter s will NOT have been decoded. 1901 * // 1902 * // This is a a closure, so code here may reference 1903 * // variables which are outside of this scope 1904 * }; 1905 * -------------- 1906 */ 1907 @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; } 1908 1909 /** 1910 * Register a handler which will be called whenever a character data 1911 * segment is encountered. 1912 * 1913 * Example: 1914 * -------------- 1915 * // Call this function whenever a CData section is encountered 1916 * onCData = (string s) 1917 * { 1918 * // Your code here 1919 * 1920 * // The passed parameter s does not include the opening <![CDATA[ 1921 * // nor closing ]]> 1922 * // 1923 * // This is a a closure, so code here may reference 1924 * // variables which are outside of this scope 1925 * }; 1926 * -------------- 1927 */ 1928 @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; } 1929 1930 /** 1931 * Register a handler which will be called whenever a comment is 1932 * encountered. 1933 * 1934 * Example: 1935 * -------------- 1936 * // Call this function whenever a comment is encountered 1937 * onComment = (string s) 1938 * { 1939 * // Your code here 1940 * 1941 * // The passed parameter s does not include the opening <!-- nor 1942 * // closing --> 1943 * // 1944 * // This is a a closure, so code here may reference 1945 * // variables which are outside of this scope 1946 * }; 1947 * -------------- 1948 */ 1949 @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; } 1950 1951 /** 1952 * Register a handler which will be called whenever a processing 1953 * instruction is encountered. 1954 * 1955 * Example: 1956 * -------------- 1957 * // Call this function whenever a processing instruction is encountered 1958 * onPI = (string s) 1959 * { 1960 * // Your code here 1961 * 1962 * // The passed parameter s does not include the opening <? nor 1963 * // closing ?> 1964 * // 1965 * // This is a a closure, so code here may reference 1966 * // variables which are outside of this scope 1967 * }; 1968 * -------------- 1969 */ 1970 @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; } 1971 1972 /** 1973 * Register a handler which will be called whenever an XML instruction is 1974 * encountered. 1975 * 1976 * Example: 1977 * -------------- 1978 * // Call this function whenever an XML instruction is encountered 1979 * // (Note: XML instructions may only occur preceding the root tag of a 1980 * // document). 1981 * onPI = (string s) 1982 * { 1983 * // Your code here 1984 * 1985 * // The passed parameter s does not include the opening <! nor 1986 * // closing > 1987 * // 1988 * // This is a a closure, so code here may reference 1989 * // variables which are outside of this scope 1990 * }; 1991 * -------------- 1992 */ 1993 @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; } 1994 1995 /** 1996 * Parse an XML element. 1997 * 1998 * Parsing will continue until the end of the current element. Any items 1999 * encountered for which a handler has been registered will invoke that 2000 * handler. 2001 * 2002 * Throws: various kinds of XMLException 2003 */ 2004 void parse() 2005 { 2006 import std.algorithm.searching : startsWith; 2007 import std.string : indexOf; 2008 2009 string t; 2010 const Tag root = tag_; 2011 Tag[string] startTags; 2012 if (tag_ !is null) startTags[tag_.name] = tag_; 2013 2014 while (s.length != 0) 2015 { 2016 if (startsWith(*s,"<!--")) 2017 { 2018 chop(*s,4); 2019 t = chop(*s,indexOf(*s,"-->")); 2020 if (commentHandler.funcptr !is null) commentHandler(t); 2021 chop(*s,3); 2022 } 2023 else if (startsWith(*s,"<![CDATA[")) 2024 { 2025 chop(*s,9); 2026 t = chop(*s,indexOf(*s,"]]>")); 2027 if (cdataHandler.funcptr !is null) cdataHandler(t); 2028 chop(*s,3); 2029 } 2030 else if (startsWith(*s,"<!")) 2031 { 2032 chop(*s,2); 2033 t = chop(*s,indexOf(*s,">")); 2034 if (xiHandler.funcptr !is null) xiHandler(t); 2035 chop(*s,1); 2036 } 2037 else if (startsWith(*s,"<?")) 2038 { 2039 chop(*s,2); 2040 t = chop(*s,indexOf(*s,"?>")); 2041 if (piHandler.funcptr !is null) piHandler(t); 2042 chop(*s,2); 2043 } 2044 else if (startsWith(*s,"<")) 2045 { 2046 tag_ = new Tag(*s,true); 2047 if (root is null) 2048 return; // Return to constructor of derived class 2049 2050 if (tag_.isStart) 2051 { 2052 startTags[tag_.name] = tag_; 2053 2054 auto parser = new ElementParser(this); 2055 2056 auto handler = tag_.name in onStartTag; 2057 if (handler !is null) (*handler)(parser); 2058 else 2059 { 2060 handler = null in onStartTag; 2061 if (handler !is null) (*handler)(parser); 2062 } 2063 } 2064 else if (tag_.isEnd) 2065 { 2066 const startTag = startTags[tag_.name]; 2067 string text; 2068 2069 if (startTag.tagString.length == 0) 2070 assert(0); 2071 2072 immutable(char)* p = startTag.tagString.ptr 2073 + startTag.tagString.length; 2074 immutable(char)* q = &tag_.tagString[0]; 2075 text = decode(p[0..(q-p)], DecodeMode.LOOSE); 2076 2077 auto element = new Element(startTag); 2078 if (text.length != 0) element ~= new Text(text); 2079 2080 auto handler = tag_.name in onEndTag; 2081 if (handler !is null) (*handler)(element); 2082 else 2083 { 2084 handler = null in onEndTag; 2085 if (handler !is null) (*handler)(element); 2086 } 2087 2088 if (tag_.name == root.name) return; 2089 } 2090 else if (tag_.isEmpty) 2091 { 2092 Tag startTag = new Tag(tag_.name); 2093 2094 // FIX by hed010gy, for bug 2979 2095 // http://d.puremagic.com/issues/show_bug.cgi?id=2979 2096 if (tag_.attr.length > 0) 2097 foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv; 2098 // END FIX 2099 2100 // Handle the pretend start tag 2101 string s2; 2102 auto parser = new ElementParser(startTag,&s2); 2103 auto handler1 = startTag.name in onStartTag; 2104 if (handler1 !is null) (*handler1)(parser); 2105 else 2106 { 2107 handler1 = null in onStartTag; 2108 if (handler1 !is null) (*handler1)(parser); 2109 } 2110 2111 // Handle the pretend end tag 2112 auto element = new Element(startTag); 2113 auto handler2 = tag_.name in onEndTag; 2114 if (handler2 !is null) (*handler2)(element); 2115 else 2116 { 2117 handler2 = null in onEndTag; 2118 if (handler2 !is null) (*handler2)(element); 2119 } 2120 } 2121 } 2122 else 2123 { 2124 t = chop(*s,indexOf(*s,"<")); 2125 if (rawTextHandler.funcptr !is null) 2126 rawTextHandler(t); 2127 else if (textHandler.funcptr !is null) 2128 textHandler(decode(t,DecodeMode.LOOSE)); 2129 } 2130 } 2131 } 2132 2133 /** 2134 * Returns that part of the element which has already been parsed 2135 */ 2136 override string toString() const @nogc @safe pure nothrow 2137 { 2138 assert(elementStart.length >= s.length); 2139 return elementStart[0 .. elementStart.length - s.length]; 2140 } 2141 2142} 2143 2144private 2145{ 2146 template Check(string msg) 2147 { 2148 string old = s; 2149 2150 void fail() @safe pure 2151 { 2152 s = old; 2153 throw new Err(s,msg); 2154 } 2155 2156 void fail(Err e) @safe pure 2157 { 2158 s = old; 2159 throw new Err(s,msg,e); 2160 } 2161 2162 void fail(string msg2) @safe pure 2163 { 2164 fail(new Err(s,msg2)); 2165 } 2166 } 2167 2168 void checkMisc(ref string s) @safe pure // rule 27 2169 { 2170 import std.algorithm.searching : startsWith; 2171 2172 mixin Check!("Misc"); 2173 2174 try 2175 { 2176 if (s.startsWith("<!--")) { checkComment(s); } 2177 else if (s.startsWith("<?")) { checkPI(s); } 2178 else { checkSpace(s); } 2179 } 2180 catch (Err e) { fail(e); } 2181 } 2182 2183 void checkDocument(ref string s) @safe pure // rule 1 2184 { 2185 mixin Check!("Document"); 2186 try 2187 { 2188 checkProlog(s); 2189 checkElement(s); 2190 star!(checkMisc)(s); 2191 } 2192 catch (Err e) { fail(e); } 2193 } 2194 2195 void checkChars(ref string s) @safe pure // rule 2 2196 { 2197 // TO DO - Fix std.utf stride and decode functions, then use those 2198 // instead 2199 import std.format : format; 2200 2201 mixin Check!("Chars"); 2202 2203 dchar c; 2204 ptrdiff_t n = -1; 2205 // 'i' must not be smaller than size_t because size_t is used internally in 2206 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2207 foreach (size_t i, dchar d; s) 2208 { 2209 if (!isChar(d)) 2210 { 2211 c = d; 2212 n = i; 2213 break; 2214 } 2215 } 2216 if (n != -1) 2217 { 2218 s = s[n..$]; 2219 fail(format("invalid character: U+%04X",c)); 2220 } 2221 } 2222 2223 void checkSpace(ref string s) @safe pure // rule 3 2224 { 2225 import std.algorithm.searching : countUntil; 2226 import std.ascii : isWhite; 2227 import std.utf : byCodeUnit; 2228 2229 mixin Check!("Whitespace"); 2230 ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a)); 2231 if (i == -1 && s.length > 0 && isWhite(s[0])) 2232 s = s[$ .. $]; 2233 else if (i > -1) 2234 s = s[i .. $]; 2235 if (s is old) fail(); 2236 } 2237 2238 void checkName(ref string s, out string name) @safe pure // rule 5 2239 { 2240 mixin Check!("Name"); 2241 2242 if (s.length == 0) fail(); 2243 ptrdiff_t n; 2244 // 'i' must not be smaller than size_t because size_t is used internally in 2245 // aApply.d and it will be cast e.g to (int *) which fails on BigEndian targets. 2246 foreach (size_t i, dchar c; s) 2247 { 2248 if (c == '_' || c == ':' || isLetter(c)) continue; 2249 if (i == 0) fail(); 2250 if (c == '-' || c == '.' || isDigit(c) 2251 || isCombiningChar(c) || isExtender(c)) continue; 2252 n = i; 2253 break; 2254 } 2255 name = s[0 .. n]; 2256 s = s[n..$]; 2257 } 2258 2259 void checkAttValue(ref string s) @safe pure // rule 10 2260 { 2261 import std.algorithm.searching : countUntil; 2262 import std.utf : byCodeUnit; 2263 2264 mixin Check!("AttValue"); 2265 2266 if (s.length == 0) fail(); 2267 char c = s[0]; 2268 if (c != '\u0022' && c != '\u0027') 2269 fail("attribute value requires quotes"); 2270 s = s[1..$]; 2271 for (;;) 2272 { 2273 s = s[s.byCodeUnit.countUntil(c) .. $]; 2274 if (s.length == 0) fail("unterminated attribute value"); 2275 if (s[0] == '<') fail("< found in attribute value"); 2276 if (s[0] == c) break; 2277 try { checkReference(s); } catch (Err e) { fail(e); } 2278 } 2279 s = s[1..$]; 2280 } 2281 2282 void checkCharData(ref string s) @safe pure // rule 14 2283 { 2284 import std.algorithm.searching : startsWith; 2285 2286 mixin Check!("CharData"); 2287 2288 while (s.length != 0) 2289 { 2290 if (s.startsWith("&")) break; 2291 if (s.startsWith("<")) break; 2292 if (s.startsWith("]]>")) fail("]]> found within char data"); 2293 s = s[1..$]; 2294 } 2295 } 2296 2297 void checkComment(ref string s) @safe pure // rule 15 2298 { 2299 import std.string : indexOf; 2300 2301 mixin Check!("Comment"); 2302 2303 try { checkLiteral("<!--",s); } catch (Err e) { fail(e); } 2304 ptrdiff_t n = s.indexOf("--"); 2305 if (n == -1) fail("unterminated comment"); 2306 s = s[n..$]; 2307 try { checkLiteral("-->",s); } catch (Err e) { fail(e); } 2308 } 2309 2310 void checkPI(ref string s) @safe pure // rule 16 2311 { 2312 mixin Check!("PI"); 2313 2314 try 2315 { 2316 checkLiteral("<?",s); 2317 checkEnd("?>",s); 2318 } 2319 catch (Err e) { fail(e); } 2320 } 2321 2322 void checkCDSect(ref string s) @safe pure // rule 18 2323 { 2324 mixin Check!("CDSect"); 2325 2326 try 2327 { 2328 checkLiteral(cdata,s); 2329 checkEnd("]]>",s); 2330 } 2331 catch (Err e) { fail(e); } 2332 } 2333 2334 void checkProlog(ref string s) @safe pure // rule 22 2335 { 2336 mixin Check!("Prolog"); 2337 2338 try 2339 { 2340 /* The XML declaration is optional 2341 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog 2342 */ 2343 opt!(checkXMLDecl)(s); 2344 2345 star!(checkMisc)(s); 2346 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s); 2347 } 2348 catch (Err e) { fail(e); } 2349 } 2350 2351 void checkXMLDecl(ref string s) @safe pure // rule 23 2352 { 2353 mixin Check!("XMLDecl"); 2354 2355 try 2356 { 2357 checkLiteral("<?xml",s); 2358 checkVersionInfo(s); 2359 opt!(checkEncodingDecl)(s); 2360 opt!(checkSDDecl)(s); 2361 opt!(checkSpace)(s); 2362 checkLiteral("?>",s); 2363 } 2364 catch (Err e) { fail(e); } 2365 } 2366 2367 void checkVersionInfo(ref string s) @safe pure // rule 24 2368 { 2369 mixin Check!("VersionInfo"); 2370 2371 try 2372 { 2373 checkSpace(s); 2374 checkLiteral("version",s); 2375 checkEq(s); 2376 quoted!(checkVersionNum)(s); 2377 } 2378 catch (Err e) { fail(e); } 2379 } 2380 2381 void checkEq(ref string s) @safe pure // rule 25 2382 { 2383 mixin Check!("Eq"); 2384 2385 try 2386 { 2387 opt!(checkSpace)(s); 2388 checkLiteral("=",s); 2389 opt!(checkSpace)(s); 2390 } 2391 catch (Err e) { fail(e); } 2392 } 2393 2394 void checkVersionNum(ref string s) @safe pure // rule 26 2395 { 2396 import std.algorithm.searching : countUntil; 2397 import std.utf : byCodeUnit; 2398 2399 mixin Check!("VersionNum"); 2400 2401 s = s[s.byCodeUnit.countUntil('\"') .. $]; 2402 if (s is old) fail(); 2403 } 2404 2405 void checkDocTypeDecl(ref string s) @safe pure // rule 28 2406 { 2407 mixin Check!("DocTypeDecl"); 2408 2409 try 2410 { 2411 checkLiteral("<!DOCTYPE",s); 2412 // 2413 // TO DO -- ensure DOCTYPE is well formed 2414 // (But not yet. That's one of our "future directions") 2415 // 2416 checkEnd(">",s); 2417 } 2418 catch (Err e) { fail(e); } 2419 } 2420 2421 void checkSDDecl(ref string s) @safe pure // rule 32 2422 { 2423 import std.algorithm.searching : startsWith; 2424 2425 mixin Check!("SDDecl"); 2426 2427 try 2428 { 2429 checkSpace(s); 2430 checkLiteral("standalone",s); 2431 checkEq(s); 2432 } 2433 catch (Err e) { fail(e); } 2434 2435 int n = 0; 2436 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5; 2437 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4; 2438 else fail("standalone attribute value must be 'yes', \"yes\","~ 2439 " 'no' or \"no\""); 2440 s = s[n..$]; 2441 } 2442 2443 void checkElement(ref string s) @safe pure // rule 39 2444 { 2445 mixin Check!("Element"); 2446 2447 string sname,ename,t; 2448 try { checkTag(s,t,sname); } catch (Err e) { fail(e); } 2449 2450 if (t == "STag") 2451 { 2452 try 2453 { 2454 checkContent(s); 2455 t = s; 2456 checkETag(s,ename); 2457 } 2458 catch (Err e) { fail(e); } 2459 2460 if (sname != ename) 2461 { 2462 s = t; 2463 fail("end tag name \"" ~ ename 2464 ~ "\" differs from start tag name \""~sname~"\""); 2465 } 2466 } 2467 } 2468 2469 // rules 40 and 44 2470 void checkTag(ref string s, out string type, out string name) @safe pure 2471 { 2472 mixin Check!("Tag"); 2473 2474 try 2475 { 2476 type = "STag"; 2477 checkLiteral("<",s); 2478 checkName(s,name); 2479 star!(seq!(checkSpace,checkAttribute))(s); 2480 opt!(checkSpace)(s); 2481 if (s.length != 0 && s[0] == '/') 2482 { 2483 s = s[1..$]; 2484 type = "ETag"; 2485 } 2486 checkLiteral(">",s); 2487 } 2488 catch (Err e) { fail(e); } 2489 } 2490 2491 void checkAttribute(ref string s) @safe pure // rule 41 2492 { 2493 mixin Check!("Attribute"); 2494 2495 try 2496 { 2497 string name; 2498 checkName(s,name); 2499 checkEq(s); 2500 checkAttValue(s); 2501 } 2502 catch (Err e) { fail(e); } 2503 } 2504 2505 void checkETag(ref string s, out string name) @safe pure // rule 42 2506 { 2507 mixin Check!("ETag"); 2508 2509 try 2510 { 2511 checkLiteral("</",s); 2512 checkName(s,name); 2513 opt!(checkSpace)(s); 2514 checkLiteral(">",s); 2515 } 2516 catch (Err e) { fail(e); } 2517 } 2518 2519 void checkContent(ref string s) @safe pure // rule 43 2520 { 2521 import std.algorithm.searching : startsWith; 2522 2523 mixin Check!("Content"); 2524 2525 try 2526 { 2527 while (s.length != 0) 2528 { 2529 old = s; 2530 if (s.startsWith("&")) { checkReference(s); } 2531 else if (s.startsWith("<!--")) { checkComment(s); } 2532 else if (s.startsWith("<?")) { checkPI(s); } 2533 else if (s.startsWith(cdata)) { checkCDSect(s); } 2534 else if (s.startsWith("</")) { break; } 2535 else if (s.startsWith("<")) { checkElement(s); } 2536 else { checkCharData(s); } 2537 } 2538 } 2539 catch (Err e) { fail(e); } 2540 } 2541 2542 void checkCharRef(ref string s, out dchar c) @safe pure // rule 66 2543 { 2544 import std.format : format; 2545 2546 mixin Check!("CharRef"); 2547 2548 c = 0; 2549 try { checkLiteral("&#",s); } catch (Err e) { fail(e); } 2550 int radix = 10; 2551 if (s.length != 0 && s[0] == 'x') 2552 { 2553 s = s[1..$]; 2554 radix = 16; 2555 } 2556 if (s.length == 0) fail("unterminated character reference"); 2557 if (s[0] == ';') 2558 fail("character reference must have at least one digit"); 2559 while (s.length != 0) 2560 { 2561 immutable char d = s[0]; 2562 int n = 0; 2563 switch (d) 2564 { 2565 case 'F','f': ++n; goto case; 2566 case 'E','e': ++n; goto case; 2567 case 'D','d': ++n; goto case; 2568 case 'C','c': ++n; goto case; 2569 case 'B','b': ++n; goto case; 2570 case 'A','a': ++n; goto case; 2571 case '9': ++n; goto case; 2572 case '8': ++n; goto case; 2573 case '7': ++n; goto case; 2574 case '6': ++n; goto case; 2575 case '5': ++n; goto case; 2576 case '4': ++n; goto case; 2577 case '3': ++n; goto case; 2578 case '2': ++n; goto case; 2579 case '1': ++n; goto case; 2580 case '0': break; 2581 default: n = 100; break; 2582 } 2583 if (n >= radix) break; 2584 c *= radix; 2585 c += n; 2586 s = s[1..$]; 2587 } 2588 if (!isChar(c)) fail(format("U+%04X is not a legal character",c)); 2589 if (s.length == 0 || s[0] != ';') fail("expected ;"); 2590 else s = s[1..$]; 2591 } 2592 2593 void checkReference(ref string s) @safe pure // rule 67 2594 { 2595 import std.algorithm.searching : startsWith; 2596 2597 mixin Check!("Reference"); 2598 2599 try 2600 { 2601 dchar c; 2602 if (s.startsWith("&#")) checkCharRef(s,c); 2603 else checkEntityRef(s); 2604 } 2605 catch (Err e) { fail(e); } 2606 } 2607 2608 void checkEntityRef(ref string s) @safe pure // rule 68 2609 { 2610 mixin Check!("EntityRef"); 2611 2612 try 2613 { 2614 string name; 2615 checkLiteral("&",s); 2616 checkName(s,name); 2617 checkLiteral(";",s); 2618 } 2619 catch (Err e) { fail(e); } 2620 } 2621 2622 void checkEncName(ref string s) @safe pure // rule 81 2623 { 2624 import std.algorithm.searching : countUntil; 2625 import std.ascii : isAlpha; 2626 import std.utf : byCodeUnit; 2627 2628 mixin Check!("EncName"); 2629 2630 s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $]; 2631 if (s is old) fail(); 2632 s = s[s.byCodeUnit.countUntil('\"', '\'') .. $]; 2633 } 2634 2635 void checkEncodingDecl(ref string s) @safe pure // rule 80 2636 { 2637 mixin Check!("EncodingDecl"); 2638 2639 try 2640 { 2641 checkSpace(s); 2642 checkLiteral("encoding",s); 2643 checkEq(s); 2644 quoted!(checkEncName)(s); 2645 } 2646 catch (Err e) { fail(e); } 2647 } 2648 2649 // Helper functions 2650 2651 void checkLiteral(string literal,ref string s) @safe pure 2652 { 2653 import std.string : startsWith; 2654 2655 mixin Check!("Literal"); 2656 2657 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\""); 2658 s = s[literal.length..$]; 2659 } 2660 2661 void checkEnd(string end,ref string s) @safe pure 2662 { 2663 import std.string : indexOf; 2664 // Deliberately no mixin Check here. 2665 2666 auto n = s.indexOf(end); 2667 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\""); 2668 s = s[n..$]; 2669 checkLiteral(end,s); 2670 } 2671 2672 // Metafunctions -- none of these use mixin Check 2673 2674 void opt(alias f)(ref string s) 2675 { 2676 try { f(s); } catch (Err e) {} 2677 } 2678 2679 void plus(alias f)(ref string s) 2680 { 2681 f(s); 2682 star!(f)(s); 2683 } 2684 2685 void star(alias f)(ref string s) 2686 { 2687 while (s.length != 0) 2688 { 2689 try { f(s); } 2690 catch (Err e) { return; } 2691 } 2692 } 2693 2694 void quoted(alias f)(ref string s) 2695 { 2696 import std.string : startsWith; 2697 2698 if (s.startsWith("'")) 2699 { 2700 checkLiteral("'",s); 2701 f(s); 2702 checkLiteral("'",s); 2703 } 2704 else 2705 { 2706 checkLiteral("\"",s); 2707 f(s); 2708 checkLiteral("\"",s); 2709 } 2710 } 2711 2712 void seq(alias f,alias g)(ref string s) 2713 { 2714 f(s); 2715 g(s); 2716 } 2717} 2718 2719/** 2720 * Check an entire XML document for well-formedness 2721 * 2722 * Params: 2723 * s = the document to be checked, passed as a string 2724 * 2725 * Throws: CheckException if the document is not well formed 2726 * 2727 * CheckException's toString() method will yield the complete hierarchy of 2728 * parse failure (the XML equivalent of a stack trace), giving the line and 2729 * column number of every failure at every level. 2730 */ 2731void check(string s) @safe pure 2732{ 2733 try 2734 { 2735 checkChars(s); 2736 checkDocument(s); 2737 if (s.length != 0) throw new Err(s,"Junk found after document"); 2738 } 2739 catch (Err e) 2740 { 2741 e.complete(s); 2742 throw e; 2743 } 2744} 2745 2746@system pure unittest 2747{ 2748 import std.string : indexOf; 2749 2750 try 2751 { 2752 check(q"[<?xml version="1.0"?> 2753 <catalog> 2754 <book id="bk101"> 2755 <author>Gambardella, Matthew</author> 2756 <title>XML Developer's Guide</title> 2757 <genre>Computer</genre> 2758 <price>44.95</price> 2759 <publish_date>2000-10-01</publish_date> 2760 <description>An in-depth look at creating applications 2761 with XML.</description> 2762 </book> 2763 <book id="bk102"> 2764 <author>Ralls, Kim</author> 2765 <title>Midnight Rain</title> 2766 <genre>Fantasy</genres> 2767 <price>5.95</price> 2768 <publish_date>2000-12-16</publish_date> 2769 <description>A former architect battles corporate zombies, 2770 an evil sorceress, and her own childhood to become queen 2771 of the world.</description> 2772 </book> 2773 <book id="bk103"> 2774 <author>Corets, Eva</author> 2775 <title>Maeve Ascendant</title> 2776 <genre>Fantasy</genre> 2777 <price>5.95</price> 2778 <publish_date>2000-11-17</publish_date> 2779 <description>After the collapse of a nanotechnology 2780 society in England, the young survivors lay the 2781 foundation for a new society.</description> 2782 </book> 2783 </catalog> 2784 ]"); 2785 assert(false); 2786 } 2787 catch (CheckException e) 2788 { 2789 auto n = e.toString().indexOf("end tag name \"genres\" differs"~ 2790 " from start tag name \"genre\""); 2791 assert(n != -1); 2792 } 2793} 2794 2795@system unittest 2796{ 2797 string s = q"EOS 2798<?xml version="1.0"?> 2799<set> 2800 <one>A</one> 2801 <!-- comment --> 2802 <two>B</two> 2803</set> 2804EOS"; 2805 try 2806 { 2807 check(s); 2808 } 2809 catch (CheckException e) 2810 { 2811 assert(0, e.toString()); 2812 } 2813} 2814 2815@system unittest 2816{ 2817 string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream 2818 xmlns:stream="http://etherx.'jabber'.org/streams" 2819 xmlns="jabber:'client'" from='jid.pl' id="587a5767" 2820 xml:lang="en" version="1.0" attr='a"b"c'> 2821 </stream:stream></r>`; 2822 2823 DocumentParser parser = new DocumentParser(test_xml); 2824 bool tested = false; 2825 parser.onStartTag["stream:stream"] = (ElementParser p) { 2826 assert(p.tag.attr["xmlns"] == "jabber:'client'"); 2827 assert(p.tag.attr["from"] == "jid.pl"); 2828 assert(p.tag.attr["attr"] == "a\"b\"c"); 2829 tested = true; 2830 }; 2831 parser.parse(); 2832 assert(tested); 2833} 2834 2835@system unittest 2836{ 2837 string s = q"EOS 2838<?xml version="1.0" encoding="utf-8"?> <Tests> 2839 <Test thing="What & Up">What & Up Second</Test> 2840</Tests> 2841EOS"; 2842 auto xml = new DocumentParser(s); 2843 2844 xml.onStartTag["Test"] = (ElementParser xml) { 2845 assert(xml.tag.attr["thing"] == "What & Up"); 2846 }; 2847 2848 xml.onEndTag["Test"] = (in Element e) { 2849 assert(e.text() == "What & Up Second"); 2850 }; 2851 xml.parse(); 2852} 2853 2854@system unittest 2855{ 2856 string s = `<tag attr=""value>" />`; 2857 auto doc = new Document(s); 2858 assert(doc.toString() == s); 2859} 2860 2861/** The base class for exceptions thrown by this module */ 2862class XMLException : Exception { this(string msg) @safe pure { super(msg); } } 2863 2864// Other exceptions 2865 2866/// Thrown during Comment constructor 2867class CommentException : XMLException 2868{ private this(string msg) @safe pure { super(msg); } } 2869 2870/// Thrown during CData constructor 2871class CDataException : XMLException 2872{ private this(string msg) @safe pure { super(msg); } } 2873 2874/// Thrown during XMLInstruction constructor 2875class XIException : XMLException 2876{ private this(string msg) @safe pure { super(msg); } } 2877 2878/// Thrown during ProcessingInstruction constructor 2879class PIException : XMLException 2880{ private this(string msg) @safe pure { super(msg); } } 2881 2882/// Thrown during Text constructor 2883class TextException : XMLException 2884{ private this(string msg) @safe pure { super(msg); } } 2885 2886/// Thrown during decode() 2887class DecodeException : XMLException 2888{ private this(string msg) @safe pure { super(msg); } } 2889 2890/// Thrown if comparing with wrong type 2891class InvalidTypeException : XMLException 2892{ private this(string msg) @safe pure { super(msg); } } 2893 2894/// Thrown when parsing for Tags 2895class TagException : XMLException 2896{ private this(string msg) @safe pure { super(msg); } } 2897 2898/** 2899 * Thrown during check() 2900 */ 2901class CheckException : XMLException 2902{ 2903 CheckException err; /// Parent in hierarchy 2904 private string tail; 2905 /** 2906 * Name of production rule which failed to parse, 2907 * or specific error message 2908 */ 2909 string msg; 2910 size_t line = 0; /// Line number at which parse failure occurred 2911 size_t column = 0; /// Column number at which parse failure occurred 2912 2913 private this(string tail,string msg,Err err=null) @safe pure 2914 { 2915 super(null); 2916 this.tail = tail; 2917 this.msg = msg; 2918 this.err = err; 2919 } 2920 2921 private void complete(string entire) @safe pure 2922 { 2923 import std.string : count, lastIndexOf; 2924 import std.utf : toUTF32; 2925 2926 string head = entire[0..$-tail.length]; 2927 ptrdiff_t n = head.lastIndexOf('\n') + 1; 2928 line = head.count("\n") + 1; 2929 dstring t = toUTF32(head[n..$]); 2930 column = t.length + 1; 2931 if (err !is null) err.complete(entire); 2932 } 2933 2934 override string toString() const @safe pure 2935 { 2936 import std.format : format; 2937 2938 string s; 2939 if (line != 0) s = format("Line %d, column %d: ",line,column); 2940 s ~= msg; 2941 s ~= '\n'; 2942 if (err !is null) s = err.toString() ~ s; 2943 return s; 2944 } 2945} 2946 2947private alias Err = CheckException; 2948 2949// Private helper functions 2950 2951private 2952{ 2953 inout(T) toType(T)(inout Object o) 2954 { 2955 T t = cast(T)(o); 2956 if (t is null) 2957 { 2958 throw new InvalidTypeException("Attempt to compare a " 2959 ~ T.stringof ~ " with an instance of another type"); 2960 } 2961 return t; 2962 } 2963 2964 string chop(ref string s, size_t n) @safe pure nothrow 2965 { 2966 if (n == -1) n = s.length; 2967 string t = s[0 .. n]; 2968 s = s[n..$]; 2969 return t; 2970 } 2971 2972 bool optc(ref string s, char c) @safe pure nothrow 2973 { 2974 immutable bool b = s.length != 0 && s[0] == c; 2975 if (b) s = s[1..$]; 2976 return b; 2977 } 2978 2979 void reqc(ref string s, char c) @safe pure 2980 { 2981 if (s.length == 0 || s[0] != c) throw new TagException(""); 2982 s = s[1..$]; 2983 } 2984 2985 char requireOneOf(ref string s, string chars) @safe pure 2986 { 2987 import std.string : indexOf; 2988 2989 if (s.length == 0 || indexOf(chars,s[0]) == -1) 2990 throw new TagException(""); 2991 immutable char ch = s[0]; 2992 s = s[1..$]; 2993 return ch; 2994 } 2995 2996 size_t hash(string s,size_t h=0) @trusted nothrow 2997 { 2998 return typeid(s).getHash(&s) + h; 2999 } 3000 3001 // Definitions from the XML specification 3002 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD, 3003 0x10000,0x10FFFF]; 3004 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8, 3005 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A, 3006 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250, 3007 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E, 3008 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE, 3009 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451, 3010 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0, 3011 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561, 3012 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671, 3013 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5, 3014 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F, 3015 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC, 3016 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13, 3017 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59, 3018 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F, 3019 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD, 3020 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A, 3021 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F, 3022 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C, 3023 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7, 3024 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35, 3025 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA, 3026 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E, 3027 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30, 3028 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87, 3029 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1, 3030 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0, 3031 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49, 3032 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105, 3033 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E, 3034 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154, 3035 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167, 3036 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E, 3037 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA, 3038 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00, 3039 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48, 3040 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F, 3041 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6, 3042 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6, 3043 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041, 3044 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3]; 3045 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5]; 3046 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486, 3047 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2, 3048 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF, 3049 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C, 3050 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983, 3051 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8, 3052 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C, 3053 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D, 3054 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9, 3055 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48, 3056 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8, 3057 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48, 3058 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8, 3059 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48, 3060 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E, 3061 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19, 3062 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F, 3063 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD, 3064 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F, 3065 0x3099,0x3099,0x309A,0x309A]; 3066 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966, 3067 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7, 3068 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0, 3069 0x0ED9,0x0F20,0x0F29]; 3070 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387, 3071 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031, 3072 0x3035,0x309D,0x309E,0x30FC,0x30FE]; 3073 3074 bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure 3075 { 3076 while (table.length != 0) 3077 { 3078 auto m = (table.length >> 1) & ~1; 3079 if (c < table[m]) 3080 { 3081 table = table[0 .. m]; 3082 } 3083 else if (c > table[m+1]) 3084 { 3085 table = table[m+2..$]; 3086 } 3087 else return true; 3088 } 3089 return false; 3090 } 3091 3092 string startOf(string s) @safe nothrow pure 3093 { 3094 string r; 3095 foreach (char c;s) 3096 { 3097 r ~= (c < 0x20 || c > 0x7F) ? '.' : c; 3098 if (r.length >= 40) { r ~= "___"; break; } 3099 } 3100 return r; 3101 } 3102 3103 void exit(string s=null) 3104 { 3105 throw new XMLException(s); 3106 } 3107} 3108