1/* xml_acquisition.pl : XML -> Document translation. 2 * 3 * Copyright (C) 2001-2005 Binding Time Limited 4 * Copyright (C) 2005, 2006 John Fletcher 5 * 6 * Current Release: $Revision: 1.2 $ 7 * 8 * TERMS AND CONDITIONS: 9 * 10 * This program is offered free of charge, as unsupported source code. You may 11 * use it, copy it, distribute it, modify it or sell it without restriction, 12 * but entirely at your own risk. 13 */ 14 15:- ensure_loaded( xml_utilities ). 16 17/* xml_to_document( +Controls, +XML, ?Document ) translates the list of 18 * character codes XML into the Prolog term Document. Controls is a list 19 * of terms controlling the treatment of layout characters and character 20 * entities. 21 */ 22xml_to_document( Controls, XML, Document ) :- 23 initial_context( Controls, Context ), 24 ( xml_declaration( Attributes0, XML, XML1 ) -> 25 Attributes = Attributes0 26 ; otherwise -> 27 XML1 = XML, 28 Attributes = [] 29 ), 30 xml_to_document( XML1, Context, Terms, [], WellFormed ), 31 xml_to_document1( WellFormed, Attributes, Terms, Document ). 32 33xml_to_document1( true, Attributes, Terms, xml(Attributes, Terms) ). 34xml_to_document1( false, Attributes, Terms, malformed(Attributes, Terms) ). 35 36% unparsed( +Unparsed, +Context, ?Terms, ?Residue, ?WellFormed ) 37unparsed( Unparsed, _Context, [unparsed(Unparsed)], [], false ). 38 39xml_declaration( Attributes ) --> 40 spaces, 41 "<?", 42 nmtoken( xml ), 43 xml_declaration_attributes( Attributes ), 44 spaces, 45 "?>". 46 47xml_to_document( [], Context, Terms, [], WF ) :- 48 close_context( Context, Terms, WF ). 49xml_to_document( [Char|Chars], Context, Terms, Residue, WF ) :- 50 ( Char =:= "<" -> 51 xml_markup_structure( Chars, Context, Terms, Residue, WF ) 52 ; Char =:= "&" -> 53 entity_reference( Chars, Context, Terms, Residue, WF ) 54 ; Char =< " ", 55 \+ space_preserve( Context ) -> 56 layouts( Chars, Context, [Char|T], T, Terms, Residue, WF ) 57 ; void_context( Context ) -> 58 unparsed( [Char|Chars], Context, Terms, Residue, WF ) 59 ; otherwise -> 60 Terms = [pcdata([Char|Chars1])|Terms1], 61 acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF ) 62 ). 63 64layouts( [], Context, _Plus, _Minus, Terms, [], WF ) :- 65 close_context( Context, Terms, WF ). 66layouts( [Char|Chars], Context, Plus, Minus, Terms, Residue, WF ) :- 67 ( Char =:= "<" -> 68 xml_markup_structure( Chars, Context, Terms, Residue, WF ) 69 ; Char =:= "&" -> 70 reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF ) 71 ; Char =< " " -> 72 Minus = [Char|Minus1], 73 layouts( Chars, Context, Plus, Minus1, Terms, Residue, WF ) 74 ; void_context( Context ) -> 75 unparsed( [Char|Chars], Context, Terms, Residue, WF ) 76 ; otherwise -> 77 Terms = [pcdata(Plus)|Terms1], 78 Minus = [Char|Chars1], 79 context_update( space_preserve, Context, true, Context1 ), 80 acquire_pcdata( Chars, Context1, Chars1, Terms1, Residue, WF ) 81 ). 82 83acquire_pcdata( [], Context, [], Terms, [], WF ) :- 84 close_context( Context, Terms, WF ). 85acquire_pcdata( [Char|Chars], Context, Chars1, Terms, Residue, WF ) :- 86 ( Char =:= "<" -> 87 Chars1 = [], 88 xml_markup_structure( Chars, Context, Terms, Residue, WF ) 89 ; Char =:= "&" -> 90 reference_in_pcdata( Chars, Context, Chars1, Terms, Residue, WF ) 91 ; otherwise -> 92 Chars1 = [Char|Chars2], 93 acquire_pcdata( Chars, Context, Chars2, Terms, Residue, WF ) 94 ). 95 96xml_markup_structure( [], Context, Terms, Residue, WF ) :- 97 unparsed( "<", Context, Terms, Residue, WF ). 98xml_markup_structure( Chars, Context, Terms, Residue, WF ) :- 99 Chars = [Char|Chars1], 100 ( Char =:= "/" -> 101 closing_tag( Context, Chars1, Terms, Residue, WF ) 102 ; Char =:= "?" -> 103 pi_acquisition( Chars1, Context, Terms, Residue, WF ) 104 ; Char =:= "!" -> 105 declaration_acquisition( Chars1, Context, Terms, Residue, WF ) 106 ; open_tag(Tag,Context,Attributes,Type, Chars, Chars2 ) -> 107 push_tag( Tag, Chars2, Context, Attributes, Type, Terms, Residue, WF ) 108 ; otherwise -> 109 unparsed( [0'<|Chars], Context, Terms, Residue, WF ) %' 110 ). 111 112push_tag( Tag, Chars, Context, Attributes, Type, Terms, Residue, WF ) :- 113 new_element(Tag, Chars, Context, Attributes, Type, Term, Rest, WF0), 114 push_tag1( WF0, Context, Term, Rest, Terms, Residue, WF ). 115 116push_tag1( true, Context, Term, Chars, [Term|Terms], Residue, WF ) :- 117 xml_to_document( Chars, Context, Terms, Residue, WF ). 118push_tag1( false, _Context, Term, Chars, [Term], Chars, false ). 119 120new_element( TagChars, Chars, Context, Attributes0, Type, Term, Residue, WF ) :- 121 namespace_attributes( Attributes0, Context, Context1, Attributes1 ), 122 ( append( NSChars, [0':|TagChars1], TagChars ), %' 123 specific_namespace( NSChars, Context1, SpecificNamespace ) -> 124 Namespace0 = SpecificNamespace 125 ; otherwise -> 126 NSChars = "", 127 TagChars1 = TagChars, 128 default_namespace( Context1, Namespace0 ) 129 ), 130 current_namespace( Context1, CurrentNamespace ), 131 ( Namespace0 == CurrentNamespace -> 132 Term = element(Tag, Attributes, Contents), 133 Context2 = Context1 134 ; otherwise -> 135 Term = namespace( Namespace0, NSChars, 136 element(Tag, Attributes, Contents) 137 ), 138 context_update( current_namespace, Context1, Namespace0, Context2 ) 139 ), 140 input_attributes( Attributes1, Context2, Attributes ), 141 atom_codes( Tag, TagChars1 ), 142 close_tag( Type, Chars, Context2, Contents, Residue, WF ). 143 144close_tag( empty, Residue, _Context, [], Residue, true ). 145close_tag( push(Tag), Chars, Context0, Contents, Residue, WF ) :- 146 context_update( element, Context0, Tag, Context1 ), 147 xml_to_document( Chars, Context1, Contents, Residue, WF ). 148 149pi_acquisition( Chars, Context, Terms, Residue, WellFormed ) :- 150 ( inline_instruction(Target, Processing, Chars, Rest ), 151 Target \== xml -> 152 Terms = [instructions(Target, Processing)|Terms1], 153 xml_to_document( Rest, Context, Terms1, Residue, WellFormed ) 154 ; otherwise -> 155 unparsed( [0'<,0'?|Chars], Context, Terms, Residue, WellFormed ) 156 ). 157 158declaration_acquisition( Chars, Context, Terms, Residue, WF ) :- 159 ( declaration_type( Chars, Type, Chars1 ), 160 declaration_parse( Type, Context, Term, Context1, Chars1, Rest ) -> 161 Terms = [Term|Terms1], 162 xml_to_document( Rest, Context1, Terms1, Residue, WF ) 163 ; otherwise -> 164 unparsed( [0'<,0'!|Chars], Context, Terms, Residue, WF ) 165 ). 166 167open_tag( Tag, Namespaces, Attributes, Termination ) --> 168 nmtoken_chars( Tag ), 169 attributes( Attributes, [], Namespaces ), 170 spaces, 171 open_tag_terminator( Tag, Termination ). 172 173open_tag_terminator( Tag, push(Tag) ) --> 174 ">". 175open_tag_terminator( _Tag, empty ) --> 176 "/>". 177 178declaration_parse( comment, Namespaces, comment(Comment), Namespaces ) --> 179 comment(Comment). 180declaration_parse( cdata, Namespaces, cdata(CData), Namespaces ) --> 181 cdata( CData ). 182declaration_parse( doctype, Namespaces0, doctype(Name, Names), Namespaces ) --> 183 doctype( Name, Names, Namespaces0, Namespaces ), 184 spaces, 185 ">". 186 187inline_instruction( Target, Processing, Plus, Minus ) :- 188 nmtoken(Target, Plus, Mid0 ), 189 spaces( Mid0, Mid1 ), 190 append( Processing, [0'?,0'>|Minus], Mid1 ), 191 !. 192 193entity_reference_name( Reference ) --> 194 nmtoken_chars( Reference ), 195 ";". 196 197declaration_type( [Char1,Char2|Chars1], Class, Rest ) :- 198 Chars = [Char1,Char2|Chars1], 199 ( declaration_type1( Char1, Char2, Chars1, Class0, Residue ) -> 200 Class = Class0, 201 Rest = Residue 202 ; otherwise -> 203 Class = generic, 204 Rest = Chars 205 ). 206 207declaration_type1( 0'-, 0'-, Chars, comment, Chars ). 208declaration_type1( 0'[, 0'C, Chars, cdata, Residue ) :- 209 append( "DATA[", Residue, Chars ). 210declaration_type1( 0'D, 0'O, Chars, doctype, Residue ) :- 211 append( "CTYPE", Residue, Chars ). 212 213closing_tag( Context, Chars, Terms, Residue, WellFormed ) :- 214 ( closing_tag_name( Tag, Chars, Rest ), 215 current_tag( Context, Tag ) -> 216 Terms = [], 217 Residue = Rest, 218 WellFormed = true 219 ; otherwise -> 220 unparsed( [0'<,0'/|Chars], Context, Terms, Residue, WellFormed ) 221 ). 222 223closing_tag_name( Tag ) --> 224 nmtoken_chars( Tag ), 225 spaces, 226 ">". 227 228entity_reference( Chars, Context, Terms, Residue, WF ) :- 229 reference_in_layout( Chars, Context, L, L, Terms, Residue, WF ). 230 231reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF ) :- 232 ( standard_character_entity( Char, Chars, Rest ) -> 233 Minus = [Char|Chars1], 234 Terms = [pcdata(Plus)|Terms1], 235 acquire_pcdata( Rest, Context, Chars1, Terms1, Residue, WF ) 236 ; entity_reference_name( Reference, Chars, Rest ), 237 defined_entity( Reference, Context, String ) -> 238 append( String, Rest, Full ), 239 xml_to_document( Full, Context, Terms, Residue, WF ) 240 ; allow_ampersand( Context ) -> 241 Minus = [0'&|Chars1], %' 242 Terms = [pcdata(Plus)|Terms1], 243 acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF ) 244 ; otherwise -> 245 unparsed( [0'&|Chars], Context, Terms, Residue, WF ) %' 246 ). 247 248reference_in_pcdata( Chars0, Context, Chars1, Terms, Residue, WF ) :- 249 ( standard_character_entity( Char, Chars0, Rest ) -> 250 Chars1 = [Char|Chars2], 251 acquire_pcdata( Rest, Context, Chars2, Terms, Residue, WF ) 252 ; entity_reference_name( Reference, Chars0, Rest ), 253 defined_entity( Reference, Context, String ) -> 254 append( String, Rest, Full ), 255 acquire_pcdata( Full, Context, Chars1, Terms, Residue, WF ) 256 ; allow_ampersand( Context ) -> 257 Chars1 = [0'&|Chars2], 258 acquire_pcdata( Chars0, Context, Chars2, Terms, Residue, WF ) 259 ; otherwise -> 260 Chars1 = [], 261 unparsed( [0'&|Chars0], Context, Terms, Residue, WF ) 262 ). 263 264namespace_attributes( [], Context, Context, [] ). 265namespace_attributes( Attributes0, Context0, Context, Attributes ) :- 266 Attributes0 = [_|_], 267 append( "xmlns:", Unqualified, QualifiedNameChars ), 268 ( select( "xmlns"=Value, Attributes0, Attributes1 ) -> 269 atom_codes( URI, Value ), 270 context_update( default_namespace, Context0, URI, Context1 ), 271 namespace_attributes( Attributes1, Context1, Context, Attributes ) 272 ; select( QualifiedNameChars=Value, Attributes0, Attributes1 ) -> 273 Attributes = [QualifiedNameChars=Value|Attributes2], 274 atom_codes( URI, Value ), 275 context_update( ns_prefix(Unqualified), Context0, URI, Context1 ), 276 namespace_attributes( Attributes1, Context1, Context, Attributes2 ) 277 ; member( "xml:space"="preserve", Attributes0 ) -> 278 Attributes = Attributes0, 279 context_update( space_preserve, Context0, true, Context ) 280 ; otherwise -> 281 Context = Context0, 282 Attributes = Attributes0 283 ). 284 285input_attributes( [], _Context, [] ). 286input_attributes( [NameChars=Value|Attributes0], Context, 287 [Name=Value|Attributes] ) :- 288 ( remove_attribute_prefixes( Context ), 289 append( NSChars, [0':|NameChars1], NameChars ), %' 290 NSChars \== "xmlns", 291 specific_namespace( NSChars, Context, Namespace ), 292 current_namespace( Context, Namespace ) -> 293 atom_codes( Name, NameChars1 ) 294 ; otherwise -> 295 atom_codes( Name, NameChars ) 296 ), 297 input_attributes( Attributes0, Context, Attributes ). 298 299attributes( [Name=Value|Attributes], Seen, Namespaces ) --> 300 spaces, 301 nmtoken_chars( Name ), 302 {\+ member(Name, Seen)}, 303 spaces, 304 "=", 305 spaces, 306 attribute_value( Value, Namespaces ), 307 attributes( Attributes, [Name|Seen], Namespaces ). 308attributes( [], _Seen, _Namespaces ) --> "". 309 310xml_declaration_attributes( [] ) --> "". 311xml_declaration_attributes( [Name=Value|Attributes] ) --> 312 spaces, 313 nmtoken( Name ), 314 spaces, 315 "=", 316 spaces, 317 xml_string( Value ), 318 {xml_declaration_attribute_valid(Name, Value)}, 319 xml_declaration_attributes( Attributes ), 320 spaces. 321 322doctype( Name, External, Namespaces0, Namespaces1 ) --> 323 spaces, 324 nmtoken( Name ), 325 spaces, 326 doctype_id( External0 ), 327 spaces, 328 doctype1( Namespaces0, Literals, Namespaces1 ), 329 {doctype_extension(Literals, External0, External)}. 330 331doctype_extension( [], External, External ). 332doctype_extension( [Literal|Literals], External0, External ) :- 333 extended_doctype( External0, [Literal|Literals], External ). 334 335extended_doctype( system(URL), Literals, system(URL,Literals) ). 336extended_doctype( public(URN,URL), Literals, public(URN,URL,Literals) ). 337extended_doctype( local, Literals, local(Literals) ). 338 339doctype1( Namespaces0, Literals, Namespaces1 ) --> 340 "[", 341 !, 342 dtd( Namespaces0, Literals, Namespaces1 ), 343 "]". 344doctype1( Namespaces, [], Namespaces ) --> "". 345 346doctype_id( system(URL) ) --> 347 "SYSTEM", 348 spaces, 349 uri( URL ). 350doctype_id( public(URN,URL) ) --> 351 "PUBLIC", 352 spaces, 353 uri( URN ), 354 spaces, 355 uri( URL ). 356doctype_id( local ) --> "". 357 358dtd( Namespaces0, Literals, Namespaces1 ) --> 359 spaces, 360 "<!ENTITY", 361 !, 362 spaces, 363 nmtoken_chars( Name ), 364 spaces, 365 quote( Quote ), 366 entity_value( Quote, Namespaces0, String ), 367 spaces, 368 ">", 369 {\+ character_entity( Name, _StandardChar ), 370 % Don't allow < "e; etc. to be updated 371 context_update( entity(Name), Namespaces0, String, Namespaces2 ) 372 }, 373 dtd( Namespaces2, Literals, Namespaces1 ). 374dtd( Namespaces0, Literals, Namespaces1 ) --> 375 spaces, 376 "<!--", 377 !, 378 dtd_comment, 379 ">", 380 dtd( Namespaces0, Literals, Namespaces1 ). 381dtd( Namespaces0, [dtd_literal(Literal)|Literals], Namespaces1 ) --> 382 spaces, 383 "<!", 384 !, 385 dtd_literal( Literal ), 386 dtd( Namespaces0, Literals, Namespaces1 ). 387dtd( Namespaces, [], Namespaces ) --> spaces. 388 389dtd_literal( [] ) --> ">", !. 390dtd_literal( Chars ) --> 391 "--", 392 !, 393 dtd_comment, 394 dtd_literal( Chars ). 395dtd_literal( [Char|Chars] ) --> 396 [Char], 397 dtd_literal( Chars ). 398 399dtd_comment( Plus, Minus ) :- 400 append( _Chars, [0'-,0'-|Minus], Plus ), 401 !. 402 403nmtokens( [Name|Names] ) --> 404 spaces, 405 nmtoken( Name ), 406 nmtokens( Names ). 407nmtokens( [] ) --> []. 408 409entity_value( Quote, Namespaces, String, [Char|Plus], Minus ) :- 410 ( Char == Quote -> 411 String = [], 412 Minus = Plus 413 ; Char =:= "&" -> 414 reference_in_entity( Namespaces, Quote, String, Plus, Minus ) 415 ; otherwise -> 416 String = [Char|String1], 417 entity_value( Quote, Namespaces, String1, Plus, Minus ) 418 ). 419 420attribute_value( String, Namespaces ) --> 421 quote( Quote ), 422 attribute_leading_layouts( Quote, Namespaces, String ). 423 424attribute_leading_layouts( _Quote, _Namespace, [], [], [] ). 425attribute_leading_layouts( Quote, Namespaces, String, [Char|Plus], Minus ) :- 426 ( Char == Quote -> 427 String = [], 428 Minus = Plus 429 ; Char =:= "&" -> 430 ref_in_attribute_layout( Namespaces, Quote, String, Plus, Minus ) 431 ; Char > 32, Char \== 160 -> 432 String = [Char|String1], 433 attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus ) 434 ; otherwise -> 435 attribute_leading_layouts( Quote, Namespaces, String, Plus, Minus ) 436 ). 437 438attribute_layouts( _Quote, _Namespaces, _Layout, [], [], [] ). 439attribute_layouts( Quote, Namespaces, Layout, String, [Char|Plus], Minus ) :- 440 ( Char == Quote -> 441 String = [], 442 Minus = Plus 443 ; Char =:= "&" -> 444 reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) 445 ; Char > 32, Char \== 160 -> 446 ( Layout == true -> 447 String = [0' ,Char|String1] %' 448 ; otherwise -> 449 String = [Char|String1] 450 ), 451 attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus ) 452 ; otherwise -> 453 attribute_layouts( Quote, Namespaces, true, String, Plus, Minus ) 454 ). 455 456ref_in_attribute_layout( NS, Quote, String, Plus, Minus ) :- 457 ( standard_character_entity( Char, Plus, Mid ) -> 458 String = [Char|String1], 459 attribute_layouts( Quote, NS, false, String1, Mid, Minus ) 460 ; entity_reference_name( Name, Plus, Suffix ), 461 defined_entity( Name, NS, Text ) -> 462 append( Text, Suffix, Mid ), 463 attribute_leading_layouts( Quote, NS, String, Mid, Minus ) 464 ; otherwise -> % Just & is okay in a value 465 String = [0'&|String1], %' 466 attribute_layouts( Quote, NS, false, String1, Plus, Minus ) 467 ). 468 469reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) :- 470 ( standard_character_entity( Char, Plus, Mid ) -> 471 ( Layout == true -> 472 String = [0' ,Char|String1] %' 473 ; otherwise -> 474 String = [Char|String1] 475 ), 476 Layout1 = false 477 ; entity_reference_name( Name, Plus, Suffix ), 478 defined_entity( Name, Namespaces, Text ) -> 479 String = String1, 480 append( Text, Suffix, Mid ), 481 Layout1 = Layout 482 ; otherwise -> % Just & is okay in a value 483 Mid = Plus, 484 String = [0'&|String1], %' 485 Layout1 = false 486 ), 487 attribute_layouts( Quote, Namespaces, Layout1, String1, Mid, Minus ). 488 489/* References are resolved backwards in Entity defintions so that 490 * circularity is avoided. 491 */ 492reference_in_entity( Namespaces, Quote, String, Plus, Minus ) :- 493 ( standard_character_entity( _SomeChar, Plus, _Rest ) -> 494 String = [0'&|String1], % ' Character entities are unparsed 495 Mid = Plus 496 ; entity_reference_name( Name, Plus, Suffix ), 497 defined_entity( Name, Namespaces, Text ) -> 498 String = String1, 499 append( Text, Suffix, Mid ) 500 ), 501 entity_value( Quote, Namespaces, String1, Mid, Minus ). 502 503standard_character_entity( Char ) --> 504 "#x", hex_character_reference( Char ), ";". 505standard_character_entity( Char ) --> 506 "#", digit( Digit ), digits( Digits ), ";", 507 {number_chars( Char, [Digit|Digits])}. 508standard_character_entity( C ) --> 509 chars( String ), 510 ";", 511 !, 512 {character_entity(String, C)}. 513 514uri( URI ) --> 515 quote( Quote ), 516 uri1( Quote, URI ). 517 518uri1( Quote, [] ) --> 519 quote( Quote ), 520 !. 521uri1( Quote, [Char|Chars] ) --> 522 [Char], 523 uri1( Quote, Chars ). 524 525comment( Chars, Plus, Minus ) :- 526 append( Chars, [0'-,0'-,0'>|Minus], Plus ), %' 527 !. 528 529cdata( Chars, Plus, Minus ) :- 530 append( Chars, [0'],0'],0'>|Minus], Plus ), %' 531 !. 532% Syntax Components 533 534hex_character_reference( Code ) --> 535 hex_character_reference1( 0, Code ). 536 537hex_character_reference1( Current, Code ) --> 538 hex_digit_char( Value ), 539 !, 540 {New is (Current << 4) + Value}, 541 hex_character_reference1( New, Code ). 542hex_character_reference1( Code, Code ) --> "". 543 544hex_digit_char( 0 ) --> "0". 545hex_digit_char( 1 ) --> "1". 546hex_digit_char( 2 ) --> "2". 547hex_digit_char( 3 ) --> "3". 548hex_digit_char( 4 ) --> "4". 549hex_digit_char( 5 ) --> "5". 550hex_digit_char( 6 ) --> "6". 551hex_digit_char( 7 ) --> "7". 552hex_digit_char( 8 ) --> "8". 553hex_digit_char( 9 ) --> "9". 554hex_digit_char( 10 ) --> "A". 555hex_digit_char( 11 ) --> "B". 556hex_digit_char( 12 ) --> "C". 557hex_digit_char( 13 ) --> "D". 558hex_digit_char( 14 ) --> "E". 559hex_digit_char( 15 ) --> "F". 560hex_digit_char( 10 ) --> "a". 561hex_digit_char( 11 ) --> "b". 562hex_digit_char( 12 ) --> "c". 563hex_digit_char( 13 ) --> "d". 564hex_digit_char( 14 ) --> "e". 565hex_digit_char( 15 ) --> "f". 566 567quote( 0'" ) --> %' 568 """". 569quote( 0'' ) --> 570 "'". 571 572spaces( [], [] ). 573spaces( [Char|Chars0], Chars1 ) :- 574 ( Char =< 32 -> 575 spaces( Chars0, Chars1 ) 576 ; otherwise -> 577 Chars1 = [Char|Chars0] 578 ). 579 580nmtoken( Name ) --> 581 nmtoken_chars( Chars ), 582 {atom_codes(Name, Chars)}. 583 584nmtoken_chars( [Char|Chars] ) --> 585 [Char], 586 {nmtoken_first( Char )}, 587 nmtoken_chars_tail( Chars ). 588 589nmtoken_chars_tail( [Char|Chars] ) --> 590 [Char], 591 {nmtoken_char(Char)}, 592 !, 593 nmtoken_chars_tail( Chars ). 594nmtoken_chars_tail([]) --> "". 595 596nmtoken_first( 0': ). 597nmtoken_first( 0'_ ). 598nmtoken_first( Char ) :- 599 alphabet( Char ). 600 601nmtoken_char( 0'a ). 602nmtoken_char( 0'b ). 603nmtoken_char( 0'c ). 604nmtoken_char( 0'd ). 605nmtoken_char( 0'e ). 606nmtoken_char( 0'f ). 607nmtoken_char( 0'g ). 608nmtoken_char( 0'h ). 609nmtoken_char( 0'i ). 610nmtoken_char( 0'j ). 611nmtoken_char( 0'k ). 612nmtoken_char( 0'l ). 613nmtoken_char( 0'm ). 614nmtoken_char( 0'n ). 615nmtoken_char( 0'o ). 616nmtoken_char( 0'p ). 617nmtoken_char( 0'q ). 618nmtoken_char( 0'r ). 619nmtoken_char( 0's ). 620nmtoken_char( 0't ). 621nmtoken_char( 0'u ). 622nmtoken_char( 0'v ). 623nmtoken_char( 0'w ). 624nmtoken_char( 0'x ). 625nmtoken_char( 0'y ). 626nmtoken_char( 0'z ). 627nmtoken_char( 0'A ). 628nmtoken_char( 0'B ). 629nmtoken_char( 0'C ). 630nmtoken_char( 0'D ). 631nmtoken_char( 0'E ). 632nmtoken_char( 0'F ). 633nmtoken_char( 0'G ). 634nmtoken_char( 0'H ). 635nmtoken_char( 0'I ). 636nmtoken_char( 0'J ). 637nmtoken_char( 0'K ). 638nmtoken_char( 0'L ). 639nmtoken_char( 0'M ). 640nmtoken_char( 0'N ). 641nmtoken_char( 0'O ). 642nmtoken_char( 0'P ). 643nmtoken_char( 0'Q ). 644nmtoken_char( 0'R ). 645nmtoken_char( 0'S ). 646nmtoken_char( 0'T ). 647nmtoken_char( 0'U ). 648nmtoken_char( 0'V ). 649nmtoken_char( 0'W ). 650nmtoken_char( 0'X ). 651nmtoken_char( 0'Y ). 652nmtoken_char( 0'Z ). 653nmtoken_char( 0'0 ). 654nmtoken_char( 0'1 ). 655nmtoken_char( 0'2 ). 656nmtoken_char( 0'3 ). 657nmtoken_char( 0'4 ). 658nmtoken_char( 0'5 ). 659nmtoken_char( 0'6 ). 660nmtoken_char( 0'7 ). 661nmtoken_char( 0'8 ). 662nmtoken_char( 0'9 ). 663nmtoken_char( 0'. ). 664nmtoken_char( 0'- ). 665nmtoken_char( 0'_ ). 666nmtoken_char( 0': ). 667 668xml_string( String ) --> 669 quote( Quote ), 670 xml_string1( Quote, String ). 671 672xml_string1( Quote, [] ) --> 673 quote( Quote ), 674 !. 675xml_string1( Quote, [Char|Chars] ) --> 676 [Char], 677 xml_string1( Quote, Chars ). 678 679alphabet( 0'a ). 680alphabet( 0'b ). 681alphabet( 0'c ). 682alphabet( 0'd ). 683alphabet( 0'e ). 684alphabet( 0'f ). 685alphabet( 0'g ). 686alphabet( 0'h ). 687alphabet( 0'i ). 688alphabet( 0'j ). 689alphabet( 0'k ). 690alphabet( 0'l ). 691alphabet( 0'm ). 692alphabet( 0'n ). 693alphabet( 0'o ). 694alphabet( 0'p ). 695alphabet( 0'q ). 696alphabet( 0'r ). 697alphabet( 0's ). 698alphabet( 0't ). 699alphabet( 0'u ). 700alphabet( 0'v ). 701alphabet( 0'w ). 702alphabet( 0'x ). 703alphabet( 0'y ). 704alphabet( 0'z ). 705alphabet( 0'A ). 706alphabet( 0'B ). 707alphabet( 0'C ). 708alphabet( 0'D ). 709alphabet( 0'E ). 710alphabet( 0'F ). 711alphabet( 0'G ). 712alphabet( 0'H ). 713alphabet( 0'I ). 714alphabet( 0'J ). 715alphabet( 0'K ). 716alphabet( 0'L ). 717alphabet( 0'M ). 718alphabet( 0'N ). 719alphabet( 0'O ). 720alphabet( 0'P ). 721alphabet( 0'Q ). 722alphabet( 0'R ). 723alphabet( 0'S ). 724alphabet( 0'T ). 725alphabet( 0'U ). 726alphabet( 0'V ). 727alphabet( 0'W ). 728alphabet( 0'X ). 729alphabet( 0'Y ). 730alphabet( 0'Z ). 731 732digit( C ) --> [C], {digit_table( C )}. 733 734digit_table( 0'0 ). 735digit_table( 0'1 ). 736digit_table( 0'2 ). 737digit_table( 0'3 ). 738digit_table( 0'4 ). 739digit_table( 0'5 ). 740digit_table( 0'6 ). 741digit_table( 0'7 ). 742digit_table( 0'8 ). 743digit_table( 0'9 ). 744 745digits( [Digit|Digits] ) --> 746 digit( Digit ), 747 digits( Digits ). 748digits( [] ) --> []. 749 750character_entity( "quot", 0'" ). %' 751character_entity( "amp", 0'& ). %' 752character_entity( "lt", 0'< ). %' 753character_entity( "gt", 0'> ). %' 754character_entity( "apos", 0'' ). 755 756end_of_file. 757 758/* For reference, this is a comprehensive recognizer for namechar, based on 759 * the definition of in http://www.w3.org/TR/2000/REC-xml-20001006 . 760 */ 761namechar --> 762 ( letter 763 | unicode_digit 764 | "." 765 | "-" 766 | "_" 767 | ":" 768 | combiningchar 769 | extender 770 ). 771 772letter --> (basechar | ideographic). 773 774basechar --> 775 ( range( 16'0041, 16'005A ) 776 | range( 16'0061, 16'007A ) 777 | range( 16'00C0, 16'00D6 ) 778 | range( 16'00D8, 16'00F6 ) 779 | range( 16'00F8, 16'00FF ) 780 | range( 16'0100, 16'0131 ) 781 | range( 16'0134, 16'013E ) 782 | range( 16'0141, 16'0148 ) 783 | range( 16'014A, 16'017E ) 784 | range( 16'0180, 16'01C3 ) 785 | range( 16'01CD, 16'01F0 ) 786 | range( 16'01F4, 16'01F5 ) 787 | range( 16'01FA, 16'0217 ) 788 | range( 16'0250, 16'02A8 ) 789 | range( 16'02BB, 16'02C1 ) 790 | [16'0386] 791 | range( 16'0388, 16'038A ) 792 | [16'038C] 793 | range( 16'038E, 16'03A1 ) 794 | range( 16'03A3, 16'03CE ) 795 | range( 16'03D0, 16'03D6 ) 796 | [16'03DA] 797 | [16'03DC] 798 | [16'03DE] 799 | [16'03E0] 800 | range( 16'03E2, 16'03F3 ) 801 | range( 16'0401, 16'040C ) 802 | range( 16'040E, 16'044F ) 803 | range( 16'0451, 16'045C ) 804 | range( 16'045E, 16'0481 ) 805 | range( 16'0490, 16'04C4 ) 806 | range( 16'04C7, 16'04C8 ) 807 | range( 16'04CB, 16'04CC ) 808 | range( 16'04D0, 16'04EB ) 809 | range( 16'04EE, 16'04F5 ) 810 | range( 16'04F8, 16'04F9 ) 811 | range( 16'0531, 16'0556 ) 812 | [16'0559] 813 | range( 16'0561, 16'0586 ) 814 | range( 16'05D0, 16'05EA ) 815 | range( 16'05F0, 16'05F2 ) 816 | range( 16'0621, 16'063A ) 817 | range( 16'0641, 16'064A ) 818 | range( 16'0671, 16'06B7 ) 819 | range( 16'06BA, 16'06BE ) 820 | range( 16'06C0, 16'06CE ) 821 | range( 16'06D0, 16'06D3 ) 822 | [16'06D5] 823 | range( 16'06E5, 16'06E6 ) 824 | range( 16'0905, 16'0939 ) 825 | [16'093D] 826 | range( 16'0958, 16'0961 ) 827 | range( 16'0985, 16'098C ) 828 | range( 16'098F, 16'0990 ) 829 | range( 16'0993, 16'09A8 ) 830 | range( 16'09AA, 16'09B0 ) 831 | [16'09B2] 832 | range( 16'09B6, 16'09B9 ) 833 | range( 16'09DC, 16'09DD ) 834 | range( 16'09DF, 16'09E1 ) 835 | range( 16'09F0, 16'09F1 ) 836 | range( 16'0A05, 16'0A0A ) 837 | range( 16'0A0F, 16'0A10 ) 838 | range( 16'0A13, 16'0A28 ) 839 | range( 16'0A2A, 16'0A30 ) 840 | range( 16'0A32, 16'0A33 ) 841 | range( 16'0A35, 16'0A36 ) 842 | range( 16'0A38, 16'0A39 ) 843 | range( 16'0A59, 16'0A5C ) 844 | [16'0A5E] 845 | range( 16'0A72, 16'0A74 ) 846 | range( 16'0A85, 16'0A8B ) 847 | [16'0A8D] 848 | range( 16'0A8F, 16'0A91 ) 849 | range( 16'0A93, 16'0AA8 ) 850 | range( 16'0AAA, 16'0AB0 ) 851 | range( 16'0AB2, 16'0AB3 ) 852 | range( 16'0AB5, 16'0AB9 ) 853 | [16'0ABD] 854 | [16'0AE0] 855 | range( 16'0B05, 16'0B0C ) 856 | range( 16'0B0F, 16'0B10 ) 857 | range( 16'0B13, 16'0B28 ) 858 | range( 16'0B2A, 16'0B30 ) 859 | range( 16'0B32, 16'0B33 ) 860 | range( 16'0B36, 16'0B39 ) 861 | [16'0B3D] 862 | range( 16'0B5C, 16'0B5D ) 863 | range( 16'0B5F, 16'0B61 ) 864 | range( 16'0B85, 16'0B8A ) 865 | range( 16'0B8E, 16'0B90 ) 866 | range( 16'0B92, 16'0B95 ) 867 | range( 16'0B99, 16'0B9A ) 868 | [16'0B9C] 869 | range( 16'0B9E, 16'0B9F ) 870 | range( 16'0BA3, 16'0BA4 ) 871 | range( 16'0BA8, 16'0BAA ) 872 | range( 16'0BAE, 16'0BB5 ) 873 | range( 16'0BB7, 16'0BB9 ) 874 | range( 16'0C05, 16'0C0C ) 875 | range( 16'0C0E, 16'0C10 ) 876 | range( 16'0C12, 16'0C28 ) 877 | range( 16'0C2A, 16'0C33 ) 878 | range( 16'0C35, 16'0C39 ) 879 | range( 16'0C60, 16'0C61 ) 880 | range( 16'0C85, 16'0C8C ) 881 | range( 16'0C8E, 16'0C90 ) 882 | range( 16'0C92, 16'0CA8 ) 883 | range( 16'0CAA, 16'0CB3 ) 884 | range( 16'0CB5, 16'0CB9 ) 885 | [16'0CDE] 886 | range( 16'0CE0, 16'0CE1 ) 887 | range( 16'0D05, 16'0D0C ) 888 | range( 16'0D0E, 16'0D10 ) 889 | range( 16'0D12, 16'0D28 ) 890 | range( 16'0D2A, 16'0D39 ) 891 | range( 16'0D60, 16'0D61 ) 892 | range( 16'0E01, 16'0E2E ) 893 | [16'0E30] 894 | range( 16'0E32, 16'0E33 ) 895 | range( 16'0E40, 16'0E45 ) 896 | range( 16'0E81, 16'0E82 ) 897 | [16'0E84] 898 | range( 16'0E87, 16'0E88 ) 899 | [16'0E8A] 900 | [16'0E8D] 901 | range( 16'0E94, 16'0E97 ) 902 | range( 16'0E99, 16'0E9F ) 903 | range( 16'0EA1, 16'0EA3 ) 904 | [16'0EA5] 905 | [16'0EA7] 906 | range( 16'0EAA, 16'0EAB ) 907 | range( 16'0EAD, 16'0EAE ) 908 | [16'0EB0] 909 | range( 16'0EB2, 16'0EB3 ) 910 | [16'0EBD] 911 | range( 16'0EC0, 16'0EC4 ) 912 | range( 16'0F40, 16'0F47 ) 913 | range( 16'0F49, 16'0F69 ) 914 | range( 16'10A0, 16'10C5 ) 915 | range( 16'10D0, 16'10F6 ) 916 | [16'1100] 917 | range( 16'1102, 16'1103 ) 918 | range( 16'1105, 16'1107 ) 919 | [16'1109] 920 | range( 16'110B, 16'110C ) 921 | range( 16'110E, 16'1112 ) 922 | [16'113C] 923 | [16'113E] 924 | [16'1140] 925 | [16'114C] 926 | [16'114E] 927 | [16'1150] 928 | range( 16'1154, 16'1155 ) 929 | [16'1159] 930 | range( 16'115F, 16'1161 ) 931 | [16'1163] 932 | [16'1165] 933 | [16'1167] 934 | [16'1169] 935 | range( 16'116D, 16'116E ) 936 | range( 16'1172, 16'1173 ) 937 | [16'1175] 938 | [16'119E] 939 | [16'11A8] 940 | [16'11AB] 941 | range( 16'11AE, 16'11AF ) 942 | range( 16'11B7, 16'11B8 ) 943 | [16'11BA] 944 | range( 16'11BC, 16'11C2 ) 945 | [16'11EB] 946 | [16'11F0] 947 | [16'11F9] 948 | range( 16'1E00, 16'1E9B ) 949 | range( 16'1EA0, 16'1EF9 ) 950 | range( 16'1F00, 16'1F15 ) 951 | range( 16'1F18, 16'1F1D ) 952 | range( 16'1F20, 16'1F45 ) 953 | range( 16'1F48, 16'1F4D ) 954 | range( 16'1F50, 16'1F57 ) 955 | [16'1F59] 956 | [16'1F5B] 957 | [16'1F5D] 958 | range( 16'1F5F, 16'1F7D ) 959 | range( 16'1F80, 16'1FB4 ) 960 | range( 16'1FB6, 16'1FBC ) 961 | [16'1FBE] 962 | range( 16'1FC2, 16'1FC4 ) 963 | range( 16'1FC6, 16'1FCC ) 964 | range( 16'1FD0, 16'1FD3 ) 965 | range( 16'1FD6, 16'1FDB ) 966 | range( 16'1FE0, 16'1FEC ) 967 | range( 16'1FF2, 16'1FF4 ) 968 | range( 16'1FF6, 16'1FFC ) 969 | [16'2126] 970 | range( 16'212A, 16'212B ) 971 | [16'212E] 972 | range( 16'2180, 16'2182 ) 973 | range( 16'3041, 16'3094 ) 974 | range( 16'30A1, 16'30FA ) 975 | range( 16'3105, 16'312C ) 976 | range( 16'AC00, 16'D7A3 ) 977 ). 978ideographic --> 979 ( range( 16'4E00, 16'9FA5 ) 980 | [16'3007] 981 | range( 16'3021, 16'3029 ) 982 ). 983combiningchar --> 984 ( range( 16'0300, 16'0345 ) 985 | range( 16'0360, 16'0361 ) 986 | range( 16'0483, 16'0486 ) 987 | range( 16'0591, 16'05A1 ) 988 | range( 16'05A3, 16'05B9 ) 989 | range( 16'05BB, 16'05BD ) 990 | [16'05BF] 991 | range( 16'05C1, 16'05C2 ) 992 | [16'05C4] 993 | range( 16'064B, 16'0652 ) 994 | [16'0670] 995 | range( 16'06D6, 16'06DC ) 996 | range( 16'06DD, 16'06DF ) 997 | range( 16'06E0, 16'06E4 ) 998 | range( 16'06E7, 16'06E8 ) 999 | range( 16'06EA, 16'06ED ) 1000 | range( 16'0901, 16'0903 ) 1001 | [16'093C] 1002 | range( 16'093E, 16'094C ) 1003 | [16'094D] 1004 | range( 16'0951, 16'0954 ) 1005 | range( 16'0962, 16'0963 ) 1006 | range( 16'0981, 16'0983 ) 1007 | [16'09BC] 1008 | [16'09BE] 1009 | [16'09BF] 1010 | range( 16'09C0, 16'09C4 ) 1011 | range( 16'09C7, 16'09C8 ) 1012 | range( 16'09CB, 16'09CD ) 1013 | [16'09D7] 1014 | range( 16'09E2, 16'09E3 ) 1015 | [16'0A02] 1016 | [16'0A3C] 1017 | [16'0A3E] 1018 | [16'0A3F] 1019 | range( 16'0A40, 16'0A42 ) 1020 | range( 16'0A47, 16'0A48 ) 1021 | range( 16'0A4B, 16'0A4D ) 1022 | range( 16'0A70, 16'0A71 ) 1023 | range( 16'0A81, 16'0A83 ) 1024 | [16'0ABC] 1025 | range( 16'0ABE, 16'0AC5 ) 1026 | range( 16'0AC7, 16'0AC9 ) 1027 | range( 16'0ACB, 16'0ACD ) 1028 | range( 16'0B01, 16'0B03 ) 1029 | [16'0B3C] 1030 | range( 16'0B3E, 16'0B43 ) 1031 | range( 16'0B47, 16'0B48 ) 1032 | range( 16'0B4B, 16'0B4D ) 1033 | range( 16'0B56, 16'0B57 ) 1034 | range( 16'0B82, 16'0B83 ) 1035 | range( 16'0BBE, 16'0BC2 ) 1036 | range( 16'0BC6, 16'0BC8 ) 1037 | range( 16'0BCA, 16'0BCD ) 1038 | [16'0BD7] 1039 | range( 16'0C01, 16'0C03 ) 1040 | range( 16'0C3E, 16'0C44 ) 1041 | range( 16'0C46, 16'0C48 ) 1042 | range( 16'0C4A, 16'0C4D ) 1043 | range( 16'0C55, 16'0C56 ) 1044 | range( 16'0C82, 16'0C83 ) 1045 | range( 16'0CBE, 16'0CC4 ) 1046 | range( 16'0CC6, 16'0CC8 ) 1047 | range( 16'0CCA, 16'0CCD ) 1048 | range( 16'0CD5, 16'0CD6 ) 1049 | range( 16'0D02, 16'0D03 ) 1050 | range( 16'0D3E, 16'0D43 ) 1051 | range( 16'0D46, 16'0D48 ) 1052 | range( 16'0D4A, 16'0D4D ) 1053 | [16'0D57] 1054 | [16'0E31] 1055 | range( 16'0E34, 16'0E3A ) 1056 | range( 16'0E47, 16'0E4E ) 1057 | [16'0EB1] 1058 | range( 16'0EB4, 16'0EB9 ) 1059 | range( 16'0EBB, 16'0EBC ) 1060 | range( 16'0EC8, 16'0ECD ) 1061 | range( 16'0F18, 16'0F19 ) 1062 | [16'0F35] 1063 | [16'0F37] 1064 | [16'0F39] 1065 | [16'0F3E] 1066 | [16'0F3F] 1067 | range( 16'0F71, 16'0F84 ) 1068 | range( 16'0F86, 16'0F8B ) 1069 | range( 16'0F90, 16'0F95 ) 1070 | [16'0F97] 1071 | range( 16'0F99, 16'0FAD ) 1072 | range( 16'0FB1, 16'0FB7 ) 1073 | [16'0FB9] 1074 | range( 16'20D0, 16'20DC ) 1075 | [16'20E1] 1076 | range( 16'302A, 16'302F ) 1077 | [16'3099] 1078 | [16'309A] 1079 ). 1080 1081unicode_digit --> 1082 ( range( 16'0030, 16'0039 ) 1083 | range( 16'0660, 16'0669 ) 1084 | range( 16'06F0, 16'06F9 ) 1085 | range( 16'0966, 16'096F ) 1086 | range( 16'09E6, 16'09EF ) 1087 | range( 16'0A66, 16'0A6F ) 1088 | range( 16'0AE6, 16'0AEF ) 1089 | range( 16'0B66, 16'0B6F ) 1090 | range( 16'0BE7, 16'0BEF ) 1091 | range( 16'0C66, 16'0C6F ) 1092 | range( 16'0CE6, 16'0CEF ) 1093 | range( 16'0D66, 16'0D6F ) 1094 | range( 16'0E50, 16'0E59 ) 1095 | range( 16'0ED0, 16'0ED9 ) 1096 | range( 16'0F20, 16'0F29 ) 1097 ). 1098 1099extender --> 1100 ( [16'00B7] 1101 | [16'02D0] 1102 | [16'02D1] 1103 | [16'0387] 1104 | [16'0640] 1105 | [16'0E46] 1106 | [16'0EC6] 1107 | [16'3005] 1108 | range( 16'3031, 16'3035 ) 1109 | range( 16'309D, 16'309E ) 1110 | range( 16'30FC, 16'30FE ) 1111 ). 1112 1113range( Low, High ) --> 1114 [Char], 1115 {Char >= Low, Char =< High}. 1116