1/* xml_acquisition.pl : XML -> Document translation.
2 *
3 * Copyright (C) 2001-2005 Binding Time Limited
4 * Copyright (C) 2005, 2006 John Fletcher
5 *
6 * Current Release: $Revision: 1.2 $
7 *
9 *
10 * This program is offered free of charge, as unsupported source code. You may
11 * use it, copy it, distribute it, modify it or sell it without restriction,
12 * but entirely at your own risk.
13 */
15:- ensure_loaded( xml_utilities ).
17/* xml_to_document( +Controls, +XML, ?Document ) translates the list of
18 * character codes XML into the Prolog term Document. Controls is a list
19 * of terms controlling the treatment of layout characters and character
20 * entities.
21 */
22xml_to_document( Controls, XML, Document ) :-
23	initial_context( Controls, Context ),
24	( xml_declaration( Attributes0, XML, XML1 ) ->
25		Attributes = Attributes0
26	; otherwise ->
27		XML1 = XML,
28		Attributes = []
29	),
30	xml_to_document( XML1, Context, Terms, [], WellFormed ),
31	xml_to_document1( WellFormed, Attributes, Terms, Document ).
33xml_to_document1( true,  Attributes, Terms, xml(Attributes, Terms) ).
34xml_to_document1( false, Attributes, Terms, malformed(Attributes, Terms) ).
36% unparsed( +Unparsed, +Context, ?Terms, ?Residue, ?WellFormed )
37unparsed( Unparsed, _Context, [unparsed(Unparsed)], [], false ).
39xml_declaration( Attributes ) -->
40	spaces,
41	"<?",
42	nmtoken( xml ),
43	xml_declaration_attributes( Attributes ),
44	spaces,
45	"?>".
47xml_to_document( [], Context, Terms, [], WF ) :-
48	close_context( Context, Terms, WF ).
49xml_to_document( [Char|Chars], Context, Terms, Residue, WF ) :-
50	( Char =:= "<" ->
51		xml_markup_structure( Chars, Context, Terms, Residue, WF )
52	; Char =:= "&" ->
53		entity_reference( Chars, Context, Terms, Residue, WF )
54	; Char =< " ",
55	  \+ space_preserve( Context ) ->
56		layouts( Chars, Context, [Char|T], T, Terms, Residue, WF )
57	; void_context( Context ) ->
58		unparsed( [Char|Chars], Context, Terms, Residue, WF )
59	; otherwise ->
60		Terms = [pcdata([Char|Chars1])|Terms1],
61		acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF )
62	).
64layouts( [], Context, _Plus, _Minus, Terms, [], WF ) :-
65	close_context( Context, Terms, WF ).
66layouts( [Char|Chars], Context, Plus, Minus, Terms, Residue, WF ) :-
67	( Char =:= "<" ->
68		xml_markup_structure( Chars, Context, Terms, Residue, WF )
69	; Char =:= "&" ->
70		reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF )
71	; Char =< " " ->
72		Minus = [Char|Minus1],
73		layouts( Chars, Context, Plus, Minus1, Terms, Residue, WF )
74	; void_context( Context ) ->
75		unparsed( [Char|Chars], Context, Terms, Residue, WF )
76	; otherwise ->
77		Terms = [pcdata(Plus)|Terms1],
78		Minus = [Char|Chars1],
79		context_update( space_preserve, Context, true, Context1 ),
80		acquire_pcdata( Chars, Context1, Chars1, Terms1, Residue, WF )
81	).
83acquire_pcdata( [], Context, [], Terms, [], WF ) :-
84	close_context( Context, Terms, WF ).
85acquire_pcdata( [Char|Chars], Context, Chars1, Terms, Residue, WF ) :-
86	( Char =:= "<" ->
87		Chars1 = [],
88		xml_markup_structure( Chars, Context, Terms, Residue, WF )
89	; Char =:= "&" ->
90		reference_in_pcdata( Chars, Context, Chars1, Terms, Residue, WF )
91	; otherwise ->
92		Chars1 = [Char|Chars2],
93		acquire_pcdata( Chars, Context, Chars2, Terms, Residue, WF )
94	).
96xml_markup_structure( [], Context, Terms, Residue, WF ) :-
97	unparsed( "<", Context, Terms, Residue, WF ).
98xml_markup_structure( Chars, Context, Terms, Residue, WF ) :-
99	Chars = [Char|Chars1],
100	( Char =:= "/" ->
101		closing_tag( Context, Chars1, Terms, Residue, WF )
102	; Char =:= "?" ->
103		pi_acquisition( Chars1, Context, Terms, Residue, WF )
104	; Char =:= "!" ->
105		declaration_acquisition( Chars1, Context, Terms, Residue, WF )
106	; open_tag(Tag,Context,Attributes,Type, Chars, Chars2 ) ->
107		push_tag( Tag, Chars2, Context, Attributes, Type, Terms, Residue, WF )
108	; otherwise ->
109		unparsed( [0'<|Chars], Context, Terms, Residue, WF ) %'
110	).
112push_tag( Tag, Chars, Context, Attributes, Type, Terms, Residue, WF ) :-
113	new_element(Tag, Chars, Context, Attributes, Type, Term, Rest, WF0),
114	push_tag1( WF0, Context, Term, Rest, Terms, Residue, WF ).
116push_tag1( true, Context, Term, Chars, [Term|Terms], Residue, WF ) :-
117	xml_to_document( Chars, Context, Terms, Residue, WF ).
118push_tag1( false, _Context, Term, Chars, [Term], Chars, false ).
120new_element( TagChars, Chars, Context, Attributes0, Type, Term, Residue, WF ) :-
121	namespace_attributes( Attributes0, Context, Context1, Attributes1 ),
122	( append( NSChars, [0':|TagChars1], TagChars ), %'
123	  specific_namespace( NSChars, Context1, SpecificNamespace ) ->
124		Namespace0 = SpecificNamespace
125	; otherwise ->
126		NSChars = "",
127		TagChars1 = TagChars,
128		default_namespace( Context1, Namespace0 )
129	),
130	current_namespace( Context1, CurrentNamespace ),
131	( Namespace0 == CurrentNamespace ->
132		Term = element(Tag, Attributes, Contents),
133		Context2 = Context1
134	; otherwise ->
135		Term = namespace( Namespace0, NSChars,
136					element(Tag, Attributes, Contents)
137					),
138		context_update( current_namespace, Context1, Namespace0, Context2 )
139	),
140	input_attributes( Attributes1, Context2, Attributes ),
141	atom_codes( Tag, TagChars1 ),
142	close_tag( Type, Chars, Context2, Contents, Residue, WF ).
144close_tag( empty, Residue, _Context, [], Residue, true ).
145close_tag( push(Tag), Chars, Context0, Contents, Residue, WF ) :-
146	context_update( element, Context0, Tag, Context1 ),
147	xml_to_document( Chars, Context1, Contents, Residue, WF ).
149pi_acquisition( Chars, Context, Terms, Residue, WellFormed ) :-
150	( inline_instruction(Target, Processing, Chars, Rest ),
151	  Target \== xml ->
152		Terms = [instructions(Target, Processing)|Terms1],
153		xml_to_document( Rest, Context, Terms1, Residue, WellFormed )
154	; otherwise ->
155		unparsed( [0'<,0'?|Chars], Context, Terms, Residue, WellFormed )
156	).
158declaration_acquisition( Chars, Context, Terms, Residue, WF ) :-
159	( declaration_type( Chars, Type, Chars1 ),
160	  declaration_parse( Type, Context, Term, Context1, Chars1, Rest ) ->
161		Terms = [Term|Terms1],
162		xml_to_document( Rest, Context1, Terms1, Residue, WF )
163	; otherwise ->
164		unparsed( [0'<,0'!|Chars], Context, Terms, Residue, WF )
165	).
167open_tag( Tag, Namespaces, Attributes, Termination ) -->
168	nmtoken_chars( Tag ),
169	attributes( Attributes, [], Namespaces ),
170	spaces,
171	open_tag_terminator( Tag, Termination ).
173open_tag_terminator( Tag, push(Tag) ) -->
174	">".
175open_tag_terminator( _Tag, empty ) -->
176	"/>".
178declaration_parse( comment, Namespaces, comment(Comment), Namespaces ) -->
179	comment(Comment).
180declaration_parse( cdata, Namespaces, cdata(CData), Namespaces ) -->
181	cdata( CData ).
182declaration_parse( doctype, Namespaces0, doctype(Name, Names), Namespaces ) -->
183	doctype( Name, Names, Namespaces0, Namespaces ),
184	spaces,
185	">".
187inline_instruction( Target, Processing, Plus, Minus  ) :-
188	nmtoken(Target, Plus, Mid0 ),
189	spaces( Mid0, Mid1 ),
190	append( Processing, [0'?,0'>|Minus], Mid1 ),
191	!.
193entity_reference_name( Reference ) -->
194	nmtoken_chars( Reference ),
195	";".
197declaration_type( [Char1,Char2|Chars1], Class, Rest ) :-
198	Chars = [Char1,Char2|Chars1],
199	( declaration_type1( Char1, Char2, Chars1, Class0, Residue ) ->
200		Class = Class0,
201		Rest = Residue
202	; otherwise ->
203		Class = generic,
204		Rest = Chars
205	).
207declaration_type1( 0'-, 0'-, Chars, comment, Chars ).
208declaration_type1( 0'[, 0'C, Chars, cdata, Residue ) :-
209	append( "DATA[", Residue, Chars ).
210declaration_type1( 0'D, 0'O, Chars, doctype, Residue ) :-
211	append( "CTYPE", Residue, Chars ).
213closing_tag( Context, Chars, Terms, Residue, WellFormed ) :-
214	( closing_tag_name( Tag, Chars, Rest ),
215	  current_tag( Context, Tag ) ->
216		Terms = [],
217		Residue = Rest,
218		WellFormed = true
219	; otherwise ->
220		unparsed( [0'<,0'/|Chars], Context, Terms, Residue, WellFormed )
221	).
223closing_tag_name( Tag ) -->
224	nmtoken_chars( Tag ),
225	spaces,
226	">".
228entity_reference( Chars, Context, Terms, Residue, WF ) :-
229	reference_in_layout( Chars, Context, L, L, Terms, Residue, WF ).
231reference_in_layout( Chars, Context, Plus, Minus, Terms, Residue, WF ) :-
232	( standard_character_entity( Char, Chars, Rest ) ->
233		Minus = [Char|Chars1],
234		Terms = [pcdata(Plus)|Terms1],
235		acquire_pcdata( Rest, Context, Chars1, Terms1, Residue, WF )
236	; entity_reference_name( Reference, Chars, Rest ),
237	  defined_entity( Reference, Context, String ) ->
238		append( String, Rest, Full ),
239		xml_to_document( Full, Context, Terms, Residue, WF )
240	; allow_ampersand( Context ) ->
241		Minus = [0'&|Chars1], %'
242		Terms = [pcdata(Plus)|Terms1],
243		acquire_pcdata( Chars, Context, Chars1, Terms1, Residue, WF )
244	; otherwise ->
245		unparsed( [0'&|Chars], Context, Terms, Residue, WF ) %'
246	).
248reference_in_pcdata( Chars0, Context, Chars1, Terms, Residue, WF ) :-
249	( standard_character_entity( Char, Chars0, Rest ) ->
250		Chars1 = [Char|Chars2],
251		acquire_pcdata( Rest, Context, Chars2, Terms, Residue, WF )
252	; entity_reference_name( Reference, Chars0, Rest ),
253	  defined_entity( Reference, Context, String ) ->
254		append( String, Rest, Full ),
255		acquire_pcdata( Full, Context, Chars1, Terms, Residue, WF )
256	; allow_ampersand( Context ) ->
257		Chars1 = [0'&|Chars2],
258		acquire_pcdata( Chars0, Context, Chars2, Terms, Residue, WF )
259	; otherwise ->
260		Chars1 = [],
261		unparsed( [0'&|Chars0], Context, Terms, Residue, WF )
262	).
264namespace_attributes( [], Context, Context, [] ).
265namespace_attributes( Attributes0, Context0, Context, Attributes ) :-
266	Attributes0 = [_|_],
267	append( "xmlns:", Unqualified, QualifiedNameChars ),
268	( select( "xmlns"=Value, Attributes0, Attributes1 ) ->
269		atom_codes( URI, Value ),
270		context_update( default_namespace, Context0, URI, Context1 ),
271		namespace_attributes( Attributes1, Context1, Context, Attributes )
272	; select( QualifiedNameChars=Value, Attributes0, Attributes1 ) ->
273		Attributes = [QualifiedNameChars=Value|Attributes2],
274		atom_codes( URI, Value ),
275		context_update( ns_prefix(Unqualified), Context0, URI, Context1 ),
276		namespace_attributes( Attributes1, Context1, Context, Attributes2 )
277	; member( "xml:space"="preserve", Attributes0 ) ->
278		Attributes = Attributes0,
279		context_update( space_preserve, Context0, true, Context )
280	; otherwise ->
281		Context = Context0,
282		Attributes = Attributes0
283	).
285input_attributes( [], _Context, [] ).
286input_attributes( [NameChars=Value|Attributes0], Context,
287		[Name=Value|Attributes] ) :-
288	( remove_attribute_prefixes( Context ),
289	  append( NSChars, [0':|NameChars1], NameChars ), %'
290	  NSChars \== "xmlns",
291	  specific_namespace( NSChars, Context, Namespace ),
292	  current_namespace( Context, Namespace ) ->
293		atom_codes( Name, NameChars1 )
294	; otherwise ->
295		atom_codes( Name, NameChars )
296	),
297	input_attributes( Attributes0, Context, Attributes ).
299attributes( [Name=Value|Attributes], Seen, Namespaces ) -->
300	spaces,
301	nmtoken_chars( Name ),
302	{\+ member(Name, Seen)},
303	spaces,
304	"=",
305	spaces,
306	attribute_value( Value, Namespaces ),
307	attributes( Attributes, [Name|Seen], Namespaces ).
308attributes( [], _Seen, _Namespaces ) --> "".
310xml_declaration_attributes( [] ) --> "".
311xml_declaration_attributes( [Name=Value|Attributes] ) -->
312	spaces,
313	nmtoken( Name ),
314	spaces,
315	"=",
316	spaces,
317	xml_string( Value ),
318	{xml_declaration_attribute_valid(Name, Value)},
319	xml_declaration_attributes( Attributes ),
320	spaces.
322doctype( Name, External, Namespaces0, Namespaces1 ) -->
323	spaces,
324	nmtoken( Name ),
325	spaces,
326	doctype_id( External0 ),
327	spaces,
328	doctype1( Namespaces0, Literals, Namespaces1 ),
329	{doctype_extension(Literals, External0, External)}.
331doctype_extension( [], External, External ).
332doctype_extension( [Literal|Literals], External0, External ) :-
333	extended_doctype( External0, [Literal|Literals], External ).
335extended_doctype( system(URL), Literals, system(URL,Literals) ).
336extended_doctype( public(URN,URL), Literals, public(URN,URL,Literals) ).
337extended_doctype( local, Literals, local(Literals) ).
339doctype1( Namespaces0, Literals, Namespaces1 ) -->
340	"[",
341	!,
342	dtd( Namespaces0, Literals, Namespaces1 ),
343	"]".
344doctype1( Namespaces, [], Namespaces ) --> "".
346doctype_id( system(URL) ) -->
347	"SYSTEM",
348	spaces,
349	uri( URL ).
350doctype_id( public(URN,URL) ) -->
351	"PUBLIC",
352	spaces,
353	uri( URN ),
354	spaces,
355	uri( URL ).
356doctype_id( local ) --> "".
358dtd( Namespaces0, Literals, Namespaces1 ) -->
359	spaces,
360	"<!ENTITY",
361	!,
362	spaces,
363	nmtoken_chars( Name ),
364	spaces,
365	quote( Quote ),
366	entity_value( Quote, Namespaces0, String ),
367	spaces,
368	">",
369	{\+ character_entity( Name, _StandardChar ),
370	 % Don't allow &lt; &quote; etc. to be updated
371	 context_update( entity(Name), Namespaces0, String, Namespaces2 )
372	 },
373	dtd( Namespaces2, Literals, Namespaces1 ).
374dtd( Namespaces0, Literals, Namespaces1 ) -->
375	spaces,
376	"<!--",
377	!,
378	dtd_comment,
379	">",
380	dtd( Namespaces0, Literals, Namespaces1 ).
381dtd( Namespaces0, [dtd_literal(Literal)|Literals], Namespaces1 ) -->
382	spaces,
383	"<!",
384	!,
385	dtd_literal( Literal ),
386	dtd( Namespaces0, Literals, Namespaces1 ).
387dtd( Namespaces, [], Namespaces ) --> spaces.
389dtd_literal( [] ) --> ">", !.
390dtd_literal( Chars ) -->
391	"--",
392	!,
393	dtd_comment,
394	dtd_literal( Chars ).
395dtd_literal( [Char|Chars] ) -->
396	[Char],
397	dtd_literal( Chars ).
399dtd_comment( Plus, Minus ) :-
400	append( _Chars, [0'-,0'-|Minus], Plus ),
401	!.
403nmtokens( [Name|Names] ) -->
404	spaces,
405	nmtoken( Name ),
406	nmtokens( Names ).
407nmtokens( [] ) --> [].
409entity_value( Quote, Namespaces, String, [Char|Plus], Minus ) :-
410	( Char == Quote ->
411		String = [],
412		Minus = Plus
413	; Char =:= "&" ->
414		reference_in_entity( Namespaces, Quote, String, Plus, Minus )
415	; otherwise ->
416		String = [Char|String1],
417		entity_value( Quote, Namespaces, String1, Plus, Minus )
418	).
420attribute_value( String, Namespaces ) -->
421	quote( Quote ),
422	attribute_leading_layouts( Quote, Namespaces, String ).
424attribute_leading_layouts( _Quote, _Namespace, [], [], [] ).
425attribute_leading_layouts( Quote, Namespaces, String, [Char|Plus], Minus ) :-
426	( Char == Quote ->
427		String = [],
428		Minus = Plus
429	; Char =:= "&" ->
430		ref_in_attribute_layout( Namespaces, Quote, String, Plus, Minus )
431	; Char > 32, Char \== 160 ->
432		String = [Char|String1],
433		attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus )
434	; otherwise ->
435		attribute_leading_layouts( Quote, Namespaces, String, Plus, Minus )
436	).
438attribute_layouts( _Quote, _Namespaces, _Layout, [], [], [] ).
439attribute_layouts( Quote, Namespaces, Layout, String, [Char|Plus], Minus ) :-
440	( Char == Quote ->
441		String = [],
442		Minus = Plus
443	; Char =:= "&" ->
444		reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus )
445	; Char > 32, Char \== 160 ->
446		( Layout == true ->
447			String = [0' ,Char|String1] %'
448		; otherwise ->
449			String = [Char|String1]
450		),
451		attribute_layouts( Quote, Namespaces, false, String1, Plus, Minus )
452	; otherwise ->
453		attribute_layouts( Quote, Namespaces, true, String, Plus, Minus )
454	).
456ref_in_attribute_layout( NS, Quote, String, Plus, Minus ) :-
457	( standard_character_entity( Char, Plus, Mid ) ->
458		String = [Char|String1],
459		attribute_layouts( Quote, NS, false,  String1, Mid, Minus )
460	; entity_reference_name( Name, Plus, Suffix ),
461	  defined_entity( Name, NS, Text ) ->
462		append( Text, Suffix, Mid ),
463		attribute_leading_layouts( Quote, NS, String, Mid, Minus )
464	; otherwise -> % Just & is okay in a value
465		String = [0'&|String1], %'
466		attribute_layouts( Quote, NS, false, String1, Plus, Minus )
467	).
469reference_in_value( Namespaces, Quote, Layout, String, Plus, Minus ) :-
470	( standard_character_entity( Char, Plus, Mid ) ->
471		( Layout == true ->
472			String = [0' ,Char|String1] %'
473		; otherwise ->
474			String = [Char|String1]
475		),
476		Layout1 = false
477	; entity_reference_name( Name, Plus, Suffix ),
478	  defined_entity( Name, Namespaces, Text ) ->
479		String = String1,
480		append( Text, Suffix, Mid ),
481		Layout1 = Layout
482	; otherwise -> % Just & is okay in a value
483		Mid = Plus,
484		String = [0'&|String1], %'
485		Layout1 = false
486	),
487	attribute_layouts( Quote, Namespaces, Layout1, String1, Mid, Minus ).
489/* References are resolved backwards in Entity defintions so that
490 * circularity is avoided.
491 */
492reference_in_entity( Namespaces, Quote, String, Plus, Minus ) :-
493	( standard_character_entity( _SomeChar, Plus, _Rest ) ->
494		String = [0'&|String1], % ' Character entities are unparsed
495		Mid = Plus
496	; entity_reference_name( Name, Plus, Suffix ),
497	  defined_entity( Name, Namespaces, Text ) ->
498		String = String1,
499		append( Text, Suffix, Mid )
500	),
501	entity_value( Quote, Namespaces, String1, Mid, Minus ).
503standard_character_entity( Char ) -->
504	"#x", hex_character_reference( Char ), ";".
505standard_character_entity( Char ) -->
506	"#", digit( Digit ), digits( Digits ), ";",
507	{number_chars( Char, [Digit|Digits])}.
508standard_character_entity( C ) -->
509	chars( String ),
510	";",
511	!,
512	{character_entity(String, C)}.
514uri( URI ) -->
515	quote( Quote ),
516	uri1( Quote, URI ).
518uri1( Quote, [] ) -->
519	quote( Quote ),
520	!.
521uri1( Quote, [Char|Chars] ) -->
522	[Char],
523	uri1( Quote, Chars ).
525comment( Chars, Plus, Minus ) :-
526	append( Chars, [0'-,0'-,0'>|Minus], Plus ), %'
527	!.
529cdata( Chars, Plus, Minus ) :-
530	append( Chars, [0'],0'],0'>|Minus], Plus ), %'
531	!.
532% Syntax Components
534hex_character_reference( Code ) -->
535	hex_character_reference1( 0, Code ).
537hex_character_reference1( Current, Code ) -->
538	hex_digit_char( Value ),
539	!,
540	{New is (Current << 4) + Value},
541	hex_character_reference1( New, Code ).
542hex_character_reference1( Code, Code ) --> "".
544hex_digit_char( 0 ) --> "0".
545hex_digit_char( 1 ) --> "1".
546hex_digit_char( 2 ) --> "2".
547hex_digit_char( 3 ) --> "3".
548hex_digit_char( 4 ) --> "4".
549hex_digit_char( 5 ) --> "5".
550hex_digit_char( 6 ) --> "6".
551hex_digit_char( 7 ) --> "7".
552hex_digit_char( 8 ) --> "8".
553hex_digit_char( 9 ) --> "9".
554hex_digit_char( 10 ) --> "A".
555hex_digit_char( 11 ) --> "B".
556hex_digit_char( 12 ) --> "C".
557hex_digit_char( 13 ) --> "D".
558hex_digit_char( 14 ) --> "E".
559hex_digit_char( 15 ) --> "F".
560hex_digit_char( 10 ) --> "a".
561hex_digit_char( 11 ) --> "b".
562hex_digit_char( 12 ) --> "c".
563hex_digit_char( 13 ) --> "d".
564hex_digit_char( 14 ) --> "e".
565hex_digit_char( 15 ) --> "f".
567quote( 0'" ) --> %'
568	"""".
569quote( 0'' ) -->
570	"'".
572spaces( [], [] ).
573spaces( [Char|Chars0], Chars1 ) :-
574	( Char =< 32 ->
575		spaces( Chars0, Chars1 )
576	; otherwise ->
577		Chars1 = [Char|Chars0]
578	).
580nmtoken( Name ) -->
581	nmtoken_chars( Chars ),
582	{atom_codes(Name, Chars)}.
584nmtoken_chars( [Char|Chars] ) -->
585	[Char],
586	{nmtoken_first( Char )},
587	nmtoken_chars_tail( Chars ).
589nmtoken_chars_tail( [Char|Chars] ) -->
590	[Char],
591	{nmtoken_char(Char)},
592	!,
593	nmtoken_chars_tail( Chars ).
594nmtoken_chars_tail([]) --> "".
596nmtoken_first( 0': ).
597nmtoken_first( 0'_ ).
598nmtoken_first( Char ) :-
599	alphabet( Char ).
601nmtoken_char( 0'a ).
602nmtoken_char( 0'b ).
603nmtoken_char( 0'c ).
604nmtoken_char( 0'd ).
605nmtoken_char( 0'e ).
606nmtoken_char( 0'f ).
607nmtoken_char( 0'g ).
608nmtoken_char( 0'h ).
609nmtoken_char( 0'i ).
610nmtoken_char( 0'j ).
611nmtoken_char( 0'k ).
612nmtoken_char( 0'l ).
613nmtoken_char( 0'm ).
614nmtoken_char( 0'n ).
615nmtoken_char( 0'o ).
616nmtoken_char( 0'p ).
617nmtoken_char( 0'q ).
618nmtoken_char( 0'r ).
619nmtoken_char( 0's ).
620nmtoken_char( 0't ).
621nmtoken_char( 0'u ).
622nmtoken_char( 0'v ).
623nmtoken_char( 0'w ).
624nmtoken_char( 0'x ).
625nmtoken_char( 0'y ).
626nmtoken_char( 0'z ).
627nmtoken_char( 0'A ).
628nmtoken_char( 0'B ).
629nmtoken_char( 0'C ).
630nmtoken_char( 0'D ).
631nmtoken_char( 0'E ).
632nmtoken_char( 0'F ).
633nmtoken_char( 0'G ).
634nmtoken_char( 0'H ).
635nmtoken_char( 0'I ).
636nmtoken_char( 0'J ).
637nmtoken_char( 0'K ).
638nmtoken_char( 0'L ).
639nmtoken_char( 0'M ).
640nmtoken_char( 0'N ).
641nmtoken_char( 0'O ).
642nmtoken_char( 0'P ).
643nmtoken_char( 0'Q ).
644nmtoken_char( 0'R ).
645nmtoken_char( 0'S ).
646nmtoken_char( 0'T ).
647nmtoken_char( 0'U ).
648nmtoken_char( 0'V ).
649nmtoken_char( 0'W ).
650nmtoken_char( 0'X ).
651nmtoken_char( 0'Y ).
652nmtoken_char( 0'Z ).
653nmtoken_char( 0'0 ).
654nmtoken_char( 0'1 ).
655nmtoken_char( 0'2 ).
656nmtoken_char( 0'3 ).
657nmtoken_char( 0'4 ).
658nmtoken_char( 0'5 ).
659nmtoken_char( 0'6 ).
660nmtoken_char( 0'7 ).
661nmtoken_char( 0'8 ).
662nmtoken_char( 0'9 ).
663nmtoken_char( 0'. ).
664nmtoken_char( 0'- ).
665nmtoken_char( 0'_ ).
666nmtoken_char( 0': ).
668xml_string( String ) -->
669	quote( Quote ),
670	xml_string1( Quote, String ).
672xml_string1( Quote, [] ) -->
673	quote( Quote ),
674	!.
675xml_string1( Quote, [Char|Chars] ) -->
676	[Char],
677	xml_string1( Quote, Chars ).
679alphabet( 0'a ).
680alphabet( 0'b ).
681alphabet( 0'c ).
682alphabet( 0'd ).
683alphabet( 0'e ).
684alphabet( 0'f ).
685alphabet( 0'g ).
686alphabet( 0'h ).
687alphabet( 0'i ).
688alphabet( 0'j ).
689alphabet( 0'k ).
690alphabet( 0'l ).
691alphabet( 0'm ).
692alphabet( 0'n ).
693alphabet( 0'o ).
694alphabet( 0'p ).
695alphabet( 0'q ).
696alphabet( 0'r ).
697alphabet( 0's ).
698alphabet( 0't ).
699alphabet( 0'u ).
700alphabet( 0'v ).
701alphabet( 0'w ).
702alphabet( 0'x ).
703alphabet( 0'y ).
704alphabet( 0'z ).
705alphabet( 0'A ).
706alphabet( 0'B ).
707alphabet( 0'C ).
708alphabet( 0'D ).
709alphabet( 0'E ).
710alphabet( 0'F ).
711alphabet( 0'G ).
712alphabet( 0'H ).
713alphabet( 0'I ).
714alphabet( 0'J ).
715alphabet( 0'K ).
716alphabet( 0'L ).
717alphabet( 0'M ).
718alphabet( 0'N ).
719alphabet( 0'O ).
720alphabet( 0'P ).
721alphabet( 0'Q ).
722alphabet( 0'R ).
723alphabet( 0'S ).
724alphabet( 0'T ).
725alphabet( 0'U ).
726alphabet( 0'V ).
727alphabet( 0'W ).
728alphabet( 0'X ).
729alphabet( 0'Y ).
730alphabet( 0'Z ).
732digit( C ) --> [C], {digit_table( C )}.
734digit_table( 0'0 ).
735digit_table( 0'1 ).
736digit_table( 0'2 ).
737digit_table( 0'3 ).
738digit_table( 0'4 ).
739digit_table( 0'5 ).
740digit_table( 0'6 ).
741digit_table( 0'7 ).
742digit_table( 0'8 ).
743digit_table( 0'9 ).
745digits( [Digit|Digits] ) -->
746	digit( Digit ),
747	digits( Digits ).
748digits( [] ) --> [].
750character_entity( "quot", 0'" ). %'
751character_entity( "amp", 0'&  ). %'
752character_entity( "lt", 0'< ). %'
753character_entity( "gt", 0'> ). %'
754character_entity( "apos", 0'' ).
758/* For reference, this is a comprehensive recognizer for namechar, based on
759 * the definition of in http://www.w3.org/TR/2000/REC-xml-20001006 .
760 */
761namechar -->
762	( letter
763	| unicode_digit
764	|  "."
765	|  "-"
766	|  "_"
767	|  ":"
768	|  combiningchar
769	|  extender
770	).
772letter  --> (basechar | ideographic).
774basechar  -->
775	( range( 16'0041, 16'005A )
776	| range( 16'0061, 16'007A )
777	| range( 16'00C0, 16'00D6 )
778	| range( 16'00D8, 16'00F6 )
779	| range( 16'00F8, 16'00FF )
780	| range( 16'0100, 16'0131 )
781	| range( 16'0134, 16'013E )
782	| range( 16'0141, 16'0148 )
783	| range( 16'014A, 16'017E )
784	| range( 16'0180, 16'01C3 )
785	| range( 16'01CD, 16'01F0 )
786	| range( 16'01F4, 16'01F5 )
787	| range( 16'01FA, 16'0217 )
788	| range( 16'0250, 16'02A8 )
789	| range( 16'02BB, 16'02C1 )
790	| [16'0386]
791	| range( 16'0388, 16'038A )
792	| [16'038C]
793	| range( 16'038E, 16'03A1 )
794	| range( 16'03A3, 16'03CE )
795	| range( 16'03D0, 16'03D6 )
796	| [16'03DA]
797	| [16'03DC]
798	| [16'03DE]
799	| [16'03E0]
800	| range( 16'03E2, 16'03F3 )
801	| range( 16'0401, 16'040C )
802	| range( 16'040E, 16'044F )
803	| range( 16'0451, 16'045C )
804	| range( 16'045E, 16'0481 )
805	| range( 16'0490, 16'04C4 )
806	| range( 16'04C7, 16'04C8 )
807	| range( 16'04CB, 16'04CC )
808	| range( 16'04D0, 16'04EB )
809	| range( 16'04EE, 16'04F5 )
810	| range( 16'04F8, 16'04F9 )
811	| range( 16'0531, 16'0556 )
812	| [16'0559]
813	| range( 16'0561, 16'0586 )
814	| range( 16'05D0, 16'05EA )
815	| range( 16'05F0, 16'05F2 )
816	| range( 16'0621, 16'063A )
817	| range( 16'0641, 16'064A )
818	| range( 16'0671, 16'06B7 )
819	| range( 16'06BA, 16'06BE )
820	| range( 16'06C0, 16'06CE )
821	| range( 16'06D0, 16'06D3 )
822	| [16'06D5]
823	| range( 16'06E5, 16'06E6 )
824	| range( 16'0905, 16'0939 )
825	| [16'093D]
826	| range( 16'0958, 16'0961 )
827	| range( 16'0985, 16'098C )
828	| range( 16'098F, 16'0990 )
829	| range( 16'0993, 16'09A8 )
830	| range( 16'09AA, 16'09B0 )
831	| [16'09B2]
832	| range( 16'09B6, 16'09B9 )
833	| range( 16'09DC, 16'09DD )
834	| range( 16'09DF, 16'09E1 )
835	| range( 16'09F0, 16'09F1 )
836	| range( 16'0A05, 16'0A0A )
837	| range( 16'0A0F, 16'0A10 )
838	| range( 16'0A13, 16'0A28 )
839	| range( 16'0A2A, 16'0A30 )
840	| range( 16'0A32, 16'0A33 )
841	| range( 16'0A35, 16'0A36 )
842	| range( 16'0A38, 16'0A39 )
843	| range( 16'0A59, 16'0A5C )
844	| [16'0A5E]
845	| range( 16'0A72, 16'0A74 )
846	| range( 16'0A85, 16'0A8B )
847	| [16'0A8D]
848	| range( 16'0A8F, 16'0A91 )
849	| range( 16'0A93, 16'0AA8 )
850	| range( 16'0AAA, 16'0AB0 )
851	| range( 16'0AB2, 16'0AB3 )
852	| range( 16'0AB5, 16'0AB9 )
853	| [16'0ABD]
854	| [16'0AE0]
855	| range( 16'0B05, 16'0B0C )
856	| range( 16'0B0F, 16'0B10 )
857	| range( 16'0B13, 16'0B28 )
858	| range( 16'0B2A, 16'0B30 )
859	| range( 16'0B32, 16'0B33 )
860	| range( 16'0B36, 16'0B39 )
861	| [16'0B3D]
862	| range( 16'0B5C, 16'0B5D )
863	| range( 16'0B5F, 16'0B61 )
864	| range( 16'0B85, 16'0B8A )
865	| range( 16'0B8E, 16'0B90 )
866	| range( 16'0B92, 16'0B95 )
867	| range( 16'0B99, 16'0B9A )
868	| [16'0B9C]
869	| range( 16'0B9E, 16'0B9F )
870	| range( 16'0BA3, 16'0BA4 )
871	| range( 16'0BA8, 16'0BAA )
872	| range( 16'0BAE, 16'0BB5 )
873	| range( 16'0BB7, 16'0BB9 )
874	| range( 16'0C05, 16'0C0C )
875	| range( 16'0C0E, 16'0C10 )
876	| range( 16'0C12, 16'0C28 )
877	| range( 16'0C2A, 16'0C33 )
878	| range( 16'0C35, 16'0C39 )
879	| range( 16'0C60, 16'0C61 )
880	| range( 16'0C85, 16'0C8C )
881	| range( 16'0C8E, 16'0C90 )
882	| range( 16'0C92, 16'0CA8 )
883	| range( 16'0CAA, 16'0CB3 )
884	| range( 16'0CB5, 16'0CB9 )
885	| [16'0CDE]
886	| range( 16'0CE0, 16'0CE1 )
887	| range( 16'0D05, 16'0D0C )
888	| range( 16'0D0E, 16'0D10 )
889	| range( 16'0D12, 16'0D28 )
890	| range( 16'0D2A, 16'0D39 )
891	| range( 16'0D60, 16'0D61 )
892	| range( 16'0E01, 16'0E2E )
893	| [16'0E30]
894	| range( 16'0E32, 16'0E33 )
895	| range( 16'0E40, 16'0E45 )
896	| range( 16'0E81, 16'0E82 )
897	| [16'0E84]
898	| range( 16'0E87, 16'0E88 )
899	| [16'0E8A]
900	| [16'0E8D]
901	| range( 16'0E94, 16'0E97 )
902	| range( 16'0E99, 16'0E9F )
903	| range( 16'0EA1, 16'0EA3 )
904	| [16'0EA5]
905	| [16'0EA7]
906	| range( 16'0EAA, 16'0EAB )
907	| range( 16'0EAD, 16'0EAE )
908	| [16'0EB0]
909	| range( 16'0EB2, 16'0EB3 )
910	| [16'0EBD]
911	| range( 16'0EC0, 16'0EC4 )
912	| range( 16'0F40, 16'0F47 )
913	| range( 16'0F49, 16'0F69 )
914	| range( 16'10A0, 16'10C5 )
915	| range( 16'10D0, 16'10F6 )
916	| [16'1100]
917	| range( 16'1102, 16'1103 )
918	| range( 16'1105, 16'1107 )
919	| [16'1109]
920	| range( 16'110B, 16'110C )
921	| range( 16'110E, 16'1112 )
922	| [16'113C]
923	| [16'113E]
924	| [16'1140]
925	| [16'114C]
926	| [16'114E]
927	| [16'1150]
928	| range( 16'1154, 16'1155 )
929	| [16'1159]
930	| range( 16'115F, 16'1161 )
931	| [16'1163]
932	| [16'1165]
933	| [16'1167]
934	| [16'1169]
935	| range( 16'116D, 16'116E )
936	| range( 16'1172, 16'1173 )
937	| [16'1175]
938	| [16'119E]
939	| [16'11A8]
940	| [16'11AB]
941	| range( 16'11AE, 16'11AF )
942	| range( 16'11B7, 16'11B8 )
943	| [16'11BA]
944	| range( 16'11BC, 16'11C2 )
945	| [16'11EB]
946	| [16'11F0]
947	| [16'11F9]
948	| range( 16'1E00, 16'1E9B )
949	| range( 16'1EA0, 16'1EF9 )
950	| range( 16'1F00, 16'1F15 )
951	| range( 16'1F18, 16'1F1D )
952	| range( 16'1F20, 16'1F45 )
953	| range( 16'1F48, 16'1F4D )
954	| range( 16'1F50, 16'1F57 )
955	| [16'1F59]
956	| [16'1F5B]
957	| [16'1F5D]
958	| range( 16'1F5F, 16'1F7D )
959	| range( 16'1F80, 16'1FB4 )
960	| range( 16'1FB6, 16'1FBC )
961	| [16'1FBE]
962	| range( 16'1FC2, 16'1FC4 )
963	| range( 16'1FC6, 16'1FCC )
964	| range( 16'1FD0, 16'1FD3 )
965	| range( 16'1FD6, 16'1FDB )
966	| range( 16'1FE0, 16'1FEC )
967	| range( 16'1FF2, 16'1FF4 )
968	| range( 16'1FF6, 16'1FFC )
969	| [16'2126]
970	| range( 16'212A, 16'212B )
971	| [16'212E]
972	| range( 16'2180, 16'2182 )
973	| range( 16'3041, 16'3094 )
974	| range( 16'30A1, 16'30FA )
975	| range( 16'3105, 16'312C )
976	| range( 16'AC00, 16'D7A3 )
977	).
978ideographic  -->
979	( range( 16'4E00, 16'9FA5 )
980	| [16'3007]
981	| range( 16'3021, 16'3029 )
982	).
983combiningchar  -->
984	( range( 16'0300, 16'0345 )
985	| range( 16'0360, 16'0361 )
986	| range( 16'0483, 16'0486 )
987	| range( 16'0591, 16'05A1 )
988	| range( 16'05A3, 16'05B9 )
989	| range( 16'05BB, 16'05BD )
990	| [16'05BF]
991	| range( 16'05C1, 16'05C2 )
992	| [16'05C4]
993	| range( 16'064B, 16'0652 )
994	| [16'0670]
995	| range( 16'06D6, 16'06DC )
996	| range( 16'06DD, 16'06DF )
997	| range( 16'06E0, 16'06E4 )
998	| range( 16'06E7, 16'06E8 )
999	| range( 16'06EA, 16'06ED )
1000	| range( 16'0901, 16'0903 )
1001	| [16'093C]
1002	| range( 16'093E, 16'094C )
1003	| [16'094D]
1004	| range( 16'0951, 16'0954 )
1005	| range( 16'0962, 16'0963 )
1006	| range( 16'0981, 16'0983 )
1007	| [16'09BC]
1008	| [16'09BE]
1009	| [16'09BF]
1010	| range( 16'09C0, 16'09C4 )
1011	| range( 16'09C7, 16'09C8 )
1012	| range( 16'09CB, 16'09CD )
1013	| [16'09D7]
1014	| range( 16'09E2, 16'09E3 )
1015	| [16'0A02]
1016	| [16'0A3C]
1017	| [16'0A3E]
1018	| [16'0A3F]
1019	| range( 16'0A40, 16'0A42 )
1020	| range( 16'0A47, 16'0A48 )
1021	| range( 16'0A4B, 16'0A4D )
1022	| range( 16'0A70, 16'0A71 )
1023	| range( 16'0A81, 16'0A83 )
1024	| [16'0ABC]
1025	| range( 16'0ABE, 16'0AC5 )
1026	| range( 16'0AC7, 16'0AC9 )
1027	| range( 16'0ACB, 16'0ACD )
1028	| range( 16'0B01, 16'0B03 )
1029	| [16'0B3C]
1030	| range( 16'0B3E, 16'0B43 )
1031	| range( 16'0B47, 16'0B48 )
1032	| range( 16'0B4B, 16'0B4D )
1033	| range( 16'0B56, 16'0B57 )
1034	| range( 16'0B82, 16'0B83 )
1035	| range( 16'0BBE, 16'0BC2 )
1036	| range( 16'0BC6, 16'0BC8 )
1037	| range( 16'0BCA, 16'0BCD )
1038	| [16'0BD7]
1039	| range( 16'0C01, 16'0C03 )
1040	| range( 16'0C3E, 16'0C44 )
1041	| range( 16'0C46, 16'0C48 )
1042	| range( 16'0C4A, 16'0C4D )
1043	| range( 16'0C55, 16'0C56 )
1044	| range( 16'0C82, 16'0C83 )
1045	| range( 16'0CBE, 16'0CC4 )
1046	| range( 16'0CC6, 16'0CC8 )
1047	| range( 16'0CCA, 16'0CCD )
1048	| range( 16'0CD5, 16'0CD6 )
1049	| range( 16'0D02, 16'0D03 )
1050	| range( 16'0D3E, 16'0D43 )
1051	| range( 16'0D46, 16'0D48 )
1052	| range( 16'0D4A, 16'0D4D )
1053	| [16'0D57]
1054	| [16'0E31]
1055	| range( 16'0E34, 16'0E3A )
1056	| range( 16'0E47, 16'0E4E )
1057	| [16'0EB1]
1058	| range( 16'0EB4, 16'0EB9 )
1059	| range( 16'0EBB, 16'0EBC )
1060	| range( 16'0EC8, 16'0ECD )
1061	| range( 16'0F18, 16'0F19 )
1062	| [16'0F35]
1063	| [16'0F37]
1064	| [16'0F39]
1065	| [16'0F3E]
1066	| [16'0F3F]
1067	| range( 16'0F71, 16'0F84 )
1068	| range( 16'0F86, 16'0F8B )
1069	| range( 16'0F90, 16'0F95 )
1070	| [16'0F97]
1071	| range( 16'0F99, 16'0FAD )
1072	| range( 16'0FB1, 16'0FB7 )
1073	| [16'0FB9]
1074	| range( 16'20D0, 16'20DC )
1075	| [16'20E1]
1076	| range( 16'302A, 16'302F )
1077	| [16'3099]
1078	| [16'309A]
1079	).
1081unicode_digit  -->
1082	( range( 16'0030, 16'0039 )
1083	| range( 16'0660, 16'0669 )
1084	| range( 16'06F0, 16'06F9 )
1085	| range( 16'0966, 16'096F )
1086	| range( 16'09E6, 16'09EF )
1087	| range( 16'0A66, 16'0A6F )
1088	| range( 16'0AE6, 16'0AEF )
1089	| range( 16'0B66, 16'0B6F )
1090	| range( 16'0BE7, 16'0BEF )
1091	| range( 16'0C66, 16'0C6F )
1092	| range( 16'0CE6, 16'0CEF )
1093	| range( 16'0D66, 16'0D6F )
1094	| range( 16'0E50, 16'0E59 )
1095	| range( 16'0ED0, 16'0ED9 )
1096	| range( 16'0F20, 16'0F29 )
1097	).
1099extender  -->
1100	( [16'00B7]
1101	| [16'02D0]
1102	| [16'02D1]
1103	| [16'0387]
1104	| [16'0640]
1105	| [16'0E46]
1106	| [16'0EC6]
1107	| [16'3005]
1108	| range( 16'3031, 16'3035 )
1109	| range( 16'309D, 16'309E )
1110	| range( 16'30FC, 16'30FE )
1111	).
1113range( Low, High ) -->
1114	[Char],
1115	{Char >= Low, Char =< High}.