1/* XML Utilities
2 *
3 * Copyright (C) 2001-2005 Binding Time Limited
4 * Copyright (C) 2005, 2006 John Fletcher
5 *
6 * Current Release: $Revision: 1.2 $
7 *
8 * TERMS AND CONDITIONS:
9 *
10 * This program is offered free of charge, as unsupported source code. You may
11 * use it, copy it, distribute it, modify it or sell it without restriction,
12 * but entirely at your own risk.
13 */
14
15% Entity and Namespace map operations: these maps are usually quite small, so
16% a linear list lookup is okay. They could be substituted by a logarithmic
17% data structure - in extremis.
18
19/* empty_map( ?Map ) is true if Map is a null map.
20 */
21empty_map( [] ).
22
23/* map_member( +Key, +Map, ?Data ) is true if Map is a ordered map structure
24 * which records the pair Key-Data. Key must be ground.
25 */
26map_member( Key0, [Key1-Data1|Rest], Data0 ) :-
27	( Key0 == Key1 ->
28		Data0 = Data1
29	; Key0 @> Key1 ->
30		map_member( Key0, Rest, Data0 )
31	).
32
33/* map_store( +Map0, +Key, +Data, ?Map1 ) is true if Map0 is an ordered map
34 * structure, Key must be ground, and Map1 is identical to Map0 except that
35 * the pair Key-Data is recorded by Map1.
36 */
37map_store( [], Key, Data, [Key-Data] ).
38map_store( [Key0-Data0|Map0], Key, Data, Map ) :-
39	( Key == Key0 ->
40		Map = [Key-Data|Map0]
41	; Key @< Key0 ->
42		Map = [Key-Data,Key0-Data0|Map0]
43	; otherwise -> % >
44		Map = [Key0-Data0|Map1],
45		map_store( Map0, Key, Data, Map1 )
46	).
47
48/* context(?Element, ?PreserveSpace, ?CurrentNS, ?DefaultNS, ?Entities, ?Namespaces )
49 * is an ADT hiding the "state" arguments for XML Acquisition
50 */
51initial_context(
52		Controls,
53		context(void,PreserveSpace,'','',Entities,Empty,
54			RemoveAttributePrefixes,AllowAmpersand)
55		) :-
56	empty_map( Empty ),
57	( member( extended_characters(false), Controls ) ->
58		Entities = Empty
59	; otherwise ->
60		extended_character_entities(Entities)
61	),
62	( member( format(false), Controls ) ->
63		PreserveSpace = true
64	; otherwise ->
65		PreserveSpace = false
66	),
67	( member( remove_attribute_prefixes(true), Controls ) ->
68		RemoveAttributePrefixes = true
69	; otherwise ->
70		RemoveAttributePrefixes = false
71	),
72	( member( allow_ampersand(true), Controls ) ->
73		AllowAmpersand = true
74	; otherwise ->
75		AllowAmpersand = false
76	).
77
78context_update( current_namespace, Context0, URI, Context1 ) :-
79	Context0 = context(Element,Preserve,_Current,Default,Entities,
80		Namespaces,RemoveAttributePrefixes,Amp),
81	Context1 = context(Element,Preserve,URI,Default,Entities,
82		Namespaces,RemoveAttributePrefixes,Amp).
83context_update( element, Context0, Tag, Context1 ) :-
84	Context0 = context(_Element,Preserve,Current,Default,Entities,
85		Namespaces,RemoveAttributePrefixes,Amp),
86	Context1 = context(tag(Tag),Preserve,Current,Default,Entities,
87		Namespaces,RemoveAttributePrefixes,Amp).
88context_update( default_namespace, Context0, URI, Context1 ):-
89	Context0 = context(Element,Preserve,Current,_Default,Entities,
90		Namespaces,RemoveAttributePrefixes,Amp),
91	Context1 = context(Element,Preserve,Current,URI,Entities,
92		Namespaces,RemoveAttributePrefixes,Amp).
93context_update( space_preserve, Context0, Boolean, Context1 ):-
94	Context0 = context(Element,_Preserve,Current,Default,Entities,
95		Namespaces,RemoveAttributePrefixes,Amp),
96	Context1 = context(Element,Boolean,Current,Default,Entities,
97		Namespaces,RemoveAttributePrefixes,Amp).
98context_update( ns_prefix(Prefix), Context0, URI, Context1 ) :-
99	Context0 = context(Element,Preserve,Current,Default,Entities,
100		Namespaces0,RemoveAttributePrefixes,Amp),
101	Context1 = context(Element,Preserve,Current,Default,Entities,
102		Namespaces1,RemoveAttributePrefixes,Amp),
103	map_store( Namespaces0, Prefix, URI, Namespaces1 ).
104context_update( entity(Name), Context0, String, Context1 ) :-
105	Context0 = context(Element,Preserve,Current,Default,Entities0,
106		Namespaces,RemoveAttributePrefixes,Amp),
107	Context1 = context(Element,Preserve,Current,Default,Entities1,
108		Namespaces,RemoveAttributePrefixes,Amp),
109	map_store( Entities0, Name, String, Entities1 ).
110
111remove_attribute_prefixes( Context ) :-
112	Context = context(_Element,_Preserve,_Current,_Default,_Entities,
113		_Namespaces,true,_Amp).
114
115current_tag( Context, Tag ) :-
116	Context = context(tag(Tag),_Preserve,_Current,_Default,_Entities,
117		_Namespaces,_RPFA,_Amp).
118
119current_namespace( Context, Current ) :-
120	Context = context(_Element,_Preserve,Current,_Default,_Entities,
121		_Namespaces,_RPFA,_Amp).
122
123default_namespace( Context, Default ) :-
124	Context = context(_Element,_Preserve,_Current,Default,_Entities,
125		_Namespaces,_RPFA,_Amp).
126
127space_preserve( Context ) :-
128	Context = context(tag(_Tag),true,_Current,_Default,_Entities,
129		_Namespaces,_RPFA,_Amp).
130
131specific_namespace( Prefix, Context, URI ) :-
132	Context = context(_Element,_Preserve,_Current,_Default,_Entities,
133		Namespaces,_RPFA,_Amp),
134	map_member( Prefix, Namespaces, URI ).
135
136defined_entity( Reference, Context, String ) :-
137	Context = context(_Element,_Preserve,_Current,_Default,Entities,
138		_Namespaces,_RPFA,_Amp),
139	map_member( Reference, Entities, String ).
140
141close_context( Context, Terms, WellFormed ) :-
142	Context = context(Element,_Preserve,_Current,_Default,_Entities,
143		_Namespaces,_RPFA,_Amp),
144	close_context1( Element, Terms, WellFormed ).
145
146close_context1( void, [], true ).
147close_context1( tag(TagChars), [out_of_context(Tag)], false ) :-
148	atom_chars( Tag, TagChars ).
149
150void_context(
151	context(void,_Preserve,_Current,_Default,_Entities,_Names,_RPFA,_Amp)
152	).
153
154allow_ampersand(
155	context(_Void,_Preserve,_Current,_Default,_Entities,_Names,_RPFA,true)
156	).
157
158/* pp_string( +String ) prints String onto the current output stream.
159 * If String contains only 7-bit chars it is printed in shorthand quoted
160 * format, otherwise it is written as a list.
161 * If your Prolog uses " to delimit a special string type, just use write/1.
162 */
163pp_string( Chars ) :-
164	( member( Char, Chars ),
165	  (Char > 255 ; Char < 9) ->
166		write( Chars )
167	; otherwise ->
168		put_quote,
169		pp_string1( Chars ),
170		put_quote
171	).
172
173put_quote :-
174	put( 0'" ). % '
175
176pp_string1( [] ).
177pp_string1( [Char|Chars] ) :-
178	( Char =:= """"  -> % Meta-quote
179		put( Char ),
180		put( Char ),
181		pp_string1( Chars )
182	; Char =:= 13,	% Handle Windows border-settings
183	  Chars = [10|Chars1] ->
184		put( 10 ),
185		pp_string1( Chars1 )
186	; otherwise ->
187		put( Char ),
188		pp_string1( Chars )
189	).
190
191xml_declaration_attributes_valid( [] ).
192xml_declaration_attributes_valid( [Name=Value|Attributes] ) :-
193	xml_declaration_attribute_valid( Name, Value ),
194	xml_declaration_attributes_valid( Attributes ).
195
196xml_declaration_attribute_valid( Name, Value ) :-
197	lowercase( Value, Lowercase ),
198	canonical_xml_declaration_attribute( Name, Lowercase ).
199
200canonical_xml_declaration_attribute( version, "1.0" ).
201canonical_xml_declaration_attribute( standalone, "yes" ).
202canonical_xml_declaration_attribute( standalone, "no" ).
203% The encodings here are all valid for the output produced.
204canonical_xml_declaration_attribute( encoding, "utf-8" ).
205% canonical_xml_declaration_attribute( encoding, "utf-16" ).
206% This is erroneous for the output of this library
207canonical_xml_declaration_attribute( encoding, "us-ascii" ).
208canonical_xml_declaration_attribute( encoding, "ascii" ).
209canonical_xml_declaration_attribute( encoding, "iso-8859-1" ).
210canonical_xml_declaration_attribute( encoding, "iso-8859-2" ).
211canonical_xml_declaration_attribute( encoding, "iso-8859-15" ).
212canonical_xml_declaration_attribute( encoding, "windows-1252" ).
213% In general, it's better not to specify an encoding.
214
215/* lowercase( +MixedCase, ?Lowercase ) holds when Lowercase and MixedCase are
216 * lists of character codes, and Lowercase is identical to MixedCase with
217 * every uppercase character replaced by its lowercase equivalent.
218 */
219lowercase( [], [] ).
220lowercase( [Char|Chars], [Lower|LowerCase] ) :-
221	( Char >= "A", Char =< "Z" ->
222		Lower is Char + "a" - "A"
223	; otherwise ->
224		Lower = Char
225	),
226	lowercase( Chars, LowerCase ).
227
228extended_character_entities( [
229	"Aacute"-[193],		% latin capital letter A with acute,
230	"aacute"-[225],		% latin small letter a with acute,
231	"Acirc"-[194],		% latin capital letter A with circumflex,
232	"acirc"-[226],		% latin small letter a with circumflex,
233	"acute"-[180],		% acute accent = spacing acute,
234	"AElig"-[198],		% latin capital letter AE
235	"aelig"-[230],		% latin small letter ae
236	"Agrave"-[192],		% latin capital letter A with grave
237	"agrave"-[224],		% latin small letter a with grave
238	"alefsym"-[8501],	% alef symbol = first transfinite cardinal,
239	"Alpha"-[913],		% greek capital letter alpha, U+0391
240	"alpha"-[945],		% greek small letter alpha,
241	"and"-[8743],		% logical and = wedge, U+2227 ISOtech
242	"ang"-[8736],		% angle, U+2220 ISOamso
243	"Aring"-[197],		% latin capital letter A with ring above
244	"aring"-[229],		% latin small letter a with ring above
245	"asymp"-[8776],		% almost equal to = asymptotic to,
246	"Atilde"-[195],		% latin capital letter A with tilde,
247	"atilde"-[227],		% latin small letter a with tilde,
248	"Auml"-[196],		% latin capital letter A with diaeresis,
249	"auml"-[228],		% latin small letter a with diaeresis,
250	"bdquo"-[8222],		% double low-9 quotation mark, U+201E NEW
251	"Beta"-[914],		% greek capital letter beta, U+0392
252	"beta"-[946],		% greek small letter beta, U+03B2 ISOgrk3
253	"brvbar"-[166],		% broken bar = broken vertical bar,
254	"bull"-[8226],		% bullet = black small circle,
255	"cap"-[8745],		% intersection = cap, U+2229 ISOtech
256	"Ccedil"-[199],		% latin capital letter C with cedilla,
257	"ccedil"-[231],		% latin small letter c with cedilla,
258	"cedil"-[184],		% cedilla = spacing cedilla, U+00B8 ISOdia>
259	"cent"-[162],		% cent sign, U+00A2 ISOnum>
260	"Chi"-[935],		% greek capital letter chi, U+03A7
261	"chi"-[967],		% greek small letter chi, U+03C7 ISOgrk3
262	"circ"-[710],		% modifier letter circumflex accent,
263	"clubs"-[9827],		% black club suit = shamrock,
264	"cong"-[8773],		% approximately equal to, U+2245 ISOtech
265	"copy"-[169],		% copyright sign, U+00A9 ISOnum>
266	"crarr"-[8629],		% downwards arrow with corner leftwards
267	"cup"-[8746],		% union = cup, U+222A ISOtech
268	"curren"-[164],		% currency sign, U+00A4 ISOnum>
269	"dagger"-[8224],	% dagger, U+2020 ISOpub
270	"Dagger"-[8225],	% double dagger, U+2021 ISOpub
271	"darr"-[8595],		% downwards arrow, U+2193 ISOnum
272	"dArr"-[8659],		% downwards double arrow, U+21D3 ISOamsa
273	"deg"-[176],		% degree sign, U+00B0 ISOnum>
274	"Delta"-[916],		% greek capital letter delta,
275	"delta"-[948],		% greek small letter delta,
276	"diams"-[9830],		% black diamond suit, U+2666 ISOpub
277	"divide"-[247],		% division sign, U+00F7 ISOnum>
278	"Eacute"-[201],		% latin capital letter E with acute,
279	"eacute"-[233],		% latin small letter e with acute,
280	"Ecirc"-[202],		% latin capital letter E with circumflex,
281	"ecirc"-[234],		% latin small letter e with circumflex,
282	"Egrave"-[200],		% latin capital letter E with grave,
283	"egrave"-[232],		% latin small letter e with grave,
284	"empty"-[8709],		% empty set = null set = diameter,
285	"emsp"-[8195],		% em space, U+2003 ISOpub
286	"ensp"-[8194],		% en space, U+2002 ISOpub
287	"Epsilon"-[917],	% greek capital letter epsilon, U+0395
288	"epsilon"-[949],	% greek small letter epsilon,
289	"equiv"-[8801],		% identical to, U+2261 ISOtech
290	"Eta"-[919],		% greek capital letter eta, U+0397
291	"eta"-[951],		% greek small letter eta, U+03B7 ISOgrk3
292	"ETH"-[208],		% latin capital letter ETH, U+00D0 ISOlat1>
293	"eth"-[240],		% latin small letter eth, U+00F0 ISOlat1>
294	"Euml"-[203],		% latin capital letter E with diaeresis,
295	"euml"-[235],		% latin small letter e with diaeresis,
296	"euro"-[8364],		% euro sign, U+20AC NEW
297	"exist"-[8707],		% there exists, U+2203 ISOtech
298	"fnof"-[402],		% latin small f with hook = function
299	"forall"-[8704],	% for all, U+2200 ISOtech
300	"frac12"-[189],		% vulgar fraction one half
301	"frac14"-[188],		% vulgar fraction one quarter
302	"frac34"-[190],		% vulgar fraction three quarters
303	"frasl"-[8260],		% fraction slash, U+2044 NEW
304	"Gamma"-[915],		% greek capital letter gamma,
305	"gamma"-[947],		% greek small letter gamma,
306	"ge"-[8805],		% greater-than or equal to,
307	"harr"-[8596],		% left right arrow, U+2194 ISOamsa
308	"hArr"-[8660],		% left right double arrow,
309	"hearts"-[9829],	% black heart suit = valentine,
310	"hellip"-[8230],	% horizontal ellipsis = three dot leader,
311	"Iacute"-[205],		% latin capital letter I with acute,
312	"iacute"-[237],		% latin small letter i with acute,
313	"Icirc"-[206],		% latin capital letter I with circumflex,
314	"icirc"-[238],		% latin small letter i with circumflex,
315	"iexcl"-[161],		% inverted exclamation mark, U+00A1 ISOnum>
316	"Igrave"-[204],		% latin capital letter I with grave,
317	"igrave"-[236],		% latin small letter i with grave,
318	"image"-[8465],		% blackletter capital I = imaginary part,
319	"infin"-[8734],		% infinity, U+221E ISOtech
320	"int"-[8747],		% integral, U+222B ISOtech
321	"Iota"-[921],		% greek capital letter iota, U+0399
322	"iota"-[953],		% greek small letter iota, U+03B9 ISOgrk3
323	"iquest"-[191],		% inverted question mark
324	"isin"-[8712],		% element of, U+2208 ISOtech
325	"Iuml"-[207],		% latin capital letter I with diaeresis,
326	"iuml"-[239],		% latin small letter i with diaeresis,
327	"Kappa"-[922],		% greek capital letter kappa, U+039A
328	"kappa"-[954],		% greek small letter kappa,
329	"Lambda"-[923],		% greek capital letter lambda,
330	"lambda"-[955],		% greek small letter lambda,
331	"lang"-[9001],		% left-pointing angle bracket = bra,
332	"laquo"-[171],		% left-pointing double angle quotation mark
333	"larr"-[8592],		% leftwards arrow, U+2190 ISOnum
334	"lArr"-[8656],		% leftwards double arrow, U+21D0 ISOtech
335	"lceil"-[8968],		% left ceiling = apl upstile,
336	"ldquo"-[8220],		% left double quotation mark,
337	"le"-[8804],		% less-than or equal to, U+2264 ISOtech
338	"lfloor"-[8970],	% left floor = apl downstile,
339	"lowast"-[8727],	% asterisk operator, U+2217 ISOtech
340	"loz"-[9674],		% lozenge, U+25CA ISOpub
341	"lrm"-[8206],		% left-to-right mark, U+200E NEW RFC 2070
342	"lsaquo"-[8249],	% single left-pointing angle quotation mark,
343	"lsquo"-[8216],		% left single quotation mark,
344	"macr"-[175],		% macron = spacing macron = overline
345	"mdash"-[8212],		% em dash, U+2014 ISOpub
346	"micro"-[181],		% micro sign, U+00B5 ISOnum>
347	"middot"-[183],		% middle dot = Georgian comma
348	"minus"-[8722],		% minus sign, U+2212 ISOtech
349	"Mu"-[924],			% greek capital letter mu, U+039C
350	"mu"-[956],			% greek small letter mu, U+03BC ISOgrk3
351	"nabla"-[8711],		% nabla = backward difference,
352	"nbsp"-[160],		% no-break space = non-breaking space,
353	"ndash"-[8211],		% en dash, U+2013 ISOpub
354	"ne"-[8800],		% not equal to, U+2260 ISOtech
355	"ni"-[8715],		% contains as member, U+220B ISOtech
356	"not"-[172],		% not sign, U+00AC ISOnum>
357	"notin"-[8713],		% not an element of, U+2209 ISOtech
358	"nsub"-[8836],		% not a subset of, U+2284 ISOamsn
359	"Ntilde"-[209],		% latin capital letter N with tilde,
360	"ntilde"-[241],		% latin small letter n with tilde,
361	"Nu"-[925],			% greek capital letter nu, U+039D
362	"nu"-[957],			% greek small letter nu, U+03BD ISOgrk3
363	"Oacute"-[211],		% latin capital letter O with acute,
364	"oacute"-[243],		% latin small letter o with acute,
365	"Ocirc"-[212],		% latin capital letter O with circumflex,
366	"ocirc"-[244],		% latin small letter o with circumflex,
367	"OElig"-[338],		% latin capital ligature OE,
368	"oelig"-[339],		% latin small ligature oe, U+0153 ISOlat2
369	"Ograve"-[210],		% latin capital letter O with grave,
370	"ograve"-[242],		% latin small letter o with grave,
371	"oline"-[8254],		% overline = spacing overscore,
372	"Omega"-[937],		% greek capital letter omega,
373	"omega"-[969],		% greek small letter omega,
374	"Omicron"-[927],	% greek capital letter omicron, U+039F
375	"omicron"-[959],	% greek small letter omicron, U+03BF NEW
376	"oplus"-[8853],		% circled plus = direct sum,
377	"or"-[8744],		% logical or = vee, U+2228 ISOtech
378	"ordf"-[170],		% feminine ordinal indicator, U+00AA ISOnum>
379	"ordm"-[186],		% masculine ordinal indicator,
380	"Oslash"-[216],		% latin capital letter O with stroke
381	"oslash"-[248],		% latin small letter o with stroke,
382	"Otilde"-[213],		% latin capital letter O with tilde,
383	"otilde"-[245],		% latin small letter o with tilde,
384	"otimes"-[8855],	% circled times = vector product,
385	"Ouml"-[214],		% latin capital letter O with diaeresis,
386	"ouml"-[246],		% latin small letter o with diaeresis,
387	"para"-[182],		% pilcrow sign = paragraph sign,
388	"part"-[8706],		% partial differential, U+2202 ISOtech
389	"permil"-[8240],	% per mille sign, U+2030 ISOtech
390	"perp"-[8869],		% up tack = orthogonal to = perpendicular,
391	"Phi"-[934],		% greek capital letter phi,
392	"phi"-[966],		% greek small letter phi, U+03C6 ISOgrk3
393	"Pi"-[928],			% greek capital letter pi, U+03A0 ISOgrk3
394	"pi"-[960],			% greek small letter pi, U+03C0 ISOgrk3
395	"piv"-[982],		% greek pi symbol, U+03D6 ISOgrk3
396	"plusmn"-[177],		% plus-minus sign = plus-or-minus sign,
397	"pound"-[163],		% pound sign, U+00A3 ISOnum>
398	"prime"-[8242],		% prime = minutes = feet, U+2032 ISOtech
399	"Prime"-[8243],		% double prime = seconds = inches,
400	"prod"-[8719],		% n-ary product = product sign,
401	"prop"-[8733],		% proportional to, U+221D ISOtech
402	"Psi"-[936],		% greek capital letter psi,
403	"psi"-[968],		% greek small letter psi, U+03C8 ISOgrk3
404	"radic"-[8730],		% square root = radical sign,
405	"rang"-[9002],		% right-pointing angle bracket = ket,
406	"raquo"-[187],		% right-pointing double angle quotation mark
407	"rarr"-[8594],		% rightwards arrow, U+2192 ISOnum
408	"rArr"-[8658],		% rightwards double arrow,
409	"rceil"-[8969],		% right ceiling, U+2309 ISOamsc
410	"rdquo"-[8221],		% right double quotation mark,
411	"real"-[8476],		% blackletter capital R = real part symbol,
412	"reg"-[174],		% registered sign = registered trade mark sign,
413	"rfloor"-[8971],	% right floor, U+230B ISOamsc
414	"Rho"-[929],		% greek capital letter rho, U+03A1
415	"rho"-[961],		% greek small letter rho, U+03C1 ISOgrk3
416	"rlm"-[8207],		% right-to-left mark, U+200F NEW RFC 2070
417	"rsaquo"-[8250],	% single right-pointing angle quotation mark,
418	"rsquo"-[8217],		% right single quotation mark,
419	"sbquo"-[8218],		% single low-9 quotation mark, U+201A NEW
420	"Scaron"-[352],		% latin capital letter S with caron,
421	"scaron"-[353],		% latin small letter s with caron,
422	"sdot"-[8901],		% dot operator, U+22C5 ISOamsb
423	"sect"-[167],		% section sign, U+00A7 ISOnum>
424	"shy"-[173],		% soft hyphen = discretionary hyphen,
425	"Sigma"-[931],		% greek capital letter sigma,
426	"sigma"-[963],		% greek small letter sigma,
427	"sigmaf"-[962],		% greek small letter final sigma,
428	"sim"-[8764],		% tilde operator = varies with = similar to,
429	"spades"-[9824],	% black spade suit, U+2660 ISOpub
430	"sub"-[8834],		% subset of, U+2282 ISOtech
431	"sube"-[8838],		% subset of or equal to, U+2286 ISOtech
432	"sum"-[8721],		% n-ary sumation, U+2211 ISOamsb
433	"sup"-[8835],		% superset of, U+2283 ISOtech
434	"sup1"-[185],		% superscript one = superscript digit one,
435	"sup2"-[178],		% superscript two = superscript digit two
436	"sup3"-[179],		% superscript three = superscript digit three
437	"supe"-[8839],		% superset of or equal to,
438	"szlig"-[223],		% latin small letter sharp s = ess-zed,
439	"Tau"-[932],		% greek capital letter tau, U+03A4
440	"tau"-[964],		% greek small letter tau, U+03C4 ISOgrk3
441	"there4"-[8756],	% therefore, U+2234 ISOtech
442	"Theta"-[920],		% greek capital letter theta,
443	"theta"-[952],		% greek small letter theta,
444	"thetasym"-[977],	% greek small letter theta symbol,
445	"thinsp"-[8201],	% thin space, U+2009 ISOpub
446	"THORN"-[222],		% latin capital letter THORN,
447	"thorn"-[254],		% latin small letter thorn with,
448	"tilde"-[732],		% small tilde, U+02DC ISOdia
449	"times"-[215],		% multiplication sign, U+00D7 ISOnum>
450	"trade"-[8482],		% trade mark sign, U+2122 ISOnum
451	"Uacute"-[218],		% latin capital letter U with acute,
452	"uacute"-[250],		% latin small letter u with acute,
453	"uarr"-[8593],		% upwards arrow, U+2191 ISOnum
454	"uArr"-[8657],		% upwards double arrow, U+21D1 ISOamsa
455	"Ucirc"-[219],		% latin capital letter U with circumflex,
456	"ucirc"-[251],		% latin small letter u with circumflex,
457	"Ugrave"-[217],		% latin capital letter U with grave,
458	"ugrave"-[249],		% latin small letter u with grave,
459	"uml"-[168],		% diaeresis = spacing diaeresis,
460	"upsih"-[978],		% greek upsilon with hook symbol,
461	"Upsilon"-[933],	% greek capital letter upsilon,
462	"upsilon"-[965],	% greek small letter upsilon,
463	"Uuml"-[220],		% latin capital letter U with diaeresis,
464	"uuml"-[252],		% latin small letter u with diaeresis,
465	"weierp"-[8472],	% script capital P = power set
466	"Xi"-[926],			% greek capital letter xi, U+039E ISOgrk3
467	"xi"-[958],			% greek small letter xi, U+03BE ISOgrk3
468	"Yacute"-[221],		% latin capital letter Y with acute,
469	"yacute"-[253],		% latin small letter y with acute,
470	"yen"-[165],		% yen sign = yuan sign, U+00A5 ISOnum>
471	"yuml"-[255],		% latin small letter y with diaeresis,
472	"Yuml"-[376],		% latin capital letter Y with diaeresis,
473	"Zeta"-[918],		% greek capital letter zeta, U+0396
474	"zeta"-[950],		% greek small letter zeta, U+03B6 ISOgrk3
475	"zwj"-[8205],		% zero width joiner, U+200D NEW RFC 2070
476	"zwnj"-[8204]		% zero width non-joiner,
477	] ).
478
479% The following code is for Quintus Prolog primarily. Some of these
480% predicates are built-in to SWI, LPA etc.
481
482/* member( ?Element, ?List ) holds when Element is a member of List.
483 */
484member( H, [H|_] ).
485member( H, [_|T] ):-
486    member( H, T ).
487
488/* select( ?Element, ?List0, ?List1 ) is true if List1 is equal to List0
489 * with Element removed.
490 */
491select( H, [H|T], T ).
492select( Element, [H|T0], [H|T1] ):-
493    select( Element, T0, T1 ).
494
495/* is_list( +List ) holds when List is a list.
496 */
497is_list( List ) :-
498	nonvar( List ),
499	is_list1( List ).
500
501is_list1( [] ).
502is_list1( [_|_] ).
503
504/* chars( ?Chars, ?Plus, ?Minus ) used as chars( ?Chars ) in a DCG to
505 * copy the list Chars inline.
506 *
507 * This is best expressed in terms of append/3 where append/3 is built-in.
508 * For other Prologs, a straightforward specification can be used:
509 *
510 *	chars( [] ) --> "".
511 *	chars( [Char|Chars] ) -->
512 *		[Char],
513 *		chars( Chars ).
514 */
515
516chars( Chars, Plus, Minus ) :-
517	append( Chars, Minus, Plus ).
518
519/* atom_codes/2, number_codes/2 and throw/1 are ISO predicates, mapped to
520 * the Quintus equivalent here.
521 */
522atom_codes( Atom, Codes ) :-
523	atom_chars( Atom, Codes ).
524
525number_codes( Number, Codes ) :-
526	number_chars( Number, Codes ).
527
528throw( Exception ) :-
529	raise_exception( Exception ).
530
531end_of_file. % <- Remove this line for ISO Prologs?
532
533append( [], L, L ).
534append( [H|T0], L, [H|T1] ) :-
535	append( T0, L, T1 ).
536
537otherwise.
538