1/* XML Utilities 2 * 3 * Copyright (C) 2001-2005 Binding Time Limited 4 * Copyright (C) 2005, 2006 John Fletcher 5 * 6 * Current Release: $Revision: 1.2 $ 7 * 8 * TERMS AND CONDITIONS: 9 * 10 * This program is offered free of charge, as unsupported source code. You may 11 * use it, copy it, distribute it, modify it or sell it without restriction, 12 * but entirely at your own risk. 13 */ 14 15% Entity and Namespace map operations: these maps are usually quite small, so 16% a linear list lookup is okay. They could be substituted by a logarithmic 17% data structure - in extremis. 18 19/* empty_map( ?Map ) is true if Map is a null map. 20 */ 21empty_map( [] ). 22 23/* map_member( +Key, +Map, ?Data ) is true if Map is a ordered map structure 24 * which records the pair Key-Data. Key must be ground. 25 */ 26map_member( Key0, [Key1-Data1|Rest], Data0 ) :- 27 ( Key0 == Key1 -> 28 Data0 = Data1 29 ; Key0 @> Key1 -> 30 map_member( Key0, Rest, Data0 ) 31 ). 32 33/* map_store( +Map0, +Key, +Data, ?Map1 ) is true if Map0 is an ordered map 34 * structure, Key must be ground, and Map1 is identical to Map0 except that 35 * the pair Key-Data is recorded by Map1. 36 */ 37map_store( [], Key, Data, [Key-Data] ). 38map_store( [Key0-Data0|Map0], Key, Data, Map ) :- 39 ( Key == Key0 -> 40 Map = [Key-Data|Map0] 41 ; Key @< Key0 -> 42 Map = [Key-Data,Key0-Data0|Map0] 43 ; otherwise -> % > 44 Map = [Key0-Data0|Map1], 45 map_store( Map0, Key, Data, Map1 ) 46 ). 47 48/* context(?Element, ?PreserveSpace, ?CurrentNS, ?DefaultNS, ?Entities, ?Namespaces ) 49 * is an ADT hiding the "state" arguments for XML Acquisition 50 */ 51initial_context( 52 Controls, 53 context(void,PreserveSpace,'','',Entities,Empty, 54 RemoveAttributePrefixes,AllowAmpersand) 55 ) :- 56 empty_map( Empty ), 57 ( member( extended_characters(false), Controls ) -> 58 Entities = Empty 59 ; otherwise -> 60 extended_character_entities(Entities) 61 ), 62 ( member( format(false), Controls ) -> 63 PreserveSpace = true 64 ; otherwise -> 65 PreserveSpace = false 66 ), 67 ( member( remove_attribute_prefixes(true), Controls ) -> 68 RemoveAttributePrefixes = true 69 ; otherwise -> 70 RemoveAttributePrefixes = false 71 ), 72 ( member( allow_ampersand(true), Controls ) -> 73 AllowAmpersand = true 74 ; otherwise -> 75 AllowAmpersand = false 76 ). 77 78context_update( current_namespace, Context0, URI, Context1 ) :- 79 Context0 = context(Element,Preserve,_Current,Default,Entities, 80 Namespaces,RemoveAttributePrefixes,Amp), 81 Context1 = context(Element,Preserve,URI,Default,Entities, 82 Namespaces,RemoveAttributePrefixes,Amp). 83context_update( element, Context0, Tag, Context1 ) :- 84 Context0 = context(_Element,Preserve,Current,Default,Entities, 85 Namespaces,RemoveAttributePrefixes,Amp), 86 Context1 = context(tag(Tag),Preserve,Current,Default,Entities, 87 Namespaces,RemoveAttributePrefixes,Amp). 88context_update( default_namespace, Context0, URI, Context1 ):- 89 Context0 = context(Element,Preserve,Current,_Default,Entities, 90 Namespaces,RemoveAttributePrefixes,Amp), 91 Context1 = context(Element,Preserve,Current,URI,Entities, 92 Namespaces,RemoveAttributePrefixes,Amp). 93context_update( space_preserve, Context0, Boolean, Context1 ):- 94 Context0 = context(Element,_Preserve,Current,Default,Entities, 95 Namespaces,RemoveAttributePrefixes,Amp), 96 Context1 = context(Element,Boolean,Current,Default,Entities, 97 Namespaces,RemoveAttributePrefixes,Amp). 98context_update( ns_prefix(Prefix), Context0, URI, Context1 ) :- 99 Context0 = context(Element,Preserve,Current,Default,Entities, 100 Namespaces0,RemoveAttributePrefixes,Amp), 101 Context1 = context(Element,Preserve,Current,Default,Entities, 102 Namespaces1,RemoveAttributePrefixes,Amp), 103 map_store( Namespaces0, Prefix, URI, Namespaces1 ). 104context_update( entity(Name), Context0, String, Context1 ) :- 105 Context0 = context(Element,Preserve,Current,Default,Entities0, 106 Namespaces,RemoveAttributePrefixes,Amp), 107 Context1 = context(Element,Preserve,Current,Default,Entities1, 108 Namespaces,RemoveAttributePrefixes,Amp), 109 map_store( Entities0, Name, String, Entities1 ). 110 111remove_attribute_prefixes( Context ) :- 112 Context = context(_Element,_Preserve,_Current,_Default,_Entities, 113 _Namespaces,true,_Amp). 114 115current_tag( Context, Tag ) :- 116 Context = context(tag(Tag),_Preserve,_Current,_Default,_Entities, 117 _Namespaces,_RPFA,_Amp). 118 119current_namespace( Context, Current ) :- 120 Context = context(_Element,_Preserve,Current,_Default,_Entities, 121 _Namespaces,_RPFA,_Amp). 122 123default_namespace( Context, Default ) :- 124 Context = context(_Element,_Preserve,_Current,Default,_Entities, 125 _Namespaces,_RPFA,_Amp). 126 127space_preserve( Context ) :- 128 Context = context(tag(_Tag),true,_Current,_Default,_Entities, 129 _Namespaces,_RPFA,_Amp). 130 131specific_namespace( Prefix, Context, URI ) :- 132 Context = context(_Element,_Preserve,_Current,_Default,_Entities, 133 Namespaces,_RPFA,_Amp), 134 map_member( Prefix, Namespaces, URI ). 135 136defined_entity( Reference, Context, String ) :- 137 Context = context(_Element,_Preserve,_Current,_Default,Entities, 138 _Namespaces,_RPFA,_Amp), 139 map_member( Reference, Entities, String ). 140 141close_context( Context, Terms, WellFormed ) :- 142 Context = context(Element,_Preserve,_Current,_Default,_Entities, 143 _Namespaces,_RPFA,_Amp), 144 close_context1( Element, Terms, WellFormed ). 145 146close_context1( void, [], true ). 147close_context1( tag(TagChars), [out_of_context(Tag)], false ) :- 148 atom_chars( Tag, TagChars ). 149 150void_context( 151 context(void,_Preserve,_Current,_Default,_Entities,_Names,_RPFA,_Amp) 152 ). 153 154allow_ampersand( 155 context(_Void,_Preserve,_Current,_Default,_Entities,_Names,_RPFA,true) 156 ). 157 158/* pp_string( +String ) prints String onto the current output stream. 159 * If String contains only 7-bit chars it is printed in shorthand quoted 160 * format, otherwise it is written as a list. 161 * If your Prolog uses " to delimit a special string type, just use write/1. 162 */ 163pp_string( Chars ) :- 164 ( member( Char, Chars ), 165 (Char > 255 ; Char < 9) -> 166 write( Chars ) 167 ; otherwise -> 168 put_quote, 169 pp_string1( Chars ), 170 put_quote 171 ). 172 173put_quote :- 174 put( 0'" ). % ' 175 176pp_string1( [] ). 177pp_string1( [Char|Chars] ) :- 178 ( Char =:= """" -> % Meta-quote 179 put( Char ), 180 put( Char ), 181 pp_string1( Chars ) 182 ; Char =:= 13, % Handle Windows border-settings 183 Chars = [10|Chars1] -> 184 put( 10 ), 185 pp_string1( Chars1 ) 186 ; otherwise -> 187 put( Char ), 188 pp_string1( Chars ) 189 ). 190 191xml_declaration_attributes_valid( [] ). 192xml_declaration_attributes_valid( [Name=Value|Attributes] ) :- 193 xml_declaration_attribute_valid( Name, Value ), 194 xml_declaration_attributes_valid( Attributes ). 195 196xml_declaration_attribute_valid( Name, Value ) :- 197 lowercase( Value, Lowercase ), 198 canonical_xml_declaration_attribute( Name, Lowercase ). 199 200canonical_xml_declaration_attribute( version, "1.0" ). 201canonical_xml_declaration_attribute( standalone, "yes" ). 202canonical_xml_declaration_attribute( standalone, "no" ). 203% The encodings here are all valid for the output produced. 204canonical_xml_declaration_attribute( encoding, "utf-8" ). 205% canonical_xml_declaration_attribute( encoding, "utf-16" ). 206% This is erroneous for the output of this library 207canonical_xml_declaration_attribute( encoding, "us-ascii" ). 208canonical_xml_declaration_attribute( encoding, "ascii" ). 209canonical_xml_declaration_attribute( encoding, "iso-8859-1" ). 210canonical_xml_declaration_attribute( encoding, "iso-8859-2" ). 211canonical_xml_declaration_attribute( encoding, "iso-8859-15" ). 212canonical_xml_declaration_attribute( encoding, "windows-1252" ). 213% In general, it's better not to specify an encoding. 214 215/* lowercase( +MixedCase, ?Lowercase ) holds when Lowercase and MixedCase are 216 * lists of character codes, and Lowercase is identical to MixedCase with 217 * every uppercase character replaced by its lowercase equivalent. 218 */ 219lowercase( [], [] ). 220lowercase( [Char|Chars], [Lower|LowerCase] ) :- 221 ( Char >= "A", Char =< "Z" -> 222 Lower is Char + "a" - "A" 223 ; otherwise -> 224 Lower = Char 225 ), 226 lowercase( Chars, LowerCase ). 227 228extended_character_entities( [ 229 "Aacute"-[193], % latin capital letter A with acute, 230 "aacute"-[225], % latin small letter a with acute, 231 "Acirc"-[194], % latin capital letter A with circumflex, 232 "acirc"-[226], % latin small letter a with circumflex, 233 "acute"-[180], % acute accent = spacing acute, 234 "AElig"-[198], % latin capital letter AE 235 "aelig"-[230], % latin small letter ae 236 "Agrave"-[192], % latin capital letter A with grave 237 "agrave"-[224], % latin small letter a with grave 238 "alefsym"-[8501], % alef symbol = first transfinite cardinal, 239 "Alpha"-[913], % greek capital letter alpha, U+0391 240 "alpha"-[945], % greek small letter alpha, 241 "and"-[8743], % logical and = wedge, U+2227 ISOtech 242 "ang"-[8736], % angle, U+2220 ISOamso 243 "Aring"-[197], % latin capital letter A with ring above 244 "aring"-[229], % latin small letter a with ring above 245 "asymp"-[8776], % almost equal to = asymptotic to, 246 "Atilde"-[195], % latin capital letter A with tilde, 247 "atilde"-[227], % latin small letter a with tilde, 248 "Auml"-[196], % latin capital letter A with diaeresis, 249 "auml"-[228], % latin small letter a with diaeresis, 250 "bdquo"-[8222], % double low-9 quotation mark, U+201E NEW 251 "Beta"-[914], % greek capital letter beta, U+0392 252 "beta"-[946], % greek small letter beta, U+03B2 ISOgrk3 253 "brvbar"-[166], % broken bar = broken vertical bar, 254 "bull"-[8226], % bullet = black small circle, 255 "cap"-[8745], % intersection = cap, U+2229 ISOtech 256 "Ccedil"-[199], % latin capital letter C with cedilla, 257 "ccedil"-[231], % latin small letter c with cedilla, 258 "cedil"-[184], % cedilla = spacing cedilla, U+00B8 ISOdia> 259 "cent"-[162], % cent sign, U+00A2 ISOnum> 260 "Chi"-[935], % greek capital letter chi, U+03A7 261 "chi"-[967], % greek small letter chi, U+03C7 ISOgrk3 262 "circ"-[710], % modifier letter circumflex accent, 263 "clubs"-[9827], % black club suit = shamrock, 264 "cong"-[8773], % approximately equal to, U+2245 ISOtech 265 "copy"-[169], % copyright sign, U+00A9 ISOnum> 266 "crarr"-[8629], % downwards arrow with corner leftwards 267 "cup"-[8746], % union = cup, U+222A ISOtech 268 "curren"-[164], % currency sign, U+00A4 ISOnum> 269 "dagger"-[8224], % dagger, U+2020 ISOpub 270 "Dagger"-[8225], % double dagger, U+2021 ISOpub 271 "darr"-[8595], % downwards arrow, U+2193 ISOnum 272 "dArr"-[8659], % downwards double arrow, U+21D3 ISOamsa 273 "deg"-[176], % degree sign, U+00B0 ISOnum> 274 "Delta"-[916], % greek capital letter delta, 275 "delta"-[948], % greek small letter delta, 276 "diams"-[9830], % black diamond suit, U+2666 ISOpub 277 "divide"-[247], % division sign, U+00F7 ISOnum> 278 "Eacute"-[201], % latin capital letter E with acute, 279 "eacute"-[233], % latin small letter e with acute, 280 "Ecirc"-[202], % latin capital letter E with circumflex, 281 "ecirc"-[234], % latin small letter e with circumflex, 282 "Egrave"-[200], % latin capital letter E with grave, 283 "egrave"-[232], % latin small letter e with grave, 284 "empty"-[8709], % empty set = null set = diameter, 285 "emsp"-[8195], % em space, U+2003 ISOpub 286 "ensp"-[8194], % en space, U+2002 ISOpub 287 "Epsilon"-[917], % greek capital letter epsilon, U+0395 288 "epsilon"-[949], % greek small letter epsilon, 289 "equiv"-[8801], % identical to, U+2261 ISOtech 290 "Eta"-[919], % greek capital letter eta, U+0397 291 "eta"-[951], % greek small letter eta, U+03B7 ISOgrk3 292 "ETH"-[208], % latin capital letter ETH, U+00D0 ISOlat1> 293 "eth"-[240], % latin small letter eth, U+00F0 ISOlat1> 294 "Euml"-[203], % latin capital letter E with diaeresis, 295 "euml"-[235], % latin small letter e with diaeresis, 296 "euro"-[8364], % euro sign, U+20AC NEW 297 "exist"-[8707], % there exists, U+2203 ISOtech 298 "fnof"-[402], % latin small f with hook = function 299 "forall"-[8704], % for all, U+2200 ISOtech 300 "frac12"-[189], % vulgar fraction one half 301 "frac14"-[188], % vulgar fraction one quarter 302 "frac34"-[190], % vulgar fraction three quarters 303 "frasl"-[8260], % fraction slash, U+2044 NEW 304 "Gamma"-[915], % greek capital letter gamma, 305 "gamma"-[947], % greek small letter gamma, 306 "ge"-[8805], % greater-than or equal to, 307 "harr"-[8596], % left right arrow, U+2194 ISOamsa 308 "hArr"-[8660], % left right double arrow, 309 "hearts"-[9829], % black heart suit = valentine, 310 "hellip"-[8230], % horizontal ellipsis = three dot leader, 311 "Iacute"-[205], % latin capital letter I with acute, 312 "iacute"-[237], % latin small letter i with acute, 313 "Icirc"-[206], % latin capital letter I with circumflex, 314 "icirc"-[238], % latin small letter i with circumflex, 315 "iexcl"-[161], % inverted exclamation mark, U+00A1 ISOnum> 316 "Igrave"-[204], % latin capital letter I with grave, 317 "igrave"-[236], % latin small letter i with grave, 318 "image"-[8465], % blackletter capital I = imaginary part, 319 "infin"-[8734], % infinity, U+221E ISOtech 320 "int"-[8747], % integral, U+222B ISOtech 321 "Iota"-[921], % greek capital letter iota, U+0399 322 "iota"-[953], % greek small letter iota, U+03B9 ISOgrk3 323 "iquest"-[191], % inverted question mark 324 "isin"-[8712], % element of, U+2208 ISOtech 325 "Iuml"-[207], % latin capital letter I with diaeresis, 326 "iuml"-[239], % latin small letter i with diaeresis, 327 "Kappa"-[922], % greek capital letter kappa, U+039A 328 "kappa"-[954], % greek small letter kappa, 329 "Lambda"-[923], % greek capital letter lambda, 330 "lambda"-[955], % greek small letter lambda, 331 "lang"-[9001], % left-pointing angle bracket = bra, 332 "laquo"-[171], % left-pointing double angle quotation mark 333 "larr"-[8592], % leftwards arrow, U+2190 ISOnum 334 "lArr"-[8656], % leftwards double arrow, U+21D0 ISOtech 335 "lceil"-[8968], % left ceiling = apl upstile, 336 "ldquo"-[8220], % left double quotation mark, 337 "le"-[8804], % less-than or equal to, U+2264 ISOtech 338 "lfloor"-[8970], % left floor = apl downstile, 339 "lowast"-[8727], % asterisk operator, U+2217 ISOtech 340 "loz"-[9674], % lozenge, U+25CA ISOpub 341 "lrm"-[8206], % left-to-right mark, U+200E NEW RFC 2070 342 "lsaquo"-[8249], % single left-pointing angle quotation mark, 343 "lsquo"-[8216], % left single quotation mark, 344 "macr"-[175], % macron = spacing macron = overline 345 "mdash"-[8212], % em dash, U+2014 ISOpub 346 "micro"-[181], % micro sign, U+00B5 ISOnum> 347 "middot"-[183], % middle dot = Georgian comma 348 "minus"-[8722], % minus sign, U+2212 ISOtech 349 "Mu"-[924], % greek capital letter mu, U+039C 350 "mu"-[956], % greek small letter mu, U+03BC ISOgrk3 351 "nabla"-[8711], % nabla = backward difference, 352 "nbsp"-[160], % no-break space = non-breaking space, 353 "ndash"-[8211], % en dash, U+2013 ISOpub 354 "ne"-[8800], % not equal to, U+2260 ISOtech 355 "ni"-[8715], % contains as member, U+220B ISOtech 356 "not"-[172], % not sign, U+00AC ISOnum> 357 "notin"-[8713], % not an element of, U+2209 ISOtech 358 "nsub"-[8836], % not a subset of, U+2284 ISOamsn 359 "Ntilde"-[209], % latin capital letter N with tilde, 360 "ntilde"-[241], % latin small letter n with tilde, 361 "Nu"-[925], % greek capital letter nu, U+039D 362 "nu"-[957], % greek small letter nu, U+03BD ISOgrk3 363 "Oacute"-[211], % latin capital letter O with acute, 364 "oacute"-[243], % latin small letter o with acute, 365 "Ocirc"-[212], % latin capital letter O with circumflex, 366 "ocirc"-[244], % latin small letter o with circumflex, 367 "OElig"-[338], % latin capital ligature OE, 368 "oelig"-[339], % latin small ligature oe, U+0153 ISOlat2 369 "Ograve"-[210], % latin capital letter O with grave, 370 "ograve"-[242], % latin small letter o with grave, 371 "oline"-[8254], % overline = spacing overscore, 372 "Omega"-[937], % greek capital letter omega, 373 "omega"-[969], % greek small letter omega, 374 "Omicron"-[927], % greek capital letter omicron, U+039F 375 "omicron"-[959], % greek small letter omicron, U+03BF NEW 376 "oplus"-[8853], % circled plus = direct sum, 377 "or"-[8744], % logical or = vee, U+2228 ISOtech 378 "ordf"-[170], % feminine ordinal indicator, U+00AA ISOnum> 379 "ordm"-[186], % masculine ordinal indicator, 380 "Oslash"-[216], % latin capital letter O with stroke 381 "oslash"-[248], % latin small letter o with stroke, 382 "Otilde"-[213], % latin capital letter O with tilde, 383 "otilde"-[245], % latin small letter o with tilde, 384 "otimes"-[8855], % circled times = vector product, 385 "Ouml"-[214], % latin capital letter O with diaeresis, 386 "ouml"-[246], % latin small letter o with diaeresis, 387 "para"-[182], % pilcrow sign = paragraph sign, 388 "part"-[8706], % partial differential, U+2202 ISOtech 389 "permil"-[8240], % per mille sign, U+2030 ISOtech 390 "perp"-[8869], % up tack = orthogonal to = perpendicular, 391 "Phi"-[934], % greek capital letter phi, 392 "phi"-[966], % greek small letter phi, U+03C6 ISOgrk3 393 "Pi"-[928], % greek capital letter pi, U+03A0 ISOgrk3 394 "pi"-[960], % greek small letter pi, U+03C0 ISOgrk3 395 "piv"-[982], % greek pi symbol, U+03D6 ISOgrk3 396 "plusmn"-[177], % plus-minus sign = plus-or-minus sign, 397 "pound"-[163], % pound sign, U+00A3 ISOnum> 398 "prime"-[8242], % prime = minutes = feet, U+2032 ISOtech 399 "Prime"-[8243], % double prime = seconds = inches, 400 "prod"-[8719], % n-ary product = product sign, 401 "prop"-[8733], % proportional to, U+221D ISOtech 402 "Psi"-[936], % greek capital letter psi, 403 "psi"-[968], % greek small letter psi, U+03C8 ISOgrk3 404 "radic"-[8730], % square root = radical sign, 405 "rang"-[9002], % right-pointing angle bracket = ket, 406 "raquo"-[187], % right-pointing double angle quotation mark 407 "rarr"-[8594], % rightwards arrow, U+2192 ISOnum 408 "rArr"-[8658], % rightwards double arrow, 409 "rceil"-[8969], % right ceiling, U+2309 ISOamsc 410 "rdquo"-[8221], % right double quotation mark, 411 "real"-[8476], % blackletter capital R = real part symbol, 412 "reg"-[174], % registered sign = registered trade mark sign, 413 "rfloor"-[8971], % right floor, U+230B ISOamsc 414 "Rho"-[929], % greek capital letter rho, U+03A1 415 "rho"-[961], % greek small letter rho, U+03C1 ISOgrk3 416 "rlm"-[8207], % right-to-left mark, U+200F NEW RFC 2070 417 "rsaquo"-[8250], % single right-pointing angle quotation mark, 418 "rsquo"-[8217], % right single quotation mark, 419 "sbquo"-[8218], % single low-9 quotation mark, U+201A NEW 420 "Scaron"-[352], % latin capital letter S with caron, 421 "scaron"-[353], % latin small letter s with caron, 422 "sdot"-[8901], % dot operator, U+22C5 ISOamsb 423 "sect"-[167], % section sign, U+00A7 ISOnum> 424 "shy"-[173], % soft hyphen = discretionary hyphen, 425 "Sigma"-[931], % greek capital letter sigma, 426 "sigma"-[963], % greek small letter sigma, 427 "sigmaf"-[962], % greek small letter final sigma, 428 "sim"-[8764], % tilde operator = varies with = similar to, 429 "spades"-[9824], % black spade suit, U+2660 ISOpub 430 "sub"-[8834], % subset of, U+2282 ISOtech 431 "sube"-[8838], % subset of or equal to, U+2286 ISOtech 432 "sum"-[8721], % n-ary sumation, U+2211 ISOamsb 433 "sup"-[8835], % superset of, U+2283 ISOtech 434 "sup1"-[185], % superscript one = superscript digit one, 435 "sup2"-[178], % superscript two = superscript digit two 436 "sup3"-[179], % superscript three = superscript digit three 437 "supe"-[8839], % superset of or equal to, 438 "szlig"-[223], % latin small letter sharp s = ess-zed, 439 "Tau"-[932], % greek capital letter tau, U+03A4 440 "tau"-[964], % greek small letter tau, U+03C4 ISOgrk3 441 "there4"-[8756], % therefore, U+2234 ISOtech 442 "Theta"-[920], % greek capital letter theta, 443 "theta"-[952], % greek small letter theta, 444 "thetasym"-[977], % greek small letter theta symbol, 445 "thinsp"-[8201], % thin space, U+2009 ISOpub 446 "THORN"-[222], % latin capital letter THORN, 447 "thorn"-[254], % latin small letter thorn with, 448 "tilde"-[732], % small tilde, U+02DC ISOdia 449 "times"-[215], % multiplication sign, U+00D7 ISOnum> 450 "trade"-[8482], % trade mark sign, U+2122 ISOnum 451 "Uacute"-[218], % latin capital letter U with acute, 452 "uacute"-[250], % latin small letter u with acute, 453 "uarr"-[8593], % upwards arrow, U+2191 ISOnum 454 "uArr"-[8657], % upwards double arrow, U+21D1 ISOamsa 455 "Ucirc"-[219], % latin capital letter U with circumflex, 456 "ucirc"-[251], % latin small letter u with circumflex, 457 "Ugrave"-[217], % latin capital letter U with grave, 458 "ugrave"-[249], % latin small letter u with grave, 459 "uml"-[168], % diaeresis = spacing diaeresis, 460 "upsih"-[978], % greek upsilon with hook symbol, 461 "Upsilon"-[933], % greek capital letter upsilon, 462 "upsilon"-[965], % greek small letter upsilon, 463 "Uuml"-[220], % latin capital letter U with diaeresis, 464 "uuml"-[252], % latin small letter u with diaeresis, 465 "weierp"-[8472], % script capital P = power set 466 "Xi"-[926], % greek capital letter xi, U+039E ISOgrk3 467 "xi"-[958], % greek small letter xi, U+03BE ISOgrk3 468 "Yacute"-[221], % latin capital letter Y with acute, 469 "yacute"-[253], % latin small letter y with acute, 470 "yen"-[165], % yen sign = yuan sign, U+00A5 ISOnum> 471 "yuml"-[255], % latin small letter y with diaeresis, 472 "Yuml"-[376], % latin capital letter Y with diaeresis, 473 "Zeta"-[918], % greek capital letter zeta, U+0396 474 "zeta"-[950], % greek small letter zeta, U+03B6 ISOgrk3 475 "zwj"-[8205], % zero width joiner, U+200D NEW RFC 2070 476 "zwnj"-[8204] % zero width non-joiner, 477 ] ). 478 479% The following code is for Quintus Prolog primarily. Some of these 480% predicates are built-in to SWI, LPA etc. 481 482/* member( ?Element, ?List ) holds when Element is a member of List. 483 */ 484member( H, [H|_] ). 485member( H, [_|T] ):- 486 member( H, T ). 487 488/* select( ?Element, ?List0, ?List1 ) is true if List1 is equal to List0 489 * with Element removed. 490 */ 491select( H, [H|T], T ). 492select( Element, [H|T0], [H|T1] ):- 493 select( Element, T0, T1 ). 494 495/* is_list( +List ) holds when List is a list. 496 */ 497is_list( List ) :- 498 nonvar( List ), 499 is_list1( List ). 500 501is_list1( [] ). 502is_list1( [_|_] ). 503 504/* chars( ?Chars, ?Plus, ?Minus ) used as chars( ?Chars ) in a DCG to 505 * copy the list Chars inline. 506 * 507 * This is best expressed in terms of append/3 where append/3 is built-in. 508 * For other Prologs, a straightforward specification can be used: 509 * 510 * chars( [] ) --> "". 511 * chars( [Char|Chars] ) --> 512 * [Char], 513 * chars( Chars ). 514 */ 515 516chars( Chars, Plus, Minus ) :- 517 append( Chars, Minus, Plus ). 518 519/* atom_codes/2, number_codes/2 and throw/1 are ISO predicates, mapped to 520 * the Quintus equivalent here. 521 */ 522atom_codes( Atom, Codes ) :- 523 atom_chars( Atom, Codes ). 524 525number_codes( Number, Codes ) :- 526 number_chars( Number, Codes ). 527 528throw( Exception ) :- 529 raise_exception( Exception ). 530 531end_of_file. % <- Remove this line for ISO Prologs? 532 533append( [], L, L ). 534append( [H|T0], L, [H|T1] ) :- 535 append( T0, L, T1 ). 536 537otherwise. 538