1104349Sphk<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [ 2104349Sphk 3104349Sphk<!-- Process this file with docbook-to-man to generate an nroff manual 4104349Sphk page: `docbook-to-man manpage.sgml > manpage.1'. You may view 5104349Sphk the manual page with: `docbook-to-man manpage.sgml | nroff -man | 6104349Sphk less'. A typical entry in a Makefile or Makefile.am is: 7104349Sphk 8104349Sphkmanpage.1: manpage.sgml 9104349Sphk docbook-to-man $< > $@ 10104349Sphk --> 11104349Sphk 12104349Sphk <!-- Fill in your name for FIRSTNAME and SURNAME. --> 13104349Sphk <!ENTITY dhfirstname "<firstname>Scott</firstname>"> 14104349Sphk <!ENTITY dhsurname "<surname>Bronson</surname>"> 15104349Sphk <!-- Please adjust the date whenever revising the manpage. --> 16104349Sphk <!ENTITY dhdate "<date>December 5, 2001</date>"> 17104349Sphk <!-- SECTION should be 1-8, maybe w/ subsection other parameters are 18104349Sphk allowed: see man(7), man(1). --> 19104349Sphk <!ENTITY dhsection "<manvolnum>1</manvolnum>"> 20104349Sphk <!ENTITY dhemail "<email>bronson@rinspin.com</email>"> 21104349Sphk <!ENTITY dhusername "Scott Bronson"> 22104349Sphk <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>"> 23104349Sphk <!ENTITY dhpackage "xmlwf"> 24104349Sphk 25104349Sphk <!ENTITY debian "<productname>Debian GNU/Linux</productname>"> 26104349Sphk <!ENTITY gnu "<acronym>GNU</acronym>"> 27104349Sphk]> 28104349Sphk 29104349Sphk<refentry> 30104349Sphk <refentryinfo> 31104349Sphk <address> 32104349Sphk &dhemail; 33104349Sphk </address> 34104349Sphk <author> 35104349Sphk &dhfirstname; 36104349Sphk &dhsurname; 37104349Sphk </author> 38104349Sphk <copyright> 39104349Sphk <year>2001</year> 40104349Sphk <holder>&dhusername;</holder> 41104349Sphk </copyright> 42104349Sphk &dhdate; 43104349Sphk </refentryinfo> 44104349Sphk <refmeta> 45104349Sphk &dhucpackage; 46104349Sphk 47104349Sphk &dhsection; 48104349Sphk </refmeta> 49104349Sphk <refnamediv> 50104349Sphk <refname>&dhpackage;</refname> 51104349Sphk 52104349Sphk <refpurpose>Determines if an XML document is well-formed</refpurpose> 53104349Sphk </refnamediv> 54104349Sphk <refsynopsisdiv> 55104349Sphk <cmdsynopsis> 56104349Sphk <command>&dhpackage;</command> 57104349Sphk <arg><option>-s</option></arg> 58104349Sphk <arg><option>-n</option></arg> 59104349Sphk <arg><option>-p</option></arg> 60104349Sphk <arg><option>-x</option></arg> 61104349Sphk 62104349Sphk <arg><option>-e <replaceable>encoding</replaceable></option></arg> 63104349Sphk <arg><option>-w</option></arg> 64104349Sphk 65104349Sphk <arg><option>-d <replaceable>output-dir</replaceable></option></arg> 66104349Sphk <arg><option>-c</option></arg> 67104349Sphk <arg><option>-m</option></arg> 68104349Sphk 69104349Sphk <arg><option>-r</option></arg> 70104349Sphk <arg><option>-t</option></arg> 71104349Sphk 72104349Sphk <arg><option>-v</option></arg> 73104349Sphk 74104349Sphk <arg>file ...</arg> 75104349Sphk </cmdsynopsis> 76104349Sphk </refsynopsisdiv> 77104349Sphk 78104349Sphk <refsect1> 79104349Sphk <title>DESCRIPTION</title> 80104349Sphk 81104349Sphk <para> 82178848Scokane <command>&dhpackage;</command> uses the Expat library to 83178848Scokane determine if an XML document is well-formed. It is 84178848Scokane non-validating. 85104349Sphk </para> 86104349Sphk 87104349Sphk <para> 88178848Scokane If you do not specify any files on the command-line, and you 89178848Scokane have a recent version of <command>&dhpackage;</command>, the 90178848Scokane input file will be read from standard input. 91104349Sphk </para> 92104349Sphk 93104349Sphk </refsect1> 94104349Sphk 95104349Sphk <refsect1> 96104349Sphk <title>WELL-FORMED DOCUMENTS</title> 97104349Sphk 98104349Sphk <para> 99104349Sphk A well-formed document must adhere to the 100104349Sphk following rules: 101104349Sphk </para> 102104349Sphk 103104349Sphk <itemizedlist> 104104349Sphk <listitem><para> 105104349Sphk The file begins with an XML declaration. For instance, 106104349Sphk <literal><?xml version="1.0" standalone="yes"?></literal>. 107178848Scokane <emphasis>NOTE:</emphasis> 108178848Scokane <command>&dhpackage;</command> does not currently 109104349Sphk check for a valid XML declaration. 110104349Sphk </para></listitem> 111104349Sphk <listitem><para> 112104349Sphk Every start tag is either empty (<tag/>) 113104349Sphk or has a corresponding end tag. 114104349Sphk </para></listitem> 115104349Sphk <listitem><para> 116104349Sphk There is exactly one root element. This element must contain 117104349Sphk all other elements in the document. Only comments, white 118104349Sphk space, and processing instructions may come after the close 119104349Sphk of the root element. 120104349Sphk </para></listitem> 121104349Sphk <listitem><para> 122104349Sphk All elements nest properly. 123104349Sphk </para></listitem> 124104349Sphk <listitem><para> 125104349Sphk All attribute values are enclosed in quotes (either single 126104349Sphk or double). 127104349Sphk </para></listitem> 128104349Sphk </itemizedlist> 129104349Sphk 130104349Sphk <para> 131104349Sphk If the document has a DTD, and it strictly complies with that 132104349Sphk DTD, then the document is also considered <emphasis>valid</emphasis>. 133178848Scokane <command>&dhpackage;</command> is a non-validating parser -- 134178848Scokane it does not check the DTD. However, it does support 135178848Scokane external entities (see the <option>-x</option> option). 136104349Sphk </para> 137104349Sphk </refsect1> 138104349Sphk 139104349Sphk <refsect1> 140104349Sphk <title>OPTIONS</title> 141104349Sphk 142104349Sphk<para> 143104349SphkWhen an option includes an argument, you may specify the argument either 144178848Scokaneseparately ("<option>-d</option> output") or concatenated with the 145178848Scokaneoption ("<option>-d</option>output"). <command>&dhpackage;</command> 146178848Scokanesupports both. 147104349Sphk</para> 148104349Sphk 149104349Sphk <variablelist> 150104349Sphk 151104349Sphk <varlistentry> 152104349Sphk <term><option>-c</option></term> 153104349Sphk <listitem> 154104349Sphk <para> 155178848Scokane If the input file is well-formed and <command>&dhpackage;</command> 156178848Scokane doesn't encounter any errors, the input file is simply copied to 157104349Sphk the output directory unchanged. 158178848Scokane This implies no namespaces (turns off <option>-n</option>) and 159178848Scokane requires <option>-d</option> to specify an output file. 160104349Sphk </para> 161104349Sphk </listitem> 162104349Sphk </varlistentry> 163104349Sphk 164104349Sphk <varlistentry> 165104349Sphk <term><option>-d output-dir</option></term> 166104349Sphk <listitem> 167104349Sphk <para> 168104349Sphk Specifies a directory to contain transformed 169104349Sphk representations of the input files. 170178848Scokane By default, <option>-d</option> outputs a canonical representation 171104349Sphk (described below). 172178848Scokane You can select different output formats using <option>-c</option> 173178848Scokane and <option>-m</option>. 174104349Sphk </para> 175104349Sphk <para> 176104349Sphk The output filenames will 177104349Sphk be exactly the same as the input filenames or "STDIN" if the input is 178178848Scokane coming from standard input. Therefore, you must be careful that the 179104349Sphk output file does not go into the same directory as the input 180178848Scokane file. Otherwise, <command>&dhpackage;</command> will delete the 181178848Scokane input file before it generates the output file (just like running 182104349Sphk <literal>cat < file > file</literal> in most shells). 183104349Sphk </para> 184104349Sphk <para> 185104349Sphk Two structurally equivalent XML documents have a byte-for-byte 186104349Sphk identical canonical XML representation. 187104349Sphk Note that ignorable white space is considered significant and 188104349Sphk is treated equivalently to data. 189104349Sphk More on canonical XML can be found at 190104349Sphk http://www.jclark.com/xml/canonxml.html . 191104349Sphk </para> 192104349Sphk </listitem> 193104349Sphk </varlistentry> 194104349Sphk 195104349Sphk <varlistentry> 196104349Sphk <term><option>-e encoding</option></term> 197104349Sphk <listitem> 198104349Sphk <para> 199104349Sphk Specifies the character encoding for the document, overriding 200178848Scokane any document encoding declaration. <command>&dhpackage;</command> 201178848Scokane supports four built-in encodings: 202104349Sphk <literal>US-ASCII</literal>, 203104349Sphk <literal>UTF-8</literal>, 204104349Sphk <literal>UTF-16</literal>, and 205178848Scokane <literal>ISO-8859-1</literal>. 206178848Scokane Also see the <option>-w</option> option. 207104349Sphk </para> 208104349Sphk </listitem> 209104349Sphk </varlistentry> 210104349Sphk 211104349Sphk <varlistentry> 212104349Sphk <term><option>-m</option></term> 213104349Sphk <listitem> 214104349Sphk <para> 215104349Sphk Outputs some strange sort of XML file that completely 216104349Sphk describes the the input file, including character postitions. 217178848Scokane Requires <option>-d</option> to specify an output file. 218104349Sphk </para> 219104349Sphk </listitem> 220104349Sphk </varlistentry> 221104349Sphk 222104349Sphk <varlistentry> 223104349Sphk <term><option>-n</option></term> 224104349Sphk <listitem> 225104349Sphk <para> 226104349Sphk Turns on namespace processing. (describe namespaces) 227178848Scokane <option>-c</option> disables namespaces. 228104349Sphk </para> 229104349Sphk </listitem> 230104349Sphk </varlistentry> 231104349Sphk 232104349Sphk <varlistentry> 233104349Sphk <term><option>-p</option></term> 234104349Sphk <listitem> 235104349Sphk <para> 236104349Sphk Tells xmlwf to process external DTDs and parameter 237104349Sphk entities. 238104349Sphk </para> 239104349Sphk <para> 240178848Scokane Normally <command>&dhpackage;</command> never parses parameter 241178848Scokane entities. <option>-p</option> tells it to always parse them. 242178848Scokane <option>-p</option> implies <option>-x</option>. 243104349Sphk </para> 244104349Sphk </listitem> 245104349Sphk </varlistentry> 246104349Sphk 247104349Sphk <varlistentry> 248104349Sphk <term><option>-r</option></term> 249104349Sphk <listitem> 250104349Sphk <para> 251178848Scokane Normally <command>&dhpackage;</command> memory-maps the XML file 252178848Scokane before parsing; this can result in faster parsing on many 253178848Scokane platforms. 254178848Scokane <option>-r</option> turns off memory-mapping and uses normal file 255178848Scokane IO calls instead. 256104349Sphk Of course, memory-mapping is automatically turned off 257178848Scokane when reading from standard input. 258104349Sphk </para> 259178848Scokane <para> 260178848Scokane Use of memory-mapping can cause some platforms to report 261178848Scokane substantially higher memory usage for 262178848Scokane <command>&dhpackage;</command>, but this appears to be a matter of 263178848Scokane the operating system reporting memory in a strange way; there is 264178848Scokane not a leak in <command>&dhpackage;</command>. 265178848Scokane </para> 266104349Sphk </listitem> 267104349Sphk </varlistentry> 268104349Sphk 269104349Sphk <varlistentry> 270104349Sphk <term><option>-s</option></term> 271104349Sphk <listitem> 272104349Sphk <para> 273104349Sphk Prints an error if the document is not standalone. 274104349Sphk A document is standalone if it has no external subset and no 275104349Sphk references to parameter entities. 276104349Sphk </para> 277104349Sphk </listitem> 278104349Sphk </varlistentry> 279104349Sphk 280104349Sphk <varlistentry> 281104349Sphk <term><option>-t</option></term> 282104349Sphk <listitem> 283104349Sphk <para> 284104349Sphk Turns on timings. This tells Expat to parse the entire file, 285104349Sphk but not perform any processing. 286104349Sphk This gives a fairly accurate idea of the raw speed of Expat itself 287104349Sphk without client overhead. 288178848Scokane <option>-t</option> turns off most of the output options 289178848Scokane (<option>-d</option>, <option>-m</option>, <option>-c</option>, 290178848Scokane ...). 291104349Sphk </para> 292104349Sphk </listitem> 293104349Sphk </varlistentry> 294104349Sphk 295104349Sphk <varlistentry> 296104349Sphk <term><option>-v</option></term> 297104349Sphk <listitem> 298104349Sphk <para> 299178848Scokane Prints the version of the Expat library being used, including some 300178848Scokane information on the compile-time configuration of the library, and 301178848Scokane then exits. 302104349Sphk </para> 303104349Sphk </listitem> 304104349Sphk </varlistentry> 305104349Sphk 306104349Sphk <varlistentry> 307104349Sphk <term><option>-w</option></term> 308104349Sphk <listitem> 309104349Sphk <para> 310178848Scokane Enables support for Windows code pages. 311178848Scokane Normally, <command>&dhpackage;</command> will throw an error if it 312178848Scokane runs across an encoding that it is not equipped to handle itself. With 313178848Scokane <option>-w</option>, &dhpackage; will try to use a Windows code 314178848Scokane page. See also <option>-e</option>. 315104349Sphk </para> 316104349Sphk </listitem> 317104349Sphk </varlistentry> 318104349Sphk 319104349Sphk <varlistentry> 320104349Sphk <term><option>-x</option></term> 321104349Sphk <listitem> 322104349Sphk <para> 323104349Sphk Turns on parsing external entities. 324104349Sphk </para> 325104349Sphk<para> 326104349Sphk Non-validating parsers are not required to resolve external 327104349Sphk entities, or even expand entities at all. 328104349Sphk Expat always expands internal entities (?), 329104349Sphk but external entity parsing must be enabled explicitly. 330104349Sphk </para> 331104349Sphk <para> 332104349Sphk External entities are simply entities that obtain their 333104349Sphk data from outside the XML file currently being parsed. 334104349Sphk </para> 335104349Sphk <para> 336104349Sphk This is an example of an internal entity: 337104349Sphk<literallayout> 338104349Sphk<!ENTITY vers '1.0.2'> 339104349Sphk</literallayout> 340104349Sphk </para> 341104349Sphk <para> 342104349Sphk And here are some examples of external entities: 343104349Sphk 344104349Sphk<literallayout> 345104349Sphk<!ENTITY header SYSTEM "header-&vers;.xml"> (parsed) 346104349Sphk<!ENTITY logo SYSTEM "logo.png" PNG> (unparsed) 347104349Sphk</literallayout> 348104349Sphk 349104349Sphk </para> 350104349Sphk </listitem> 351104349Sphk </varlistentry> 352104349Sphk 353104349Sphk <varlistentry> 354104349Sphk <term><option>--</option></term> 355104349Sphk <listitem> 356104349Sphk <para> 357178848Scokane (Two hyphens.) 358178848Scokane Terminates the list of options. This is only needed if a filename 359178848Scokane starts with a hyphen. For example: 360104349Sphk </para> 361178848Scokane<literallayout> 362178848Scokane&dhpackage; -- -myfile.xml 363178848Scokane</literallayout> 364178848Scokane <para> 365178848Scokane will run <command>&dhpackage;</command> on the file 366178848Scokane <filename>-myfile.xml</filename>. 367178848Scokane </para> 368104349Sphk </listitem> 369104349Sphk </varlistentry> 370104349Sphk </variablelist> 371104349Sphk 372104349Sphk <para> 373178848Scokane Older versions of <command>&dhpackage;</command> do not support 374178848Scokane reading from standard input. 375104349Sphk </para> 376104349Sphk </refsect1> 377104349Sphk 378104349Sphk <refsect1> 379104349Sphk <title>OUTPUT</title> 380104349Sphk <para> 381178848Scokane If an input file is not well-formed, 382178848Scokane <command>&dhpackage;</command> prints a single line describing 383178848Scokane the problem to standard output. If a file is well formed, 384178848Scokane <command>&dhpackage;</command> outputs nothing. 385104349Sphk Note that the result code is <emphasis>not</emphasis> set. 386104349Sphk </para> 387104349Sphk </refsect1> 388104349Sphk 389104349Sphk <refsect1> 390104349Sphk <title>BUGS</title> 391104349Sphk <para> 392178848Scokane <command>&dhpackage;</command> returns a 0 - noerr result, 393178848Scokane even if the file is not well-formed. There is no good way for 394178848Scokane a program to use <command>&dhpackage;</command> to quickly 395178848Scokane check a file -- it must parse <command>&dhpackage;</command>'s 396178848Scokane standard output. 397104349Sphk </para> 398104349Sphk <para> 399178848Scokane The errors should go to standard error, not standard output. 400104349Sphk </para> 401104349Sphk <para> 402178848Scokane There should be a way to get <option>-d</option> to send its 403178848Scokane output to standard output rather than forcing the user to send 404178848Scokane it to a file. 405104349Sphk </para> 406178848Scokane <para> 407178848Scokane I have no idea why anyone would want to use the 408178848Scokane <option>-d</option>, <option>-c</option>, and 409178848Scokane <option>-m</option> options. If someone could explain it to 410178848Scokane me, I'd like to add this information to this manpage. 411178848Scokane </para> 412104349Sphk </refsect1> 413104349Sphk 414104349Sphk <refsect1> 415104349Sphk <title>ALTERNATIVES</title> 416104349Sphk <para> 417104349Sphk Here are some XML validators on the web: 418104349Sphk 419104349Sphk<literallayout> 420104349Sphkhttp://www.hcrc.ed.ac.uk/~richard/xml-check.html 421104349Sphkhttp://www.stg.brown.edu/service/xmlvalid/ 422104349Sphkhttp://www.scripting.com/frontier5/xml/code/xmlValidator.html 423104349Sphkhttp://www.xml.com/pub/a/tools/ruwf/check.html 424104349Sphk</literallayout> 425104349Sphk 426104349Sphk </para> 427104349Sphk </refsect1> 428104349Sphk 429104349Sphk <refsect1> 430104349Sphk <title>SEE ALSO</title> 431104349Sphk <para> 432104349Sphk 433104349Sphk<literallayout> 434104349SphkThe Expat home page: http://www.libexpat.org/ 435104349SphkThe W3 XML specification: http://www.w3.org/TR/REC-xml 436104349Sphk</literallayout> 437104349Sphk 438104349Sphk </para> 439104349Sphk </refsect1> 440104349Sphk 441104349Sphk <refsect1> 442104349Sphk <title>AUTHOR</title> 443104349Sphk <para> 444104349Sphk This manual page was written by &dhusername; &dhemail; for 445104349Sphk the &debian; system (but may be used by others). Permission is 446104349Sphk granted to copy, distribute and/or modify this document under 447104349Sphk the terms of the <acronym>GNU</acronym> Free Documentation 448104349Sphk License, Version 1.1. 449104349Sphk </para> 450104349Sphk </refsect1> 451104349Sphk</refentry> 452104349Sphk 453104349Sphk<!-- Keep this comment at the end of the file 454104349SphkLocal variables: 455104349Sphkmode: sgml 456104349Sphksgml-omittag:t 457104349Sphksgml-shorttag:t 458104349Sphksgml-minimize-attributes:nil 459104349Sphksgml-always-quote-attributes:t 460104349Sphksgml-indent-step:2 461104349Sphksgml-indent-data:t 462104349Sphksgml-parent-document:nil 463104349Sphksgml-default-dtd-file:nil 464104349Sphksgml-exposed-tags:nil 465104349Sphksgml-local-catalogs:nil 466104349Sphksgml-local-ecat-files:nil 467104349SphkEnd: 468104349Sphk--> 469