1104349Sphk<!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
2104349Sphk
3104349Sphk<!-- Process this file with docbook-to-man to generate an nroff manual
4104349Sphk     page: `docbook-to-man manpage.sgml > manpage.1'.  You may view
5104349Sphk     the manual page with: `docbook-to-man manpage.sgml | nroff -man |
6104349Sphk     less'.  A typical entry in a Makefile or Makefile.am is:
7104349Sphk
8104349Sphkmanpage.1: manpage.sgml
9104349Sphk	docbook-to-man $< > $@
10104349Sphk  -->
11104349Sphk
12104349Sphk  <!-- Fill in your name for FIRSTNAME and SURNAME. -->
13104349Sphk  <!ENTITY dhfirstname "<firstname>Scott</firstname>">
14104349Sphk  <!ENTITY dhsurname   "<surname>Bronson</surname>">
15104349Sphk  <!-- Please adjust the date whenever revising the manpage. -->
16104349Sphk  <!ENTITY dhdate      "<date>December  5, 2001</date>">
17104349Sphk  <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
18104349Sphk       allowed: see man(7), man(1). -->
19104349Sphk  <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
20104349Sphk  <!ENTITY dhemail     "<email>bronson@rinspin.com</email>">
21104349Sphk  <!ENTITY dhusername  "Scott Bronson">
22104349Sphk  <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
23104349Sphk  <!ENTITY dhpackage   "xmlwf">
24104349Sphk
25104349Sphk  <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
26104349Sphk  <!ENTITY gnu         "<acronym>GNU</acronym>">
27104349Sphk]>
28104349Sphk
29104349Sphk<refentry>
30104349Sphk  <refentryinfo>
31104349Sphk    <address>
32104349Sphk      &dhemail;
33104349Sphk    </address>
34104349Sphk    <author>
35104349Sphk      &dhfirstname;
36104349Sphk      &dhsurname;
37104349Sphk    </author>
38104349Sphk    <copyright>
39104349Sphk      <year>2001</year>
40104349Sphk      <holder>&dhusername;</holder>
41104349Sphk    </copyright>
42104349Sphk    &dhdate;
43104349Sphk  </refentryinfo>
44104349Sphk  <refmeta>
45104349Sphk    &dhucpackage;
46104349Sphk
47104349Sphk    &dhsection;
48104349Sphk  </refmeta>
49104349Sphk  <refnamediv>
50104349Sphk    <refname>&dhpackage;</refname>
51104349Sphk
52104349Sphk    <refpurpose>Determines if an XML document is well-formed</refpurpose>
53104349Sphk  </refnamediv>
54104349Sphk  <refsynopsisdiv>
55104349Sphk    <cmdsynopsis>
56104349Sphk      <command>&dhpackage;</command>
57104349Sphk	  <arg><option>-s</option></arg>
58104349Sphk	  <arg><option>-n</option></arg>
59104349Sphk	  <arg><option>-p</option></arg>
60104349Sphk	  <arg><option>-x</option></arg>
61104349Sphk
62104349Sphk	  <arg><option>-e <replaceable>encoding</replaceable></option></arg>
63104349Sphk	  <arg><option>-w</option></arg>
64104349Sphk
65104349Sphk	  <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
66104349Sphk	  <arg><option>-c</option></arg>
67104349Sphk	  <arg><option>-m</option></arg>
68104349Sphk
69104349Sphk	  <arg><option>-r</option></arg>
70104349Sphk	  <arg><option>-t</option></arg>
71104349Sphk
72104349Sphk	  <arg><option>-v</option></arg>
73104349Sphk
74104349Sphk	  <arg>file ...</arg>
75104349Sphk    </cmdsynopsis>
76104349Sphk  </refsynopsisdiv>
77104349Sphk 
78104349Sphk  <refsect1>
79104349Sphk    <title>DESCRIPTION</title>
80104349Sphk
81104349Sphk    <para>
82178848Scokane	<command>&dhpackage;</command> uses the Expat library to
83178848Scokane	determine if an XML document is well-formed.  It is
84178848Scokane	non-validating.
85104349Sphk	</para>
86104349Sphk
87104349Sphk	<para>
88178848Scokane	If you do not specify any files on the command-line, and you
89178848Scokane	have a recent version of <command>&dhpackage;</command>, the
90178848Scokane	input file will be read from standard input.
91104349Sphk	</para>
92104349Sphk
93104349Sphk  </refsect1>
94104349Sphk
95104349Sphk  <refsect1>
96104349Sphk    <title>WELL-FORMED DOCUMENTS</title>
97104349Sphk
98104349Sphk	<para>
99104349Sphk	  A well-formed document must adhere to the
100104349Sphk	  following rules:
101104349Sphk	</para>
102104349Sphk
103104349Sphk	<itemizedlist>
104104349Sphk      <listitem><para>
105104349Sphk	    The file begins with an XML declaration.  For instance,
106104349Sphk		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
107178848Scokane		<emphasis>NOTE:</emphasis>
108178848Scokane		<command>&dhpackage;</command> does not currently
109104349Sphk		check for a valid XML declaration.
110104349Sphk      </para></listitem>
111104349Sphk      <listitem><para>
112104349Sphk		Every start tag is either empty (&lt;tag/&gt;)
113104349Sphk		or has a corresponding end tag.
114104349Sphk      </para></listitem>
115104349Sphk      <listitem><para>
116104349Sphk	    There is exactly one root element.  This element must contain
117104349Sphk		all other elements in the document.  Only comments, white
118104349Sphk		space, and processing instructions may come after the close
119104349Sphk		of the root element.
120104349Sphk      </para></listitem>
121104349Sphk      <listitem><para>
122104349Sphk		All elements nest properly.
123104349Sphk      </para></listitem>
124104349Sphk      <listitem><para>
125104349Sphk		All attribute values are enclosed in quotes (either single
126104349Sphk		or double).
127104349Sphk      </para></listitem>
128104349Sphk    </itemizedlist>
129104349Sphk
130104349Sphk	<para>
131104349Sphk	  If the document has a DTD, and it strictly complies with that
132104349Sphk	  DTD, then the document is also considered <emphasis>valid</emphasis>.
133178848Scokane	  <command>&dhpackage;</command> is a non-validating parser --
134178848Scokane	  it does not check the DTD.  However, it does support
135178848Scokane	  external entities (see the <option>-x</option> option).
136104349Sphk	</para>
137104349Sphk  </refsect1>
138104349Sphk
139104349Sphk  <refsect1>
140104349Sphk    <title>OPTIONS</title>
141104349Sphk
142104349Sphk<para>
143104349SphkWhen an option includes an argument, you may specify the argument either
144178848Scokaneseparately ("<option>-d</option> output") or concatenated with the
145178848Scokaneoption ("<option>-d</option>output").  <command>&dhpackage;</command>
146178848Scokanesupports both.
147104349Sphk</para>
148104349Sphk
149104349Sphk    <variablelist>
150104349Sphk
151104349Sphk      <varlistentry>
152104349Sphk        <term><option>-c</option></term>
153104349Sphk        <listitem>
154104349Sphk		<para>
155178848Scokane  If the input file is well-formed and <command>&dhpackage;</command>
156178848Scokane  doesn't encounter any errors, the input file is simply copied to
157104349Sphk  the output directory unchanged.
158178848Scokane  This implies no namespaces (turns off <option>-n</option>) and
159178848Scokane  requires <option>-d</option> to specify an output file.
160104349Sphk  		</para>
161104349Sphk        </listitem>
162104349Sphk      </varlistentry>
163104349Sphk
164104349Sphk      <varlistentry>
165104349Sphk        <term><option>-d output-dir</option></term>
166104349Sphk        <listitem>
167104349Sphk		<para>
168104349Sphk  Specifies a directory to contain transformed
169104349Sphk  representations of the input files.
170178848Scokane  By default, <option>-d</option> outputs a canonical representation
171104349Sphk  (described below).
172178848Scokane  You can select different output formats using <option>-c</option>
173178848Scokane  and <option>-m</option>.
174104349Sphk	  </para>
175104349Sphk	  <para>
176104349Sphk  The output filenames will
177104349Sphk  be exactly the same as the input filenames or "STDIN" if the input is
178178848Scokane  coming from standard input.  Therefore, you must be careful that the
179104349Sphk  output file does not go into the same directory as the input
180178848Scokane  file.  Otherwise, <command>&dhpackage;</command> will delete the
181178848Scokane  input file before it generates the output file (just like running
182104349Sphk  <literal>cat &lt; file &gt; file</literal> in most shells).
183104349Sphk	  </para>
184104349Sphk	  <para> 
185104349Sphk  Two structurally equivalent XML documents have a byte-for-byte
186104349Sphk  identical canonical XML representation.
187104349Sphk  Note that ignorable white space is considered significant and
188104349Sphk  is treated equivalently to data.
189104349Sphk  More on canonical XML can be found at
190104349Sphk  http://www.jclark.com/xml/canonxml.html .
191104349Sphk	  </para>
192104349Sphk        </listitem>
193104349Sphk      </varlistentry>
194104349Sphk
195104349Sphk      <varlistentry>
196104349Sphk        <term><option>-e encoding</option></term>
197104349Sphk        <listitem>
198104349Sphk		<para>
199104349Sphk   Specifies the character encoding for the document, overriding
200178848Scokane   any document encoding declaration.  <command>&dhpackage;</command>
201178848Scokane   supports four built-in encodings:
202104349Sphk   	<literal>US-ASCII</literal>,
203104349Sphk	<literal>UTF-8</literal>,
204104349Sphk	<literal>UTF-16</literal>, and
205178848Scokane	<literal>ISO-8859-1</literal>.
206178848Scokane   Also see the <option>-w</option> option.
207104349Sphk	   </para>
208104349Sphk        </listitem>
209104349Sphk      </varlistentry>
210104349Sphk
211104349Sphk      <varlistentry>
212104349Sphk        <term><option>-m</option></term>
213104349Sphk        <listitem>
214104349Sphk		<para>
215104349Sphk  Outputs some strange sort of XML file that completely
216104349Sphk  describes the the input file, including character postitions.
217178848Scokane  Requires <option>-d</option> to specify an output file.
218104349Sphk	   </para>
219104349Sphk        </listitem>
220104349Sphk      </varlistentry>
221104349Sphk
222104349Sphk      <varlistentry>
223104349Sphk        <term><option>-n</option></term>
224104349Sphk        <listitem>
225104349Sphk		<para>
226104349Sphk  Turns on namespace processing.  (describe namespaces)
227178848Scokane  <option>-c</option> disables namespaces.
228104349Sphk	   </para>
229104349Sphk        </listitem>
230104349Sphk      </varlistentry>
231104349Sphk
232104349Sphk      <varlistentry>
233104349Sphk        <term><option>-p</option></term>
234104349Sphk        <listitem>
235104349Sphk		<para>
236104349Sphk    Tells xmlwf to process external DTDs and parameter
237104349Sphk    entities.
238104349Sphk	 </para>
239104349Sphk	 <para>
240178848Scokane   Normally <command>&dhpackage;</command> never parses parameter
241178848Scokane   entities.  <option>-p</option> tells it to always parse them.
242178848Scokane   <option>-p</option> implies <option>-x</option>.
243104349Sphk	   </para>
244104349Sphk        </listitem>
245104349Sphk      </varlistentry>
246104349Sphk
247104349Sphk      <varlistentry>
248104349Sphk        <term><option>-r</option></term>
249104349Sphk        <listitem>
250104349Sphk		<para>
251178848Scokane   Normally <command>&dhpackage;</command> memory-maps the XML file
252178848Scokane   before parsing; this can result in faster parsing on many
253178848Scokane   platforms.
254178848Scokane   <option>-r</option> turns off memory-mapping and uses normal file
255178848Scokane   IO calls instead.
256104349Sphk   Of course, memory-mapping is automatically turned off
257178848Scokane   when reading from standard input.
258104349Sphk	   </para>
259178848Scokane		<para>
260178848Scokane   Use of memory-mapping can cause some platforms to report
261178848Scokane   substantially higher memory usage for
262178848Scokane   <command>&dhpackage;</command>, but this appears to be a matter of
263178848Scokane   the operating system reporting memory in a strange way; there is
264178848Scokane   not a leak in <command>&dhpackage;</command>.
265178848Scokane           </para>
266104349Sphk        </listitem>
267104349Sphk      </varlistentry>
268104349Sphk
269104349Sphk      <varlistentry>
270104349Sphk        <term><option>-s</option></term>
271104349Sphk        <listitem>
272104349Sphk		<para>
273104349Sphk  Prints an error if the document is not standalone. 
274104349Sphk  A document is standalone if it has no external subset and no
275104349Sphk  references to parameter entities.
276104349Sphk	   </para>
277104349Sphk        </listitem>
278104349Sphk      </varlistentry>
279104349Sphk
280104349Sphk      <varlistentry>
281104349Sphk        <term><option>-t</option></term>
282104349Sphk        <listitem>
283104349Sphk		<para>
284104349Sphk  Turns on timings.  This tells Expat to parse the entire file,
285104349Sphk  but not perform any processing.
286104349Sphk  This gives a fairly accurate idea of the raw speed of Expat itself
287104349Sphk  without client overhead.
288178848Scokane  <option>-t</option> turns off most of the output options
289178848Scokane  (<option>-d</option>, <option>-m</option>, <option>-c</option>,
290178848Scokane  ...).
291104349Sphk	   </para>
292104349Sphk        </listitem>
293104349Sphk      </varlistentry>
294104349Sphk
295104349Sphk      <varlistentry>
296104349Sphk        <term><option>-v</option></term>
297104349Sphk        <listitem>
298104349Sphk		<para>
299178848Scokane  Prints the version of the Expat library being used, including some
300178848Scokane  information on the compile-time configuration of the library, and
301178848Scokane  then exits.
302104349Sphk	   </para>
303104349Sphk        </listitem>
304104349Sphk      </varlistentry>
305104349Sphk
306104349Sphk      <varlistentry>
307104349Sphk        <term><option>-w</option></term>
308104349Sphk        <listitem>
309104349Sphk		<para>
310178848Scokane  Enables support for Windows code pages.
311178848Scokane  Normally, <command>&dhpackage;</command> will throw an error if it
312178848Scokane  runs across an encoding that it is not equipped to handle itself.  With
313178848Scokane  <option>-w</option>, &dhpackage; will try to use a Windows code
314178848Scokane  page.  See also <option>-e</option>.
315104349Sphk	   </para>
316104349Sphk        </listitem>
317104349Sphk      </varlistentry>
318104349Sphk
319104349Sphk      <varlistentry>
320104349Sphk        <term><option>-x</option></term>
321104349Sphk        <listitem>
322104349Sphk		<para>
323104349Sphk  Turns on parsing external entities.
324104349Sphk  </para>
325104349Sphk<para>
326104349Sphk  Non-validating parsers are not required to resolve external
327104349Sphk  entities, or even expand entities at all.
328104349Sphk  Expat always expands internal entities (?),
329104349Sphk  but external entity parsing must be enabled explicitly.
330104349Sphk  </para>
331104349Sphk  <para>
332104349Sphk  External entities are simply entities that obtain their
333104349Sphk  data from outside the XML file currently being parsed.
334104349Sphk  </para>
335104349Sphk  <para>
336104349Sphk  This is an example of an internal entity:
337104349Sphk<literallayout>
338104349Sphk&lt;!ENTITY vers '1.0.2'&gt;
339104349Sphk</literallayout>
340104349Sphk  </para>
341104349Sphk  <para>
342104349Sphk  And here are some examples of external entities:
343104349Sphk
344104349Sphk<literallayout>
345104349Sphk&lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
346104349Sphk&lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
347104349Sphk</literallayout>
348104349Sphk
349104349Sphk	   </para>
350104349Sphk        </listitem>
351104349Sphk      </varlistentry>
352104349Sphk
353104349Sphk      <varlistentry>
354104349Sphk        <term><option>--</option></term>
355104349Sphk        <listitem>
356104349Sphk		<para>
357178848Scokane    (Two hyphens.)
358178848Scokane    Terminates the list of options.  This is only needed if a filename
359178848Scokane    starts with a hyphen.  For example:
360104349Sphk	   </para>
361178848Scokane<literallayout>
362178848Scokane&dhpackage; -- -myfile.xml
363178848Scokane</literallayout>
364178848Scokane		<para>
365178848Scokane    will run <command>&dhpackage;</command> on the file
366178848Scokane    <filename>-myfile.xml</filename>.
367178848Scokane	   </para>
368104349Sphk        </listitem>
369104349Sphk      </varlistentry>
370104349Sphk    </variablelist>
371104349Sphk
372104349Sphk	<para>
373178848Scokane    Older versions of <command>&dhpackage;</command> do not support
374178848Scokane    reading from standard input.
375104349Sphk	</para>
376104349Sphk  </refsect1>
377104349Sphk
378104349Sphk  <refsect1>
379104349Sphk  <title>OUTPUT</title>
380104349Sphk    <para>
381178848Scokane	If an input file is not well-formed,
382178848Scokane	<command>&dhpackage;</command> prints a single line describing
383178848Scokane	the problem to standard output.  If a file is well formed,
384178848Scokane	<command>&dhpackage;</command> outputs nothing.
385104349Sphk	Note that the result code is <emphasis>not</emphasis> set.
386104349Sphk	</para>
387104349Sphk  </refsect1>
388104349Sphk  
389104349Sphk  <refsect1>
390104349Sphk    <title>BUGS</title>
391104349Sphk	<para>
392178848Scokane	<command>&dhpackage;</command> returns a 0 - noerr result,
393178848Scokane	even if the file is not well-formed.  There is no good way for
394178848Scokane	a program to use <command>&dhpackage;</command> to quickly
395178848Scokane	check a file -- it must parse <command>&dhpackage;</command>'s
396178848Scokane	standard output.
397104349Sphk	</para>
398104349Sphk	<para>
399178848Scokane	The errors should go to standard error, not standard output.
400104349Sphk	</para>
401104349Sphk	<para>
402178848Scokane	There should be a way to get <option>-d</option> to send its
403178848Scokane	output to standard output rather than forcing the user to send
404178848Scokane	it to a file.
405104349Sphk	</para>
406178848Scokane	<para>
407178848Scokane	I have no idea why anyone would want to use the
408178848Scokane	<option>-d</option>, <option>-c</option>, and
409178848Scokane	<option>-m</option> options.  If someone could explain it to
410178848Scokane	me, I'd like to add this information to this manpage.
411178848Scokane	</para>
412104349Sphk  </refsect1>
413104349Sphk
414104349Sphk  <refsect1>
415104349Sphk    <title>ALTERNATIVES</title>
416104349Sphk	<para>
417104349Sphk	  Here are some XML validators on the web:
418104349Sphk
419104349Sphk<literallayout>
420104349Sphkhttp://www.hcrc.ed.ac.uk/~richard/xml-check.html
421104349Sphkhttp://www.stg.brown.edu/service/xmlvalid/
422104349Sphkhttp://www.scripting.com/frontier5/xml/code/xmlValidator.html
423104349Sphkhttp://www.xml.com/pub/a/tools/ruwf/check.html
424104349Sphk</literallayout>
425104349Sphk
426104349Sphk		 </para>
427104349Sphk  </refsect1>
428104349Sphk
429104349Sphk  <refsect1>
430104349Sphk    <title>SEE ALSO</title>
431104349Sphk	<para>
432104349Sphk
433104349Sphk<literallayout>
434104349SphkThe Expat home page:        http://www.libexpat.org/
435104349SphkThe W3 XML specification:   http://www.w3.org/TR/REC-xml
436104349Sphk</literallayout>
437104349Sphk
438104349Sphk	</para>
439104349Sphk  </refsect1>
440104349Sphk
441104349Sphk  <refsect1>
442104349Sphk    <title>AUTHOR</title>
443104349Sphk    <para>
444104349Sphk	  This manual page was written by &dhusername; &dhemail; for
445104349Sphk      the &debian; system (but may be used by others).  Permission is
446104349Sphk      granted to copy, distribute and/or modify this document under
447104349Sphk      the terms of the <acronym>GNU</acronym> Free Documentation
448104349Sphk      License, Version 1.1.
449104349Sphk	</para>
450104349Sphk  </refsect1>
451104349Sphk</refentry>
452104349Sphk
453104349Sphk<!-- Keep this comment at the end of the file
454104349SphkLocal variables:
455104349Sphkmode: sgml
456104349Sphksgml-omittag:t
457104349Sphksgml-shorttag:t
458104349Sphksgml-minimize-attributes:nil
459104349Sphksgml-always-quote-attributes:t
460104349Sphksgml-indent-step:2
461104349Sphksgml-indent-data:t
462104349Sphksgml-parent-document:nil
463104349Sphksgml-default-dtd-file:nil
464104349Sphksgml-exposed-tags:nil
465104349Sphksgml-local-catalogs:nil
466104349Sphksgml-local-ecat-files:nil
467104349SphkEnd:
468104349Sphk-->
469