1302260Sdelphij<!DOCTYPE refentry [
2104349Sphk  <!-- Fill in your name for FIRSTNAME and SURNAME. -->
3104349Sphk  <!ENTITY dhfirstname "<firstname>Scott</firstname>">
4104349Sphk  <!ENTITY dhsurname   "<surname>Bronson</surname>">
5104349Sphk  <!-- Please adjust the date whenever revising the manpage. -->
6302260Sdelphij  <!ENTITY dhdate      "<date>March 11, 2016</date>">
7104349Sphk  <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
8104349Sphk       allowed: see man(7), man(1). -->
9104349Sphk  <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
10104349Sphk  <!ENTITY dhemail     "<email>bronson@rinspin.com</email>">
11104349Sphk  <!ENTITY dhusername  "Scott Bronson">
12104349Sphk  <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
13104349Sphk  <!ENTITY dhpackage   "xmlwf">
14104349Sphk
15104349Sphk  <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
16104349Sphk  <!ENTITY gnu         "<acronym>GNU</acronym>">
17104349Sphk]>
18104349Sphk
19104349Sphk<refentry>
20104349Sphk  <refentryinfo>
21104349Sphk    <address>
22104349Sphk      &dhemail;
23104349Sphk    </address>
24104349Sphk    <author>
25104349Sphk      &dhfirstname;
26104349Sphk      &dhsurname;
27104349Sphk    </author>
28104349Sphk    <copyright>
29104349Sphk      <year>2001</year>
30104349Sphk      <holder>&dhusername;</holder>
31104349Sphk    </copyright>
32104349Sphk    &dhdate;
33104349Sphk  </refentryinfo>
34104349Sphk  <refmeta>
35104349Sphk    &dhucpackage;
36104349Sphk
37104349Sphk    &dhsection;
38104349Sphk  </refmeta>
39104349Sphk  <refnamediv>
40104349Sphk    <refname>&dhpackage;</refname>
41104349Sphk
42104349Sphk    <refpurpose>Determines if an XML document is well-formed</refpurpose>
43104349Sphk  </refnamediv>
44104349Sphk  <refsynopsisdiv>
45104349Sphk    <cmdsynopsis>
46104349Sphk      <command>&dhpackage;</command>
47104349Sphk	  <arg><option>-s</option></arg>
48104349Sphk	  <arg><option>-n</option></arg>
49104349Sphk	  <arg><option>-p</option></arg>
50104349Sphk	  <arg><option>-x</option></arg>
51104349Sphk
52104349Sphk	  <arg><option>-e <replaceable>encoding</replaceable></option></arg>
53104349Sphk	  <arg><option>-w</option></arg>
54104349Sphk
55104349Sphk	  <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
56104349Sphk	  <arg><option>-c</option></arg>
57104349Sphk	  <arg><option>-m</option></arg>
58104349Sphk
59104349Sphk	  <arg><option>-r</option></arg>
60104349Sphk	  <arg><option>-t</option></arg>
61104349Sphk
62104349Sphk	  <arg><option>-v</option></arg>
63104349Sphk
64104349Sphk	  <arg>file ...</arg>
65104349Sphk    </cmdsynopsis>
66104349Sphk  </refsynopsisdiv>
67104349Sphk 
68104349Sphk  <refsect1>
69104349Sphk    <title>DESCRIPTION</title>
70104349Sphk
71104349Sphk    <para>
72178848Scokane	<command>&dhpackage;</command> uses the Expat library to
73178848Scokane	determine if an XML document is well-formed.  It is
74178848Scokane	non-validating.
75104349Sphk	</para>
76104349Sphk
77104349Sphk	<para>
78178848Scokane	If you do not specify any files on the command-line, and you
79178848Scokane	have a recent version of <command>&dhpackage;</command>, the
80178848Scokane	input file will be read from standard input.
81104349Sphk	</para>
82104349Sphk
83104349Sphk  </refsect1>
84104349Sphk
85104349Sphk  <refsect1>
86104349Sphk    <title>WELL-FORMED DOCUMENTS</title>
87104349Sphk
88104349Sphk	<para>
89104349Sphk	  A well-formed document must adhere to the
90104349Sphk	  following rules:
91104349Sphk	</para>
92104349Sphk
93104349Sphk	<itemizedlist>
94104349Sphk      <listitem><para>
95104349Sphk	    The file begins with an XML declaration.  For instance,
96104349Sphk		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
97178848Scokane		<emphasis>NOTE:</emphasis>
98178848Scokane		<command>&dhpackage;</command> does not currently
99104349Sphk		check for a valid XML declaration.
100104349Sphk      </para></listitem>
101104349Sphk      <listitem><para>
102104349Sphk		Every start tag is either empty (&lt;tag/&gt;)
103104349Sphk		or has a corresponding end tag.
104104349Sphk      </para></listitem>
105104349Sphk      <listitem><para>
106104349Sphk	    There is exactly one root element.  This element must contain
107104349Sphk		all other elements in the document.  Only comments, white
108104349Sphk		space, and processing instructions may come after the close
109104349Sphk		of the root element.
110104349Sphk      </para></listitem>
111104349Sphk      <listitem><para>
112104349Sphk		All elements nest properly.
113104349Sphk      </para></listitem>
114104349Sphk      <listitem><para>
115104349Sphk		All attribute values are enclosed in quotes (either single
116104349Sphk		or double).
117104349Sphk      </para></listitem>
118104349Sphk    </itemizedlist>
119104349Sphk
120104349Sphk	<para>
121104349Sphk	  If the document has a DTD, and it strictly complies with that
122104349Sphk	  DTD, then the document is also considered <emphasis>valid</emphasis>.
123178848Scokane	  <command>&dhpackage;</command> is a non-validating parser --
124178848Scokane	  it does not check the DTD.  However, it does support
125178848Scokane	  external entities (see the <option>-x</option> option).
126104349Sphk	</para>
127104349Sphk  </refsect1>
128104349Sphk
129104349Sphk  <refsect1>
130104349Sphk    <title>OPTIONS</title>
131104349Sphk
132104349Sphk<para>
133104349SphkWhen an option includes an argument, you may specify the argument either
134178848Scokaneseparately ("<option>-d</option> output") or concatenated with the
135178848Scokaneoption ("<option>-d</option>output").  <command>&dhpackage;</command>
136178848Scokanesupports both.
137104349Sphk</para>
138104349Sphk
139104349Sphk    <variablelist>
140104349Sphk
141104349Sphk      <varlistentry>
142104349Sphk        <term><option>-c</option></term>
143104349Sphk        <listitem>
144104349Sphk		<para>
145178848Scokane  If the input file is well-formed and <command>&dhpackage;</command>
146178848Scokane  doesn't encounter any errors, the input file is simply copied to
147104349Sphk  the output directory unchanged.
148178848Scokane  This implies no namespaces (turns off <option>-n</option>) and
149178848Scokane  requires <option>-d</option> to specify an output file.
150104349Sphk  		</para>
151104349Sphk        </listitem>
152104349Sphk      </varlistentry>
153104349Sphk
154104349Sphk      <varlistentry>
155104349Sphk        <term><option>-d output-dir</option></term>
156104349Sphk        <listitem>
157104349Sphk		<para>
158104349Sphk  Specifies a directory to contain transformed
159104349Sphk  representations of the input files.
160178848Scokane  By default, <option>-d</option> outputs a canonical representation
161104349Sphk  (described below).
162178848Scokane  You can select different output formats using <option>-c</option>
163178848Scokane  and <option>-m</option>.
164104349Sphk	  </para>
165104349Sphk	  <para>
166104349Sphk  The output filenames will
167104349Sphk  be exactly the same as the input filenames or "STDIN" if the input is
168178848Scokane  coming from standard input.  Therefore, you must be careful that the
169104349Sphk  output file does not go into the same directory as the input
170178848Scokane  file.  Otherwise, <command>&dhpackage;</command> will delete the
171178848Scokane  input file before it generates the output file (just like running
172104349Sphk  <literal>cat &lt; file &gt; file</literal> in most shells).
173104349Sphk	  </para>
174104349Sphk	  <para> 
175104349Sphk  Two structurally equivalent XML documents have a byte-for-byte
176104349Sphk  identical canonical XML representation.
177104349Sphk  Note that ignorable white space is considered significant and
178104349Sphk  is treated equivalently to data.
179104349Sphk  More on canonical XML can be found at
180104349Sphk  http://www.jclark.com/xml/canonxml.html .
181104349Sphk	  </para>
182104349Sphk        </listitem>
183104349Sphk      </varlistentry>
184104349Sphk
185104349Sphk      <varlistentry>
186104349Sphk        <term><option>-e encoding</option></term>
187104349Sphk        <listitem>
188104349Sphk		<para>
189104349Sphk   Specifies the character encoding for the document, overriding
190178848Scokane   any document encoding declaration.  <command>&dhpackage;</command>
191178848Scokane   supports four built-in encodings:
192104349Sphk   	<literal>US-ASCII</literal>,
193104349Sphk	<literal>UTF-8</literal>,
194104349Sphk	<literal>UTF-16</literal>, and
195178848Scokane	<literal>ISO-8859-1</literal>.
196178848Scokane   Also see the <option>-w</option> option.
197104349Sphk	   </para>
198104349Sphk        </listitem>
199104349Sphk      </varlistentry>
200104349Sphk
201104349Sphk      <varlistentry>
202104349Sphk        <term><option>-m</option></term>
203104349Sphk        <listitem>
204104349Sphk		<para>
205104349Sphk  Outputs some strange sort of XML file that completely
206302260Sdelphij  describes the input file, including character positions.
207178848Scokane  Requires <option>-d</option> to specify an output file.
208104349Sphk	   </para>
209104349Sphk        </listitem>
210104349Sphk      </varlistentry>
211104349Sphk
212104349Sphk      <varlistentry>
213104349Sphk        <term><option>-n</option></term>
214104349Sphk        <listitem>
215104349Sphk		<para>
216104349Sphk  Turns on namespace processing.  (describe namespaces)
217178848Scokane  <option>-c</option> disables namespaces.
218104349Sphk	   </para>
219104349Sphk        </listitem>
220104349Sphk      </varlistentry>
221104349Sphk
222104349Sphk      <varlistentry>
223104349Sphk        <term><option>-p</option></term>
224104349Sphk        <listitem>
225104349Sphk		<para>
226104349Sphk    Tells xmlwf to process external DTDs and parameter
227104349Sphk    entities.
228104349Sphk	 </para>
229104349Sphk	 <para>
230178848Scokane   Normally <command>&dhpackage;</command> never parses parameter
231178848Scokane   entities.  <option>-p</option> tells it to always parse them.
232178848Scokane   <option>-p</option> implies <option>-x</option>.
233104349Sphk	   </para>
234104349Sphk        </listitem>
235104349Sphk      </varlistentry>
236104349Sphk
237104349Sphk      <varlistentry>
238104349Sphk        <term><option>-r</option></term>
239104349Sphk        <listitem>
240104349Sphk		<para>
241178848Scokane   Normally <command>&dhpackage;</command> memory-maps the XML file
242178848Scokane   before parsing; this can result in faster parsing on many
243178848Scokane   platforms.
244178848Scokane   <option>-r</option> turns off memory-mapping and uses normal file
245178848Scokane   IO calls instead.
246104349Sphk   Of course, memory-mapping is automatically turned off
247178848Scokane   when reading from standard input.
248104349Sphk	   </para>
249178848Scokane		<para>
250178848Scokane   Use of memory-mapping can cause some platforms to report
251178848Scokane   substantially higher memory usage for
252178848Scokane   <command>&dhpackage;</command>, but this appears to be a matter of
253178848Scokane   the operating system reporting memory in a strange way; there is
254178848Scokane   not a leak in <command>&dhpackage;</command>.
255178848Scokane           </para>
256104349Sphk        </listitem>
257104349Sphk      </varlistentry>
258104349Sphk
259104349Sphk      <varlistentry>
260104349Sphk        <term><option>-s</option></term>
261104349Sphk        <listitem>
262104349Sphk		<para>
263104349Sphk  Prints an error if the document is not standalone. 
264104349Sphk  A document is standalone if it has no external subset and no
265104349Sphk  references to parameter entities.
266104349Sphk	   </para>
267104349Sphk        </listitem>
268104349Sphk      </varlistentry>
269104349Sphk
270104349Sphk      <varlistentry>
271104349Sphk        <term><option>-t</option></term>
272104349Sphk        <listitem>
273104349Sphk		<para>
274104349Sphk  Turns on timings.  This tells Expat to parse the entire file,
275104349Sphk  but not perform any processing.
276104349Sphk  This gives a fairly accurate idea of the raw speed of Expat itself
277104349Sphk  without client overhead.
278178848Scokane  <option>-t</option> turns off most of the output options
279302260Sdelphij  (<option>-d</option>, <option>-m</option>, <option>-c</option>, ...).
280104349Sphk	   </para>
281104349Sphk        </listitem>
282104349Sphk      </varlistentry>
283104349Sphk
284104349Sphk      <varlistentry>
285104349Sphk        <term><option>-v</option></term>
286104349Sphk        <listitem>
287104349Sphk		<para>
288178848Scokane  Prints the version of the Expat library being used, including some
289178848Scokane  information on the compile-time configuration of the library, and
290178848Scokane  then exits.
291104349Sphk	   </para>
292104349Sphk        </listitem>
293104349Sphk      </varlistentry>
294104349Sphk
295104349Sphk      <varlistentry>
296104349Sphk        <term><option>-w</option></term>
297104349Sphk        <listitem>
298104349Sphk		<para>
299178848Scokane  Enables support for Windows code pages.
300178848Scokane  Normally, <command>&dhpackage;</command> will throw an error if it
301178848Scokane  runs across an encoding that it is not equipped to handle itself.  With
302178848Scokane  <option>-w</option>, &dhpackage; will try to use a Windows code
303178848Scokane  page.  See also <option>-e</option>.
304104349Sphk	   </para>
305104349Sphk        </listitem>
306104349Sphk      </varlistentry>
307104349Sphk
308104349Sphk      <varlistentry>
309104349Sphk        <term><option>-x</option></term>
310104349Sphk        <listitem>
311104349Sphk		<para>
312104349Sphk  Turns on parsing external entities.
313104349Sphk  </para>
314104349Sphk<para>
315104349Sphk  Non-validating parsers are not required to resolve external
316104349Sphk  entities, or even expand entities at all.
317104349Sphk  Expat always expands internal entities (?),
318104349Sphk  but external entity parsing must be enabled explicitly.
319104349Sphk  </para>
320104349Sphk  <para>
321104349Sphk  External entities are simply entities that obtain their
322104349Sphk  data from outside the XML file currently being parsed.
323104349Sphk  </para>
324104349Sphk  <para>
325104349Sphk  This is an example of an internal entity:
326104349Sphk<literallayout>
327104349Sphk&lt;!ENTITY vers '1.0.2'&gt;
328104349Sphk</literallayout>
329104349Sphk  </para>
330104349Sphk  <para>
331104349Sphk  And here are some examples of external entities:
332104349Sphk
333104349Sphk<literallayout>
334104349Sphk&lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
335104349Sphk&lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
336104349Sphk</literallayout>
337104349Sphk
338104349Sphk	   </para>
339104349Sphk        </listitem>
340104349Sphk      </varlistentry>
341104349Sphk
342104349Sphk      <varlistentry>
343104349Sphk        <term><option>--</option></term>
344104349Sphk        <listitem>
345104349Sphk		<para>
346178848Scokane    (Two hyphens.)
347178848Scokane    Terminates the list of options.  This is only needed if a filename
348178848Scokane    starts with a hyphen.  For example:
349104349Sphk	   </para>
350178848Scokane<literallayout>
351178848Scokane&dhpackage; -- -myfile.xml
352178848Scokane</literallayout>
353178848Scokane		<para>
354178848Scokane    will run <command>&dhpackage;</command> on the file
355178848Scokane    <filename>-myfile.xml</filename>.
356178848Scokane	   </para>
357104349Sphk        </listitem>
358104349Sphk      </varlistentry>
359104349Sphk    </variablelist>
360104349Sphk
361104349Sphk	<para>
362178848Scokane    Older versions of <command>&dhpackage;</command> do not support
363178848Scokane    reading from standard input.
364104349Sphk	</para>
365104349Sphk  </refsect1>
366104349Sphk
367104349Sphk  <refsect1>
368104349Sphk  <title>OUTPUT</title>
369104349Sphk    <para>
370178848Scokane	If an input file is not well-formed,
371178848Scokane	<command>&dhpackage;</command> prints a single line describing
372178848Scokane	the problem to standard output.  If a file is well formed,
373178848Scokane	<command>&dhpackage;</command> outputs nothing.
374104349Sphk	Note that the result code is <emphasis>not</emphasis> set.
375104349Sphk	</para>
376104349Sphk  </refsect1>
377104349Sphk  
378104349Sphk  <refsect1>
379104349Sphk    <title>BUGS</title>
380104349Sphk	<para>
381178848Scokane	<command>&dhpackage;</command> returns a 0 - noerr result,
382178848Scokane	even if the file is not well-formed.  There is no good way for
383178848Scokane	a program to use <command>&dhpackage;</command> to quickly
384178848Scokane	check a file -- it must parse <command>&dhpackage;</command>'s
385178848Scokane	standard output.
386104349Sphk	</para>
387104349Sphk	<para>
388178848Scokane	The errors should go to standard error, not standard output.
389104349Sphk	</para>
390104349Sphk	<para>
391178848Scokane	There should be a way to get <option>-d</option> to send its
392178848Scokane	output to standard output rather than forcing the user to send
393178848Scokane	it to a file.
394104349Sphk	</para>
395178848Scokane	<para>
396178848Scokane	I have no idea why anyone would want to use the
397178848Scokane	<option>-d</option>, <option>-c</option>, and
398178848Scokane	<option>-m</option> options.  If someone could explain it to
399178848Scokane	me, I'd like to add this information to this manpage.
400178848Scokane	</para>
401104349Sphk  </refsect1>
402104349Sphk
403104349Sphk  <refsect1>
404104349Sphk    <title>ALTERNATIVES</title>
405104349Sphk	<para>
406104349Sphk	  Here are some XML validators on the web:
407104349Sphk
408104349Sphk<literallayout>
409104349Sphkhttp://www.hcrc.ed.ac.uk/~richard/xml-check.html
410104349Sphkhttp://www.stg.brown.edu/service/xmlvalid/
411104349Sphkhttp://www.scripting.com/frontier5/xml/code/xmlValidator.html
412104349Sphkhttp://www.xml.com/pub/a/tools/ruwf/check.html
413104349Sphk</literallayout>
414104349Sphk
415104349Sphk		 </para>
416104349Sphk  </refsect1>
417104349Sphk
418104349Sphk  <refsect1>
419104349Sphk    <title>SEE ALSO</title>
420104349Sphk	<para>
421104349Sphk
422104349Sphk<literallayout>
423104349SphkThe Expat home page:        http://www.libexpat.org/
424104349SphkThe W3 XML specification:   http://www.w3.org/TR/REC-xml
425104349Sphk</literallayout>
426104349Sphk
427104349Sphk	</para>
428104349Sphk  </refsect1>
429104349Sphk
430104349Sphk  <refsect1>
431104349Sphk    <title>AUTHOR</title>
432104349Sphk    <para>
433104349Sphk	  This manual page was written by &dhusername; &dhemail; for
434104349Sphk      the &debian; system (but may be used by others).  Permission is
435104349Sphk      granted to copy, distribute and/or modify this document under
436104349Sphk      the terms of the <acronym>GNU</acronym> Free Documentation
437104349Sphk      License, Version 1.1.
438104349Sphk	</para>
439104349Sphk  </refsect1>
440104349Sphk</refentry>
441