xmlreaderimpl/support/expat-2.0.1/doc/xmlwf.sgml
changeset 300 b6d834208d33
parent 299 f06c5eba524d
child 301 1bfe4ecc6c10
equal deleted inserted replaced
299:f06c5eba524d 300:b6d834208d33
     1 <!doctype refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN" [
       
     2 
       
     3 <!-- Process this file with docbook-to-man to generate an nroff manual
       
     4      page: `docbook-to-man manpage.sgml > manpage.1'.  You may view
       
     5      the manual page with: `docbook-to-man manpage.sgml | nroff -man |
       
     6      less'.  A typical entry in a Makefile or Makefile.am is:
       
     7 
       
     8 manpage.1: manpage.sgml
       
     9 	docbook-to-man $< > $@
       
    10   -->
       
    11 
       
    12   <!-- Fill in your name for FIRSTNAME and SURNAME. -->
       
    13   <!ENTITY dhfirstname "<firstname>Scott</firstname>">
       
    14   <!ENTITY dhsurname   "<surname>Bronson</surname>">
       
    15   <!-- Please adjust the date whenever revising the manpage. -->
       
    16   <!ENTITY dhdate      "<date>December  5, 2001</date>">
       
    17   <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
       
    18        allowed: see man(7), man(1). -->
       
    19   <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
       
    20   <!ENTITY dhemail     "<email>bronson@rinspin.com</email>">
       
    21   <!ENTITY dhusername  "Scott Bronson">
       
    22   <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
       
    23   <!ENTITY dhpackage   "xmlwf">
       
    24 
       
    25   <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
       
    26   <!ENTITY gnu         "<acronym>GNU</acronym>">
       
    27 ]>
       
    28 
       
    29 <refentry>
       
    30   <refentryinfo>
       
    31     <address>
       
    32       &dhemail;
       
    33     </address>
       
    34     <author>
       
    35       &dhfirstname;
       
    36       &dhsurname;
       
    37     </author>
       
    38     <copyright>
       
    39       <year>2001</year>
       
    40       <holder>&dhusername;</holder>
       
    41     </copyright>
       
    42     &dhdate;
       
    43   </refentryinfo>
       
    44   <refmeta>
       
    45     &dhucpackage;
       
    46 
       
    47     &dhsection;
       
    48   </refmeta>
       
    49   <refnamediv>
       
    50     <refname>&dhpackage;</refname>
       
    51 
       
    52     <refpurpose>Determines if an XML document is well-formed</refpurpose>
       
    53   </refnamediv>
       
    54   <refsynopsisdiv>
       
    55     <cmdsynopsis>
       
    56       <command>&dhpackage;</command>
       
    57 	  <arg><option>-s</option></arg>
       
    58 	  <arg><option>-n</option></arg>
       
    59 	  <arg><option>-p</option></arg>
       
    60 	  <arg><option>-x</option></arg>
       
    61 
       
    62 	  <arg><option>-e <replaceable>encoding</replaceable></option></arg>
       
    63 	  <arg><option>-w</option></arg>
       
    64 
       
    65 	  <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
       
    66 	  <arg><option>-c</option></arg>
       
    67 	  <arg><option>-m</option></arg>
       
    68 
       
    69 	  <arg><option>-r</option></arg>
       
    70 	  <arg><option>-t</option></arg>
       
    71 
       
    72 	  <arg><option>-v</option></arg>
       
    73 
       
    74 	  <arg>file ...</arg>
       
    75     </cmdsynopsis>
       
    76   </refsynopsisdiv>
       
    77  
       
    78   <refsect1>
       
    79     <title>DESCRIPTION</title>
       
    80 
       
    81     <para>
       
    82 	<command>&dhpackage;</command> uses the Expat library to
       
    83 	determine if an XML document is well-formed.  It is
       
    84 	non-validating.
       
    85 	</para>
       
    86 
       
    87 	<para>
       
    88 	If you do not specify any files on the command-line, and you
       
    89 	have a recent version of <command>&dhpackage;</command>, the
       
    90 	input file will be read from standard input.
       
    91 	</para>
       
    92 
       
    93   </refsect1>
       
    94 
       
    95   <refsect1>
       
    96     <title>WELL-FORMED DOCUMENTS</title>
       
    97 
       
    98 	<para>
       
    99 	  A well-formed document must adhere to the
       
   100 	  following rules:
       
   101 	</para>
       
   102 
       
   103 	<itemizedlist>
       
   104       <listitem><para>
       
   105 	    The file begins with an XML declaration.  For instance,
       
   106 		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
       
   107 		<emphasis>NOTE:</emphasis>
       
   108 		<command>&dhpackage;</command> does not currently
       
   109 		check for a valid XML declaration.
       
   110       </para></listitem>
       
   111       <listitem><para>
       
   112 		Every start tag is either empty (&lt;tag/&gt;)
       
   113 		or has a corresponding end tag.
       
   114       </para></listitem>
       
   115       <listitem><para>
       
   116 	    There is exactly one root element.  This element must contain
       
   117 		all other elements in the document.  Only comments, white
       
   118 		space, and processing instructions may come after the close
       
   119 		of the root element.
       
   120       </para></listitem>
       
   121       <listitem><para>
       
   122 		All elements nest properly.
       
   123       </para></listitem>
       
   124       <listitem><para>
       
   125 		All attribute values are enclosed in quotes (either single
       
   126 		or double).
       
   127       </para></listitem>
       
   128     </itemizedlist>
       
   129 
       
   130 	<para>
       
   131 	  If the document has a DTD, and it strictly complies with that
       
   132 	  DTD, then the document is also considered <emphasis>valid</emphasis>.
       
   133 	  <command>&dhpackage;</command> is a non-validating parser --
       
   134 	  it does not check the DTD.  However, it does support
       
   135 	  external entities (see the <option>-x</option> option).
       
   136 	</para>
       
   137   </refsect1>
       
   138 
       
   139   <refsect1>
       
   140     <title>OPTIONS</title>
       
   141 
       
   142 <para>
       
   143 When an option includes an argument, you may specify the argument either
       
   144 separately ("<option>-d</option> output") or concatenated with the
       
   145 option ("<option>-d</option>output").  <command>&dhpackage;</command>
       
   146 supports both.
       
   147 </para>
       
   148 
       
   149     <variablelist>
       
   150 
       
   151       <varlistentry>
       
   152         <term><option>-c</option></term>
       
   153         <listitem>
       
   154 		<para>
       
   155   If the input file is well-formed and <command>&dhpackage;</command>
       
   156   doesn't encounter any errors, the input file is simply copied to
       
   157   the output directory unchanged.
       
   158   This implies no namespaces (turns off <option>-n</option>) and
       
   159   requires <option>-d</option> to specify an output file.
       
   160   		</para>
       
   161         </listitem>
       
   162       </varlistentry>
       
   163 
       
   164       <varlistentry>
       
   165         <term><option>-d output-dir</option></term>
       
   166         <listitem>
       
   167 		<para>
       
   168   Specifies a directory to contain transformed
       
   169   representations of the input files.
       
   170   By default, <option>-d</option> outputs a canonical representation
       
   171   (described below).
       
   172   You can select different output formats using <option>-c</option>
       
   173   and <option>-m</option>.
       
   174 	  </para>
       
   175 	  <para>
       
   176   The output filenames will
       
   177   be exactly the same as the input filenames or "STDIN" if the input is
       
   178   coming from standard input.  Therefore, you must be careful that the
       
   179   output file does not go into the same directory as the input
       
   180   file.  Otherwise, <command>&dhpackage;</command> will delete the
       
   181   input file before it generates the output file (just like running
       
   182   <literal>cat &lt; file &gt; file</literal> in most shells).
       
   183 	  </para>
       
   184 	  <para> 
       
   185   Two structurally equivalent XML documents have a byte-for-byte
       
   186   identical canonical XML representation.
       
   187   Note that ignorable white space is considered significant and
       
   188   is treated equivalently to data.
       
   189   More on canonical XML can be found at
       
   190   http://www.jclark.com/xml/canonxml.html .
       
   191 	  </para>
       
   192         </listitem>
       
   193       </varlistentry>
       
   194 
       
   195       <varlistentry>
       
   196         <term><option>-e encoding</option></term>
       
   197         <listitem>
       
   198 		<para>
       
   199    Specifies the character encoding for the document, overriding
       
   200    any document encoding declaration.  <command>&dhpackage;</command>
       
   201    supports four built-in encodings:
       
   202    	<literal>US-ASCII</literal>,
       
   203 	<literal>UTF-8</literal>,
       
   204 	<literal>UTF-16</literal>, and
       
   205 	<literal>ISO-8859-1</literal>.
       
   206    Also see the <option>-w</option> option.
       
   207 	   </para>
       
   208         </listitem>
       
   209       </varlistentry>
       
   210 
       
   211       <varlistentry>
       
   212         <term><option>-m</option></term>
       
   213         <listitem>
       
   214 		<para>
       
   215   Outputs some strange sort of XML file that completely
       
   216   describes the the input file, including character postitions.
       
   217   Requires <option>-d</option> to specify an output file.
       
   218 	   </para>
       
   219         </listitem>
       
   220       </varlistentry>
       
   221 
       
   222       <varlistentry>
       
   223         <term><option>-n</option></term>
       
   224         <listitem>
       
   225 		<para>
       
   226   Turns on namespace processing.  (describe namespaces)
       
   227   <option>-c</option> disables namespaces.
       
   228 	   </para>
       
   229         </listitem>
       
   230       </varlistentry>
       
   231 
       
   232       <varlistentry>
       
   233         <term><option>-p</option></term>
       
   234         <listitem>
       
   235 		<para>
       
   236     Tells xmlwf to process external DTDs and parameter
       
   237     entities.
       
   238 	 </para>
       
   239 	 <para>
       
   240    Normally <command>&dhpackage;</command> never parses parameter
       
   241    entities.  <option>-p</option> tells it to always parse them.
       
   242    <option>-p</option> implies <option>-x</option>.
       
   243 	   </para>
       
   244         </listitem>
       
   245       </varlistentry>
       
   246 
       
   247       <varlistentry>
       
   248         <term><option>-r</option></term>
       
   249         <listitem>
       
   250 		<para>
       
   251    Normally <command>&dhpackage;</command> memory-maps the XML file
       
   252    before parsing; this can result in faster parsing on many
       
   253    platforms.
       
   254    <option>-r</option> turns off memory-mapping and uses normal file
       
   255    IO calls instead.
       
   256    Of course, memory-mapping is automatically turned off
       
   257    when reading from standard input.
       
   258 	   </para>
       
   259 		<para>
       
   260    Use of memory-mapping can cause some platforms to report
       
   261    substantially higher memory usage for
       
   262    <command>&dhpackage;</command>, but this appears to be a matter of
       
   263    the operating system reporting memory in a strange way; there is
       
   264    not a leak in <command>&dhpackage;</command>.
       
   265            </para>
       
   266         </listitem>
       
   267       </varlistentry>
       
   268 
       
   269       <varlistentry>
       
   270         <term><option>-s</option></term>
       
   271         <listitem>
       
   272 		<para>
       
   273   Prints an error if the document is not standalone. 
       
   274   A document is standalone if it has no external subset and no
       
   275   references to parameter entities.
       
   276 	   </para>
       
   277         </listitem>
       
   278       </varlistentry>
       
   279 
       
   280       <varlistentry>
       
   281         <term><option>-t</option></term>
       
   282         <listitem>
       
   283 		<para>
       
   284   Turns on timings.  This tells Expat to parse the entire file,
       
   285   but not perform any processing.
       
   286   This gives a fairly accurate idea of the raw speed of Expat itself
       
   287   without client overhead.
       
   288   <option>-t</option> turns off most of the output options
       
   289   (<option>-d</option>, <option>-m</option>, <option>-c</option>,
       
   290   ...).
       
   291 	   </para>
       
   292         </listitem>
       
   293       </varlistentry>
       
   294 
       
   295       <varlistentry>
       
   296         <term><option>-v</option></term>
       
   297         <listitem>
       
   298 		<para>
       
   299   Prints the version of the Expat library being used, including some
       
   300   information on the compile-time configuration of the library, and
       
   301   then exits.
       
   302 	   </para>
       
   303         </listitem>
       
   304       </varlistentry>
       
   305 
       
   306       <varlistentry>
       
   307         <term><option>-w</option></term>
       
   308         <listitem>
       
   309 		<para>
       
   310   Enables support for Windows code pages.
       
   311   Normally, <command>&dhpackage;</command> will throw an error if it
       
   312   runs across an encoding that it is not equipped to handle itself.  With
       
   313   <option>-w</option>, &dhpackage; will try to use a Windows code
       
   314   page.  See also <option>-e</option>.
       
   315 	   </para>
       
   316         </listitem>
       
   317       </varlistentry>
       
   318 
       
   319       <varlistentry>
       
   320         <term><option>-x</option></term>
       
   321         <listitem>
       
   322 		<para>
       
   323   Turns on parsing external entities.
       
   324   </para>
       
   325 <para>
       
   326   Non-validating parsers are not required to resolve external
       
   327   entities, or even expand entities at all.
       
   328   Expat always expands internal entities (?),
       
   329   but external entity parsing must be enabled explicitly.
       
   330   </para>
       
   331   <para>
       
   332   External entities are simply entities that obtain their
       
   333   data from outside the XML file currently being parsed.
       
   334   </para>
       
   335   <para>
       
   336   This is an example of an internal entity:
       
   337 <literallayout>
       
   338 &lt;!ENTITY vers '1.0.2'&gt;
       
   339 </literallayout>
       
   340   </para>
       
   341   <para>
       
   342   And here are some examples of external entities:
       
   343 
       
   344 <literallayout>
       
   345 &lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
       
   346 &lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
       
   347 </literallayout>
       
   348 
       
   349 	   </para>
       
   350         </listitem>
       
   351       </varlistentry>
       
   352 
       
   353       <varlistentry>
       
   354         <term><option>--</option></term>
       
   355         <listitem>
       
   356 		<para>
       
   357     (Two hyphens.)
       
   358     Terminates the list of options.  This is only needed if a filename
       
   359     starts with a hyphen.  For example:
       
   360 	   </para>
       
   361 <literallayout>
       
   362 &dhpackage; -- -myfile.xml
       
   363 </literallayout>
       
   364 		<para>
       
   365     will run <command>&dhpackage;</command> on the file
       
   366     <filename>-myfile.xml</filename>.
       
   367 	   </para>
       
   368         </listitem>
       
   369       </varlistentry>
       
   370     </variablelist>
       
   371 
       
   372 	<para>
       
   373     Older versions of <command>&dhpackage;</command> do not support
       
   374     reading from standard input.
       
   375 	</para>
       
   376   </refsect1>
       
   377 
       
   378   <refsect1>
       
   379   <title>OUTPUT</title>
       
   380     <para>
       
   381 	If an input file is not well-formed,
       
   382 	<command>&dhpackage;</command> prints a single line describing
       
   383 	the problem to standard output.  If a file is well formed,
       
   384 	<command>&dhpackage;</command> outputs nothing.
       
   385 	Note that the result code is <emphasis>not</emphasis> set.
       
   386 	</para>
       
   387   </refsect1>
       
   388   
       
   389   <refsect1>
       
   390     <title>BUGS</title>
       
   391 	<para>
       
   392 	According to the W3C standard, an XML file without a
       
   393 	declaration at the beginning is not considered well-formed.
       
   394 	However, <command>&dhpackage;</command> allows this to pass.
       
   395 	</para>
       
   396 	<para>
       
   397 	<command>&dhpackage;</command> returns a 0 - noerr result,
       
   398 	even if the file is not well-formed.  There is no good way for
       
   399 	a program to use <command>&dhpackage;</command> to quickly
       
   400 	check a file -- it must parse <command>&dhpackage;</command>'s
       
   401 	standard output.
       
   402 	</para>
       
   403 	<para>
       
   404 	The errors should go to standard error, not standard output.
       
   405 	</para>
       
   406 	<para>
       
   407 	There should be a way to get <option>-d</option> to send its
       
   408 	output to standard output rather than forcing the user to send
       
   409 	it to a file.
       
   410 	</para>
       
   411 	<para>
       
   412 	I have no idea why anyone would want to use the
       
   413 	<option>-d</option>, <option>-c</option>, and
       
   414 	<option>-m</option> options.  If someone could explain it to
       
   415 	me, I'd like to add this information to this manpage.
       
   416 	</para>
       
   417   </refsect1>
       
   418 
       
   419   <refsect1>
       
   420     <title>ALTERNATIVES</title>
       
   421 	<para>
       
   422 	  Here are some XML validators on the web:
       
   423 
       
   424 <literallayout>
       
   425 http://www.hcrc.ed.ac.uk/~richard/xml-check.html
       
   426 http://www.stg.brown.edu/service/xmlvalid/
       
   427 http://www.scripting.com/frontier5/xml/code/xmlValidator.html
       
   428 http://www.xml.com/pub/a/tools/ruwf/check.html
       
   429 </literallayout>
       
   430 
       
   431 		 </para>
       
   432   </refsect1>
       
   433 
       
   434   <refsect1>
       
   435     <title>SEE ALSO</title>
       
   436 	<para>
       
   437 
       
   438 <literallayout>
       
   439 The Expat home page:        http://www.libexpat.org/
       
   440 The W3 XML specification:   http://www.w3.org/TR/REC-xml
       
   441 </literallayout>
       
   442 
       
   443 	</para>
       
   444   </refsect1>
       
   445 
       
   446   <refsect1>
       
   447     <title>AUTHOR</title>
       
   448     <para>
       
   449 	  This manual page was written by &dhusername; &dhemail; for
       
   450       the &debian; system (but may be used by others).  Permission is
       
   451       granted to copy, distribute and/or modify this document under
       
   452       the terms of the <acronym>GNU</acronym> Free Documentation
       
   453       License, Version 1.1.
       
   454 	</para>
       
   455   </refsect1>
       
   456 </refentry>
       
   457 
       
   458 <!-- Keep this comment at the end of the file
       
   459 Local variables:
       
   460 mode: sgml
       
   461 sgml-omittag:t
       
   462 sgml-shorttag:t
       
   463 sgml-minimize-attributes:nil
       
   464 sgml-always-quote-attributes:t
       
   465 sgml-indent-step:2
       
   466 sgml-indent-data:t
       
   467 sgml-parent-document:nil
       
   468 sgml-default-dtd-file:nil
       
   469 sgml-exposed-tags:nil
       
   470 sgml-local-catalogs:nil
       
   471 sgml-local-ecat-files:nil
       
   472 End:
       
   473 -->