xmlformat-1.04/ 0000755 0001750 0001750 00000000000 10470215403 012265 5 ustar paul devel xmlformat-1.04/xmlformat.pl 0000755 0001750 0001750 00000145424 10355364247 014665 0 ustar paul devel #! /usr/bin/perl -w
# vim:set ts=2 sw=2 expandtab:
# xmlformat - configurable XML file formatter/pretty-printer
# Copyright (c) 2004, 2005 Kitebird, LLC. All rights reserved.
# Some portions are based on the REX shallow XML parser, which
# is Copyright (c) 1998, Robert D. Cameron. These include the
# regular expression parsing variables and the shallow_parse()
# method.
# This software is licensed as described in the file LICENSE,
# which you should have received as part of this distribution.
# Syntax: xmlformat [config-file] xml-file
# Default config file is $ENV{XMLFORMAT_CONF} or ./xmlformat.conf, in that
# order.
# Paul DuBois
# paul@kitebird.com
# 2003-12-14
# The input document first is parsed into a list of strings. Each string
# represents one of the following:
# - text node
# - processing instruction (the XML declaration is treated as a PI)
# - comment
# - CDATA section
# - DOCTYPE declaration
# - element tag (either , , or ), *including attributes*
# Entities are left untouched. They appear in their original form as part
# of the text node in which they occur.
# The list of strings then is converted to a hierarchical structure.
# The document top level is represented by a reference to a list.
# Each list element is a reference to a node -- a hash that has "type"
# and "content" key/value pairs. The "type" key indicates the node
# type and has one of the following values:
# "text" - text node
# "pi" - processing instruction node
# "comment" - comment node
# "CDATA" - CDATA section node
# "DOCTYPE" - DOCTYPE node
# "elt" - element node
# (For purposes of this program, it's really only necessary to have "text",
# "elt", and "other". The types other than "text" and "elt" currently are
# all treated the same way.)
# For all but element nodes, the "content" value is the text of the node.
# For element nodes, the "content" hash is a reference to a list of
# nodes for the element's children. In addition, an element node has
# three additional key/value pairs:
# - The "name" value is the tag name within the opening tag, minus angle
# brackets or attributes.
# - The "open_tag" value is the full opening tag, which may also be the
# closing tag.
# - The "close_tag" value depends on the opening tag. If the open tag is
# "", the close tag is "". If the open tag is "", the
# close tag is the empty string.
# If the tree structure is converted back into a string with
# tree_stringify(), the result can be compared to the input file
# as a regression test. The string should be identical to the original
# input document.
use strict;
use Getopt::Long;
$Getopt::Long::ignorecase = 0; # options are case sensitive
$Getopt::Long::bundling = 1; # allow short options to be bundled
my $PROG_NAME = "xmlformat";
my $PROG_VERSION = "1.04";
my $PROG_LANG = "Perl";
# ----------------------------------------------------------------------
package XMLFormat;
use strict;
# ----------------------------------------------------------------------
# Regular expressions for parsing document components. Based on REX.
# SPE = shallow parsing expression
# SE = scanning expression
# CE = completion expression
# RSB = right square brackets
# QM = question mark
my $TextSE = "[^<]+";
my $UntilHyphen = "[^-]*-";
my $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
my $CommentCE = "$Until2Hyphens>?";
my $UntilRSBs = "[^\\]]*\\](?:[^\\]]+\\])*\\]+";
my $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
my $S = "[ \\n\\t\\r]+";
my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
my $Name = "(?:$NameStrt)(?:$NameChar)*";
my $QuoteSE = "\"[^\"]*\"|'[^']*'";
my $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
my $S1 = "[\\n\\r\\t ]";
my $UntilQMs = "[^?]*\\?+";
my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
my $DT_ItemSE =
"<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
my $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*\\](?:$S)?)?>?";
my $DeclCE =
"--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
my $PI_CE = "$Name(?:$PI_Tail)?";
my $EndTagCE = "$Name(?:$S)?>?";
my $AttValSE = "\"[^<\"]*\"|'[^<']*'";
my $ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?";
my $MarkupSPE =
"<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
my $XML_SPE = "$TextSE|$MarkupSPE";
# ----------------------------------------------------------------------
# Allowable options and their possible values:
# - The keys of this hash are the allowable option names
# - The value for each key is list of allowable option values
# - If the value is undef, the option value must be numeric
# If any new formatting option is added to this program, it
# must be specified here, *and* a default value for it should
# be listed in the *DOCUMENT and *DEFAULT pseudo-element
# option hashes.
my %opt_list = (
"format" => [ "block", "inline", "verbatim" ],
"normalize" => [ "yes", "no" ],
"subindent" => undef,
"wrap-length" => undef,
"entry-break" => undef,
"exit-break" => undef,
"element-break" => undef
);
# Object creation: set up the default formatting configuration
# and variables for maintaining input and output document.
sub new
{
my $type = shift;
my $self = {};
# Formatting options for each element.
$self->{elt_opts} = { };
# The formatting options for the *DOCUMENT and *DEFAULT pseudo-elements can
# be overridden in the configuration file, but the options must also be
# built in to make sure they exist if not specified in the configuration
# file. Each of the structures must have a value for every option.
# Options for top-level document children.
# - Do not change entry-break: 0 ensures no extra newlines before
# first element of output.
# - Do not change exit-break: 1 ensures a newline after final element
# of output document.
# - It's probably best not to change any of the others, except perhaps
# if you want to increase the element-break.
$self->{elt_opts}->{"*DOCUMENT"} = {
"format" => "block",
"normalize" => "no",
"subindent" => 0,
"wrap-length" => 0,
"entry-break" => 0, # do not change
"exit-break" => 1, # do not change
"element-break" => 1
};
# Default options. These are used for any elements in the document
# that are not specified explicitly in the configuration file.
$self->{elt_opts}->{"*DEFAULT"} = {
"format" => "block",
"normalize" => "no",
"subindent" => 1,
"wrap-length" => 0,
"entry-break" => 1,
"exit-break" => 1,
"element-break" => 1
};
# Run the *DOCUMENT and *DEFAULT options through the option-checker
# to verify that the built-in values are legal.
my $err_count = 0;
foreach my $elt_name (keys (%{$self->{elt_opts}})) # ... for each element
{
# Check each option for element
while (my ($opt_name, $opt_val) = each (%{$self->{elt_opts}->{$elt_name}}))
{
my $err_msg;
($opt_val, $err_msg) = check_option ($opt_name, $opt_val);
if (!defined ($err_msg))
{
$self->{elt_opts}->{$elt_name}->{$opt_name} = $opt_val;
}
else
{
warn "LOGIC ERROR: $elt_name default option is invalid\n";
warn "$err_msg\n";
++$err_count;
}
}
}
# Make sure that the every option is represented in the
# *DOCUMENT and *DEFAULT structures.
foreach my $opt_name (keys (%opt_list))
{
foreach my $elt_name (keys (%{$self->{elt_opts}}))
{
if (!exists ($self->{elt_opts}->{$elt_name}->{$opt_name}))
{
warn "LOGIC ERROR: $elt_name has no default '$opt_name' option\n";
++$err_count;
}
}
}
die "Cannot continue; internal default formatting options must be fixed\n"
if $err_count > 0;
bless $self, $type; # bless object and return it
}
# Initialize the variables that are used per-document
sub init_doc_vars
{
my $self = shift;
# Elements that are used in the document but not named explicitly
# in the configuration file.
$self->{unconf_elts} = { };
# List of tokens for current document.
$self->{tokens} = [ ];
# List of line numbers for each token
$self->{line_num} = [ ];
# Document node tree (constructed from the token list).
$self->{tree} = [ ];
# Variables for formatting operations:
# out_doc = resulting output document (constructed from document tree)
# pending = array of pending tokens being held until flushed
$self->{out_doc} = "";
$self->{pending} = [ ];
# Inline elements within block elements are processed using the
# text normalization (and possible line-wrapping) values of their
# enclosing block. Blocks and inlines may be nested, so we maintain
# a stack that allows the normalize/wrap-length values of the current
# block to be determined.
$self->{block_name_stack} = [ ]; # for debugging
$self->{block_opts_stack} = [ ];
# A similar stack for maintaining each block's current break type.
$self->{block_break_type_stack} = [ ];
}
# Accessors for token list and resulting output document
sub tokens
{
my $self = shift;
return $self->{tokens};
}
sub out_doc
{
my $self = shift;
return $self->{out_doc};
}
# Methods for adding strings to output document or
# to the pending output array
sub add_to_doc
{
my ($self, $str) = @_;
$self->{out_doc} .= $str;
}
sub add_to_pending
{
my ($self, $str) = @_;
push (@{$self->{pending}}, $str);
}
# Block stack mainenance methods
# Push options onto or pop options off from the stack. When doing
# this, also push or pop an element onto the break-level stack.
sub begin_block
{
my ($self, $name, $opts) = @_;
push (@{$self->{block_name_stack}}, $name);
push (@{$self->{block_opts_stack}}, $opts);
push (@{$self->{block_break_type_stack}}, "entry-break");
}
sub end_block
{
my $self = shift;
pop (@{$self->{block_name_stack}});
pop (@{$self->{block_opts_stack}});
pop (@{$self->{block_break_type_stack}});
}
# Return the current block's normalization status or wrap length
sub block_normalize
{
my $self = shift;
my $size = @{$self->{block_opts_stack}};
my $opts = $self->{block_opts_stack}->[$size-1];
return $opts->{normalize} eq "yes";
}
sub block_wrap_length
{
my $self = shift;
my $size = @{$self->{block_opts_stack}};
my $opts = $self->{block_opts_stack}->[$size-1];
return $opts->{"wrap-length"};
}
# Set the current block's break type, or return the number of newlines
# for the block's break type
sub set_block_break_type
{
my ($self, $type) = @_;
my $size = @{$self->{block_break_type_stack}};
$self->{block_break_type_stack}->[$size-1] = $type;
}
sub block_break_value
{
my $self = shift;
my $size = @{$self->{block_opts_stack}};
my $opts = $self->{block_opts_stack}->[$size-1];
$size = @{$self->{block_break_type_stack}};
my $type = $self->{block_break_type_stack}->[$size-1];
return $opts->{$type};
}
# ----------------------------------------------------------------------
# Read configuration information. For each element, construct a hash
# containing a hash key and value for each option name and value.
# After reading the file, fill in missing option values for
# incomplete option structures using the *DEFAULT options.
sub read_config
{
my $self = shift;
my $conf_file = shift;
my @elt_names = ();
my $err_msg;
my $in_continuation = 0;
my $saved_line = "";
open (FH, $conf_file) or die "Cannot read config file $conf_file: $!\n";
while ()
{
chomp;
next if /^\s*($|#)/; # skip blank lines, comments
if ($in_continuation)
{
$_ = $saved_line . " " . $_;
$saved_line = "";
$in_continuation = 0;
}
if (!/^\s/)
{
# Line doesn't begin with whitespace, so it lists element names.
# Names are separated by whitespace or commas, possibly followed
# by a continuation character or a comment.
if (/\\$/)
{
s/\\$//; # remove continuation character
$saved_line = $_;
$in_continuation = 1;
next;
}
s/\s*#.*$//; # remove any trailing comment
@elt_names = split (/[\s,]+/, $_);
# make sure each name has an entry in the elt_opts structure
foreach my $elt_name (@elt_names)
{
$self->{elt_opts}->{$elt_name} = { }
unless exists ($self->{elt_opts}->{$elt_name});
}
}
else
{
# Line begins with whitespace, so it contains an option
# to apply to the current element list, possibly followed by
# a comment. First check that there is a current list.
# Then parse the option name/value.
die "$conf_file:$.: Option setting found before any "
. "elements were named.\n"
if !@elt_names;
s/\s*#.*$//;
my ($opt_name, $opt_val) = /^\s+(\S+)(?:\s+|\s*=\s*)(\S+)$/;
die "$conf_file:$.: Malformed line: $_\n" unless defined ($opt_val);
# Check option. If illegal, die with message. Otherwise,
# add option to each element in current element list
($opt_val, $err_msg) = check_option ($opt_name, $opt_val);
die "$conf_file:$.: $err_msg\n" if defined ($err_msg);
foreach my $elt_name (@elt_names)
{
$self->{elt_opts}->{$elt_name}->{$opt_name} = $opt_val;
}
}
}
close (FH);
# For any element that has missing option values, fill in the values
# using the options for the *DEFAULT pseudo-element. This speeds up
# element option lookups later. It also makes it unnecessary to test
# each option to see if it's defined: All element option structures
# will have every option defined.
my $def_opts = $self->{elt_opts}->{"*DEFAULT"};
foreach my $elt_name (keys (%{$self->{elt_opts}}))
{
next if $elt_name eq "*DEFAULT";
foreach my $opt_name (keys (%{$def_opts}))
{
next if exists ($self->{elt_opts}->{$elt_name}->{$opt_name}); # already set
$self->{elt_opts}->{$elt_name}->{$opt_name} = $def_opts->{$opt_name};
}
}
}
# Check option name to make sure it's legal. Check the value to make sure
# that it's legal for the name. Return a two-element array:
# (value, undef) if the option name and value are legal.
# (undef, message) if an error was found; message contains error message.
# For legal values, the returned value should be assigned to the option,
# because it may get type-converted here.
sub check_option
{
my ($opt_name, $opt_val) = @_;
# - Check option name to make sure it's a legal option
# - Then check the value. If there is a list of values
# the value must be one of them. Otherwise, the value
# must be an integer.
return (undef, "Unknown option name: $opt_name")
unless exists ($opt_list{$opt_name});
my $allowable_val = $opt_list{$opt_name};
if (defined ($allowable_val))
{
return (undef, "Unknown '$opt_name' value: $opt_val")
unless grep (/^$opt_val$/, @{$allowable_val});
}
else # other options should be numeric
{
# "$opt_val" converts $opt_val to string for pattern match
return (undef, "'$opt_name' value ($opt_val) should be an integer")
unless "$opt_val" =~ /^\d+$/;
}
return ($opt_val, undef);
}
# Return hash of option values for a given element. If no options are found:
# - Add the element name to the list of unconfigured options.
# - Assign the default options to the element. (This way the test for the
# option fails only once.)
sub get_opts
{
my $self = shift;
my $elt_name = shift;
my $opts = $self->{elt_opts}->{$elt_name};
if (!defined ($opts))
{
$self->{unconf_elts}->{$elt_name} = 1;
$opts = $self->{elt_opts}->{$elt_name} = $self->{elt_opts}->{"*DEFAULT"};
}
return $opts;
}
# Display contents of configuration options to be used to process document.
# For each element named in the elt_opts structure, display its format
# type, and those options that apply to the type.
sub display_config
{
my $self = shift;
# Format types and the additional options that apply to each type
my $format_opts = {
"block" => [
"entry-break",
"element-break",
"exit-break",
"subindent",
"normalize",
"wrap-length"
],
"inline" => [ ],
"verbatim" => [ ]
};
foreach my $elt_name (sort (keys (%{$self->{elt_opts}})))
{
print "$elt_name\n";
my %opts = %{$self->{elt_opts}->{$elt_name}};
my $format = $opts{format};
# Write out format type, then options that apply to the format type
print " format = $format\n";
foreach my $opt_name (@{$format_opts->{$format}})
{
print " $opt_name = $opts{$opt_name}\n";
}
print "\n";
}
}
# Display the list of elements that are used in the document but not
# configured in the configuration file.
# Then re-unconfigure the elements so that they won't be considered
# as configured for the next document, if there is one.
sub display_unconfigured_elements
{
my $self = shift;
my @elts = keys (%{$self->{unconf_elts}});
if (@elts == 0)
{
print "The document contains no unconfigured elements.\n";
}
else
{
print "The following document elements were assigned no formatting options:\n";
foreach my $line ($self->line_wrap ([ join (" ", sort (@elts)) ], 0, 0, 65))
{
print "$line\n";
}
}
foreach my $elt_name (@elts)
{
delete ($self->{elt_opts}->{$elt_name});
}
}
# ----------------------------------------------------------------------
# Main document processing routine.
# - Argument is a string representing an input document
# - Return value is the reformatted document, or undef. An undef return
# signifies either that an error occurred, or that some option was
# given that suppresses document output. In either case, don't write
# any output for the document. Any error messages will already have
# been printed when this returns.
sub process_doc
{
my $self = shift;
my ($doc, $verbose, $check_parser, $canonize_only, $show_unconf_elts) = @_;
my $str;
$self->init_doc_vars ();
# Perform lexical parse to split document into list of tokens
warn "Parsing document...\n" if $verbose;
$self->shallow_parse ($doc);
if ($check_parser)
{
warn "Checking parser...\n" if $verbose;
# concatentation of tokens should be identical to original document
if ($doc eq join ("", @{$self->tokens ()}))
{
print "Parser is okay\n";
}
else
{
print "PARSER ERROR: document token concatenation differs from document\n";
}
return undef;
}
# Assign input line number to each token
$self->assign_line_numbers ();
# Look for and report any error tokens returned by parser
warn "Checking document for errors...\n" if $verbose;
if ($self->report_errors () > 0)
{
warn "Cannot continue processing document.\n";
return undef;
}
# Convert the token list to a tree structure
warn "Converting document tokens to tree...\n" if $verbose;
if ($self->tokens_to_tree () > 0)
{
warn "Cannot continue processing document.\n";
return undef;
}
# Check: Stringify the tree to convert it back to a single string,
# then compare to original document string (should be identical)
# (This is an integrity check on the validity of the to-tree and stringify
# operations; if one or both do not work properly, a mismatch should occur.)
#$str = $self->tree_stringify ();
#print $str;
#warn "ERROR: mismatch between document and resulting string\n" if $doc ne $str;
# Canonize tree to remove extraneous whitespace
warn "Canonizing document tree...\n" if $verbose;
$self->tree_canonize ();
if ($canonize_only)
{
print $self->tree_stringify () . "\n";
return undef;
}
# One side-effect of canonizing the tree is that the formatting
# options are looked up for each element in the document. That
# causes the list of elements that have no explicit configuration
# to be built. Display the list and return if user requested it.
if ($show_unconf_elts)
{
$self->display_unconfigured_elements ();
return undef;
}
# Format the tree to produce formatted XML as a single string
warn "Formatting document tree...\n" if $verbose;
$self->tree_format ();
# If the document is not empty, add a newline and emit a warning if
# reformatting failed to add a trailing newline. This shouldn't
# happen if the *DOCUMENT options are set up with exit-break = 1,
# which is the reason for the warning rather than just silently
# adding the newline.
$str = $self->out_doc ();
if ($str ne "" && $str !~ /\n$/)
{
warn "LOGIC ERROR: trailing newline had to be added\n";
$str .= "\n";
}
return $str;
}
# ----------------------------------------------------------------------
# Parse XML document into array of tokens and store array
sub shallow_parse
{
my ($self, $xml_document) = @_;
$self->{tokens} = [ $xml_document =~ /$XML_SPE/g ];
}
# ----------------------------------------------------------------------
# Extract a tag name from a tag and return it.
# Dies if the tag cannot be found, because this is supposed to be
# called only with a legal tag.
sub extract_tag_name
{
my $tag = shift;
die "Cannot find tag name in tag: $tag\n" unless $tag =~ /^<\/?($Name)/;
return $1;
}
# ----------------------------------------------------------------------
# Assign an input line number to each token. The number indicates
# the line number on which the token begins.
sub assign_line_numbers
{
my $self = shift;
my $line_num = 1;
$self->{line_num} = [ ];
for (my $i = 0; $i < @{$self->{tokens}}; $i++)
{
my $token = $self->{tokens}->[$i];
push (@{$self->{line_num}}, $line_num);
# count newlines and increment line counter (tr returns no. of matches)
$line_num += ($token =~ tr/\n/\n/);
}
}
# ----------------------------------------------------------------------
# Check token list for errors and report any that are found. Error
# tokens are those that begin with "<" but do not end with ">".
# Returns the error count.
# Does not modify the original token list.
sub report_errors
{
my $self = shift;
my $err_count = 0;
for (my $i = 0; $i < @{$self->{tokens}}; $i++)
{
my $token = $self->{tokens}->[$i];
if ($token =~ /^ && $token !~ />$/)
{
my $line_num = $self->{line_num}->[$i];
warn "Malformed token at line $line_num, token " . ($i+1) . ": $token\n";
++$err_count;
}
}
warn "Number of errors found: $err_count\n" if $err_count > 0;
return $err_count;
}
# ----------------------------------------------------------------------
# Helper routine to print tag stack for tokens_to_tree
sub print_tag_stack
{
my ($label, @stack) = @_;
if (@stack < 1)
{
warn " $label: none\n";
}
else
{
warn " $label:\n";
for (my $i = 0; $i < @stack; $i++)
{
warn " ", ($i+1), ": ", $stack[$i], "\n";
}
}
}
# Convert the list of XML document tokens to a tree representation.
# The implementation uses a loop and a stack rather than recursion.
# Does not modify the original token list.
# Returns an error count.
sub tokens_to_tree
{
my $self = shift;
my @tag_stack = (); # stack for element tags
my @children_stack = (); # stack for lists of children
my $children = [ ]; # current list of children
my $err_count = 0;
for (my $i = 0; $i < @{$self->{tokens}}; $i++)
{
my $token = $self->{tokens}->[$i];
my $line_num = $self->{line_num}->[$i];
my $tok_err = "Error near line $line_num, token " . ($i+1) . " ($token)";
if ($token !~ /^) # text
{
push (@{$children}, text_node ($token));
}
elsif ($token =~ /^
I bought a new coffee cup!200421
]]>
Suppose further that you want it to look like this:
I bought a new coffee cup!200421
]]>
By happy coincidence, that happens to be exactly the default output
style produced by xmlformat. To reformat your
document, all you have to do is run xmlformat with
the document filename as the argument, saving the output in another
file:
% xmlformat doc1.xml > output
Note: % represents your shell prompt; do not type it
as part of the command.
If you are confident that the output style produced by
xmlformat will be as you desire, you can be reckless
and perform an in-place conversion:
% xmlformat -i doc1.xml
In this case, xmlformat reads the document from the
input file, reformats it, and writes it back out to the same file,
replacing the file's original contents. If you are not quite so
reckless, use in conjunction with a
option to make a backup file that contains the
original document. takes an argument that specifies
the suffix to add to the original filename to create the backup
filename. For example, to back up the original
doc1.xml file in a file named
doc1.xml.bak, use this command:
% xmlformat -i -b .bak doc1.xml
Using a Configuration File
In the preceding example, the desired output style for
doc1.xml was the same as what
xmlformat produces by default. But what if the
default style is not what you want? In that case,
you must tell xmlformat how to handle your document.
This is at once both the weakness and strength of
xmlformat. The weakness is that it is extra work to
instruct xmlformat how you want it to format a
document. The strength is that it's possible to do so. Other XML
formatters do not require any extra work, but that's because they are
not configurable.
Suppose doc2.xml looks like this:
Compiling and Running a ProgramTo compile and run the program,
use the following commands, where
source-file
is the name of the source file:ccsource-file./a.out
]]>
That's ugly, and you want it to rewrite it like this:
Compiling and Running a Program
To compile and run the program, use the following commands,
where source-file is the name of
the source file:
ccsource-file./a.out
]]>
The key characteristics of this rewrite are as follows:
Child elements of the <example> element are
separated by blank lines, but not indented within it.
The text inside the <para> element is
reformatted, adjusted to 60 characters per line and indented.
The contents of the <screen> element are left
alone.
Unfortunately, if you run doc2.xml through
xmlformat, it comes out like this:
Compiling and Running a ProgramTo compile and run the program,
use the following commands, where
source-file
is the name of the source file:ccsource-file./a.out
]]>
This output is unsuitable. Among the offenses committed by
xmlformat, two are most notable:
The text of the <para> element has been left
alone, not reformatted.
The <screen> element content has been
reformatted, not left intact.
In these respects, it appears that xmlformat has done
exactly the opposite of what was wanted!
Furthermore, had you used the option to reformat the
file in place without using to make a backup, at
this point you would have a file containing a
<screen> element that you'd have to fix up by
hand to restore it to its original condition.
What a worthless, worthless program!
The rewriting of the <screen> element points to
an important lesson: Before trusting xmlformat with
your documents, it's best to run some tests and tune your configuration
as necessary to make sure it will produce the results you want.
Otherwise, you may produce changes that affect the integrity of your
documents. This is particularly true when they contain elements such as
<screen> or
<programlisting> that should be copied
verbatim, without change.
Configuring xmlformat amounts to writing a
configuration file that instructs it what to do. For
doc2.xml, that means telling
xmlformat to leave the
<screen> element alone, to normalize the text
of the paragraph to fill lines and wrap them to a given length, and to
put blank lines around sub-elements of the
<example> element.
Let's begin by creating a very basic configuration file. What should we
call it? xmlformat can read configuration settings
from a file named on the command line with a or
option. This means you can name the file
whatever you want. However, if you put the settings in a file named
xmlformat.conf in the current directory,
xmlformat will read the file automatically. That's an
easier approach, because you won't need to use a command-line option to
specify the configuration file. So create a file named
xmlformat.conf that contains the following two
lines:
screen
format = verbatim
These lines specify that <screen> elements
should be formatted as verbatim elements. That is,
xmlformat should reproduce their content in the
output exactly as it appears in the input, without modification. The
first line must begin in column 1 (no preceding spaces or tabs). The
second line must begin with at least one space or tab. Presence or
absence of whitespace is how xmlformat distinguish
the names of elements to be formatted from the instructions that
indicate how to format them.
After creating xmlformat.conf, run
xmlformat again to process
doc2.xml. It reads the newly created configuration
file and produces this result:
Compiling and Running a ProgramTo compile and run the program,
use the following commands, where
source-file
is the name of the source file:ccsource-file./a.out
]]>
That's a little better: xmlformat has not destroyed
the <screen> element by reformatting it. But
problems remain: The paragraph content has not been reformatted, and
there are no blank lines between sub-elements.
Let's take care of the paragraph next. To set up its formatting, add a
section to xmlformat.conf for
<para> elements:
para
format = block
normalize = yes
wrap-length = 60
subindent = 1
screen
format = verbatim
The order of sections in the configuration file doesn't matter. Put them
in the order that makes most sense to you. The order of option lines
under the initial section line doesn't matter, either.
The first two options in the para section specify
that the <para> element is a block element, and
that text within it should be normalized. Turning on the
normalize option tells xmlformat
that it's okay to reformat the text within the element. This means that
runs of whitespace within the text are collapsed to single spaces, and
that whitespace at the beginning and end of the text can be adjusted
(typically to put the text on different lines than the element's opening
and closing tags). Enabling normalization also allows you to perform
text line-wrapping and indenting. The wrap-length
option specifies the maximum number of characters per line, and
subindent specifies the indenting of text and
sub-elements, relative to the element's own tags. Note that when
xmlformat performs line-wrapping, it includes the
currently prevailing indent as part of the line length. (For example, if
the prevailing indent is 20 spaces and wrap-length
value is 60, lines will contain at most 40 characters
following the indentation.)
After adding the para section to
xmlformat.conf, xmlformat
produces this result:
Compiling and Running a Program
To compile and run the program, use the following
commands, where
source-file
is the name of the source file:
ccsource-file./a.out
]]>
The paragraph now is wrapped and indented. However, it doesn't seem to
be wrapped quite correctly, because the
<replaceable> element actually would fit on the
previous line. This happens because no formatting options were specified
for <replaceable> in the configuration file. As
a result, it is treated as having the default element type of
block, using the default behavior that block elements
are written out beginning on a new line.
To fix this problem, we should configure
<replaceable> as an inline element. That will
cause it to be formatted inline with the other text (and thus
line-wrapped along with it). Modify the configuration file to include a
replaceable section: this:
para
format = block
normalize = yes
wrap-length = 60
subindent = 1
replaceable
format = inline
screen
format = verbatim
The resulting output after making this change is as follows:
Compiling and Running a Program
To compile and run the program, use the following
commands, where source-file is
the name of the source file:
ccsource-file./a.out
]]>
We're getting close now. All we need to do is space out the
<example> child elements with a blank line in
between. Sub-element spacing is controlled by three formatting
properties:
entry-break controls spacing after the opening tag of
an element (that is, the spacing upon entry into the element's content).
element-break controls the spacing between
sub-elements.
exit-break controls spacing before the closing tag of
an element (that is, the spacing upon exit from the element's content).
The value for each of these formatting options should be an integer
indicating the number of newlines to write. A value of
1 causes one newline, which acts simply to break to
the next line. To get a blank line, the break value needs to be
2. Modify the configuration file by adding a section
for <example> elements:
example
format = block
entry-break = 2
element-break = 2
exit-break = 2
subindent = 0
para
format = block
normalize = yes
wrap-length = 60
subindent = 1
replaceable
format = inline
screen
format = verbatim
The resulting output is:
Compiling and Running a Program
To compile and run the program, use the following commands,
where source-file is the name of
the source file:
ccsource-file./a.out
]]>
We're done!
You may be thinking, "Wow, that's a lot of messing around just to format
that tiny little document." That's true. However, the effort of setting
up configuration files tends to be "reusable," in the sense that you can
use the same file to format multiple documents that all should be
written using the same style. Also, if you have different projects
requiring different styles, it tends to be easiest to begin setting up
the configuration file for one project by beginning with a copy of the
file from another project.
Discovering "Inherited" Formatting Options
In the final formatting of doc2.xml, note that the
paragraph tags appear on separate lines preceding and following the
paragraph content. This occurs despite the fact that the configuration
file specifies no break values in the para section,
because if you omit formatting options for an element, it "inherits" the
default properties. In the case of the <para>
element, the relevant unspecified properties are the
entry-break and exit-break values.
For block elements, both have a value of 1 by default
(that is, one newline), which causes a line break after the opening tag
and before the closing tag.
If you want to see all the formatting options
xmlformat will use, run it with the
option. For example:
% xmlformat --show-config
*DEFAULT
format = block
entry-break = 1
element-break = 1
exit-break = 1
subindent = 1
normalize = no
wrap-length = 0
*DOCUMENT
format = block
entry-break = 0
element-break = 1
exit-break = 1
subindent = 0
normalize = no
wrap-length = 0
example
format = block
entry-break = 2
element-break = 2
exit-break = 2
subindent = 0
normalize = no
wrap-length = 0
para
format = block
entry-break = 1
element-break = 1
exit-break = 1
subindent = 1
normalize = yes
wrap-length = 60
replaceable
format = inline
screen
format = verbatim
No configuration file is specified on the command line, so
xmlformat reads the default configuration file,
xmlformat.conf. Then it displays the resulting
configuration options. You can see that the para
section has inherited break values from the *DEFAULT
section.
Checking for Unconfigured Elements
Any elements appearing in the input document that are not named in the
configuration file are formatted using the values of the
*DEFAULT section. If the file contains no
*DEFAULT section, xmlformat uses
built-in default values.
If you want to see whether there are any elements in the document for
which you haven't specified any formatting options, run
xmlformat with the
option. For example:
% xmlformat --show-unconfigured-elements doc2.xml
The following document elements were assigned no formatting options:
title
As it happens, the title already formats in the desired fashion, so
there's no necessity of adding anything more to the configuration file.
xmlformat-1.04/docs/doc-cmd 0000755 0001750 0001750 00000002024 10011017657 014451 0 ustar paul devel #! /usr/bin/perl -w
# vim:set ts=2 sw=2 expandtab:
# doc-cmd - filter to process embedded shell commands in XML document
# Suppose a document contains this:
#
#
# ... a bunch of stuff here
#
# Then you mark the lines in the file from the comment to the closing
# tag and shove it through doc-cmd. In vi, with the cursor
# on the comment line, you could do this with 4!!./doc-cmd. doc-cmd
# reads the first line, extracts the command, runs it, and writes as
# its output the original comment line plus the output from the command.
# This provides a way to embed within an XML document a command that
# produces part of a document, plus an easy way to re-run the command
# should the document fragment need to be regenerated.
use strict;
$_ = <>;
die "Did not find an XML comment containing a command\n"
unless /\
xmlformat-1.04/tests/wrap3.conf 0000644 0001750 0001750 00000000260 10002341015 015316 0 ustar paul devel *DEFAULT
format block
entry-break 1
element-break 1
exit-break 1
subindent 0
para
normalize yes
wrap-length 15
xmlformat-1.04/tests/inline1.res 0000644 0001750 0001750 00000000475 07777165540 015535 0 ustar paul devel
This is a normalized paragraph with an inline
element.
This is a non-normalized paragraph
with an inline
element.
xmlformat-1.04/tests/inline2.res 0000644 0001750 0001750 00000000435 07777165540 015532 0 ustar paul devel
This is a paragraph with
an inline
element in the middle.
xmlformat-1.04/tests/empty-doc.res 0000644 0001750 0001750 00000000000 07775563753 016065 0 ustar paul devel xmlformat-1.04/tests/inline3.res 0000644 0001750 0001750 00000001165 07777267630 015537 0 ustar paul devel
This is a normalized paragraph with a long longlong long inline element.
xmlformat-1.04/tests/data.conf 0000644 0001750 0001750 00000000254 07774643440 015233 0 ustar paul devel # This configuration file is empty. Because it contains no options
# to override the built-in default formatting values, the effect is
# to cause the defaults to be used.
xmlformat-1.04/tests/inline1.xml 0000644 0001750 0001750 00000000501 07777165540 015532 0 ustar paul devel
This is a normalized paragraph
with an inline
element.
This is a non-normalized paragraph
with an inline
element.
xmlformat-1.04/tests/inline2.xml 0000644 0001750 0001750 00000000436 07777111061 015527 0 ustar paul devel
This is a paragraph with an inline element
in the middle.
xmlformat-1.04/tests/empty-doc.xml 0000644 0001750 0001750 00000000000 07775563753 016074 0 ustar paul devel xmlformat-1.04/tests/inline3.xml 0000644 0001750 0001750 00000001174 07777267630 015546 0 ustar paul devel
This is a normalized paragraph
with a long long long long inline
element.
xmlformat-1.04/tests/xsltext.res 0000644 0001750 0001750 00000000505 07774643440 015700 0 ustar paul devel
x
x
x
xmlformat-1.04/tests/subelt2.conf 0000644 0001750 0001750 00000000373 10002341015 015647 0 ustar paul devel # entry/exit break of 1, no element break, no subindent
# subelts will all appear on single line between elt opening/closing tags.
elt
format block
entry-break 1
exit-break 1
element-break 0
subindent 0
xmlformat-1.04/tests/nobreak.conf 0000644 0001750 0001750 00000000411 10002341015 015701 0 ustar paul devel # Default options
*DEFAULT
format block
subindent 2
entry-break 2
exit-break 2
element-break 0
normalize no
para
format block
entry-break 1
exit-break 1
normalize yes
xmlformat-1.04/tests/subelt6.conf 0000644 0001750 0001750 00000000612 10002341015 015647 0 ustar paul devel # entry/exit break of 2 element break of 1, no subindent
# Each subelt will appear on its own line between elt opening/closing tags,
# with no blank line between them.
# There will also be a blank line between opening tag and subelts,
# and between subelts and closing tag.
elt
format block
entry-break 2
exit-break 2
element-break 1
subindent 0
xmlformat-1.04/tests/xsltext.xml 0000644 0001750 0001750 00000000475 07774643440 015715 0 ustar paul devel
x
x
x
xmlformat-1.04/tests/Makefile 0000644 0001750 0001750 00000000033 07775641504 015106 0 ustar paul devel all:
clean::
rm -f *.out
xmlformat-1.04/tests/inline1.conf 0000644 0001750 0001750 00000001057 10002341015 015626 0 ustar paul devel # Options for top-level document children
*DOCUMENT
format block
subindent 0
entry-break 0
exit-break 1
element-break 2
normalize no
# Default options
*DEFAULT
format block
subindent 0
entry-break 0
exit-break 0
element-break 0
normalize yes
wrap-length 0
para-norm
format block
normalize yes
wrap-length 60
para-no-norm
format block
normalize no
inline
format inline
xmlformat-1.04/tests/test1.conf 0000644 0001750 0001750 00000001005 10002341015 015320 0 ustar paul devel # Options for top-level document children
*DOCUMENT
format block
subindent 0
entry-break 0
exit-break 1
element-break 2
normalize no
# Default options
*DEFAULT
format block
subindent 2
entry-break 2
exit-break 2
element-break 2
normalize no
itemizedlist
format block
para
format block
entry-break 1
exit-break 1
normalize yes
wrap-length 72
xmlformat-1.04/tests/break1.conf 0000644 0001750 0001750 00000001203 10002341015 015425 0 ustar paul devel # Default options
*DEFAULT
format block
subindent 2
entry-break 2
exit-break 2
element-break 2
normalize no
# No entry/exit break, subindent = 0
item1
format block
entry-break 0
exit-break 0
element-break 2
subindent 0
# No entry/exit break, subindent = 2
item2
format block
entry-break 0
exit-break 0
element-break 2
subindent 2
title para todo remark
format block
entry-break 1
exit-break 1
normalize yes
subindent 2
wrap-length 72
xmlformat-1.04/tests/table1.conf 0000644 0001750 0001750 00000000247 10002341015 015437 0 ustar paul devel # Format everything on the same line
table row cell
format block
normalize no
entry-break 0
element-break 0
exit-break 0
xmlformat-1.04/tests/continuation.res 0000644 0001750 0001750 00000000075 10001261545 016654 0 ustar paul devel
xmlformat-1.04/tests/xsltext.conf 0000644 0001750 0001750 00000000611 10002341015 015775 0 ustar paul devel # Options for top-level document children
*DOCUMENT
format block
subindent 0
entry-break 0
exit-break 1
element-break 2
normalize no
# Default options
*DEFAULT
format block
subindent 2
entry-break 2
exit-break 2
element-break 0
normalize no
xsl:text
format verbatim
xmlformat-1.04/tests/data.res 0000644 0001750 0001750 00000001310 10000142105 015031 0 ustar paul devel
1-11-22-12-23-13-2
This is a paragraph with literal
text and emphasized text.
xmlformat-1.04/tests/empty-elt.conf 0000644 0001750 0001750 00000001406 10002341015 016205 0 ustar paul devel # Options for top-level document children
*DOCUMENT
format block
subindent 0
entry-break 0
exit-break 1
element-break 2
normalize no
# Default options
*DEFAULT
format block
subindent 2
entry-break 2
exit-break 2
element-break 2
normalize no
wrap-length 0
programlisting
format verbatim
itemizedlist orderedlist
format block
listitem
format block
# this causes following to be on same line
entry-break 0
element-break 2
exit-break 0
subindent 0
para
format block
entry-break 1
exit-break 1
normalize yes
wrap-length 72
xmlformat-1.04/tests/subelt3.conf 0000644 0001750 0001750 00000000362 10002341015 015646 0 ustar paul devel # entry/exit/element break of 1, no subindent
# Each subelt will appear on its own line between elt opening/closing tags.
elt
format block
entry-break 1
exit-break 1
element-break 1
subindent 0
xmlformat-1.04/tests/data.xml 0000644 0001750 0001750 00000001234 10000011540 015043 0 ustar paul devel
1-11-22-12-23-13-2
This is a paragraph with literal
text and emphasized text.
xmlformat-1.04/tests/subelt7.conf 0000644 0001750 0001750 00000000642 10002341015 015653 0 ustar paul devel # entry/exit/element break of 2, subindent of 4 spaces
# Each subelt will appear on its own line between elt opening/closing tags,
# with a blank line between them.
# There will also be a blank line between opening tag and subelts,
# and between subelts and closing tag.
# subelts will be indented
elt
format block
entry-break 2
exit-break 2
element-break 1
subindent 4
xmlformat-1.04/tests/nobreak.res 0000644 0001750 0001750 00000000150 07774643440 015602 0 ustar paul devel
para1
para2
para3
xmlformat-1.04/tests/continuation.xml 0000644 0001750 0001750 00000000072 10002341015 016650 0 ustar paul devel
xmlformat-1.04/tests/wrap1.conf 0000644 0001750 0001750 00000001052 10002341015 015314 0 ustar paul devel #Two types of paragraphs.
# Both have line-wrapping with subindent, but one has an entry/exit
# break and the other does not. This is to test the handling of the
# initial indent when the text is written immediately after the open
# tag vs. when it is written on the next line.
*DEFAULT
format block
normalize yes
wrap-length 40
subindent 2
para-with-break
entry-break 1
element-break 1
exit-break 1
para-without-break
entry-break 0
element-break 0
exit-break 0
xmlformat-1.04/tests/nobreak.xml 0000644 0001750 0001750 00000000116 07774643440 015613 0 ustar paul devel para1para2para3
xmlformat-1.04/tests/wrap1.res 0000644 0001750 0001750 00000000640 07774707271 015222 0 ustar paul devel
A paragraph with several lines. A
paragraph with several lines. A
paragraph with several lines. A
paragraph with several lines. A
paragraph with several lines.
A paragraph with several lines. A
paragraph with several lines. A
paragraph with several lines. A
paragraph with several lines. A
paragraph with several lines.
xmlformat-1.04/tests/wrap2.res 0000644 0001750 0001750 00000000614 10000011540 015157 0 ustar paul devel
A paragraph with several lines. A paragraph with several
lines. A paragraph with several lines. A paragraph with
several lines. A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
xmlformat-1.04/tests/inline2.conf 0000644 0001750 0001750 00000000753 10002341015 015631 0 ustar paul devel # Options for top-level document children
*DOCUMENT
format block
subindent 0
entry-break 0
exit-break 1
element-break 2
normalize no
# Default options
*DEFAULT
format block
subindent 0
entry-break 0
exit-break 0
element-break 0
normalize yes
wrap-length 0
para
format block
normalize yes
wrap-length 60
inline
format inline
xmlformat-1.04/tests/wrap3.res 0000644 0001750 0001750 00000000632 10000621516 015172 0 ustar paul devel
this-is-a-very-long-word-at-the-beginning
short short
short
short
this-is-a-very-long-word-in-the-middle
short short
short
short short
short short
this-is-a-very-long-word-at-the-end
xmlformat-1.04/tests/Notes 0000644 0001750 0001750 00000000735 10002420647 014446 0 ustar paul devel For each test x, there are several files:
x.conf - the configuration file containing the formatting options
x.xml - the input XML document
x.res - the expected result from xmlformat
To run the test, type "./runtest x" in the parent directory.
This will generate output into the file x.out, compare it to
x.res, and display a diff if they are different. To run all
the tests, type "./runtest all".
runtest uses xmlformat.pl by default. To use xmlformat.rb,
use runtest -r.
xmlformat-1.04/tests/wrap1.xml 0000644 0001750 0001750 00000000621 07774647567 015243 0 ustar paul devel
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
xmlformat-1.04/tests/break2.conf 0000644 0001750 0001750 00000000411 10002341015 015426 0 ustar paul devel *DEFAULT
subindent 0
norm-elt
format block
normalize yes
entry-break 1
element-break 1
exit-break 1
non-norm-elt
format block
normalize no
programlisting
format verbatim
xmlformat-1.04/tests/wrap2.xml 0000644 0001750 0001750 00000000615 07777711117 015231 0 ustar paul devel
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
A paragraph with several lines.
xmlformat-1.04/tests/wrap3.xml 0000644 0001750 0001750 00000000624 10000621516 015202 0 ustar paul devel
this-is-a-very-long-word-at-the-beginning short short shortshort this-is-a-very-long-word-in-the-middle short short shortshort short short short this-is-a-very-long-word-at-the-end
xmlformat-1.04/tests/table2.conf 0000644 0001750 0001750 00000000526 10002341015 015440 0 ustar paul devel # Begin each row on new line, format cells within row on same line.
# Indent rows with table by 2 spaces.
table row cell
format block
normalize no
table
entry-break 1
element-break 1
exit-break 1
subindent 2
row cell
entry-break 0
element-break 0
exit-break 0
xmlformat-1.04/tests/break1.res 0000644 0001750 0001750 00000000333 10000013236 015273 0 ustar paul devel
This is a paragraph.
This is a paragraph.
xmlformat-1.04/tests/break2.res 0000644 0001750 0001750 00000001302 10000013236 015271 0 ustar paul devel
This is text with a in
the middle.This is text with a
verbatim
element in
the middle.
This is text with a
in the middle.
This is text with a
verbatim
element
in the middle.
xmlformat-1.04/tests/norm.res 0000644 0001750 0001750 00000001374 07777362470 015151 0 ustar paul devel
This text should have all surrounding whitespace removedThis text should have leading/trailing whitespace removed, but leave a space around the literal element>This text should have leading/trailing whitespace removed
and also spaces adjacent
to the programlisting.The whitespace around
should not be removed entirely.
xmlformat-1.04/tests/break1.xml 0000644 0001750 0001750 00000000300 10000013236 015274 0 ustar paul devel This is a paragraph.This is a paragraph.
xmlformat-1.04/tests/break2.xml 0000644 0001750 0001750 00000001300 10000013236 015276 0 ustar paul devel
This is text with a in
the middle.This is text with a
verbatim
element in
the middle.This is text with a in
the middle.This is text with a
verbatim
element in
the middle.
xmlformat-1.04/tests/subelt4.conf 0000644 0001750 0001750 00000000440 10002341015 015644 0 ustar paul devel # entry/exit break of 1, element break of 2, no subindent
# Each subelt will appear on its own line between elt opening/closing tags,
# with a blank line between them.
elt
format block
entry-break 1
exit-break 1
element-break 2
subindent 0
xmlformat-1.04/tests/subelt1.res 0000644 0001750 0001750 00000000047 07775575712 015554 0 ustar paul devel
xmlformat-1.04/tests/subelt2.res 0000644 0001750 0001750 00000000051 07775575712 015550 0 ustar paul devel
xmlformat-1.04/tests/subelt3.res 0000644 0001750 0001750 00000000053 07775575712 015553 0 ustar paul devel
xmlformat-1.04/tests/subelt4.res 0000644 0001750 0001750 00000000055 07775575712 015556 0 ustar paul devel
xmlformat-1.04/tests/norm.xml 0000644 0001750 0001750 00000001425 07777362470 015155 0 ustar paul devel
This text should have all surrounding whitespace removed
This text should have leading/trailing
whitespace
removed, but leave a space around the literal element>
This text should have leading/trailing whitespace removed
and also spaces adjacent
to the programlisting.
The whitespace around should not be removed entirely.
xmlformat-1.04/tests/subelt5.res 0000644 0001750 0001750 00000000057 07775575712 015561 0 ustar paul devel
xmlformat-1.04/tests/subelt6.res 0000644 0001750 0001750 00000000055 07775641561 015554 0 ustar paul devel
xmlformat-1.04/tests/subelt7.res 0000644 0001750 0001750 00000000071 07775577475 015567 0 ustar paul devel
xmlformat-1.04/tests/subelt1.xml 0000644 0001750 0001750 00000000047 07775575712 015563 0 ustar paul devel
xmlformat-1.04/tests/subelt2.xml 0000644 0001750 0001750 00000000047 07775575712 015564 0 ustar paul devel
xmlformat-1.04/tests/wrap2.conf 0000644 0001750 0001750 00000001041 10002341015 015313 0 ustar paul devel #Two types of paragraphs.
# Both are listed as line-wrapped, but one is normalized and the other
# is not.
# - This tests the behavior that line-wrapping should be ignored
# in non-normalized paragraphs.
# - It also tests that no extra break should be added around
# non-normalized text.
*DEFAULT
format block
entry-break 1
element-break 1
exit-break 1
subindent 0
para-with-wrap
normalize yes
wrap-length 60
para-without-wrap
normalize no
wrap-length 60
xmlformat-1.04/tests/subelt3.xml 0000644 0001750 0001750 00000000047 07775575712 015565 0 ustar paul devel
xmlformat-1.04/tests/subelt4.xml 0000644 0001750 0001750 00000000047 07775575712 015566 0 ustar paul devel
xmlformat-1.04/tests/subelt5.xml 0000644 0001750 0001750 00000000047 07775575712 015567 0 ustar paul devel
xmlformat-1.04/tests/subelt6.xml 0000644 0001750 0001750 00000000047 07775641561 015564 0 ustar paul devel
xmlformat-1.04/tests/subelt7.xml 0000644 0001750 0001750 00000000047 07775577475 015601 0 ustar paul devel
xmlformat-1.04/tests/inline3.conf 0000644 0001750 0001750 00000000753 10002341015 015632 0 ustar paul devel # Options for top-level document children
*DOCUMENT
format block
subindent 0
entry-break 0
exit-break 1
element-break 2
normalize no
# Default options
*DEFAULT
format block
subindent 0
entry-break 0
exit-break 0
element-break 0
normalize yes
wrap-length 0
para
format block
normalize yes
wrap-length 60
inline
format inline
xmlformat-1.04/tests/indent1.conf 0000644 0001750 0001750 00000000410 10002341015 015621 0 ustar paul devel *DEFAULT
format block
entry-break 1
element-break 1
exit-break 1
subindent 0
para
entry-break 0
element-break 0
exit-break 0
normalize yes
wrap-length 40
subindent 2
xmlformat-1.04/tests/test1.res 0000644 0001750 0001750 00000000153 07774643440 015224 0 ustar paul devel
para with
para with
xmlformat-1.04/tests/indent1.res 0000644 0001750 0001750 00000001450 10000623676 015510 0 ustar paul devel
This is a line of text in the outer
para. This is a line of text in the
outer para. This is a line of text in
the outer para. This is a line of text
in the outer para.This is a line of text in the inner
para. This is a line of text in the
inner para. This is a line of text
in the inner para. This is a line of
text in the inner para.This is a line of text in the inner
para. This is a line of text in the
inner para. This is a line of text in
the inner para. This is a line of text
in the inner para.
xmlformat-1.04/tests/table3.conf 0000644 0001750 0001750 00000000566 10002341015 015445 0 ustar paul devel # Begin each element on new line, indenting sub elements two spaces.
# Leave cell contents on same line as cell tags
table row cell
format block
normalize no
entry-break 1
element-break 1
exit-break 1
subindent 2
# Override break values for cell
cell
entry-break 0
element-break 0
exit-break 0
xmlformat-1.04/tests/norm.conf 0000644 0001750 0001750 00000001060 10002341015 015234 0 ustar paul devel *DOCUMENT
format block
normalize no
entry-break 0
exit-break 1
element-break 1
subindent 0
wrap-length 0
# Default options
*DEFAULT
format block
subindent 0
entry-break 0
exit-break 1
element-break 1
normalize yes
wrap-length 0
programlisting
format verbatim
para
format block
normalize yes
entry-break 0
element-break 1
exit-break 0
literal
format inline
xmlformat-1.04/tests/test1.xml 0000644 0001750 0001750 00000000134 07774643440 015232 0 ustar paul devel para with para with
xmlformat-1.04/tests/table1.res 0000644 0001750 0001750 00000000172 07774643440 015335 0 ustar paul devel
xmlformat-1.04/tests/indent1.xml 0000644 0001750 0001750 00000001416 10000623676 015521 0 ustar paul devel
This is a line of text in the outer para.
This is a line of text in the outer para.
This is a line of text in the outer para.
This is a line of text in the outer para.
This is a line of text in the inner para.
This is a line of text in the inner para.
This is a line of text in the inner para.
This is a line of text in the inner para.
This is a line of text in the inner para.
This is a line of text in the inner para.
This is a line of text in the inner para.
This is a line of text in the inner para.
xmlformat-1.04/tests/table3.res 0000644 0001750 0001750 00000000245 07774643440 015340 0 ustar paul devel
123456
xmlformat-1.04/tests/subelt1.conf 0000644 0001750 0001750 00000000320 10002341015 015636 0 ustar paul devel # No entry/element/exit break
# (subindent doesn't matter)
# elt and subelts all will appear on a single line
elt
format block
entry-break 0
exit-break 0
element-break 0
xmlformat-1.04/tests/table1.xml 0000644 0001750 0001750 00000000175 07774643440 015347 0 ustar paul devel
123456
xmlformat-1.04/tests/subelt5.conf 0000644 0001750 0001750 00000000576 10002341015 015657 0 ustar paul devel # entry/exit/element break of 2, no subindent
# Each subelt will appear on its own line between elt opening/closing tags,
# with a blank line between them.
# There will also be a blank line between opening tag and subelts,
# and between subelts and closing tag.
elt
format block
entry-break 2
exit-break 2
element-break 2
subindent 0
xmlformat-1.04/tests/table2.xml 0000644 0001750 0001750 00000000175 07774643440 015350 0 ustar paul devel
xmlformat-1.04/ChangeLog 0000644 0001750 0001750 00000002472 10470215222 014043 0 ustar paul devel Version 1.04 (released 2006-08-14)
- Assign each token an input line number and display the line number in
error messages. This provides better information to the user about
the location of problems in input files.
- Print the token stack when an error occurs. This provides some idea of
the context of the element that is malformed or has malformed content.
Version 1.03 (released 2004-03-26)
- In xmlformat.rb, made some changes needed for Ruby 1.8:
- Convert @@xml_spe parsing expression to Regexp with Regexp.new().
scan() method doesn't work with string argument now, apparently.
- In parsing patterns, change literal ] to \\] to suppress warnings
- In xmlformat.pl:
- In parsing patterns, change literal ] to \\]. This isn't actually
necessary, but better preserves parallelism with Ruby version.
Version 1.02 (released 2004-02-06)
- Added --in-place/-i option for in-place reformatting. (Requires named
input file or files.)
- Added --backup/-b option for making backup of each input file (used with
--in-place).
- If multiple input files are named on the command line, they are processed
as separate documents, not as one combined input. (This was necessary
to make --in-place and --backup work correctly.)
- Added a tutorial document.
Version 1.01 (released 2004-01-18)
- Initial public release.
xmlformat-1.04/README 0000644 0001750 0001750 00000002055 10334242051 013145 0 ustar paul devel xmlformat - an XML document formatter
Paul DuBois
paul@kitebird.com
This is the distribution for xmlformat 1.04.
If you find bugs, please let me know.
The current version of xmlformat is always available at:
http://www.kitebird.com/software/
xmlformat is free software, distributed under a BSD-style license.
For specific licensing information, see the LICENSE file.
For installation instructions, see the INSTALL file. xmlformat has two
implementations, one in Ruby and one in Perl. They should produce
identical output in all cases.
Documentation is in the docs subdirectory.
Tests are in the tests directory, though you run them in the main
xmlformat directory:
- To run all tests:
make test
- To run all tests for the Ruby version:
./runtest all
- To run all tests for the Perl version:
./runtest -p all
- To run an individual test for the Ruby version:
./runtest testname
- To run an individual test for the Perl version:
./runtest -p testname
A test name is the name of its .xml file, minus the .xml suffix.
For more information, see tests/Notes.
xmlformat-1.04/bad1.res 0000644 0001750 0001750 00000000235 10334242107 013607 0 ustar paul devel Malformed token at line 3, token 5: