libxml-perl-0.08/ 0040755 0000764 0000764 00000000000 07745275112 012032 5 ustar ken ken libxml-perl-0.08/lib/ 0040755 0000764 0000764 00000000000 07745275112 012600 5 ustar ken ken libxml-perl-0.08/lib/XML/ 0040755 0000764 0000764 00000000000 07745275112 013240 5 ustar ken ken libxml-perl-0.08/lib/XML/Handler/ 0040755 0000764 0000764 00000000000 07745275112 014615 5 ustar ken ken libxml-perl-0.08/lib/XML/Handler/Subs.pm 0100644 0000764 0000764 00000007666 07745275111 016102 0 ustar ken ken #
# Copyright (C) 1999 Ken MacLeod
# XML::Handler::XMLWriter is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# $Id: Subs.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $
#
use strict;
package XML::Handler::Subs;
use UNIVERSAL;
use vars qw{ $VERSION };
# will be substituted by make-rel script
$VERSION = "0.08";
sub new {
my $type = shift;
my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ };
return bless $self, $type;
}
sub start_document {
my ($self, $document) = @_;
$self->{Names} = [];
$self->{Nodes} = [];
}
sub end_document {
my ($self, $document) = @_;
delete $self->{Names};
delete $self->{Nodes};
return();
}
sub start_element {
my ($self, $element) = @_;
push @{$self->{Names}}, $element->{Name};
push @{$self->{Nodes}}, $element;
my $el_name = "s_" . $element->{Name};
$el_name =~ s/[^a-zA-Z0-9_]/_/g;
if ($self->can($el_name)) {
$self->$el_name($element);
return 1;
}
return 0;
}
sub end_element {
my ($self, $element) = @_;
my $called_sub = 0;
my $el_name = "e_" . $element->{Name};
$el_name =~ s/[^a-zA-Z0-9_]/_/g;
if ($self->can(${el_name})) {
$self->$el_name($element);
$called_sub = 1;
}
pop @{$self->{Names}};
pop @{$self->{Nodes}};
return $called_sub;
}
sub in_element {
my ($self, $name) = @_;
return ($self->{Names}[-1] eq $name);
}
sub within_element {
my ($self, $name) = @_;
my $count = 0;
foreach my $el_name (@{$self->{Names}}) {
$count ++ if ($el_name eq $name);
}
return $count;
}
1;
__END__
=head1 NAME
XML::Handler::Subs - a PerlSAX handler base class for calling user-defined subs
=head1 SYNOPSIS
use XML::Handler::Subs;
package MyHandlers;
use vars qw{ @ISA };
sub s_NAME { my ($self, $element) = @_ };
sub e_NAME { my ($self, $element) = @_ };
$self->{Names}; # an array of names
$self->{Nodes}; # an array of $element nodes
$handler = MyHandlers->new();
$self->in_element($name);
$self->within_element($name);
=head1 DESCRIPTION
C is a base class for PerlSAX handlers.
C is subclassed to implement complete behavior and
to add element-specific handling.
Each time an element starts, a method by that name prefixed with `s_'
is called with the element to be processed. Each time an element
ends, a method with that name prefixed with `e_' is called. Any
special characters in the element name are replaced by underscores.
Subclassing XML::Handler::Subs in this way is similar to
XML::Parser's Subs style.
XML::Handler::Subs maintains a stack of element names,
`C<$self->{Names}', and a stack of element nodes, `C<$self->{Nodes}>'
that can be used by subclasses. The current element is pushed on the
stacks before calling an element-name start method and popped off the
stacks after calling the element-name end method. The
`C' and `C' calls use these stacks.
If the subclass implements `C', `C',
`C', and `C', be sure to use
`C' to call the the superclass methods also. See perlobj(1)
for details on SUPER::. `C' and
`C' return 1 if an element-name method is
called, they return 0 if no method was called.
XML::Handler::Subs does not implement any other PerlSAX handlers.
XML::Handler::Subs supports the following methods:
=over 4
=item new( I )
A basic `C' method. `C' takes a list of key, value
pairs or a hash and creates and returns a hash with those options; the
hash is blessed into the subclass.
=item in_element($name)
Returns true if `C<$name>' is equal to the name of the innermost
currently opened element.
=item within_element($name)
Returns the number of times the `C<$name>' appears in Names.
=back
=head1 AUTHOR
Ken MacLeod, ken@bitsko.slc.ut.us
=head1 SEE ALSO
perl(1), PerlSAX.pod(3)
=cut
libxml-perl-0.08/lib/XML/Handler/CanonXMLWriter.pm 0100644 0000764 0000764 00000007074 07745275111 017773 0 ustar ken ken #
# Copyright (C) 1998, 1999 Ken MacLeod
# XML::Handler::CanonXMLWriter is free software; you can redistribute
# it and/or modify it under the same terms as Perl itself.
#
# $Id: CanonXMLWriter.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $
#
use strict;
package XML::Handler::CanonXMLWriter;
use vars qw{ $VERSION %char_entities };
# will be substituted by make-rel script
$VERSION = "0.08";
%char_entities = (
"\x09" => ' ',
"\x0a" => '
',
"\x0d" => '
',
'&' => '&',
'<' => '<',
'>' => '>',
'"' => '"',
);
sub new {
my ($class, %args) = @_;
my $self = \%args;
return bless $self, $class;
}
sub start_document {
my $self = shift; my $document = shift;
$self->{'_text_array'} = [];
}
sub end_document {
my $self = shift; my $document = shift;
if (defined $self->{IOHandle}) {
return ();
} else {
my $text = join ('', @{$self->{'_text_array'}});
undef $self->{'_text_array'};
return $text;
}
}
sub start_element {
my $self = shift; my $element = shift;
$self->_print('<' . $element->{Name});
my $key;
my $attrs = $element->{Attributes};
foreach $key (sort keys %$attrs) {
$self->_print(" $key=\"" . $self->_escape($attrs->{$key}) . '"');
}
$self->_print('>');
}
sub end_element {
my $self = shift; my $element = shift;
$self->_print('' . $element->{Name} . '>');
}
sub characters {
my $self = shift; my $characters = shift;
$self->_print($self->_escape($characters->{Data}));
}
sub ignorable_whitespace {
my $self = shift; my $characters = shift;
$self->_print($self->_escape($characters->{Data}));
}
sub processing_instruction {
my $self = shift; my $pi = shift;
$self->_print('' . $pi->{Target} . ' ' . $pi->{Data} . '?>');
}
sub entity {
# entities don't occur in text
return ();
}
sub comment {
my $self = shift; my $comment = shift;
if ($self->{PrintComments}) {
$self->_print('');
} else {
return ();
}
}
sub _print {
my $self = shift; my $string = shift;
if (defined $self->{IOHandle}) {
$self->{IOHandle}->print($string);
return ();
} else {
push @{$self->{'_text_array'}}, $string;
}
}
sub _escape {
my $self = shift; my $string = shift;
$string =~ s/([\x09\x0a\x0d&<>"])/$char_entities{$1}/ge;
return $string;
}
1;
__END__
=head1 NAME
XML::Handler::CanonXMLWriter - output XML in canonical XML format
=head1 SYNOPSIS
use XML::Handler::CanonXMLWriter;
$writer = XML::Handler::CanonXMLWriter OPTIONS;
$parser->parse(Handler => $writer);
=head1 DESCRIPTION
C is a PerlSAX handler that will return
a string or write a stream of canonical XML for an XML instance and it's
content.
C objects hold the options used for
writing the XML objects. Options can be supplied when the the object
is created,
$writer = new XML::Handler::CanonXMLWriter PrintComments => 1;
or modified at any time before calling the parser's `C' method:
$writer->{PrintComments} = 0;
=head1 OPTIONS
=over 4
=item IOHandle
IOHandle contains a handle for writing the canonical XML to. If an
IOHandle is not provided, the canonical XML string will be returned
from `C'.
=item PrintComments
By default comments are not written to the output. Setting comment to
a true value will include comments in the output.
=back
=head1 AUTHOR
Ken MacLeod, ken@bitsko.slc.ut.us
=head1 SEE ALSO
perl(1), PerlSAX
James Clark's Canonical XML definition
=cut
libxml-perl-0.08/lib/XML/Handler/Sample.pm 0100644 0000764 0000764 00000006151 07745275111 016373 0 ustar ken ken # This template file is in the Public Domain.
# You may do anything you want with this file.
#
# $Id: Sample.pm,v 1.4 1999/08/16 16:04:03 kmacleod Exp $
#
package XML::Handler::Sample;
use vars qw{ $AUTOLOAD };
sub new {
my $type = shift;
my $self = ( $#_ == 0 ) ? shift : { @_ };
return bless $self, $type;
}
# Basic PerlSAX
sub start_document { print "start_document\n"; }
sub end_document { print "end_document\n"; }
sub start_element { print "start_element\n"; }
sub end_element { print "end_element\n"; }
sub characters { print "characters\n"; }
sub processing_instruction { print "processing_instruction\n"; }
sub ignorable_whitespace { print "ignorable_whitespace\n"; }
# Additional expat callbacks in XML::Parser::PerlSAX
sub comment { print "comment\n"; }
sub notation_decl { print "notation_decl\n"; }
sub unparsed_entity_decl { print "unparsed_entity_decl\n"; }
sub entity_decl { print "entity_decl\n"; }
sub element_decl { print "element_decl\n"; }
sub doctype_decl { print "doctype_decl\n"; }
sub xml_decl { print "xml_decl\n"; }
# Additional SP/nsgmls callbacks in XML::ESISParser
sub start_subdoc { print "start_subdoc\n"; }
sub end_subdoc { print "start_subdoc\n"; }
sub appinfo { print "appinfo\n"; }
sub internal_entity_ref { print "sdata\n"; }
sub external_entity_ref { print "sdata\n"; }
sub record_end { print "record_end\n"; }
sub internal_entity_decl { print "internal_entity_decl\n"; }
sub external_entity_decl { print "external_entity_decl\n"; }
sub external_sgml_entity_decl { print "external_sgml_entity_decl\n"; }
sub subdoc_entity_decl { print "subdoc_entity_decl\n"; }
sub notation { print "notation\n"; }
sub error { print "error\n"; }
sub conforming { print "conforming\n"; }
# Others
sub AUTOLOAD {
my $self = shift;
my $method = $AUTOLOAD;
$method =~ s/.*:://;
return if $method eq 'DESTROY';
print "UNRECOGNIZED $method\n";
}
1;
__END__
=head1 NAME
XML::Handler::Sample - a trivial PerlSAX handler
=head1 SYNOPSIS
use XML::Parser::PerlSAX;
use XML::Handler::Sample;
$my_handler = XML::Handler::Sample->new;
XML::Parser::PerlSAX->new->parse(Source => { SystemId => 'REC-xml-19980210.xml' },
Handler => $my_handler);
=head1 DESCRIPTION
C is a trivial PerlSAX handler that prints out
the name of each event it receives. The source for
C lists all the currently known PerlSAX
handler methods.
C is intended for Perl module authors who wish
to look at example PerlSAX handler modules. C
can be used as a template for writing your own PerlSAX handler
modules. C is in the Public Domain and can be
used for any purpose without restriction.
=head1 AUTHOR
Ken MacLeod, ken@bitsko.slc.ut.us
=head1 SEE ALSO
perl(1), PerlSAX.pod(3)
=cut
libxml-perl-0.08/lib/XML/Handler/XMLWriter.pm 0100644 0000764 0000764 00000017660 07745275111 017016 0 ustar ken ken #
# Copyright (C) 1999 Ken MacLeod
# Portions derived from code in XML::Writer by David Megginson
# XML::Handler::XMLWriter is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# $Id: XMLWriter.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $
#
use strict;
package XML::Handler::XMLWriter;
use XML::Handler::Subs;
use vars qw{ $VERSION @ISA $escapes };
# will be substituted by make-rel script
$VERSION = "0.08";
@ISA = qw{ XML::Handler::Subs };
$escapes = { '&' => '&',
'<' => '<',
'>' => '>',
'"' => '"'
};
sub start_document {
my ($self, $document) = @_;
$self->SUPER::start_document($document);
# create a temporary Output_ in case we're creating a standard
# output file that we'll delete later.
if (!$self->{AsString} && !defined($self->{Output})) {
require IO::File;
import IO::File;
$self->{Output_} = new IO::File(">-");
} elsif (defined($self->{Output})) {
$self->{Output_} = $self->{Output};
}
if ($self->{AsString}) {
$self->{Strings} = [];
}
$self->print("\n");
# FIXME support Doctype declarations
}
sub end_document {
my ($self, $document) = @_;
if (defined($self->{Output_})) {
$self->{Output_}->print("\n");
delete $self->{Output_};
}
my $string = undef;
if (defined($self->{AsString})) {
push @{$self->{Strings}}, "\n";
$string = join('', @{$self->{Strings}});
delete $self->{Strings};
}
$self->SUPER::end_document($document);
return($string);
}
sub start_element {
my ($self, $element) = @_;
if ($self->SUPER::start_element($element) == 0) {
$self->print_start_element($element);
}
}
sub print_start_element {
my ($self, $element) = @_;
my $output = "<$element->{Name}";
if (defined($element->{Attributes})) {
foreach my $name (sort keys %{$element->{Attributes}}) {
my $esc_value = $element->{Attributes}{$name};
$esc_value =~ s/([\&\<\>\"])/$escapes->{$1}/ge;
$output .= " $name=\"$esc_value\"";
}
}
if ($self->{Newlines}) {
$output .= "\n";
}
$output .= ">";
$self->print($output);
}
sub end_element {
my ($self, $element) = @_;
if ($self->SUPER::end_element($element) == 0) {
$self->print_end_element($element);
}
}
sub print_end_element {
my ($self, $element) = @_;
my $output = "$element->{Name}"
. ($self->{Newlines} ? "\n" : "") . ">";
$self->print($output);
}
sub characters {
my ($self, $characters) = @_;
my $output = $characters->{Data};
$output =~ s/([\&\<\>])/$escapes->{$1}/ge;
$self->print($output);
}
sub processing_instruction {
my ($self, $pi) = @_;
my $nl = ($#{$self->{Names}} == -1) ? "\n" : "";
my $output;
if ($self->{IsSGML}) {
$output = "$pi->{Data}>\n";
} else {
if ($pi->{Data}) {
$output = "$pi->{Target} $pi->{Data}?>$nl";
} else {
$output = "$pi->{Target}?>$nl";
}
}
$self->print($output);
}
sub ignorable_whitespace {
my ($self, $whitespace) = @_;
$self->print($whitespace->{Data});
}
sub comment {
my ($self, $comment) = @_;
my $nl = ($#{$self->{Names}} == -1) ? "\n" : "";
my $output = "$nl";
$self->print($output);
}
sub print {
my ($self, $output) = @_;
$self->{Output_}->print($output)
if (defined($self->{Output_}));
push(@{$self->{Strings}}, $output)
if (defined($self->{AsString}));
}
1;
__END__
=head1 NAME
XML::Handler::XMLWriter - a PerlSAX handler for writing readable XML
=head1 SYNOPSIS
use XML::Parser::PerlSAX;
use XML::Handler::XMLWriter;
$my_handler = XML::Handler::XMLWriter->new( I );
XML::Parser::PerlSAX->new->parse(Source => { SystemId => 'REC-xml-19980210.xml' },
Handler => $my_handler);
=head1 DESCRIPTION
C is a PerlSAX handler for writing readable
XML (in contrast to Canonical XML, for example).
XML::Handler::XMLWriter can be used with a parser to reformat XML,
with XML::DOM or XML::Grove to write out XML, or with other PerlSAX
modules that generate events.
C is intended to be used with PerlSAX event
generators and does not perform any checking itself (for example,
matching start and end element events). If you want to generate XML
directly from your Perl code, use the XML::Writer module. XML::Writer
has an easy to use interface and performs many checks to make sure
that the XML you generate is well-formed.
C is a subclass of C.
C can be further subclassed to alter it's
behavior or to add element-specific handling. In the subclass, each
time an element starts, a method by that name prefixed with `s_' is
called with the element to be processed. Each time an element ends, a
method with that name prefixed with `e_' is called. Any special
characters in the element name are replaced by underscores. If there
isn't a start or end method for an element, the default action is to
write the start or end tag. Start and end methods can use the
`C' and `C' methods to
print start or end tags. Subclasses can call the `C' method
to write additional output.
Subclassing XML::Handler::XMLWriter in this way is similar to
XML::Parser's Stream style.
XML::Handler::Subs maintains a stack of element names,
`C<$self->{Names}', and a stack of element nodes, `C<$self->{Nodes}>'
that can be used by subclasses. The current element is pushed on the
stacks before calling an element-name start method and popped off the
stacks after calling the element-name end method.
See XML::Handler::Subs for additional methods.
In addition to the standard PerlSAX handler methods (see PerlSAX for
descriptions), XML::Handler::XMLWriter supports the following methods:
=over 4
=item new( I )
Creates and returns a new instance of XML::Handler::XMLWriter with the
given I. Options may be changed at any time by modifying
them directly in the hash returned. I can be a list of key,
value pairs or a hash. The following I are supported:
=over 4
=item Output
An IO::Handle or one of it's subclasses (such as IO::File), if this
parameter is not present and the AsString option is not used, the
module will write to standard output.
=item AsString
Return the generated XML as a string from the `C' method of
the PerlSAX event generator.
=item Newlines
A true or false value; if this parameter is present and its value is
true, then the module will insert an extra newline before the closing
delimiter of start, end, and empty tags to guarantee that the document
does not end up as a single, long line. If the paramter is not
present, the module will not insert the newlines.
=item IsSGML
A true or false value; if this parameter is present and its value is
true, then the module will generate SGML rather than XML.
=back
=item print_start_element($element)
Print a start tag for `C<$element>'. This is the default action for
the PerlSAX `C' handler, but subclasses may use this
if they define a start method for an element.
=item print_end_element($element)
Prints an end tag for `C<$element>'. This is the default action for
the PerlSAX `C' handler, but subclasses may use this
if they define a start method for an element.
=item print($output)
Write `C<$output>' to Output and/or append it to the string to be
returned. Subclasses may use this to write additional output.
=back
=head1 TODO
=over 4
=item *
An Elements option that provides finer control over newlines than the
Newlines option, where you can choose before and after newline for
element start and end tags. Inspired by the Python XMLWriter.
=item *
Support Doctype and XML declarations.
=back
=head1 AUTHOR
Ken MacLeod, ken@bitsko.slc.ut.us
This module is partially derived from XML::Writer by David Megginson.
=head1 SEE ALSO
perl(1), PerlSAX.pod(3)
=cut
libxml-perl-0.08/lib/XML/PatAct/ 0040755 0000764 0000764 00000000000 07745275112 014414 5 ustar ken ken libxml-perl-0.08/lib/XML/PatAct/ActionTempl.pm 0100644 0000764 0000764 00000006143 07745275111 017171 0 ustar ken ken # This template file is in the Public Domain.
# You may do anything you want with this file.
#
# $Id: ActionTempl.pm,v 1.2 1999/08/16 16:04:03 kmacleod Exp $
#
# replace all occurrences of ACTION with the name of your module!
use strict;
use UNIVERSAL;
package XML::PatAct::ACTION;
sub new {
my $type = shift;
my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ };
bless $self, $type;
my $usage = <<'EOF';
usage: XML::PatAct::ACTION->new( Matcher => $matcher,
Patterns => $patterns );
EOF
die "No Matcher specified\n$usage\n"
if !defined $self->{Matcher};
die "No Patterns specified\n$usage\n"
if !defined $self->{Patterns};
# perform additional initialization here
return $self;
}
sub start_document {
my ($self, $document) = @_;
# initialize the pattern module at the start of a document
$self->{Matcher}->initialize($self);
# create empty name and node lists for passing to `match()'
$self->{Names} = [ ];
$self->{Nodes} = [ ];
# Knowing that a source is a tree can be useful information
$self->{SourceIsGrove} = UNIVERSAL::isa($document, 'Data::Grove');
}
sub end_document {
my ($self, $document) = @_;
# notify the pattern module that we're done
$self->{Matcher}->finalize();
my $value;
# perform any finalization actions, use $value to return a result
# from calling `parse()'
# release all the info that is just used during event handling
$self->{Matcher} = $self->{Names} = $self->{Nodes} = undef;
$self->{SourceIsGrove} = undef;
return $value;
}
sub start_element {
my ($self, $element) = @_;
push @{$self->{Names}}, $element->{Name};
push @{$self->{Nodes}}, $element;
my $index = $self->{Matcher}->match($element,
$self->{Names},
$self->{Nodes});
# use $index to retrieve an action for this element
}
sub end_element {
my ($self, $end_element) = @_;
my $name = pop @{$self->{Names}};
my $element = pop @{$self->{Nodes}};
# perform any finishing steps at the end of an element
}
sub characters {
my ($self, $characters) = @_;
}
sub processing_instruction {
my ($self, $pi) = @_;
}
sub ignorable_whitespace {
my ($self, $characters) = @_;
}
1;
__END__
=head1 NAME
XML::PatAct::ACTION - An action module for
=head1 SYNOPSIS
use XML::PatAct::ACTION;
my $patterns = [ PATTERN => ACTION,
... ];
my $matcher = XML::PatAct::ACTION->new(Patterns => $patterns,
Matcher => $matcher );
=head1 DESCRIPTION
XML::PatAct::ACTION is a PerlSAX handler for applying pattern-action
lists to XML parses or trees. XML::PatAct::ACTION ...
New XML::PatAct::ACTION instances are creating by calling `new()'. A
Parameters can be passed as a list of key, value pairs or a hash.
Patterns and Matcher options are required. Patterns is the
pattern-action list to apply. Matcher is an instance of the pattern
or query matching module.
DESCRIBE THE FORMAT OF YOUR ACTIONS HERE
=head1 AUTHOR
This template file was written by Ken MacLeod, ken@bitsko.slc.ut.us
=head1 SEE ALSO
perl(1)
``Using PatAct Modules'' and ``Creating PatAct Modules'' in libxml-perl.
=cut
libxml-perl-0.08/lib/XML/PatAct/PatternTempl.pm 0100644 0000764 0000764 00000003015 07745275111 017364 0 ustar ken ken # This template file is in the Public Domain.
# You may do anything you want with this file.
#
# $Id: PatternTempl.pm,v 1.2 1999/08/16 16:04:03 kmacleod Exp $
#
# replace all occurrences of PATTERN with the name of your module!
use strict;
package XML::PatAct::PATTERN;
sub new {
my $type = shift;
my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ };
# perform any one-time initializations
return bless $self, $type;
}
sub initialize {
my ($self, $driver) = @_;
$self->{Driver} = $driver;
# perform initializations for each XML instance
}
sub finalize {
my $self = shift;
# clean up any state information
$self->{Driver} = undef;
}
sub match {
my ($self, $element, $names, $nodes) = @_;
# Use the Patterns list to match a pattern
return undef;
}
1;
__END__
=head1 NAME
XML::PatAct::PATTERN - A pattern module for
=head1 SYNOPSIS
use XML::PatAct::PATTERN;
my $patterns = [ PATTERN => ACTION,
... ]
my $matcher = XML::PatAct::PATTERN->new( Patterns => $patterns );
=head1 DESCRIPTION
XML::PatAct::PATTERN is a pattern module for use with PatAct action
modules for applying pattern-action lists to XML parses or trees.
XML::PatAct::PATTERN ...
Parameters can be passed as a list of key, value pairs or a hash.
DESCRIBE THE FORMAT OR LANGUAGE OF YOUR PATTERNS HERE
=head1 AUTHOR
This template file was written by Ken MacLeod, ken@bitsko.slc.ut.us
=head1 SEE ALSO
perl(1)
``Using PatAct Modules'' and ``Creating PatAct Modules'' in libxml-perl.
=cut
libxml-perl-0.08/lib/XML/PatAct/Amsterdam.pm 0100644 0000764 0000764 00000012627 07745275111 016673 0 ustar ken ken #
# Copyright (C) 1999 Ken MacLeod
# XML::PatAct::Amsterdam is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# $Id: Amsterdam.pm,v 1.4 1999/12/22 21:15:00 kmacleod Exp $
#
use strict;
use UNIVERSAL;
package XML::PatAct::Amsterdam;
use vars qw{ $VERSION };
# will be substituted by make-rel script
$VERSION = "0.08";
sub new {
my $type = shift;
my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ };
bless $self, $type;
my $usage = <<'EOF';
usage: XML::PatAct::Amsterdam->new( Matcher => $matcher,
Patterns => $patterns );
EOF
die "No Matcher specified\n$usage\n"
if !defined $self->{Matcher};
die "No Patterns specified\n$usage\n"
if !defined $self->{Patterns};
# perform additional initialization here
return $self;
}
sub start_document {
my ($self, $document) = @_;
# initialize the pattern module at the start of a document
$self->{Matcher}->initialize($self);
# create empty name and node lists for passing to `match()'
$self->{Names} = [ ];
$self->{Nodes} = [ ];
$self->{ActionStack} = [ ];
# create a temporary Output_ in case we're creating a standard
# output file that we'll delete later.
if (!$self->{AsString} && !defined($self->{Output})) {
require IO::File;
import IO::File;
$self->{Output_} = new IO::File(">-");
} elsif (defined($self->{Output})) {
$self->{Output_} = $self->{Output};
}
if ($self->{AsString}) {
$self->{Strings} = [];
}
}
sub end_document {
my ($self, $document) = @_;
# notify the pattern module that we're done
$self->{Matcher}->finalize();
if (defined($self->{Output_})) {
delete $self->{Output_};
}
my $string = undef;
if (defined($self->{AsString})) {
$string = join('', @{$self->{Strings}});
delete $self->{Strings};
}
# release all the info that is just used during event handling
$self->{Matcher} = $self->{Names} = $self->{Nodes} = undef;
$self->{ActionStack} = undef;
return($string);
}
sub start_element {
my ($self, $element) = @_;
push @{$self->{Names}}, $element->{Name};
push @{$self->{Nodes}}, $element;
my $index = $self->{Matcher}->match($element,
$self->{Names},
$self->{Nodes});
my $action;
if (!defined $index) {
$action = undef;
} else {
$action = $self->{Patterns}[$index * 2 + 1];
}
push @{$self->{ActionStack}}, $action;
if (defined($action)) {
my $before = $action->{Before};
if (defined $before) {
my $atts = $element->{Attributes};
$before =~ s/\[([\w.:]+)\]/
($1 eq '_element') ? $element->{Name} : $atts->{$1}
/eg;
$self->print($before);
}
}
}
sub end_element {
my ($self, $end_element) = @_;
my $name = pop @{$self->{Names}};
my $element = pop @{$self->{Nodes}};
my $action = pop @{$self->{ActionStack}};
if (defined($action)) {
my $after = $action->{After};
if (defined $after) {
my $atts = $element->{Attributes};
$after =~ s/\[([\w.:]+)\]/
($1 eq '_element') ? $element->{Name} : $atts->{$1}
/eg;
$self->print($after);
}
}
}
sub characters {
my ($self, $characters) = @_;
$self->print($characters->{Data});
}
sub print {
my ($self, $output) = @_;
$self->{Output_}->print($output)
if (defined($self->{Output_}));
push(@{$self->{Strings}}, $output)
if (defined($self->{AsString}));
}
1;
__END__
=head1 NAME
XML::PatAct::Amsterdam - An action module for simplistic style-sheets
=head1 SYNOPSIS
use XML::PatAct::Amsterdam;
my $patterns = [ PATTERN => { Before => 'before',
After => 'after' },
... ];
my $matcher = XML::PatAct::Amsterdam->new( I );
=head1 DESCRIPTION
XML::PatAct::Amsterdam is a PerlSAX handler for applying
pattern-action lists to XML parses or trees. XML::PatAct::Amsterdam
applies a very simple style sheet to an instance and outputs the
result. Amsterdam gets it's name from the Amsterdam SGML Parser (ASP)
which inspired this module.
CAUTION: Amsterdam is a very simple style module, you will run into
it's limitations quickly with even moderately complex XML instances,
be aware of and prepared to switch to more complete style modules.
New XML::PatAct::Amsterdam instances are creating by calling `new()'.
Parameters can be passed as a list of key, value pairs or a hash. A
Patterns and Matcher options are required. The following I
are supported:
=over 4
=item Patterns
The pattern-action list to apply. The list is an anonymous array of
pattern, action pairs. Each action in the list contains either or
both a Before and an After string to copy to the output before and
after processing an XML element. The Before and After strings may
contain attribute names enclosed in square brackets (`C<[>' I
`C<]>'), these are replaced with the value of the attribute with that
name. The special I `C<_element>' will be replaced with the
element's name.
=item Matcher
An instance of the pattern or query matching module.
=item Output
An IO::Handle or one of it's subclasses (such as IO::File), if this
parameter is not present and the AsString option is not used, the
module will write to standard output.
=item AsString
Return the generated output as a string from the `C' method
of the PerlSAX event generator.
=back
=head1 AUTHOR
Ken MacLeod, ken@bitsko.slc.ut.us
=head1 SEE ALSO
perl(1)
``Using PatAct Modules'' and ``Creating PatAct Modules'' in libxml-perl.
=cut
libxml-perl-0.08/lib/XML/PatAct/MatchName.pm 0100644 0000764 0000764 00000004275 07745275111 016613 0 ustar ken ken #
# Copyright (C) 1999 Ken MacLeod
# XML::PatAct::MatchName is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# $Id: MatchName.pm,v 1.3 1999/12/22 21:15:00 kmacleod Exp $
#
use strict;
package XML::PatAct::MatchName;
use vars qw{ $VERSION };
# will be substituted by make-rel script
$VERSION = "0.08";
sub new {
my $type = shift;
my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ };
return bless $self, $type;
}
# This is functionally equivalent to PerlSAX `start_document()'
sub initialize {
my ($self, $driver) = @_;
$self->{Driver} = $driver;
}
# This is functionally equivalent to PerlSAX `end_document()'
sub finalize {
my $self = shift;
$self->{Driver} = undef;
}
# This is functionally equivalent to a PerlSAX `start_element()'
sub match {
my ($self, $element, $names, $nodes) = @_;
my $names_path = '/' . join('/', @$names);
my $patterns = $self->{Patterns};
my $ii = 0;
while ($ii <= $#$patterns) {
my $pattern = $patterns->[$ii];
if ($names_path =~ m|/$pattern$|) {
return $ii / 2;
}
$ii += 2;
}
return undef;
}
1;
__END__
=head1 NAME
XML::PatAct::MatchName - A pattern module for matching element names
=head1 SYNOPSIS
use XML::PatAct::MatchName;
my $matcher = XML::PatAct::MatchName->new();
my $patterns = [ 'foo' => ACTION,
'bar/foo' => ACTION,
... ];
=head1 DESCRIPTION
XML::PatAct::MatchName is a pattern module for use with PatAct drivers
for applying pattern-action lists to XML parses or trees.
XML::PatAct::MatchName is a simple pattern module that uses just
element names to match on. If multiple names are supplied seperated
by `C>' characters, then all of the parent element names must match
as well.
The order of patterns in the list is not significant.
XML::PatAct::MatchName will use the most specific match. Using the
synopsis above as an example, if you have an element `C',
`C' will match if `C' is in an element `C',
otherwise just the pattern with `C' will match.
=head1 AUTHOR
Ken MacLeod, ken@bitsko.slc.ut.us
=head1 SEE ALSO
perl(1)
``Using PatAct Modules'' and ``Creating PatAct Modules'' in libxml-perl.
=cut
libxml-perl-0.08/lib/XML/PatAct/ToObjects.pm 0100644 0000764 0000764 00000036212 07745275111 016646 0 ustar ken ken #
# Copyright (C) 1999 Ken MacLeod
# XML::PatAct::ToObjects is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# $Id: ToObjects.pm,v 1.5 1999/12/22 21:15:00 kmacleod Exp $
#
# The original XML::Grove::ToObjects actually generated and compiled a
# sub for matching actions, possibly a performance improvement of three
# or four times over all the comparisons made in start_element() and
# end_element().
use strict;
use UNIVERSAL;
package XML::PatAct::ToObjects;
use vars qw{ $VERSION $name_re };
# will be substituted by make-rel script
$VERSION = "0.08";
# FIXME I doubt this is a correct Perl RE for productions [4] and
# [5] in the XML 1.0 specification, especially considering Unicode chars
$name_re = '[A-Za-z_:][A-Za-z0-9._:-]*';
sub new {
my $type = shift;
my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ };
bless $self, $type;
my $usage = <<'EOF';
usage: XML::PatAct::ToObjects->new( Matcher => $matcher,
Patterns => $patterns );
EOF
die "No Matcher specified\n$usage\n"
if !defined $self->{Matcher};
die "No Patterns specified\n$usage\n"
if !defined $self->{Patterns};
# Parse action items
$self->{Actions} = [ ];
my $patterns = $self->{Patterns};
my $ii = 1;
while ($ii <= $#$patterns) {
if (ref $patterns->[$ii]) {
push @{$self->{Actions}},
$self->_parse_action($patterns->[$ii]);
} else {
# is a code fragment
}
$ii += 2;
}
if (defined $self->{GroveBuilder}) {
require XML::Grove::Builder;
import XML::Grove::Builder;
$self->{GroveBuilder} = XML::Grove::Builder->new();
}
return $self;
}
sub start_document {
my ($self, $document) = @_;
$self->{Matcher}->initialize($self);
$self->{Parents} = [ { Contents => [ ] } ];
$self->{ActionStack} = [ ];
$self->{States} = [ 'normal' ];
$self->{Document} = $document;
$self->{Names} = [ ];
$self->{Nodes} = [ ];
$self->{Data} = undef;
$self->{SourceIsGrove} = UNIVERSAL::isa($document, 'Data::Grove');
if (!defined $self->{CharacterDataType}) {
require Data::Grove;
import Data::Grove;
$self->{CharacterDataType} = 'Data::Grove::Characters';
}
}
sub end_document {
my ($self, $document) = @_;
$self->{Matcher}->finalize();
# FIXME check to make sure no other fields were assigned to
my $value = $self->{Parents}[0]{Contents};
# release all the info that is just used during event handling
$self->{Matcher} = $self->{Parents} = $self->{ActionStack} = undef;
$self->{States} = $self->{Document} = $self->{Names} = undef;
$self->{Nodes} = $self->{Data} = $self->{SourceIsGrove} = undef;
return $value;
}
sub start_element {
my ($self, $element) = @_;
push @{$self->{Names}}, $element->{Name};
push @{$self->{Nodes}}, $element;
my $index = $self->{Matcher}->match($element,
$self->{Names},
$self->{Nodes});
my $action;
if (!defined $index) {
$action = undef;
} else {
$action = $self->{Actions}[$index];
}
push @{$self->{ActionStack}}, $action;
my $state = $self->{States}[-1];
push @{$self->{States}}, $state;
if (($state eq 'as-grove') and !$self->{SourceIsGrove}) {
$self->{GroveBuilder}->start_element($element);
}
return if (($state ne 'normal') && ($state ne 'pcdata'));
if (defined($action) and defined($action->{PCData})) {
$self->{States}[-1] = 'pcdata';
}
if (!defined($action) or $action->{Holder}) {
# ignore this element but continue processing below
return;
}
if ($action->{Ignore} or $action->{FieldValue}) {
# ignore (discard) this element and it's children
$self->{States}[-1] = 'discarding';
return;
}
if ($action->{AsString}) {
$self->{Data} = [ ];
$self->{States}[-1] = 'as-string';
return;
}
if ($action->{AsGrove}) {
$self->{States}[-1] = 'as-grove';
if (!$self->{SourceIsGrove}) {
$self->{GroveBuilder}->start_document( { } );
$self->{GroveBuilder}->start_element($element);
}
return;
}
if (defined $action->{Make}) {
my @args;
if (defined $element->{Attributes}) {
if (defined $self->{CopyAttributes}) {
push @args, %{$element->{Attributes}};
} elsif ($self->{CopyId} && defined($element->{Attributes}{ID})) {
# FIXME use code from XML::Grove::IDs
push (@args, ID => $element->{Attributes}{ID});
}
}
if (defined $action->{Args}) {
eval 'push (@args, (' . $action->{Args} . '))';
if ($@) {
warn "$@\nwhile processing pattern/action #$index\n";
}
}
if ($action->{Make} eq 'HASH') {
push @{$self->{Parents}}, { @args };
} else {
my $is_defined = 0;
#eval "\$is_defined = defined %{$action->{Make}" . "::}";
if ($is_defined) {
push @{$self->{Parents}}, $action->{Make}->new( @args );
} else {
push (@{$self->{Parents}},
bless ({ @args }, $action->{Make}));
}
}
if ($action->{ContentsAsGrove}) {
$self->{States}[-1] = 'as-grove';
if (!$self->{SourceIsGrove}) {
$self->{GroveBuilder}->start_document( { } );
}
}
return;
}
# Place to store all the rest of gathered contents
push (@{$self->{Parents}}, { } );
}
sub end_element {
my ($self, $end_element) = @_;
my $name = pop @{$self->{Names}};
my $element = pop @{$self->{Nodes}};
my $action = pop @{$self->{ActionStack}};
my $state = pop @{$self->{States}};
if ($state eq 'as-grove' and !$self->{SourceIsGrove}) {
$self->{GroveBuilder}->end_element($end_element);
}
if (!defined($action) or $action->{Holder}) {
return;
}
if ($action->{Ignore}) {
return;
}
my $value;
if ($action->{AsString}) {
$value = join("", @{$self->{Data}});
} elsif ($action->{AsGrove}) {
if ($self->{SourceIsGrove}) {
$value = $element;
} else {
# get just the root element of the document fragment
$value = $self->{GroveBuilder}->end_document({ })->{Contents}[0];
}
} elsif (defined $action->{FieldValue}) {
$value = $action->{FieldValue};
$value =~ s/%\{($name_re)\}/$element->{Attributes}{$1}/ge;
} elsif (defined $action->{Make}) {
$value = pop @{$self->{Parents}};
if ($action->{ContentsAsGrove}) {
if ($self->{SourceIsGrove}) {
$value->{Contents} = $element->{Contents};
} else {
$value->{Contents} =
$self->{GroveBuilder}->end_document({ })->{Contents};
}
}
} else {
$value = pop(@{$self->{Parents}})->{Contents};
}
if ($action->{FieldIsArray}) {
push @{$self->{Parents}[-1]{$action->{Field}}}, $value;
} elsif (defined $action->{Field}) {
$self->{Parents}[-1]{$action->{Field}} = $value;
} else {
push @{$self->{Parents}[-1]{Contents}}, $value;
}
}
sub characters {
my ($self, $characters) = @_;
my $state = $self->{States}[-1];
if ($state eq 'as-string') {
push @{$self->{Data}}, $characters->{Data};
} elsif ($state eq 'as-grove' and !$self->{SourceIsGrove}) {
$self->{GroveBuilder}->characters($characters);
} elsif ($state eq 'pcdata') {
push (@{$self->{Parents}[-1]{Contents}},
$self->{CharacterDataType}->new(%$characters));
}
}
# we ignore processing instructions and ignorable whitespace by not
# defining those functions
###
### private functions
###
sub _parse_action {
my $self = shift; my $source = shift;
my $action = {};
while ($#$source > -1) {
my $option = shift @$source;
if ($option eq '-holder') {
$action->{Holder} = 1;
} elsif ($option eq '-make') {
$action->{Make} = shift @$source;
} elsif ($option eq '-args') {
my $args = shift @$source;
$args =~ s/%\{($name_re)\}/(\$element->{Attributes}{'$1'})/g;
$action->{Args} = $args;
} elsif ($option eq '-field') {
$action->{Field} = shift @$source;
} elsif ($option eq '-push-field') {
$action->{Field} = shift @$source;
$action->{FieldIsArray} = 1;
} elsif ($option eq '-as-string') {
$action->{AsString} = 1;
} elsif ($option eq '-value') {
$action->{FieldValue} = shift @$source;
} elsif ($option eq '-grove') {
$self->{GroveBuilder} = 1;
$action->{AsGrove} = 1;
} elsif ($option eq '-grove-contents') {
$self->{GroveBuilder} = 1;
$action->{ContentsAsGrove} = 1;
} elsif ($option eq '-ignore') {
$action->{Ignore} = 1;
} elsif ($option eq '-pcdata') {
$action->{PCData} = 1;
} else {
die "$option: undefined option\n";
}
}
return $action;
}
1;
__END__
=head1 NAME
XML::PatAct::ToObjects - An action module for creating Perl objects
=head1 SYNOPSIS
use XML::PatAct::ToObjects;
my $patterns = [ PATTERN => [ OPTIONS ],
PATTERN => "PERL-CODE",
... ];
my $matcher = XML::PatAct::ToObjects->new( Patterns => $patterns,
Matcher => $matcher,
CopyId => 1,
CopyAttributes => 1 );
=head1 DESCRIPTION
XML::PatAct::ToObjects is a PerlSAX handler for applying
pattern-action lists to XML parses or trees. XML::PatAct::ToObjects
creates Perl objects of the types and contents of the action items you
define.
New XML::PatAct::ToObject instances are creating by calling `new()'.
Parameters can be passed as a list of key, value pairs or a hash.
`new()' requires the Patterns and Matcher parameters, the rest are
optional:
=over 4
=item Patterns
The pattern-action list to apply.
=item Matcher
An instance of the pattern or query matching module.
=item CopyId
Causes the `ID' attribute, if any, in a source XML element to be
copied to an `ID' attribute in newly created objects. Note that IDs
may be lost of no pattern matches that element or an object is not
created (C<-make>) for that element.
=item CopyAttributes
Causes all attributes of the element to be copied to the newly created
objects.
=back
Each action can either be a list of options defined below or a string
containing a fragment of Perl code. If the action is a string of Perl
code then simple then some simple substitutions are made as described
further below.
Options that can be used in an action item containing an option-list:
=over 4
=item B<-holder>
Ignore this element, but continue processing it's children (compare to
B<-ignore>). C<-pcdata> may be used with this option.
=item B<-ignore>
Ignore (discard) this element and it's children (compare to B<-holder>).
=item B<-pcdata>
Character data in this element should be copied to the C
field.
=item B<-make> I
Create an object blessed into I, and continue processing this
element and it's children. I may be the type `C' to
simply create an anonyous hash.
=item B<-args> I
Use I in creating the object specified by B<-make>. This
is commonly used to copy element attributes into fields in the newly
created object. For example:
-make => 'HASH', -args => 'URL => %{href}'
would copy the `C' attribute in an element to the `C' field
of the newly created hash.
=item B<-field> I
Store this element, object, or children of this element in the parent
object's field named by I.
=item B<-push-field> I
Similar to B<-field>, except that I is an array and the
contents are pushed onto that array.
=item B<-value> I
Use I as a literal value to store in I, otherwise
ignoring this element and it's children. Only valid with B<-field> or
B<-push-field>. `C<%{I}>' notation can be used to
substitute the value of an attribute into the literal value.
=item B<-as-string>
Convert the contents of this element to a string (as in
C) and store in I. Only valid with
B<-field> or B<-push-field>.
=item B<-grove>
Copy this element to I without further processing. The element
can then be processed later as the Perl objects are manipulated. Only
valid with B<-field> or B<-push-field>. If ToObjects is used with
PerlSAX, this will use XML::Grove::Builder to build the grove element.
=item B<-grove-contents>
Used with B<-make>, B<-grove-contents> creates an object but then
takes all of the content of that element and stores it in Contents.
=back
If an action item is a string, that string is treated as a fragment of
Perl code. The following simple substitutions are performed on the
fragment to provide easy access to the information being converted:
=over 4
=item B<@ELEM@>
The object that caused this action to be called. If ToObjects is used
with PerlSAX this will be a hash with the element name and attributes,
with XML::Grove this will be the element object, with Data::Grove it
will be the matching object, and with XML::DOM it will be an
XML::DOM::Element.
=back
=head1 EXAMPLE
The example pattern-action list below will convert the following XML
representing a Database schema:
MyTableA short summaryA long description that may
contain a subset of HTMLMyColumn1A short summaryA long description42
into Perl objects looking like:
[
{ Name => "MyTable",
Summary => "A short summary",
Description => $grove_object,
Columns => [
{ Name => "MyColumn1",
Summary => "A short summary",
Description => $grove_object,
Unique => 1,
NonNull => 1,
Default => 42
}
]
}
]
Here is a Perl script and pattern-action list that will perform the
conversion using the simple name matching pattern module
XML::PatAct::MatchName. The script accepts a Schema XML file as an
argument (C<$ARGV[0]>) to the script. This script creates a grove as
one of it's objects, so it requires the XML::Grove module.
use XML::Parser::PerlSAX;
use XML::PatAct::MatchName;
use XML::PatAct::ToObjects;
my $patterns = [
'schema' => [ qw{ -holder } ],
'table' => [ qw{ -make Schema::Table } ],
'name' => [ qw{ -field Name -as-string } ],
'summary' => [ qw{ -field Summary -as-string } ],
'description' => [ qw{ -field Description -grove } ],
'column' => [ qw{ -make Schema::Column -push-field Columns } ],
'unique' => [ qw{ -field Unique -value 1 } ],
'non-null' => [ qw{ -field NonNull -value 1 } ],
'default' => [ qw{ -field Default -as-string } ],
];
my $matcher = XML::PatAct::MatchName->new( Patterns => $patterns );
my $handler = XML::PatAct::ToObjects->new( Patterns => $patterns,
Matcher => $matcher);
my $parser = XML::Parser::PerlSAX->new( Handler => $handler );
my $schema = $parser->parse(Source => { SystemId => $ARGV[0] } );
=head1 TODO
=over 4
=item *
It'd be nice if patterns could be applied even in B<-as-string> and
B<-grove>.
=item *
Implement Perl code actions.
=item *
B<-as-xml> to write XML into the field.
=back
=head1 AUTHOR
Ken MacLeod, ken@bitsko.slc.ut.us
=head1 SEE ALSO
perl(1), Data::Grove(3)
``Using PatAct Modules'' and ``Creating PatAct Modules'' in libxml-perl.
=cut
libxml-perl-0.08/lib/XML/SAX2Perl.pm 0100644 0000764 0000764 00000014526 07745275111 015142 0 ustar ken ken #
# Copyright (C) 1998 Ken MacLeod
# XML::SAX2Perl is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# $Id: SAX2Perl.pm,v 1.4 2001/07/23 15:47:15 kmacleod Exp $
#
use strict;
package XML::SAX2Perl;
use vars qw{ $VERSION };
# will be substituted by make-rel script
$VERSION = "0.08";
sub new {
my $type = shift;
my $self = ($#_ == 0) ? shift : { @_ };
return bless $self, $type;
}
sub setDocumentLocator {
my $self = shift;
my $self->{Locator} = shift;
}
sub startDocument {
my $self = shift;
my @properties;
if (defined $self->{Locator}) {
push @properties, locator => $self->{Locator};
}
$self->{DocumentHandler}->start_document(@properties);
}
sub endDocument {
my $self = shift;
$self->{DocumentHandler}->end_document;
}
sub startElement {
my $self = shift;
my $name = shift;
my $attributes = shift;
# FIXME depends on how Perl SAX treats attributes
$self->{DocumentHandler}->start_element(Name => $name, Attributes => $attributes);
}
sub endElement {
my $self = shift;
my $name = shift;
$self->{DocumentHandler}->end_element(Name => $name);
}
sub characters {
my $self = shift;
my $ch = shift;
my $start = shift;
my $length = shift;
$self->{DocumentHandler}->characters(Data => substr($ch, $start, $length));
}
sub ignorableWhitespace {
my $self = shift;
my $ch = shift;
my $start = shift;
my $length = shift;
$self->{DocumentHandler}->ignorable_whitespace(Data => substr($ch, $start, $length));
}
sub processingInstruction {
my $self = shift;
my $target = shift;
my $data = shift;
$self->{DocumentHandler}->processing_instruction(Target => $target, Data => $data);
}
1;
__END__
=head1 NAME
XML::SAX2Perl -- translate Java/CORBA style SAX methods to Perl methods
=head1 SYNOPSIS
use XML::SAX2Perl;
$sax2perl = XML::SAX2Perl(Handler => $my_handler);
$sax->setDocumentHandler($sax2perl);
=head1 DESCRIPTION
C is a SAX filter that translates Java/CORBA style SAX
methods to Perl style method calls. This man page summarizes the
specific options, handlers, and properties supported by
C; please refer to the Perl SAX standard C
for general usage information.
=head1 METHODS
=over 4
=item new
Creates a new parser object. Default options for parsing, described
below, are passed as key-value pairs or as a single hash. Options may
be changed directly in the parser object unless stated otherwise.
Options passed to `C' override the default options in the
parser object for the duration of the parse.
=item parse
Parses a document. Options, described below, are passed as key-value
pairs or as a single hash. Options passed to `C' override
default options in the parser object.
=item location
Returns the location as a hash:
ColumnNumber The column number of the parse.
LineNumber The line number of the parse.
PublicId A string containing the public identifier, or undef
if none is available.
SystemId A string containing the system identifier, or undef
if none is available.
=item SAX DocumentHandler Methods
The following methods are DocumentHandler methods that the SAX 1.0
parser will call and C will translate to Perl SAX
methods calls. See SAX 1.0 for details.
setDocumentLocator(locator)
startDocument()
endDocument()
startElement(name, atts)
endElement(name)
characters(ch, start, length)
ignorableWhitespace(ch, start, length)
processingInstruction(target, data)
=back
=head1 OPTIONS
The following options are supported by C:
Handler default handler to receive events
DocumentHandler handler to receive document events
DTDHandler handler to receive DTD events
ErrorHandler handler to receive error events
EntityResolver handler to resolve entities
Locale locale to provide localisation for errors
Source hash containing the input source for parsing
If no handlers are provided then all events will be silently ignored,
except for `C' which will cause a `C' to be
called after calling `C'.
If a single string argument is passed to the `C' method, it
is treated as if a `C' option was given with a `C'
parameter.
The `C' hash may contain the following parameters:
ByteStream The raw byte stream (file handle) containing the
document.
String A string containing the document.
SystemId The system identifier (URI) of the document.
PublicId The public identifier.
Encoding A string describing the character encoding.
If more than one of `C', `C', or `C',
then preference is given first to `C', then `C',
then `C'.
=head1 HANDLERS
The following handlers and properties are supported by
C:
=head2 DocumentHandler methods
=over 4
=item start_document
Receive notification of the beginning of a document.
Locator An object that can return the location of any SAX
document event.
=item end_document
Receive notification of the end of a document.
No properties defined.
=item start_element
Receive notification of the beginning of an element.
Name The element type name.
Attributes Attributes attached to the element, if any.
ALPHA WARNING: The `C' value is not translated from the
SAX 1.0 value, so it will contain an AttributeList object.
=item end_element
Receive notification of the end of an element.
Name The element type name.
=item characters
Receive notification of character data.
Data The characters from the XML document.
=item ignorable_whitespace
Receive notification of ignorable whitespace in element content.
Data The characters from the XML document.
=item processing_instruction
Receive notification of a processing instruction.
Target The processing instruction target.
Data The processing instruction data, if any.
=back
=head1 AUTHOR
Ken MacLeod
=head1 SEE ALSO
perl(1), XML::Perl2SAX(3).
Extensible Markup Language (XML)
Simple API for XML (SAX)
=cut
libxml-perl-0.08/lib/XML/Perl2SAX.pm 0100644 0000764 0000764 00000005020 07745275111 015127 0 ustar ken ken #
# Copyright (C) 1998 Ken MacLeod
# XML::Perl2SAX is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# $Id: Perl2SAX.pm,v 1.3 1999/12/22 21:15:00 kmacleod Exp $
#
use strict;
package XML::Perl2SAX;
use vars qw{ $VERSION };
# will be substituted by make-rel script
$VERSION = "0.08";
sub new {
my $type = shift;
my $self = ($#_ == 0) ? shift : { @_ };
return bless $self, $type;
}
sub start_document {
my $self = shift;
my $properties = ($#_ == 0) ? shift : { @_ };
if ($properties->{Locator}) {
$self->{DocumentHandler}->setDocumentLocator($properties->{Locator});
}
$self->{DocumentHandler}->startDocument;
}
sub end_document {
my $self = shift;
$self->{DocumentHandler}->endDocument;
}
sub start_element {
my $self = shift;
my $properties = shift;
# FIXME depends on how Perl SAX treats attributes
$self->{DocumentHandler}->startElement($properties->{Name},
$properties->{Attributes});
}
sub end_element {
my $self = shift;
my $properties = shift;
$self->{DocumentHandler}->endElement($properties->{Name});
}
sub characters {
my $self = shift;
my $properties = shift;
$self->{DocumentHandler}->characters($properties->{Data},
0,
length($properties->{Data}));
}
sub ignorable_whitespace {
my $self = shift;
my $properties = shift;
$self->{DocumentHandler}->ignorableWhitespace($properties->{Data},
0,
length($properties->{Data}));
}
sub processing_instruction {
my $self = shift;
my $properties = shift;
$self->{DocumentHandler}->processingInstruction($properties->{Target},
$properties->{Data});
}
1;
__END__
=head1 NAME
XML::SAX2Perl -- translate Perl SAX methods to Java/CORBA style methods
=head1 SYNOPSIS
use XML::Perl2SAX;
$perl2sax = XML::Perl2SAX(handler => $java_style_handler);
=head1 DESCRIPTION
C is a SAX filter that translates Perl style SAX
methods to Java/CORBA style method calls. This module performs the
inverse operation from C.
C is a Perl SAX document handler. The `C' method takes
a `C' argument that is a Java/CORBA style handler that the
new Perl2SAX instance will call. The SAX interfaces are defined at
.
=head1 AUTHOR
Ken MacLeod
=head1 SEE ALSO
perl(1), XML::Perl2SAX(3).
Extensible Markup Language (XML)
Simple API for XML (SAX)
=cut
libxml-perl-0.08/lib/XML/ESISParser.pm 0100644 0000764 0000764 00000047000 07745275111 015513 0 ustar ken ken #
# Copyright (C) 1999 Ken MacLeod
# See the file COPYING for distribution terms.
#
# $Id: ESISParser.pm,v 1.9 2000/03/02 20:18:09 kmacleod Exp $
#
use strict;
use IO::File;
use UNIVERSAL;
package XML::ESISParser;
use vars qw{ $VERSION $NSGMLS_sgml $NSGMLS_FLAGS_sgml $NSGMLS_ENV_sgml
$NSGMLS_xml $NSGMLS_FLAGS_xml $NSGMLS_ENV_xml
$XML_DECL };
# will be substituted by make-rel script
$VERSION = "0.08";
$NSGMLS_sgml = 'nsgmls';
$NSGMLS_FLAGS_sgml = '-oentity -oempty -onotation-sysid -oincluded -oline -E0';
$NSGMLS_ENV_sgml = '';
$NSGMLS_xml = 'nsgmls';
$XML_DECL = '/usr/lib/sgml/declaration/xml.decl';
$NSGMLS_FLAGS_xml = '-oentity -oempty -onotation-sysid -oline -oincluded -wxml -E0 ';
$NSGMLS_ENV_xml = 'SP_CHARSET_FIXED=YES SP_ENCODING=XML';
sub new {
my $type = shift;
return bless { @_ }, $type;
}
sub parse {
my $self = shift;
die "XML::ESISParser: parser instance ($self) already parsing\n"
if (defined $self->{ParseOptions});
# If there's one arg and it has no ref, it's a string
my $args;
if (scalar (@_) == 1 && !ref($_[0])) {
$args = { Source => { String => shift } };
} else {
$args = (scalar (@_) == 1) ? shift : { @_ };
}
my $parse_options = { %$self, %$args };
$self->{ParseOptions} = $parse_options;
# ensure that we have at least one source
if (!defined $parse_options->{Source}
|| !(defined $parse_options->{Source}{String}
|| defined $parse_options->{Source}{ByteStream}
|| defined $parse_options->{Source}{SystemId}
|| defined $parse_options->{Source}{ESISStream})) {
die "XML::ESISParser: no source defined for parse\n";
}
# assign default Handler to any undefined handlers
if (defined $parse_options->{Handler}) {
$parse_options->{DocumentHandler} = $parse_options->{Handler}
if (!defined $parse_options->{DocumentHandler});
$parse_options->{DTDHandler} = $parse_options->{Handler}
if (!defined $parse_options->{DTDHandler});
$parse_options->{ErrorHandler} = $parse_options->{Handler}
if (!defined $parse_options->{ErrorHandler});
}
# create the NSGMLS command
my ($nsgmls_command, $nsgmls, $nsgmls_flags);
if (defined $parse_options->{NSGMLSCommand}) {
$nsgmls_command = $parse_options->{NSGMLSCommand};
} elsif (defined $parse_options->{IsSGML}
&& $parse_options->{IsSGML}) {
my $declaration = (defined $parse_options->{Declaration})
? " " . $parse_options->{Declaration} : "";
$nsgmls = $parse_options->{NSGMLS} = $NSGMLS_sgml;
$nsgmls_flags = $parse_options->{NSGMLS_FLAGS} = $NSGMLS_FLAGS_sgml;
$nsgmls_command = $parse_options->{NSGMLS_COMMAND} = "$nsgmls $nsgmls_flags $declaration";
} else {
my $declaration = (defined $parse_options->{Declaration})
? $parse_options->{Declaration} : $XML_DECL;
$nsgmls = $parse_options->{NSGMLS} = $NSGMLS_xml;
$nsgmls_flags = $parse_options->{NSGMLS_FLAGS} = $NSGMLS_FLAGS_xml;
$nsgmls_command = $parse_options->{NSGMLS_COMMAND} = "$NSGMLS_ENV_xml $nsgmls $nsgmls_flags $declaration";
}
my $result;
if (defined $self->{ParseOptions}{Source}{ESISStream}) {
# read ESIS stream directly
my $system_id = (defined $self->{ParseOptions}{Source}{SystemId})
? "\`$self->{ParseOptions}{Source}{SystemId}'" : 'ESIS Stream';
eval { $result = $self->parse_fh ($self->{ParseOptions}{Source}{ESISStream}) };
my $retval = $@;
if ($retval) {
die "XML::ESISParser::parse: unable to parse \`$system_id'\n$retval";
}
} elsif (defined $self->{ParseOptions}{Source}{ByteStream}) {
# call nsgmls using file handle
# FIXME special case stdin?
# For ByteStreams (Perl file handles) we create a sub-process
# that we feed the XML/SGML document and we get back the ESIS
# stream
my $retval;
my $system_id = (defined $self->{ParseOptions}{Source}{SystemId})
? "\`$self->{ParseOptions}{Source}{SystemId}'" : 'Byte Stream';
my ($pid) = open (ESIS, "-|");
if ($pid == 0) {
# 20% speed increase if grep swipes implieds (only 8% if
# we do it in `parse_fh'). XXX use a C routine or patch SP
open (SGML, "| $nsgmls_command 2>&1 | egrep -v '^A.* IMPLIED\$'")
or die "XML::ESISParser::parse: can't run \`$nsgmls' on \`$system_id'\n";
$self->{ParseOptions}{Source}{ByteStream}->print (*SGML);
close (SGML)
or die "XML::ESISParser::parse: can't run \`$nsgmls' on \`$system_id'\n";
exit 0;
} else {
eval { $result = $self->parse_fh (*ESIS) };
$retval = $@;
wait; # clean up that process
}
close (ESIS);
$self->{ParseOptions}{Source}{ByteStream}->close ();
if ($retval) {
die "XML::ESISParser::parse: unable to parse \`$system_id'\n$retval";
}
} elsif (defined $self->{ParseOptions}{Source}{String}) {
# call nsgmls with a literal string
} elsif (defined $self->{ParseOptions}{Source}{SystemId}) {
# if SystemId is a file, call nsgmls with file name
# otherwise, open stream on SystemId and do ByteStream
# FIXME this only handles file SystemIds right now
# 20% speed increase if grep swipes implieds (only 8% if
# we do it in `parse'). XXX use a C routine or patch SP
my $system_id = $self->{ParseOptions}{Source}{SystemId};
my ($fh) = IO::File->new
("$nsgmls_command '$system_id' 2>&1 | egrep -v '^A.* IMPLIED\$' |");
die "XML::ESISParser::parse: can't run \`$nsgmls' on \`$system_id'\n"
if (!defined $fh);
eval { $result = $self->parse_fh ($fh) };
my $retval = $@;
close ($fh);
if ($retval) {
die "XML::ESISParser::parse: unable to parse \`$system_id'\n$retval";
}
}
# clean up parser instance
delete $self->{ParseOptions};
delete $self->{DocumentHandler};
delete $self->{DTDHandler};
delete $self->{ErrorHandler};
return $result;
}
#
# Parse the `ESIS' information coming from `file'
#
sub parse_fh {
my ($self, $file) = @_;
my (@attributes, @properties, $files);
my $doc_h = $self->{ParseOptions}{DocumentHandler};
my $dtd_h = $self->{ParseOptions}{DTDHandler};
my $err_h = $self->{ParseOptions}{ErrorHandler};
# we cache these most commonly used `can()' calls
my $can_start_element = $doc_h->can('start_element');
my $can_end_element = $doc_h->can('end_element');
my $can_characters = $doc_h->can('characters');
my $can_record_end = $doc_h->can('record_end');
my $line = 0;
$doc_h->start_document( { } )
if ($doc_h->can('start_document'));
# 30% speed improvement by breaking the encapsulation
my ($is_filehandle) = (ref ($file) eq "FileHandle"
|| ref ($file) eq "IO::File");
while ($_ = ($is_filehandle ? <$file> : $file->getline())) {
chop;
if (/^A/) { # attribute
# Note: the output of `nsgmls' is `grep -v'ed to get rid of
# IMPLIED attributes, if we do it here we only get an 8%
# speed boost
my ($name, $type, $value) = split (/\s/, $', 3);
push (@attributes, $name => $value);
next;
}
if (/^\(/) { # start element
# break the encapsulation for an 8% boost
if ($#attributes >= 0) {
push (@properties, Attributes => { @attributes });
}
$doc_h->start_element ({ Name => $', @properties })
if ($can_start_element);
@properties = (); @attributes = ();
next;
}
if (/^\)/) { # end element
$doc_h->end_element ({ Name => $' })
if ($can_end_element);
next;
}
if (/^L/) { # line number
$line = $';
next;
}
if (/^-/) { # data (including sdata entities)
# This section is derived from David Megginson's SGMLSpm
my $sdata_flag = 0;
my $out = '';
my $data = $';
while ($data =~ /\\(\\|n|\||[0-7]{1,3})/) {
$out .= $`;
$data = $';
if ($1 eq '|') {
# beginning or end of SDATA
if ("$out" ne '') {
if ($sdata_flag) {
$doc_h->internal_entity_ref({ Name => $self->{'internal_entities_by_value'}{$out} })
if ($doc_h->can('internal_entity_ref'));
} else {
$doc_h->characters({ Data => $out })
if ($can_characters);
}
$out = '';
}
$sdata_flag = !$sdata_flag;
} elsif ($1 eq 'n') {
# record end
if ("$out" ne '') {
if ($sdata_flag) {
$doc_h->internal_entity_ref({ Name => $self->{'internal_entities_by_value'}{$out} })
if ($doc_h->can('internal_entity_ref'));
} else {
$doc_h->characters({ Data => $out })
if ($can_characters);
}
$out = '';
}
if ($can_record_end) {
$doc_h->record_end( { } );
} else {
$doc_h->characters({ Data => "\n" })
if ($can_characters);
}
} elsif ($1 eq '\\') {
$out .= '\\';
} else {
$out .= chr(oct($1));
}
}
$out .= $data;
if ("$out" ne '') {
if ($sdata_flag) {
$doc_h->internal_entity_ref({ Name => $self->{'internal_entities_by_value'}{$out} })
if ($doc_h->can('internal_entity_ref'));
} else {
$doc_h->characters({ Data => $out })
if ($can_characters);
}
}
next;
}
if (/^s/) { # sysid
push (@properties, SystemId => $');
next;
}
if (/^p/) { # pubid
push (@properties, PublicId => $');
next;
}
if (/^f/) { # file
if (!defined $files) {
$files = $';
} elsif (!ref $files) {
$files = [ $files, $' ];
} else {
push (@$files, $');
}
next;
}
if (/^E/) { # external entity definition
my ($entity_data) = $';
$entity_data =~ /^(\S+) (\S+) (\S+)$/
or die "XML::ESISParser::parse_fh: bad external entity event data: $entity_data\n";
my ($name,$type,$notation) = ($1,$2,$3);
if (defined $files) {
push (@properties, GeneratedId => $files);
}
$dtd_h->external_entity_decl ({ Name => $name, Type => $type,
Notation => $notation, @properties })
if ($dtd_h->can('external_entity_decl'));
@properties = (); undef $files;
next;
}
if (/^I/) { # internal entity definition
my ($name, $type, $value) = split (/\s/, $', 3);
$self->{'internal_entities_by_value'}{$value} = $name;
$dtd_h->internal_entity_decl ({ Name => $name, Type => $type,
Value => $value })
if ($dtd_h->can('internal_entity_decl'));
next;
}
if (/^&/) { # external entity reference
my ($name) = $';
$doc_h->external_entity_ref({ Name => $name })
if ($doc_h->can('external_entity_ref'));
next;
}
if (/^\?/) { # processing instruction (PI)
my ($data) = $';
if ($self->{ParseOptions}{IsSGML}) {
$doc_h->processing_instruction({ Data => $data })
if ($doc_h->can('processing_instruction'));
} else {
my ($target, $pi_data) = split (/\s+/, $data, 2);
$doc_h->processing_instruction({ Target => $target, Data => $pi_data })
if ($doc_h->can('processing_instruction'));
}
next;
}
if (/^N/) { # notation definition
my ($name) = $';
if (defined $files) {
push (@properties, GeneratedId => $files);
}
$dtd_h->notation_decl ({ Name => $name, @properties })
if ($dtd_h->can('notation_decl'));
@properties = (); undef $files;
next;
}
if (/^S/) { # subdoc definition
my ($name) = $';
if (defined $files) {
push (@properties, GeneratedId => $files);
}
$dtd_h->subdoc_entity_decl ({ Name => $name, @properties })
if ($dtd_h->can('subdoc_entity_decl'));
@properties = (); undef $files;
next;
}
if (/^T/) { # external SGML text entity definition
my ($name) = $';
if (defined $files) {
push (@properties, GeneratedId => $files);
}
$dtd_h->external_sgml_entity_decl ({ Name => $name, @properties })
if ($dtd_h->can('external_sgml_entity_decl'));
@properties = (); undef $files;
next;
}
if (/^D/) { # data attribute
# FIXME
my $message = "XML::ESISParser: can't handle data attributes yet\n";
if ($err_h->can('error')) {
$err_h->error ({ Message => $message });
} else {
die "$message";
}
next;
}
if (/^D/) { # link attribute
# FIXME
my $message = "XML::ESISParser: can't handle link attributes yet\n";
if ($err_h->can('error')) {
$err_h->error ({ Message => $message });
} else {
die "$message";
}
next;
}
if (/^{/) { # subdoc start
my ($name) = $';
$doc_h->start_subdoc ({ Name => $name })
if ($doc_h->can('start_subdoc'));
next;
}
if (/^}/) { # subdoc end
my ($name) = $';
$doc_h->end_subdoc ({ Name => $name })
if ($doc_h->can('end_subdoc'));
next;
}
if (/^#/) { # appinfo
my ($text) = $';
$doc_h->appinfo ({ Text => $text })
if ($doc_h->can('appinfo'));
next;
}
if (/^i/) { # next element is an included subelement
push (@properties, IncludedSubelement => 1);
next;
}
if (/^e/) { # next element is declared empty
push (@properties, Empty => 1);
next;
}
if (/^C/) { # conforming
$doc_h->conforming({})
if ($doc_h->can('conforming'));
next;
}
if (/^$self->{ParseOptions}{NSGMLS}:/) { # `nsgmls' error
my $message = $_;
if ($err_h->can('error')) {
$err_h->error ({ Message => $message });
} else {
die "$message\n";
}
next;
}
my ($op) = substr ($_, 0, 1);
my $message = "XML::ESISParser::parse_fh: ESIS command character \`$op' not recognized when reading line \`$_' around line $line ($.)";
if ($err_h->can('error')) {
$err_h->error ({ Message => $message });
} else {
die "$message";
}
}
if ($doc_h->can('end_document')) {
return $doc_h->end_document({});
} else {
return ();
}
}
1;
__END__
=head1 NAME
XML::ESISParser - Perl SAX parser using nsgmls
=head1 SYNOPSIS
use XML::ESISParser;
$parser = XML::ESISParser->new( [OPTIONS] );
$result = $parser->parse( [OPTIONS] );
$result = $parser->parse($string);
=head1 DESCRIPTION
C is a Perl SAX parser using the `nsgmls' command of
James Clark's SGML Parser (SP), a validating XML and SGML parser.
This man page summarizes the specific options, handlers, and
properties supported by C; please refer to the Perl
SAX standard in `C' for general usage information.
C defaults to parsing XML and has an option for
parsing SGML.
`C' source, and binaries for some platforms, is available from
. `C' is included in both the SP and
Jade packages.
=head1 METHODS
=over 4
=item new
Creates a new parser object. Default options for parsing, described
below, are passed as key-value pairs or as a single hash. Options may
be changed directly in the parser object unless stated otherwise.
Options passed to `C' override the default options in the
parser object for the duration of the parse.
=back
=head1 OPTIONS
The following options are supported by C:
Handler default handler to receive events
DocumentHandler handler to receive document events
DTDHandler handler to receive DTD events
ErrorHandler handler to receive error events
Source hash containing the input source for parsing
IsSGML the document to be parsed is in SGML
If no handlers are provided then all events will be silently ignored.
If a single string argument is passed to the `C' method, it
is treated as if a `C' option was given with a `C'
parameter.
The `C' hash may contain the following parameters:
ByteStream The raw byte stream (file handle) containing the
document.
String A string containing the document.
SystemId The system identifier (URI) of the document.
If more than one of `C', `C', or `C',
then preference is given first to `C', then `C',
then `C'.
=head1 HANDLERS
The following handlers and properties are supported by
C:
=head2 DocumentHandler methods
=over 4
=item start_document
Receive notification of the beginning of a document.
No properties defined.
=item end_document
Receive notification of the end of a document.
No properties defined.
=item start_element
Receive notification of the beginning of an element.
Name The element type name.
Attributes A hash containing the attributes attached to the
element, if any.
IncludedSubelement This element is an included subelement.
Empty This element is declared empty.
The `C' hash contains only string values. The `C'
flag is not set for an element that merely has no content, it is set
only if the DTD declares it empty.
BETA: Attribute values currently do not expand SData entities into
entity objects, they are still in the system data notation used by
nsgmls (inside `|'). A future version of XML::ESISParser will also
convert other types of attributes into their respective objects,
currently just their notation or entity names are given.
=item end_element
Receive notification of the end of an element.
Name The element type name.
=item characters
Receive notification of character data.
Data The characters from the document.
=item record_end
Receive notification of a record end sequence. XML applications
should convert this to a new-line.
=item processing_instruction
Receive notification of a processing instruction.
Target The processing instruction target in XML.
Data The processing instruction data, if any.
=item internal_entity_ref
Receive notification of a system data (SData) internal entity
reference.
Name The name of the internal entity reference.
=item external_entity_ref
Receive notification of a external entity reference.
Name The name of the external entity reference.
=item start_subdoc
Receive notification of the start of a sub document.
Name The name of the external entity reference.
=item end_subdoc
Receive notification of the end of a sub document.
Name The name of the external entity reference.
=item conforming
Receive notification that the document just parsed conforms to it's
document type declaration (DTD).
No properties defined.
=back
=head2 DTDHandler methods
=over 4
=item external_entity_decl
Receive notification of an external entity declaration.
Name The entity's entity name.
Type The entity's type (CDATA, NDATA, etc.)
SystemId The entity's system identifier.
PublicId The entity's public identifier, if any.
GeneratedId Generated system identifiers, if any.
=item internal_entity_decl
Receive notification of an internal entity declaration.
Name The entity's entity name.
Type The entity's type (CDATA, NDATA, etc.)
Value The entity's character value.
=item notation_decl
Receive notification of a notation declaration.
Name The notation's name.
SystemId The notation's system identifier.
PublicId The notation's public identifier, if any.
GeneratedId Generated system identifiers, if any.
=item subdoc_entity_decl
Receive notification of a subdocument entity declaration.
Name The entity's entity name.
SystemId The entity's system identifier.
PublicId The entity's public identifier, if any.
GeneratedId Generated system identifiers, if any.
=item external_sgml_entity_decl
Receive notification of an external SGML-entity declaration.
Name The entity's entity name.
SystemId The entity's system identifier.
PublicId The entity's public identifier, if any.
GeneratedId Generated system identifiers, if any.
=back
=head1 AUTHOR
Ken MacLeod, ken@bitsko.slc.ut.us
=head1 SEE ALSO
perl(1), PerlSAX.pod(3)
Extensible Markup Language (XML)
SAX 1.0: The Simple API for XML
SGML Parser (SP)
=cut
libxml-perl-0.08/lib/XML/Parser/ 0040755 0000764 0000764 00000000000 07745275112 014474 5 ustar ken ken libxml-perl-0.08/lib/XML/Parser/PerlSAX.pm 0100644 0000764 0000764 00000055255 07745275111 016320 0 ustar ken ken #
# Copyright (C) 1999 Ken MacLeod
# XML::Parser::PerlSAX is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# $Id: PerlSAX.pm,v 1.7 1999/12/22 21:15:00 kmacleod Exp $
#
use strict;
package XML::Parser::PerlSAX;
use XML::Parser;
use UNIVERSAL;
use vars qw{ $VERSION $name_re };
# will be substituted by make-rel script
$VERSION = "0.08";
# FIXME I doubt this is a correct Perl RE for productions [4] and
# [5] in the XML 1.0 specification, especially considering Unicode chars
$name_re = '[A-Za-z_:][A-Za-z0-9._:-]*';
sub new {
my $type = shift;
my $self = (@_ == 1) ? shift : { @_ };
return bless $self, $type;
}
sub parse {
my $self = shift;
die "XML::Parser::PerlSAX: parser instance ($self) already parsing\n"
if (defined $self->{ParseOptions});
# If there's one arg and it has no ref, it's a string
my $args;
if (scalar (@_) == 1 && !ref($_[0])) {
$args = { Source => { String => shift } };
} else {
$args = (scalar (@_) == 1) ? shift : { @_ };
}
my $parse_options = { %$self, %$args };
$self->{ParseOptions} = $parse_options;
# ensure that we have at least one source
if (!defined $parse_options->{Source}
|| !(defined $parse_options->{Source}{String}
|| defined $parse_options->{Source}{ByteStream}
|| defined $parse_options->{Source}{SystemId})) {
die "XML::Parser::PerlSAX: no source defined for parse\n";
}
# assign default Handler to any undefined handlers
if (defined $parse_options->{Handler}) {
$parse_options->{DocumentHandler} = $parse_options->{Handler}
if (!defined $parse_options->{DocumentHandler});
$parse_options->{DTDHandler} = $parse_options->{Handler}
if (!defined $parse_options->{DTDHandler});
$parse_options->{EntityResolver} = $parse_options->{Handler}
if (!defined $parse_options->{EntityResolver});
}
my @handlers;
if (defined $parse_options->{DocumentHandler}) {
# cache DocumentHandler in self for callbacks
$self->{DocumentHandler} = $parse_options->{DocumentHandler};
my $doc_h = $parse_options->{DocumentHandler};
push (@handlers, Init => sub { $self->_handle_init(@_) } )
if (UNIVERSAL::can($doc_h, 'start_document'));
push (@handlers, Final => sub { $self->_handle_final(@_) } )
if (UNIVERSAL::can($doc_h, 'end_document'));
push (@handlers, Start => sub { $self->_handle_start(@_) } )
if (UNIVERSAL::can($doc_h, 'start_element'));
push (@handlers, End => sub { $self->_handle_end(@_) } )
if (UNIVERSAL::can($doc_h, 'end_element'));
push (@handlers, Char => sub { $self->_handle_char(@_) } )
if (UNIVERSAL::can($doc_h, 'characters'));
push (@handlers, Proc => sub { $self->_handle_proc(@_) } )
if (UNIVERSAL::can($doc_h, 'processing_instruction'));
push (@handlers, Comment => sub { $self->_handle_comment(@_) } )
if (UNIVERSAL::can($doc_h, 'comment'));
push (@handlers, CdataStart => sub { $self->_handle_cdatastart(@_) } )
if (UNIVERSAL::can($doc_h, 'start_cdata'));
push (@handlers, CdataEnd => sub { $self->_handle_cdataend(@_) } )
if (UNIVERSAL::can($doc_h, 'end_cdata'));
if (UNIVERSAL::can($doc_h, 'entity_reference')) {
push (@handlers, Default => sub { $self->_handle_default(@_) } );
$self->{UseEntRefs} = 1;
}
}
if (defined $parse_options->{DTDHandler}) {
# cache DTDHandler in self for callbacks
$self->{DTDHandler} = $parse_options->{DTDHandler};
my $dtd_h = $parse_options->{DTDHandler};
push (@handlers, Notation => sub { $self->_handle_notation(@_) } )
if (UNIVERSAL::can($dtd_h, 'notation_decl'));
push (@handlers, Unparsed => sub { $self->_handle_unparsed(@_) } )
if (UNIVERSAL::can($dtd_h, 'unparsed_entity_decl'));
push (@handlers, Entity => sub { $self->_handle_entity(@_) } )
if ($self->{UseEntRefs}
|| UNIVERSAL::can($dtd_h, 'entity_decl'));
push (@handlers, Element => sub { $self->_handle_element(@_) } )
if (UNIVERSAL::can($dtd_h, 'element_decl'));
push (@handlers, Attlist => sub { $self->_handle_attlist(@_) } )
if (UNIVERSAL::can($dtd_h, 'attlist_decl'));
push (@handlers, Doctype => sub { $self->_handle_doctype(@_) } )
if (UNIVERSAL::can($dtd_h, 'doctype_decl'));
push (@handlers, XMLDecl => sub { $self->_handle_xmldecl(@_) } )
if (UNIVERSAL::can($dtd_h, 'xml_decl'));
}
if (defined $parse_options->{EntityResolver}) {
# cache EntityResolver in self for callbacks
$self->{EntityResolver} = $parse_options->{EntityResolver};
my $er = $parse_options->{EntityResolver};
push (@handlers, ExternEnt => sub { $self->_handle_extern_ent(@_) } )
if (UNIVERSAL::can($er, 'resolve_entity'));
}
my @xml_parser_options;
if ($self->{UseEntRefs}) {
@xml_parser_options = ( NoExpand => 1,
Handlers => { @handlers } );
} else {
@xml_parser_options = ( Handlers => { @handlers } );
}
push (@xml_parser_options,
ProtocolEncoding => $self->{ParseOptions}{Source}{Encoding})
if (defined $self->{ParseOptions}{Source}{Encoding});
my $parser = new XML::Parser(@xml_parser_options);
my $result;
if (defined $self->{ParseOptions}{Source}{ByteStream}) {
$result = $parser->parse($self->{ParseOptions}{Source}{ByteStream});
} elsif (defined $self->{ParseOptions}{Source}{String}) {
$result = $parser->parse($self->{ParseOptions}{Source}{String});
} elsif (defined $self->{ParseOptions}{Source}{SystemId}) {
$result = $parser->parsefile($self->{ParseOptions}{Source}{SystemId});
}
# clean up parser instance
delete $self->{ParseOptions};
delete $self->{DocumentHandler};
delete $self->{DTDHandler};
delete $self->{EntityResolver};
delete $self->{Expat};
return $result;
}
sub location {
my $self = shift;
my $expat = $self->{Expat};
my @properties = ( ColumnNumber => $expat->current_column,
LineNumber => $expat->current_line,
BytePosition => $expat->current_byte,
Base => $expat->base );
# FIXME these locations change while parsing external entities
push (@properties, PublicId => $self->{Source}{PublicId})
if (defined $self->{Source}{PublicId});
push (@properties, SystemId => $self->{Source}{SystemId})
if (defined $self->{Source}{SystemId});
return { @properties };
}
###
### DocumentHandler methods
###
sub _handle_init {
my $self = shift;
my $expat = shift;
$self->{Expat} = $expat;
if ($self->{DocumentHandler}->can('set_document_locator')) {
$self->{DocumentHandler}->set_document_locator( { Locator => $self } );
}
$self->{DocumentHandler}->start_document( { } );
}
sub _handle_final {
my $self = shift;
delete $self->{UseEntRefs};
delete $self->{EntRefs};
return $self->{DocumentHandler}->end_document( { } );
}
sub _handle_start {
my $self = shift;
my $expat = shift;
my $element = shift;
my @properties;
if ($self->{ParseOptions}{UseAttributeOrder}) {
# Capture order and defined() status for attributes
my $ii;
my $order = [];
for ($ii = 0; $ii < $#_; $ii += 2) {
push @$order, $_[$ii];
}
push @properties, 'AttributeOrder', $order;
# Divide by two because XML::Parser counts both attribute name
# and value within it's index
push @properties, 'Defaulted', ($expat->specified_attr() / 2);
}
$self->{DocumentHandler}->start_element( { Name => $element,
Attributes => { @_ },
@properties } );
}
sub _handle_end {
my $self = shift;
my $expat = shift;
my $element = shift;
$self->{DocumentHandler}->end_element( { Name => $element } );
}
sub _handle_char {
my $self = shift;
my $expat = shift;
my $string = shift;
$self->{DocumentHandler}->characters( { Data => $string } );
}
sub _handle_proc {
my $self = shift;
my $expat = shift;
my $target = shift;
my $data = shift;
$self->{DocumentHandler}->processing_instruction( { Target => $target,
Data => $data } );
}
sub _handle_comment {
my $self = shift;
my $expat = shift;
my $data = shift;
$self->{DocumentHandler}->comment( { Data => $data } );
}
sub _handle_cdatastart {
my $self = shift;
my $expat = shift;
$self->{DocumentHandler}->start_cdata( { } );
}
sub _handle_cdataend {
my $self = shift;
my $expat = shift;
$self->{DocumentHandler}->end_cdata( { } );
}
# Default receives all characters that aren't handled by some other
# handler, this means a lot of stuff goes through here. All we're
# looking for are `&NAME;' entity reference sequences
sub _handle_default {
my $self = shift;
my $expat = shift;
my $string = shift;
if ($string =~ /^&($name_re);$/) {
my $ent_ref = $self->{EntRefs}{$1};
if (!defined $ent_ref) {
$ent_ref = { Name => $1 };
}
$self->{DocumentHandler}->entity_reference($ent_ref);
}
}
###
### DTDHandler methods
###
sub _handle_notation {
my $self = shift;
my $expat = shift;
my $notation = shift;
my $base = shift;
my $sysid = shift;
my $pubid = shift;
my @properties = (Name => $notation);
push (@properties, Base => $base)
if (defined $base);
push (@properties, SystemId => $sysid)
if (defined $sysid);
push (@properties, PublicId => $pubid)
if (defined $pubid);
$self->{DTDHandler}->notation_decl( { @properties } );
}
sub _handle_unparsed {
my $self = shift;
my $expat = shift;
my $entity = shift;
my $base = shift;
my $sysid = shift;
my $pubid = shift;
my @properties = (Name => $entity, SystemId => $sysid);
push (@properties, Base => $base)
if (defined $base);
push (@properties, PublicId => $pubid)
if (defined $pubid);
$self->{DTDHandler}->unparsed_entity_decl( { @properties } );
}
sub _handle_entity {
my $self = shift;
my $expat = shift;
my $name = shift;
my $val = shift;
my $sysid = shift;
my $pubid = shift;
my $ndata = shift;
my @properties = (Name => $name);
push (@properties, Value => $val)
if (defined $val);
push (@properties, PublicId => $pubid)
if (defined $pubid);
push (@properties, SystemId => $sysid)
if (defined $sysid);
push (@properties, Notation => $ndata)
if (defined $ndata);
my $properties = { @properties };
if ($self->{UseEntRefs}) {
$self->{EntRefs}{$name} = $properties;
}
if ($self->{DTDHandler}->can('entity_decl')) {
$self->{DTDHandler}->entity_decl( $properties );
}
}
sub _handle_element {
my $self = shift;
my $expat = shift;
my $name = shift;
my $model = shift;
$self->{DTDHandler}->element_decl( { Name => $name,
Model => $model } );
}
sub _handle_attlist {
my $self = shift;
my $expat = shift;
my $elname = shift;
my $attname = shift;
my $type = shift;
my $default = shift;
my $fixed = shift;
$self->{DTDHandler}->attlist_decl( { ElementName => $elname,
AttributeName => $attname,
Type => $type,
Default => $default,
Fixed => $fixed } );
}
sub _handle_doctype {
my $self = shift;
my $expat = shift;
my $name = shift;
my $sysid = shift;
my $pubid = shift;
my $internal = shift;
my @properties = (Name => $name);
push (@properties, SystemId => $sysid)
if (defined $sysid);
push (@properties, PublicId => $pubid)
if (defined $pubid);
push (@properties, Internal => $internal)
if (defined $internal);
$self->{DTDHandler}->doctype_decl( { @properties } );
}
sub _handle_xmldecl {
my $self = shift;
my $expat = shift;
my $version = shift;
my $encoding = shift;
my $standalone = shift;
my @properties = (Version => $version);
push (@properties, Encoding => $encoding)
if (defined $encoding);
push (@properties, Standalone => $standalone)
if (defined $standalone);
$self->{DTDHandler}->xml_decl( { @properties } );
}
###
### EntityResolver methods
###
sub _handle_extern_ent {
my $self = shift;
my $expat = shift;
my $base = shift;
my $sysid = shift;
my $pubid = shift;
my @properties = (SystemId => $sysid);
push (@properties, Base => $base)
if (defined $base);
push (@properties, PublicId => $pubid)
if (defined $pubid);
my $result = $self->{EntityResolver}->resolve_entity( { @properties } );
if (UNIVERSAL::isa($result, 'HASH')) {
if ($result->{ByteStream}) {
return $result->{ByteStream};
} elsif ($result->{String}) {
return $result->{String};
} elsif ($result->{SystemId}) {
# FIXME must be able to resolve SystemIds, XML::Parser's
# default can :-(
die "PerlSAX: automatic opening of SystemIds from \`resolve_entity' not implemented, contact the author\n";
} else {
# FIXME
die "PerlSAX: invalid source returned from \`resolve_entity'\n";
}
}
return undef;
}
1;
__END__
=head1 NAME
XML::Parser::PerlSAX - Perl SAX parser using XML::Parser
=head1 SYNOPSIS
use XML::Parser::PerlSAX;
$parser = XML::Parser::PerlSAX->new( [OPTIONS] );
$result = $parser->parse( [OPTIONS] );
$result = $parser->parse($string);
=head1 DESCRIPTION
C is a PerlSAX parser using the XML::Parser
module. This man page summarizes the specific options, handlers, and
properties supported by C; please refer to the
PerlSAX standard in `C' for general usage information.
=head1 METHODS
=over 4
=item new
Creates a new parser object. Default options for parsing, described
below, are passed as key-value pairs or as a single hash. Options may
be changed directly in the parser object unless stated otherwise.
Options passed to `C' override the default options in the
parser object for the duration of the parse.
=item parse
Parses a document. Options, described below, are passed as key-value
pairs or as a single hash. Options passed to `C' override
default options in the parser object.
=item location
Returns the location as a hash:
ColumnNumber The column number of the parse.
LineNumber The line number of the parse.
BytePosition The current byte position of the parse.
PublicId A string containing the public identifier, or undef
if none is available.
SystemId A string containing the system identifier, or undef
if none is available.
Base The current value of the base for resolving relative
URIs.
ALPHA WARNING: The `C' and `C' properties returned
are the system and public identifiers of the document passed to
`C', not the identifiers of the currently parsing external
entity. The column, line, and byte positions I of the current
entity being parsed.
=head1 OPTIONS
The following options are supported by C:
Handler default handler to receive events
DocumentHandler handler to receive document events
DTDHandler handler to receive DTD events
ErrorHandler handler to receive error events
EntityResolver handler to resolve entities
Locale locale to provide localisation for errors
Source hash containing the input source for parsing
UseAttributeOrder set to true to provide AttributeOrder and Defaulted
properties in `start_element()'
If no handlers are provided then all events will be silently ignored,
except for `C' which will cause a `C' to be
called after calling `C'.
If a single string argument is passed to the `C' method, it
is treated as if a `C' option was given with a `C'
parameter.
The `C' hash may contain the following parameters:
ByteStream The raw byte stream (file handle) containing the
document.
String A string containing the document.
SystemId The system identifier (URI) of the document.
PublicId The public identifier.
Encoding A string describing the character encoding.
If more than one of `C', `C', or `C',
then preference is given first to `C', then `C',
then `C'.
=head1 HANDLERS
The following handlers and properties are supported by
C:
=head2 DocumentHandler methods
=over 4
=item start_document
Receive notification of the beginning of a document.
No properties defined.
=item end_document
Receive notification of the end of a document.
No properties defined.
=item start_element
Receive notification of the beginning of an element.
Name The element type name.
Attributes A hash containing the attributes attached to the
element, if any.
The `C' hash contains only string values.
If the `C' parser option is true, the following
properties are also passed to `C':
AttributeOrder An array of attribute names in the order they were
specified, followed by the defaulted attribute
names.
Defaulted The index number of the first defaulted attribute in
`AttributeOrder. If this index is equal to the
length of `AttributeOrder', there were no defaulted
values.
Note to C users: `C' will be half the value of
C's `C' function because only
attribute names are provided, not their values.
=item end_element
Receive notification of the end of an element.
Name The element type name.
=item characters
Receive notification of character data.
Data The characters from the XML document.
=item processing_instruction
Receive notification of a processing instruction.
Target The processing instruction target.
Data The processing instruction data, if any.
=item comment
Receive notification of a comment.
Data The comment data, if any.
=item start_cdata
Receive notification of the start of a CDATA section.
No properties defined.
=item end_cdata
Receive notification of the end of a CDATA section.
No properties defined.
=item entity_reference
Receive notification of an internal entity reference. If this handler
is defined, internal entities will not be expanded and not passed to
the `C' handler. If this handler is not defined,
internal entities will be expanded if possible and passed to the
`C' handler.
Name The entity reference name
Value The entity reference value
=back
=head2 DTDHandler methods
=over 4
=item notation_decl
Receive notification of a notation declaration event.
Name The notation name.
PublicId The notation's public identifier, if any.
SystemId The notation's system identifier, if any.
Base The base for resolving a relative URI, if any.
=item unparsed_entity_decl
Receive notification of an unparsed entity declaration event.
Name The unparsed entity's name.
SystemId The entity's system identifier.
PublicId The entity's public identifier, if any.
Base The base for resolving a relative URI, if any.
=item entity_decl
Receive notification of an entity declaration event.
Name The entity name.
Value The entity value, if any.
PublicId The notation's public identifier, if any.
SystemId The notation's system identifier, if any.
Notation The notation declared for this entity, if any.
For internal entities, the `C' parameter will contain the value
and the `C', `C', and `C' will be
undefined. For external entities, the `C' parameter will be
undefined, the `C' parameter will have the system id, the
`C' parameter will have the public id if it was provided (it
will be undefined otherwise), the `C' parameter will contain
the notation name for unparsed entities. If this is a parameter entity
declaration, then a '%' will be prefixed to the entity name.
Note that `C' and `C' overlap.
If both methods are implemented by a handler, then this handler will
not be called for unparsed entities.
=item element_decl
Receive notification of an element declaration event.
Name The element type name.
Model The content model as a string.
=item attlist_decl
Receive notification of an attribute list declaration event.
This handler is called for each attribute in an ATTLIST declaration
found in the internal subset. So an ATTLIST declaration that has
multiple attributes will generate multiple calls to this handler.
ElementName The element type name.
AttributeName The attribute name.
Type The attribute type.
Fixed True if this is a fixed attribute.
The default for `C' is the default value, which will either be
"#REQUIRED", "#IMPLIED" or a quoted string (i.e. the returned string
will begin and end with a quote character).
=item doctype_decl
Receive notification of a DOCTYPE declaration event.
Name The document type name.
SystemId The document's system identifier.
PublicId The document's public identifier, if any.
Internal The internal subset as a string, if any.
Internal will contain all whitespace, comments, processing
instructions, and declarations seen in the internal subset. The
declarations will be there whether or not they have been processed by
another handler (except for unparsed entities processed by the
Unparsed handler). However, comments and processing instructions will
not appear if they've been processed by their respective handlers.
=item xml_decl
Receive notification of an XML declaration event.
Version The version.
Encoding The encoding string, if any.
Standalone True, false, or undefined if not declared.
=back
=head2 EntityResolver
=over 4
=item resolve_entity
Allow the handler to resolve external entities.
Name The notation name.
SystemId The notation's system identifier.
PublicId The notation's public identifier, if any.
Base The base for resolving a relative URI, if any.
`C' should return undef to request that the parser
open a regular URI connection to the system identifier or a hash
describing the new input source. This hash has the same properties as
the `C' parameter to `C':
PublicId The public identifier of the external entity being
referenced, or undef if none was supplied.
SystemId The system identifier of the external entity being
referenced.
String String containing XML text
ByteStream An open file handle.
CharacterStream
An open file handle.
Encoding The character encoding, if known.
=back
=head1 AUTHOR
Ken MacLeod, ken@bitsko.slc.ut.us
=head1 SEE ALSO
perl(1), PerlSAX.pod(3)
Extensible Markup Language (XML)
SAX 1.0: The Simple API for XML
=cut
libxml-perl-0.08/lib/Data/ 0040755 0000764 0000764 00000000000 07745275112 013451 5 ustar ken ken libxml-perl-0.08/lib/Data/Grove/ 0040755 0000764 0000764 00000000000 07745275112 014533 5 ustar ken ken libxml-perl-0.08/lib/Data/Grove/Parent.pm 0100644 0000764 0000764 00000020161 07745275111 016316 0 ustar ken ken #
# Copyright (C) 1998,1999 Ken MacLeod
# Data::Grove::Parent is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# $Id: Parent.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $
#
###
### WARNING
###
###
### This code has a bug in it that renders it useless. In the FETCH
### routines, the new object created should have a reference to the
### the tied object that has $self as the underlying value. As of
### this version, I don't know of a way to get to the tied object.
###
# Search for places marked `VALIDATE' to see where validation hooks
# may be added in the future.
use strict;
#--------------------------------------------------------------------------
# Data::Grove::Parent
#--------------------------------------------------------------------------
package Data::Grove::Parent;
use UNIVERSAL;
use Carp;
use vars qw{ $VERSION };
# will be substituted by make-rel script
$VERSION = "0.08";
sub new {
my $type = shift;
my $raw = shift;
my $parent = shift;
if (UNIVERSAL::isa($raw, 'Data::Grove::Parent')) {
return $raw;
}
my @properties = ( Raw => $raw );
if (defined $parent) {
push @properties, Parent => $parent;
}
my $dummy = bless {}, ref($raw);
tie %$dummy, $type, @properties;
return $dummy;
}
sub TIEHASH {
my $type = shift;
return bless { @_ }, $type;
}
sub STORE {
my $self = shift;
my $key = shift;
my $value = shift;
if (exists $self->{$key}) {
$self->{$key} = $value;
} else {
# VALIDATE
if (UNIVERSAL::isa($value, 'Data::Grove::Parent')) {
$value = $value->{Raw};
} elsif (UNIVERSAL::isa($value, 'Data::Grove::ParentList')) {
$value = $value->[0];
}
$self->{Raw}{$key} = $value;
}
}
sub FETCH {
my $self = shift;
my $key = shift;
if (exists $self->{$key}) {
return $self->{$key};
} else {
my $value = $self->{Raw}{$key};
if (ref($value) eq 'ARRAY') {
$value = Data::Grove::ParentList->new($value, $self);
}
return $value;
}
}
sub FIRSTKEY {
my $self = shift;
my $raw = $self->{Raw};
$self->{'__each_in_raw'} = 1;
my $a = scalar keys %$raw;
each %$raw;
}
sub NEXTKEY {
my $self = shift;
my $raw = $self->{Raw};
my ($key, $value);
if ($self->{'__each_in_raw'}) {
if (($key, $value) = each %$raw) {
return $key;
}
delete $self->{'__each_in_raw'};
my $a = scalar keys %$self;
}
return each %$self;
}
sub EXISTS {
my $self = shift;
my $key = shift;
return (exists $self->{Raw}{$key})
|| (exists $self->{$key});
}
sub DELETE {
my $self = shift;
my $key = shift;
if (exists $self->{$key}) {
croak "can't delete \`Parent' or \`Raw' properties\n"
if ($key eq 'Parent' || $key eq 'Raw');
delete $self->{$key};
} else {
delete $self->{'Raw'}{$key};
}
}
sub CLEAR {
my $self = shift;
%{ $self->{Raw} } = ();
}
#--------------------------------------------------------------------------
# Data::Grove::ParentList
#--------------------------------------------------------------------------
package Data::Grove::ParentList;
use UNIVERSAL;
sub new {
my $type = shift;
my $raw = shift;
my $parent = shift;
if (UNIVERSAL::isa($raw, 'Data::Grove::ParentList')) {
return $raw;
}
my $dummy = [];
tie @$dummy, $type, $raw, $parent;
return $dummy;
}
sub TIEARRAY {
my $type = shift;
return bless [ @_ ], $type;
}
sub FETCHSIZE {
scalar @{$_[0][0]};
}
sub STORESIZE {
$#{$_[0][0]} = $_[1]-1;
}
sub STORE {
my $self = shift;
my $index = shift;
my $value = shift;
# VALIDATE
if (UNIVERSAL::isa($value, 'Data::Grove::Parent')) {
$value = $value->{Raw};
} elsif (UNIVERSAL::isa($value, 'Data::Grove::ParentList')) {
$value = $value->[0];
}
$self->[0][$index] = $value;
}
sub FETCH {
my $self = shift;
my $index = shift;
my $value = $self->[0][$index];
if (defined $value) {
if (ref($value)) {
return Data::Grove::Parent->new($value, $self->[1]);
} else {
return Data::Grove::Parent->new({ Data => $value }, $self->[1]);
}
}
return $value;
}
sub CLEAR {
@{$_[0][0]} = ();
}
sub POP {
pop(@{$_[0][0]});
}
sub PUSH {
my $o = shift;
foreach my $value (@_) {
# VALIDATE
if (UNIVERSAL::isa($value, 'Data::Grove::Parent')) {
$value = $value->{Raw};
} elsif (UNIVERSAL::isa($value, 'Data::Grove::ParentList')) {
$value = $value->[0];
}
}
push(@{$o->[0]},@_);
}
sub SHIFT {
shift(@{$_[0][0]});
}
sub UNSHIFT {
my $o = shift;
foreach my $value (@_) {
# VALIDATE
if (UNIVERSAL::isa($value, 'Data::Grove::Parent')) {
$value = $value->{Raw};
} elsif (UNIVERSAL::isa($value, 'Data::Grove::ParentList')) {
$value = $value->[0];
}
}
unshift(@{$o->[0]},@_);
}
sub SPLICE
{
my $ob = shift;
my $sz = $ob->FETCHSIZE;
my $off = @_ ? shift : 0;
$off += $sz if $off < 0;
my $len = @_ ? shift : $sz-$off;
foreach my $value (@_) {
# VALIDATE
if (UNIVERSAL::isa($value, 'Data::Grove::Parent')) {
$value = $value->{Raw};
} elsif (UNIVERSAL::isa($value, 'Data::Grove::ParentList')) {
$value = $value->[0];
}
}
return splice(@{$ob->[0]},$off,$len,@_);
}
#--------------------------------------------------------------------------
# Data::Grove
#--------------------------------------------------------------------------
package Data::Grove;
sub root {
my $self = shift;
return $self
if !defined $self->{Parent};
return $self->{Parent}->root(@_);
}
sub rootpath {
my $self = shift;
if (defined $self->{Parent}) {
return ($self->{Parent}->rootpath, $self);
} else {
return ($self);
}
}
sub add_magic {
my $self = shift;
my $parent = shift;
return Data::Grove::Parent->new($self, $parent);
}
1;
__END__
=head1 NAME
Data::Grove::Parent - provide parent properties to Data::Grove objects
=head1 SYNOPSIS
use Data::Grove::Parent;
$root = $object->root;
$rootpath = $object->rootpath;
$tied = $object->add_magic([ $parent ]);
$node = Data::Grove::Parent->new($hash [, $parent]);
$node_list = Data::Grove::ParentList->new($array [, $parent]);
=head1 DESCRIPTION
Data::Grove::Parent is an extension to Data::Grove that adds
`C' and `C' properties to Data::Grove objects and methods
for returning the root node of a grove, a list of nodes between and
including the root node and the current node, and a method that
creates parented nodes.
Data::Grove::Parent works by creating a Perl ``tied'' object that
contains a parent reference (`C') and a reference to the
original Data::Grove object (`C'). Tying-magic is used so that
every time you reference the Data::Grove::Parent object it actually
references the underlying raw object.
When you retrieve a list or a property of the Raw object,
Data::Grove::Parent automatically adds magic to the returned list or
node. This means you only call `add_magic()' once to create the first
Data::Grove::Parent object and then use the grove objects like you
normally would.
The most obvious use of this is so you don't have to call a
`C' method when you want to release a grove or part of a
grove; since Data::Grove and Data::Grove::Parent objects have no
cyclic references, Perl can garbage collect them normally.
A secondary use is to allow you to reuse grove or property set
fragments in multiple trees. WARNING: Data::Grove currently does not
protect you from creating your B cyclic references! This could
lead to infinite loops if you don't take care to avoid them.
=head1 METHODS
=over 4
=item $object->root()
=item $object->rootpath()
`C' returns the root node if `C<$object>' is a
`C' object. `C' returns an array of
all the nodes between and including the root node and `C<$object>'.
=item $tied = $object->add_magic([ $parent ])
`C' returns a C object with
`C<$object>' as it's `C' object. If `C<$parent>' is given, that
becomes the tied object's parent object.
=back
=head1 AUTHOR
Ken MacLeod, ken@bitsko.slc.ut.us
=head1 SEE ALSO
perl(1), Data::Grove(3)
=cut
libxml-perl-0.08/lib/Data/Grove/Visitor.pm 0100644 0000764 0000764 00000013005 07745275111 016523 0 ustar ken ken #
# Copyright (C) 1998,1999 Ken MacLeod
# Data::Grove::Visitor is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# $Id: Visitor.pm,v 1.6 2000/03/20 23:06:45 kmacleod Exp $
#
use strict;
use 5.005;
package Data::Grove::Visitor;
use vars qw{ $VERSION };
# will be substituted by make-rel script
$VERSION = "0.08";
# The following methods extend Data::Grove
package Data::Grove;
sub accept {
my $self = shift;
my $visitor = shift;
my $type_name;
my $package = ref($self);
eval "\$type_name = \$${package}::type_name";
if (!defined $type_name) {
return (); # no action
}
my $method_name = 'visit_' . $type_name;
if ($visitor->can($method_name)) {
return $visitor->$method_name ($self, @_);
} else {
return (); # no action
}
}
sub accept_name {
my $self = shift;
if (!defined $self->{Name}) {
return $self->accept (@_);
}
my $visitor = shift;
my $name = $self->{Name};
$name =~ s/\W/_/g;
my $name_method = "visit_name_$name";
if (!$self->{'has'}{$name_method}) {
return if (defined $self->{'has'}{$name_method});
$self->{'has'}{$name_method} = $visitor->can($name_method);
return $self->accept($visitor, @_) if (!$self->{'has'}{$name_method});
}
return $visitor->$name_method ($self, @_);
}
sub attr_accept {
my $self = shift; my $attr = shift; my $visitor = shift;
if (!defined $self->{Attributes}) {
return (); # no action
}
my $attrs = $self->{Attributes}{$attr};
if (ref($attrs) eq 'ARRAY') {
return $self->_children_accept ($attrs, $visitor, @_);
} else {
if (!$self->{has_visit_characters}) {
return if (defined $self->{has_visit_characters});
$self->{has_visit_characters} = $visitor->can('visit_characters');
return if (!$self->{has_visit_characters});
}
# FIXME should be some other generic than XML::Grove::Characters
return $visitor->visit_characters (XML::Grove::Characters->new(Data => $attrs), @_);
}
}
sub children_accept {
my $self = shift;
if (defined $self->{Contents}) {
return $self->_children_accept ($self->{Contents}, @_);
} else {
return (); # no action
}
}
sub children_accept_name {
my $self = shift;
if (defined $self->{Contents}) {
return $self->_children_accept_name ($self->{Contents}, @_);
} else {
return (); # no action
}
}
sub _children_accept {
my $self = shift; my $array = shift; my $visitor = shift;
my @return;
my $ii;
for ($ii = 0; $ii <= $#$array; $ii ++) {
push @return, $array->[$ii]->accept ($visitor, @_);
}
return @return;
}
sub _children_accept_name {
my $self = shift; my $array = shift; my $visitor = shift;
my @return;
my $ii;
for ($ii = 0; $ii <= $#$array; $ii ++) {
push @return, $array->[$ii]->accept_name ($visitor, @_);
}
return @return;
}
1;
__END__
=head1 NAME
Data::Grove::Visitor - add visitor/callback methods to Data::Grove objects
=head1 SYNOPSIS
use Data::Grove::Visitor;
@results = $object->accept ($visitor, ...);
@results = $object->accept_name ($visitor, ...);
@results = $object->children_accept ($visitor, ...);
@results = $object->children_accept_name ($visitor, ...);
=head1 DESCRIPTION
Data::Grove::Visitor adds visitor methods (callbacks) to Data::Grove
objects. A ``visitor'' is a class (a package) you write that has
methods (subs) corresponding to the objects in the classes being
visited. You use the visitor methods by creating an instance of your
visitor class, and then calling `C' on the
top-most object you want to visit, that object will in turn call your
visitor back with `C>', where I
ContentHandler
Object to receive document content events. The
ContentHandler, with additional events defined below, is the
class of events described in Basic
SAX Handler.If the application does not register a content handler
or content event handlers on the default handler, content events
reported by the SAX parser will be silently ignored.
DTDHandler
Object to receive basic DTD events. If the application does not
register a DTD handler or DTD event handlers on the default handler,
DTD events reported by the SAX parser will be silently
ignored.
EntityResolver
Object to resolve external entities. If the application does not
register an entity resolver or entity events on the default handler,
the SAX parser will perform its own default resolution.
ErrorHandler
Object to receive error-message events. If the application does not
register an error handler or error event handlers on the default
handler, all error events reported by the SAX parser will be silently
ignored; however, normal processing may not continue. It is highly
recommended that all SAX applications implement an error handler to
avoid unexpected bugs.
Source
A hash containing information about the XML instance to be parsed.
See Input Sources below. Note that
Source cannot be changed during the parse
Features
A hash containing Feature information, as described below.
Features can be set at runtime but not directly on the Features
hash (at least, not reliably. You can do it, but the results
might not be what you expect as it doesn't give the parser a
chance to look at what you've set so that it can't react properly
to errors, or Features that it doesn't support). You should use
the set_feature() method instead.
Features are as defined in SAX2: Features
and Properties, but not of course limited to those. You may add
your own Features. Also, Java has an artificial distinction between
Features and Properties which is unnecessary. In Perl, both have been
merged under the same name.
Features can be passed as options when creating a parser or calling
a parse() method. They may also be set using the
set_feature().
When performing namespace processing, Perl SAX parsers always provide
both the raw tag name in Name and the namespace names in
NamespaceURI, LocalName, and Prefix.
Therefore, the
"http://xml.org/sax/features/namespace-prefixes" Feature is
ignored.
Also, Features are things that are supposed to be turned
on, and thus should normally be off by default, especially if
the parser doesn't support turning them off. Due to backwards
compatibility problems, the one exception to this rule is the
"http://xml.org/sax/features/namespaces" Feature which is on by
default and which a number of parsers may not be able to turn off. Thus,
a parser claiming to support this Feature (and all SAX2 parsers must
support it) may in fact only support turning it on. This is only a minor
problem as turning it off basically amounts to returning to SAX1, which
can be accomplished by a filter (eg XML::Filter::SAX2toSAX1).
In addition to the Features described in the SAX spec
itself, a number of new ones may be defined for Perl. An example of
this would be http://xmlns.perl.org/sax/node-factory which
when supported by the parser would be settable to a NodeFactory object
that would be in charge of creating SAX nodes different from those that
are normally received by event handlers. See
http://xmlns.perl.org/ (currently
in alpha state) for details on how to register Features.
The following methods are used to get and set features:
get_feature(name)
Look up the value of a feature.
The feature name is any fully-qualified URI. It is possible for an
SAX parser to recognize a feature name but to be unable to return its
value; this is especially true in the case of an adapter for a SAX1
Parser, which has no way of knowing whether the underlying parser is
validating, for example.
Some feature values may be available only in specific contexts,
such as before, during, or after a parse.
get_feature() returns the value of the feature, which is usually
either a boolean or an object, and will throw
XML::SAX::Exception::NotRecognized when the SAX parser does not
recognize the feature name and XML::SAX::Exception::NotSupported
when the SAX parser recognizes the feature name but cannot determine its
value at this time.
set_feature(name,
value)
Set the state of a feature.
The feature name is any fully-qualified URI. It is possible for an
SAX parser to recognize a feature name but to be unable to set its
value; this is especially true in the case of an adapter for a SAX1
Parser, which has no way of affecting whether the underlying parser is
validating, for example.
Some feature values may be immutable or mutable only in specific
contexts, such as before, during, or after a parse.
set_feature() will throw XML::SAX::Exception::NotRecognized
when the SAX parser does not recognize the feature name and
XML::SAX::Exception::NotSupported when the SAX parser recognizes the
feature name but cannot set the requested value.
This method is also the standard mechanism for setting extended handlers,
such as "http://xml.org/sax/handlers/DeclHandler".
get_features()
Look up all Features that this parser claims to support.
This method returns a hash of Features which the parser
claims to support. The value of the hash is currently
unspecified though it may be used later. This method is meant
to be inherited so that Features supported by the base parser
class (XML::SAX::Base) are declared to be supported by
subclasses.
Calling this method is probably only moderately useful to end
users. It is mostly meant for use by XML::SAX, so that it can
query parsers for Feature support and return an appropriate
parser depending on the Features that are required.
Input sources may be provided to parser objects or are returned by
entity resolvers. An input source is a hash with these
properties:
PublicId
The public identifier of this input source.
The public identifier is always optional: if the application writer
includes one, it will be provided as part of the location
information.
SystemId
The system identifier (URI) of this input source.
The system identifier is optional if there is a byte stream or a
character stream, but it is still useful to provide one, since the
application can use it to resolve relative URIs and can include it in
error messages and warnings (the parser will attempt to open a
connection to the URI only if there is no byte stream or character
stream specified).
If the application knows the character encoding of the object
pointed to by the system identifier, it can register the encoding
using the Encoding property.
ByteStream
The byte stream for this input source.
The SAX parser will ignore this if there is also a character stream
specified, but it will use a byte stream in preference to opening a
URI connection itself.
If the application knows the character encoding of the byte stream, it
should set the Encoding property.
CharacterStream
The character stream for this input source.
If there is a character stream specified, the SAX parser will
ignore any byte stream and will not attempt to open a URI connection
to the system identifier.
Note: A CharacterStream is a filehandle that does not need any encoding
translation done on it. This is implemented as a regular filehandle
and only works under Perl 5.7.2 or higher using PerlIO. To get a single
character, or number of characters from it, use the perl core read()
function. To get a single byte from it (or number of bytes), you can
use sysread(). The encoding of the stream should be in the Encoding
entry for the Source.
Encoding
The character encoding, if known.
The encoding must be a string acceptable for an XML encoding
declaration (see section 4.3.3 of the XML 1.0 recommendation).
This property has no effect when the application provides a character
stream.
SAX supports several classes of event handlers: content handlers,
declaration handlers, DTD handlers, error handlers, entity resolvers,
and other extensions. This section defines each of these classes of
events.
Content Events
This is the main interface that most SAX applications implement: if
the application needs to be informed of basic parsing events, it
implements this interface and registers an instance with the SAX
parser using the ContentHandler property. The parser uses
the instance to report basic document-related events like the start
and end of elements and character data.
The order of events in this interface is very important, and
mirrors the order of information in the document itself. For example,
all of an element's content (character data, processing instructions,
and/or subelements) will appear, in order, between the
start_element event and the corresponding
end_element event.
set_document_locator(locator)
Receive an object for locating the origin of SAX document events.
SAX parsers are strongly encouraged (though not absolutely
required) to supply a locator: if it does so, it must supply the
locator to the application by invoking this method before invoking any
of the other methods in the ContentHandler interface.
The locator allows the application to determine the end position of
any document-related event, even if the parser is not reporting an
error. Typically, the application will use this information for
reporting its own errors (such as character content that does not
match an application's business rules). The information provided by
the locator is probably not sufficient for use with a search
engine.
Note that the locator will provide correct information only during
the invocation of the events in this interface. The application should
not attempt to use it at any other time.
The locator is a hash with these properties:
ColumnNumber
The column number of the end of the text where the exception
occurred.
LineNumber
The line number of the end of the text where the exception
occurred.
PublicId
The public identifier of the entity where the exception
occurred.
SystemId
The system identifier of the entity where the exception
occurred.
start_prefix_mapping(mapping)
Begin the scope of a prefix-URI Namespace mapping.
The information from this event is not necessary for normal
Namespace processing: the SAX XML reader will automatically replace
prefixes for element and attribute names when the
"http://xml.org/sax/features/namespaces" feature is true (the
default).
There are cases, however, when applications need to use prefixes in
character data or in attribute values, where they cannot safely be
expanded automatically; the start/end_prefix_mapping event supplies the
information to the application to expand prefixes in those contexts
itself, if necessary.
Note that start/end_prefix_mapping() events are
not guaranteed to be properly nested relative to each-other: all
start_prefix_apping() events will occur before the
corresponding start_element() event, and all
end_prefix_mapping events will occur after the corresponding
end_element() event, but their order is not
guaranteed.
mapping is a hash with these properties:
Prefix
The Namespace prefix being declared.
NamespaceURI
The Namespace URI the prefix is mapped to.
end_prefix_mapping(mapping)
End the scope of a prefix-URI mapping.
See start_prefix_mapping() for details. This event will
always occur after the corresponding end_element event, but
the order of end_prefix_mapping events is not otherwise
guaranteed.
mapping is a hash with this property:
Prefix
The Namespace prefix that was being mapped.
processing_instruction(pi)
Receive notification of a processing instruction.
The Parser will invoke this method once for each processing
instruction found: note that processing instructions may occur before
or after the main document element.
A SAX parser should never report an XML declaration (XML 1.0,
section 2.8) or a text declaration (XML 1.0, section 4.3.1) using this
method.
pi is a hash with these properties:
Target
The processing instruction target.
Data
The processing instruction data, or null if none was
supplied.
skipped_entity(entity)
Receive notification of a skipped entity.
The Parser will invoke this method once for each entity skipped.
Non-validating processors may skip entities if they have not seen the
declarations (because, for example, the entity was declared in an
external DTD subset). All processors may skip external entities,
depending on the values of the
"http://xml.org/sax/features/external-general-entities" and the
"http://xml.org/sax/features/external-parameter-entities"
Features.
entity is a hash with these properties:
Name
The name of the skipped entity. If it is a parameter
entity, the name will begin with '%'.
Declaration Events
This is an optional extension handler for SAX2 to provide
information about DTD declarations in an XML document. XML readers are
not required to support this handler.
Note that data-related DTD declarations (unparsed entities and
notations) are already reported through the DTDHandler interface.
If you are using the declaration handler together with a lexical
handler, all of the events will occur between the start_dtd
and the end_dtd events.
To set a seperate DeclHandler for an XML reader, set the
"http://xml.org/sax/handlers/DeclHandler" Feature with the
object to received declaration events. If the reader does not support
declaration events, it will throw a XML::SAX::Exception::NotRecognized
or a XML::SAX::Exception::NotSupported when you attempt to register
the handler. Declaration event handlers on the default handler are
automatically recognized and used.
element_decl(element)
Report an element type declaration.
The content model will consist of the string "EMPTY", the string
"ANY", or a parenthesised group, optionally followed by an occurrence
indicator. The model will be normalized so that all whitespace is
removed, and will include the enclosing parentheses.
element is a hash with these properties:
Name
The element type name.
Model
The content model as a normalized string.
attribute_decl(attribute)
Report an attribute type declaration.
Only the effective (first) declaration for an attribute will be
reported. The type will be one of the strings "CDATA",
"ID", "IDREF", "IDREFS",
"NMTOKEN", "NMTOKENS", "ENTITY",
"ENTITIES", or "NOTATION", or a parenthesized token
group with the separator "|" and all whitespace removed.
attribute is a hash with these properties:
eName
The name of the associated element.
aName
The name of the attribute.
Type
A string representing the attribute type.
ValueDefault
A string representing the attribute default ("#IMPLIED",
"#REQUIRED", or "#FIXED") or undef if none of these
applies.
Value
A string representing the attribute's default value, or null if
there is none.
internal_entity_decl(entity)
Report an internal entity declaration.
Only the effective (first) declaration for each entity will be
reported.
entity is a hash with these properties:
Name
The name of the entity. If it is a parameter entity, the name will
begin with '%'.
Value
The replacement text of the entity.
external_entity_decl(entity)
Report a parsed external entity declaration.
Only the effective (first) declaration for each entity will be
reported.
entity is a hash with these properties:
Name
The name of the entity. If it is a parameter entity, the name will
begin with '%'.
PublicId
The public identifier of the entity, or undef if none was
declared.
SystemId
The system identifier of the entity.
DTD Events
If a SAX application needs information about notations and unparsed
entities, then the application implements this interface. The parser
uses the instance to report notation and unparsed entity declarations
to the application.
The SAX parser may report these events in any order, regardless of
the order in which the notations and unparsed entities were declared;
however, all DTD events must be reported after the document handler's
start_document() event, and before the first
start_element() event.
It is up to the application to store the information for future use
(perhaps in a hash table or object tree). If the application
encounters attributes of type "NOTATION", "ENTITY",
or "ENTITIES", it can use the information that it obtained
through this interface to find the entity and/or notation
corresponding with the attribute value.
notation_decl(notation)
Receive notification of a notation declaration event.
It is up to the application to record the notation for later
reference, if necessary.
If a system identifier is present, and it is a URL, the SAX parser
must resolve it fully before passing it to the application.
notation is a hash with these properties:
Name
The notation name.
PublicId
The public identifier of the entity, or undef if none was
declared.
SystemId
The system identifier of the entity, or undef if none was
declared.
unparsed_entity_decl(entity)
Receive notification of an unparsed entity declaration event.
Note that the notation name corresponds to a notation reported by
the notation_decl() event. It is up to the application to
record the entity for later reference, if necessary.
If the system identifier is a URL, the parser must resolve it fully
before passing it to the application.
entity is a hash with these properties:
Name
The unparsed entity's name.
PublicId
The public identifier of the entity, or undef if none was
declared.
SystemId
The system identifier of the entity.
Notation
The name of the associated notation.
Entity Resolver
If a SAX application needs to implement customized handling for
external entities, it must implement this interface.
The parser will then allow the application to intercept any
external entities (including the external DTD subset and external
parameter entities, if any) before including them.
Many SAX applications will not need to implement this interface,
but it will be especially useful for applications that build XML
documents from databases or other specialised input sources, or for
applications that use URI types that are either not URLs, or that
have schemes unknown to the parser.
resolve_entity(entity)
Allow the application to resolve external entities.
The Parser will call this method before opening any external entity
except the top-level document entity (including the external DTD
subset, external entities referenced within the DTD, and external
entities referenced within the document element): the application may
request that the parser resolve the entity itself, that it use an
alternative URI, or that it use an entirely different input
source.
Application writers can use this method to redirect external system
identifiers to secure and/or local URIs, to look up public identifiers
in a catalogue, or to read an entity from a database or other input
source (including, for example, a dialog box).
If the system identifier is a URL, the SAX parser must resolve it
fully before reporting it to the application.
entity is a hash with these properties:
PublicId
The public identifier of the entity being referenced, or
undef if none was declared.
SystemId
The system identifier of the entity being referenced.
Error Events
If a SAX application needs to implement customized error handling,
it must implement this interface. The parser will then report all
errors and warnings through this interface.
The parser shall use this interface to report errors instead or in
addition to throwing an exception: for errors and warnings the recommended
approach is to leave the application throw its own exceptions and to not
throw them in the parser. For fatal errors however, it is not uncommon that
the parser will throw an exception after having reported the error as it
renders any continuation of parsing impossible.
All error handlers receive a hash, exception, with the
properties defined in Exceptions.
warning(exception)
Receive notification of a warning.
SAX parsers will use this method to report conditions that are not
errors or fatal errors as defined by the XML 1.0 recommendation. The
default behaviour is to take no action.
The SAX parser must continue to provide normal parsing events after
invoking this method: it should still be possible for the application
to process the document through to the end.
error(exception)
Receive notification of a recoverable error.
This corresponds to the definition of "error" in section 1.2 of the
W3C XML 1.0 Recommendation. For example, a validating parser would use
this callback to report the violation of a validity constraint. The
default behaviour is to take no action.
The SAX parser must continue to provide normal parsing events after
invoking this method: it should still be possible for the application
to process the document through to the end. If the application cannot
do so, then the parser should report a fatal error even if the XML 1.0
recommendation does not require it to do so.
fatal_error(exception)
Receive notification of a non-recoverable error.
This corresponds to the definition of "fatal error" in section 1.2
of the W3C XML 1.0 Recommendation. For example, a parser would use
this callback to report the violation of a well-formedness
constraint.
The application must assume that the document is unusable after the
parser has invoked this method, and should continue (if at all) only
for the sake of collecting addition error messages: in fact, SAX
parsers are free to stop reporting any other events once this method
has been invoked.
Lexical Events
This is an optional extension handler for SAX2 to provide lexical
information about an XML document, such as comments and CDATA section
boundaries; XML readers are not required to support this handler.
The events in the lexical handler apply to the entire document, not
just to the document element, and all lexical handler events must
appear between the content handler's start_document() and
end_document() events.
To set the LexicalHandler for an XML reader, set the Feature
"http://xml.org/sax/handlers/LexicalHandler" on the parser to
the object to receive lexical events. If the reader does not support
lexical events, it will throw a XML::SAX::Exception::NotRecognized or
a XML::SAX::Exception::NotSupported when you attempt to register the
handler.
start_dtd(dtd)
Report the start of DTD declarations, if any.
Any declarations are assumed to be in the internal subset unless
otherwise indicated by a start_entity event.
Note that the start/end_dtd() events will appear
within the start/end_document() events from Content
Handler and before the first start_element() event.
dtd is a hash with these properties:
Name
The document type name.
PublicId
The declared public identifier for the external DTD subset, or
undef if none was declared.
SystemId
The declared system identifier for the external DTD subset, or
undef if none was declared.
end_dtd(dtd)
Report the end of DTD declarations.
No properties are defined for this event (dtd is
empty).
start_entity(entity)
Report the beginning of an entity in content.
NOTE: entity references in attribute values -- and the start
and end of the document entity -- are never reported.
The start and end of the external DTD subset are reported using the
pseudo-name "[dtd]". All other events must be properly nested within
start/end entity events.
Note that skipped entities will be reported through the
skipped_entity() event, which is part of the ContentHandler
interface.
entity is a hash with these properties:
Name
The name of the entity. If it is a parameter entity, the
name will begin with '%'.
end_entity(entity)
Report the end of an entity.
entity is a hash with these properties:
Name
The name of the entity that is ending.
start_cdata(cdata)
Report the start of a CDATA section.
The contents of the CDATA section will be reported through the
regular characters event.
No properties are defined for this event (cdata is
empty).
end_cdata(cdata)
Report the end of a CDATA section.
No properties are defined for this event (cdata is
empty).
comment(comment)
Report an XML comment anywhere in the document.
This callback will be used for comments inside or outside the
document element, including comments in the external DTD subset (if
read).
An XML filter is like an XML event generator, except that it
obtains its events from another XML event generator rather than a
primary source like an XML document or database. Filters can modify a
stream of events as they pass on to the final application.
Parent
The parent reader.
This Feature allows the application to link the filter to a parent
event generator (which may be another filter).
See the XML::SAX::Base module for more on filters. It is meant to be
used as a base class for filters and drivers, and makes them much
easier to implement.
The Perl SAX 2.0 binding differs from the Java binding in these ways:
Takes parameters to new(), to parse(), and to be
set directly in the object, instead of requiring set/get calls (see
below).
Allows a default Handler parameter to be used for all
handlers.
No base classes are enforced. Instead, parsers dynamically
check the handlers for what methods they support. Note however that
using XML::SAX::Base as your base class for Drivers and Filters will
make your code a lot simpler, less error prone, and probably much more
correct with regard to this spec. Only reimplement that functionality
if you really need to.
The Attribute, InputSource, and SAXException (XML::SAX::Exception)
classes are only described as hashes (see below).
Handlers are passed a hash (Node) containing properties as an
argument instead of positional arguments.
parse() methods return the value returned by calling the
end_document() handler.
Method names have been converted to lower-case with underscores.
Parameters are all mixed case with initial upper-case.
If compatibility is a problem for you consider writing a Filter that
converts from this style to the one you want. It is likely that such
a Filter will be available from CPAN in the not distant future.
libxml-perl-0.08/doc/UsingPatActModules.pod 0100644 0000764 0000764 00000006315 06754116317 017020 0 ustar ken ken =head1 Using PatAct Modules
This document is targeted towards people who want to write scripts or
modules that use pattern and action modules. If you want to create a
new pattern or action module, please see ``Creating PatAct Modules.''
You would want to use pattern/action modules if you want to apply a
complex set of patterns or queries against an XML instance and perform
actions associated with those patterns or queries. To be able to use
pattern/action modules you will need a pattern-matching module that
supports the format of the pattern or query language you can use and
an action module that will perform the types of actions you need to
perform.
Available pattern-matching modules are:
XML::PatAct::
::MatchName Simple element name, element hierarchy matching
Available action modules are:
XML::PatAct::
::ToObjects Convert XML instances into Perl objects
::Amsterdam Simplistic style-sheet using before/after strings
Using pattern/action modules involves loading the modules, creating a
pattern/action list, creating instances of the pattern and matching
modules, and then starting a parse using the matching module as a
handler:
use XML::Parser::PerlSAX;
use XML::PatAct::MatchName;
use XML::PatAct::ToObjects;
my $patterns = [
'schema' => [ qw{ -holder } ],
'table' => [ qw{ -make Schema::Table } ],
'name' => [ qw{ -field Name -as-string } ],
];
my $matcher = XML::PatAct::MatchName->new( Patterns => $patterns );
my $handler = XML::PatAct::ToObjects->new( Patterns => $patterns,
Matcher => $matcher);
my $parser = XML::Parser::PerlSAX->new( Handler => $handler );
my $schema = $parser->parse(Source => { SystemId => $ARGV[0] } );
The example above use the MatchName and ToObjects pattern and action
modules. The pattern list contains pairs of patterns and actions in
the format specified by MatchName and ToObjects, other modules will
use other formats. The patterns that MatchName supports are a simple
element name or a hierarchy of element names. The actions that
ToObjects support describe how to create Perl objects from the XML
instances.
The $matcher object is an instance of XML::PatAct::MatchName.
$matcher is created and associated with the pattern/action list that
will be matched against. The $handler object is an instance of
XML::PatAct::ToObjects. $handler is created and associated with the
pattern/action list to be matched against as well as the pattern
matching instance $matcher.
$handler is a PerlSAX event handler. XML::Parser::PerlSAX is used as
the source of XML events. Other PerlSAX event generators include
XML::Grove::PerlSAX and XML::ESISParser. $parser is created with the
$handler object as it's Handler.
The `parse()' method of $parser is called to run the handler (the
matching object) to produce the output from XML::PatAct::ToObjects,
which is a Perl object converted from XML, $schema.
The above example is an abbrieviated version. A complete example of
usage of the MatchName and ToObjects modules, including source XML, is
in the documentation for the XML::PatAct::ToObjects module. The
script and source XML are also in the examples directory.
libxml-perl-0.08/doc/PerlSAX.pod 0100644 0000764 0000764 00000056020 06756066006 014562 0 ustar ken ken =head1 SAX for Perl
=head2 What is SAX?
SAX (Simple API for XML) is a common parser interface for XML
parsers. It allows application writers to write applications that use
XML parsers, but are independent of which parser is actually used.
This document describes a version of SAX used by Perl modules. The
original version of SAX, for Java, is described at
.
There are two basic interfaces in the Perl version of SAX, the parser
interface and the handler interface. The parser interface creates new
parser instances, initiates parsing, and provides additional
information to handlers on request. The handler interface is used to
receive parse events from the parser.
=head2 Deviations from the Java version
=over 4
=item *
Takes parameters to `C' instead of using `set*' calls.
=item *
Allows a default Handler parameter to be used for all handlers.
=item *
No base classes are implemented. Instead, parsers dynamically check
the handlers for what methods they support.
=item *
The AttributeList, InputSource, and SAXException classes have been
replaced by anonymous hashes.
=item *
Handlers are passed a hash containing properties as an argument in
place of positional arguments.
=item *
`C' returns the value returned by calling the
`C' handler.
=item *
Method names have been converted to lower-case with underscores.
Parameters are all mixed case with initial upper-case.
=back
=head1 Parser Interface
SAX parsers are reusable but not re-entrant: the application may reuse
a parser object (possibly with a different input source) once the
first parse has completed successfully, but it may not invoke the
`C' methods recursively within a parse.
Parser objects contain the following options. A new or different
handler option may provided in the middle of a parse, and the SAX
parser must begin using the new handler immediately. The `C'
option must not be changed in the middle of a parse. If an
application does not provide a handler for a particular set of events,
those events will be silently ignored unless otherwise stated. If an
`C' is not provided, the parser will resolve system
identifiers and open connections to entities itself.
Handler default handler to receive events
DocumentHandler handler to receive document events
DTDHandler handler to receive DTD events
ErrorHandler handler to receive error events
EntityResolver handler to resolve entities
Locale locale to provide localisation for errors
If no handlers are provided then all events will be silently ignored,
except for `C' which will cause a `C' to be
called after calling `C'.
All handler methods are called with a single hash argument containing
the parameters for that method. `C' methods can be called with
a hash or a list of key-value pairs containing the parameters.
All SAX parsers must implement this basic interface: it allows
applications to provide handlers for different types of events and to
initiate a parse from a URI, a byte stream, or a character stream.
=over 4
=item new( I )
Creates a Parser that will be used to parse XML sources. Any
parameters passed to `C' will be used for subsequent parses.
I may be a list of key, value pairs or a hash.
=item parse( I )
Parse an XML document.
The application can use this method to instruct the SAX parser to
begin parsing an XML document from any valid input source (a character
stream, a byte stream, or a URI). I may be a list of key,
value pairs or a hash. I passed to `C' override
options given when the parser instance was created with `C'.
Applications may not invoke this method while a parse is in progress
(they should create a new Parser instead for each additional XML
document). Once a parse is complete, an application may reuse the same
Parser object, possibly with a different input source.
`C' returns the result of calling the handler method
`C'.
A `C' parameter must have been provided to either the
`C' or `C' methods. The `C' parameter is a
hash containing the following parameters:
=over 4
=item PublicId
The public identifier for this input source.
The public identifier is always optional: if the application writer
includes one, it will be provided as part of the location information.
=item SystemId
The system identifier for this input source.
The system identifier is optional if there is a byte stream, a
character stream, or a string, but it is still useful to provide one,
since the application can use it to resolve relative URIs and can
include it in error messages and warnings (the parser will attempt to
open a connection to the URI only if there is no byte stream or
character stream specified).
If the application knows the character encoding of the object pointed
to by the system identifier, it can provide the encoding using the
`C' parameter.
If the system ID is a URL, it must be fully resolved.
=item String
A scalar value containing XML text to be parsed.
The SAX parser will ignore this if there is also a byte or character
stream, but it will use a string in preference to opening a URI
connection.
=item ByteStream
The byte stream (file handle) for this input source.
The SAX parser will ignore this if there is also a character stream
specified, but it will use a byte stream in preference to opening a
URI connection itself or using `C'.
If the application knows the character encoding of the byte stream, it
should set it with the `C' parameter.
=item CharacterStream
FOR FUTURE USE ONLY -- Perl does not currently support any character
streams, only use the `C', `C', or `C'
parameters.
The character stream (file handle) for this input source.
If there is a character stream specified, the SAX parser will ignore
any byte stream and will not attempt to open a URI connection to the
system identifier.
=item Encoding
The character encoding, if known.
The encoding must be a string acceptable for an XML encoding
declaration (see section 4.3.3 of the XML 1.0 recommendation).
This parameter has no effect when the application provides a character
stream.
=back
=back
=head2 Locator
Interface for associating a SAX event with a document location.
If a SAX parser provides location information to the SAX application,
it does so by implementing the following methods and then calling the
`C' handler method. The handler can use the
object to obtain the location of any other document handler event in
the XML source document.
Note that the results returned by the object will be valid only during
the scope of each document handler method: the application will
receive unpredictable results if it attempts to use the locator at any
other time.
SAX parsers are not required to supply a locator, but they are very
strongly encouraged to do so.
=over 4
=item location()
Return the location information for the current event.
Returns a hash containing the following parameters:
ColumnNumber The column number, or undef if none is available.
LineNumber The line number, or undef if none is available.
PublicId A string containing the public identifier, or undef if
none is available.
SystemId A string containing the system identifier, or undef if
none is available.
=back
=head1 Handler Interfaces
SAX handler methods are grouped into four interfaces: the document
handler for receiving normal document events, the DTD handler for
receiving notation and unparsed entity events, the error handler for
receiving errors and warnings, and the entity resolver for redirecting
external system identifiers.
The application may choose to implement each interface in one package
or in seperate packages, as long as the objects provided as parameters
to the parser provide the matching interface.
Parsers may implement additional methods in each of these categories,
refer to the parser documentation for further information.
All handlers are called with a single hash argument containing the
parameters for that handler.
Application writers who do not want to implement the entire interface
can leave those methods undefined. Events whose handler methods are
undefined will be ignored unless otherwise stated.
=head2 DocumentHandler
This is the main interface that most SAX applications implement: if
the application needs to be informed of basic parsing events, it
implements this interface and provides an instance with the SAX parser
using the `C' parameter. The parser uses the instance
to report basic document-related events like the start and end of
elements and character data.
The order of events in this interface is very important, and mirrors
the order of information in the document itself. For example, all of
an element's content (character data, processing instructions, and/or
subelements) will appear, in order, between the `C'
event and the corresponding `C' event.
The application can find the location of any event using the Locator
interface supplied by the Parser through the
`C' method.
=over 4
=item set_document_locator( { Locator => $locator } )
Receive an object for locating the origin of SAX document events.
SAX parsers are strongly encouraged (though not absolutely required)
to supply a locator: if it does so, it must supply the locator to the
application by invoking this method before invoking any of the other
methods in the DocumentHandler interface.
The locator allows the application to determine the end position of
any document-related event, even if the parser is not reporting an
error. Typically, the application will use this information for
reporting its own errors (such as character content that does not
match an application's business rules). The information returned by
the locator is probably not sufficient for use with a search engine.
Note that the locator will return correct information only during the
invocation of the events in this interface. The application should not
attempt to use it at any other time.
Parameters:
Locator An object that can return the location of any SAX document
event.
=item start_document( { } )
Receive notification of the beginning of a document.
The SAX parser will invoke this method only once, before any other
methods in this interface or in DTDHandler.
=item end_document( { } )
Receive notification of the end of a document, no parameters are
passed for the end of a document.
The SAX parser will invoke this method only once, and it will be the
last method invoked during the parse. The parser shall not invoke
this method until it has either abandoned parsing (because of an
unrecoverable error) or reached the end of input.
The value returned by calling `C' will be the value
returned by `C'.
=item start_element( { Name => $name, Attributes => $attributes } )
Receive notification of the beginning of an element.
The Parser will invoke this method at the beginning of every element
in the XML document; there will be a corresponding `C'
event for every `C' event (even when the element is
empty). All of the element's content will be reported, in order,
before the corresponding `C' event.
If the element name has a namespace prefix, the prefix will still be
attached. Note that the attribute list provided will contain only
attributes with explicit values (specified or defaulted): #IMPLIED
attributes will be omitted.
Parameters:
Name The element type name.
Attributes The attributes attached to the element, if any.
=item end_element( { Name => $name } )
Receive notification of the end of an element.
The SAX parser will invoke this method at the end of every element in
the XML document; there will be a corresponding `C'
event for every `C' event (even when the element is
empty).
If the element name has a namespace prefix, the prefix will still be
attached to the name.
Parameters:
Name The element type name.
=item characters( { Data => $characters } )
Receive notification of character data.
The Parser will call this method to report each chunk of character
data. SAX parsers may return all contiguous character data in a
single chunk, or they may split it into several chunks; however, all
of the characters in any single event must come from the same external
entity, so that the Locator provides useful information.
Note that some parsers will report whitespace using the
`C' method rather than this one (validating
parsers must do so).
Parameters:
Data The characters from the XML document.
=item ignorable_whitespace( { Data => $whitespace } )
Receive notification of ignorable whitespace in element content.
Validating Parsers must use this method to report each chunk of
ignorable whitespace (see the W3C XML 1.0 recommendation, section
2.10): non-validating parsers may also use this method if they are
capable of parsing and using content models.
SAX parsers may return all contiguous whitespace in a single chunk, or
they may split it into several chunks; however, all of the characters
in any single event must come from the same external entity, so that
the Locator provides useful information.
The application must not attempt to read from the array outside of the
specified range.
Data The characters from the XML document.
=item processing_instruction ( { Target => $target, Data => $data } )
Receive notification of a processing instruction.
The Parser will invoke this method once for each processing
instruction found: note that processing instructions may occur before
or after the main document element.
A SAX parser should never report an XML declaration (XML 1.0, section
2.8) or a text declaration (XML 1.0, section 4.3.1) using this method.
Parameters:
Target The processing instruction target.
Data The processing instruction data, if any.
=back
=head2 ErrorHandler
Basic interface for SAX error handlers.
If a SAX application needs to implement customized error handling, it
must implement this interface and then provide an instance to the SAX
parser using the parser's `C' parameter. The parser
will then report all errors and warnings through this interface.
The parser shall use this interface instead of throwing an exception:
it is up to the application whether to throw an exception for
different types of errors and warnings. Note, however, that there is
no requirement that the parser continue to provide useful information
after a call to `C' (in other words, a SAX driver class
could catch an exception and report a fatalError).
All error handlers receive the following I. The
`C', `C', `C', and `C'
are provided only if the parser has that information available.
Messsage The error or warning message, or undef to use the message
from the `C' parameter
PublicId The public identifer of the entity that generated the
error or warning.
SystemId The system identifer of the entity that generated the
error or warning.
LineNumber The line number of the end of the text that caused the
error or warning.
ColumnNumber The column number of the end of the text that cause the
error or warning.
EvalError The error value returned from a lower level interface.
Application writers who do not want to implement the entire interface
can leave those methods undefined. If not defined, calls to the
`C' and `C' handlers will be ignored and a
processing will be terminated (going straight to `C')
after the call to `C'.
=over 4
=item warning( { I } )
Receive notification of a warning.
SAX parsers will use this method to report conditions that are not
errors or fatal errors as defined by the XML 1.0 recommendation. The
default behaviour is to take no action.
The SAX parser must continue to provide normal parsing events after
invoking this method: it should still be possible for the application
to process the document through to the end.
=item error( { I } )
Receive notification of a recoverable error.
This corresponds to the definition of "error" in section 1.2 of the
W3C XML 1.0 Recommendation. For example, a validating parser would use
this callback to report the violation of a validity constraint. The
default behaviour is to take no action.
The SAX parser must continue to provide normal parsing events after
invoking this method: it should still be possible for the application
to process the document through to the end. If the application cannot
do so, then the parser should report a fatal error even if the XML 1.0
recommendation does not require it to do so.
=item fatal_error( { I } )
Receive notification of a non-recoverable error.
This corresponds to the definition of "fatal error" in section 1.2 of
the W3C XML 1.0 Recommendation. For example, a parser would use this
callback to report the violation of a well-formedness constraint.
The application must assume that the document is unusable after the
parser has invoked this method, and should continue (if at all) only
for the sake of collecting addition error messages: in fact, SAX
parsers are free to stop reporting any other events once this method
has been invoked.
=back
=head2 DTDHandler
Receive notification of basic DTD-related events.
If a SAX application needs information about notations and unparsed
entities, then the application implements this interface and provide
an instance to the SAX parser using the parser's `C'
parameter. The parser uses the instance to report notation and
unparsed entity declarations to the application.
The SAX parser may report these events in any order, regardless of the
order in which the notations and unparsed entities were declared;
however, all DTD events must be reported after the document handler's
`C' event, and before the first `C'
event.
It is up to the application to store the information for future use
(perhaps in a hash table or object tree). If the application
encounters attributes of type "NOTATION", "ENTITY", or "ENTITIES", it
can use the information that it obtained through this interface to
find the entity and/or notation corresponding with the attribute
value.
Application writers who do not want to implement the entire interface
can leave those methods undefined. Events whose handler methods are
undefined will be ignored.
=over 4
=item notation_decl( { I } )
Receive notification of a notation declaration event.
It is up to the application to record the notation for later
reference, if necessary.
If a system identifier is present, and it is a URL, the SAX parser
must resolve it fully before passing it to the application.
I:
Name The notation name.
PublicId The notation's public identifier, or undef if none was given.
SystemId The notation's system identifier, or undef if none was given.
=item unparsed_entity_decl( { I } )
Receive notification of an unparsed entity declaration event.
Note that the notation name corresponds to a notation reported by the
`C' event. It is up to the application to record the
entity for later reference, if necessary.
If the system identifier is a URL, the parser must resolve it fully
before passing it to the application.
I:
Name The unparsed entity's name.
PublicId The entity's public identifier, or undef if none was given.
SystemId The entity's system identifier (it must always have one).
NotationName The name of the associated notation.
=back
=head2 EntityResolver
Basic interface for resolving entities.
If a SAX application needs to implement customized handling for
external entities, it must implement this interface and provide an
instance with the SAX parser using the parser's `C'
parameter.
The parser will then allow the application to intercept any external
entities (including the external DTD subset and external parameter
entities, if any) before including them.
Many SAX applications will not need to implement this interface, but
it will be especially useful for applications that build XML documents
from databases or other specialised input sources, or for applications
that use URI types other than URLs.
The application can also use this interface to redirect system
identifiers to local URIs or to look up replacements in a catalog
(possibly by using the public identifier).
=over 4
=item resolve_entity( { PublicId => $public_id, SystemId => $system_id } )
Allow the application to resolve external entities.
The Parser will call this method before opening any external entity
except the top-level document entity (including the external DTD
subset, external entities referenced within the DTD, and external
entities referenced within the document element): the application may
request that the parser resolve the entity itself, that it use an
alternative URI, or that it use an entirely different input source.
Application writers can use this method to redirect external system
identifiers to secure and/or local URIs, to look up public identifiers
in a catalogue, or to read an entity from a database or other input
source (including, for example, a dialog box).
If the system identifier is a URL, the SAX parser must resolve it
fully before reporting it to the application.
Parameters:
PublicId The public identifier of the external entity being
referenced, or undef if none was supplied.
SystemId The system identifier of the external entity being
referenced.
`C' returns undef to request that the parser open a
regular URI connection to the system identifier or returns a hash
containing the same parameters as the `C' parameter to
Parser's `C' method, summarized here:
PublicId The public identifier of the external entity being
referenced, or undef if none was supplied.
SystemId The system identifier of the external entity being
referenced.
String String containing XML text
ByteStream An open file handle.
CharacterStream
An open file handle.
Encoding The character encoding, if known.
See Parser's `C' method for complete details on how these
parameters interact.
=back
=head1 Contributors
SAX was developed collaboratively by
the members of the XML-DEV mailing list. Please see the ``SAX History
and Contributors'' page for the people who did the real work behind
SAX. Much of the content of this document was copied from the SAX 1.0
Java Implementation documentation.
The SAX for Python specification was helpful in creating this
specification.
Thanks to the following people who contributed to Perl SAX.
Eduard (Enno) Derksen
Ken MacLeod
Eric Prud'hommeaux
Larry Wall
libxml-perl-0.08/doc/mirror.sh 0100644 0000764 0000764 00000003147 07054574600 014444 0 ustar ken ken #! /bin/sh
#
# NAME
# mirror -- update web page with a libxml-perl release
#
# SYNOPSIS
usage="mirror RELEASE DESTDIR"
#
# DESCRIPTION
# `mirror' creates a web mirror using a libxml-perl release tar
# file.
#
# `mirror' pulls files from the tar file to create the web page.
# `mirror' searches HTML files for the string @VERSION@ and
# replaces it with RELEASE. `mirror' searches for all *.pm and
# *.pod files and converts them to HTML. It also copies a few
# hardcoded files.
#
# `mirror' installs the web pages in DESTDIR.
#
# CAUTION: `mirror' removes the contents of DESTDIR before
# copying files to it.
#
# AUTHOR
# Ken MacLeod
#
# $Id: mirror.sh,v 1.2 2000/02/22 21:02:56 kmacleod Exp $
#
PWD_CMD="/bin/pwd"
SED="sed"
TR="/usr/bin/tr"
if [ $# != 2 ]; then
echo "usage: $usage"
exit 1
fi
RELEASE="$1"
DESTDIR="$2"
set -e
set -x
rm -rf $DESTDIR
mkdir -p $DESTDIR
cp libxml-perl-${RELEASE}.tar.gz $DESTDIR
cd $DESTDIR
tar xzvf libxml-perl-${RELEASE}.tar.gz
for ii in libxml-perl-${RELEASE}/doc/*.html; do
$SED <$ii >`basename $ii` \
-e "s/@VERSION@/$RELEASE/g"
done
for ii in `cd libxml-perl-${RELEASE}/doc; echo *.pod`; do
pod2html libxml-perl-${RELEASE}/doc/$ii >`basename $ii .pod`.html
done
for ii in `cd libxml-perl-${RELEASE}/lib; echo */*.pm */*/*.pm`; do
dstfile=`echo $ii | sed -e 's|/|::|g'`
pod2html libxml-perl-${RELEASE}/lib/$ii >`basename $dstfile .pm`.html
done
mv libxml-perl-${RELEASE}/README libxml-perl-${RELEASE}.readme
mv libxml-perl-${RELEASE}/doc/modules.xml .
rm -rf libxml-perl-${RELEASE} pod2html-dircache pod2html-itemcache
libxml-perl-0.08/doc/sax-2.0.html 0100644 0000764 0000764 00000027352 07423065107 014555 0 ustar ken ken
Perl SAX 2.0 Binding
Perl SAX 2.0 Binding
SAX (Simple API for XML) is a common parser interface for XML
parsers. It allows application writers to write applications that use
XML parsers, but are independent of which parser is actually used.
This document describes the version of SAX used by Perl modules.
The original version of SAX 2.0, for Java, is described at http://sax.sourceforge.net/.
There are two basic interfaces in the Perl version of SAX, the
parser interface and the handler interface. The parser interface
creates new parser instances, starts parsing, and provides additional
information to handlers on request. The handler interface is used to
receive parse events from the parser. This pattern is also commonly
called "Producer and Consumer" or "Generator and Sink". Note that the
parser doesn't have to be an XML parser, all it needs to do is provide
a stream of events to the handler as if it were parsing XML. But the
actual data from which the events are generated can be anything, a Perl
object, a CSV file, a database table...
SAX is typically used like this:
my $handler = MyHandler->new();
my $parser = AnySAXParser->new( Handler => $handler );
$parser->parse($uri);
Handlers are typically written like this:
package MyHandler;
sub new {
my $type = shift;
return bless {}, $type;
}
sub start_element {
my ($self, $element) = @_;
print "Starting element $element->{Name}\n";
}
sub end_element {
my ($self, $element) = @_;
print "Ending element $element->{Name}\n";
}
sub characters {
my ($self, $characters) = @_;
print "characters: $characters->{Data}\n";
}
1;
Basic SAX Parser
These methods and options are the most commonly used with SAX
parsers and event generators.
Applications may not invoke a parse() method again while a
parse is in progress (they should create a new SAX parser instead for
each nested XML document). Once a parse is complete, an application
may reuse the same parser object, possibly with a different input
source.
During the parse, the parser will provide information about the XML
document through the registered event handlers. Note that an event that
hasn't been registered (ie that doesn't have its corresponding method in
the handler's class) will not be called. This allows one to only
get the events one is interested in.
parse(uri [, options])
Parses the XML instance identified by uri (a system
identifier). options can be a list of option, value pairs
or a hash. Options include Handler, features and properties,
and advanced SAX parser options. parse() returns the result
of calling the end_document() handler. The options supported
by parse() may vary slightly if what is being "parsed" isn't
XML.
parse_file(stream [, options])
Parses the XML instance in the already opened stream, an
IO::Handler or similar. options are the same as for parse(). parse_file() returns the result
of calling the end_document() handler.
parse_string(string [, options])
Parses the XML instance in string. options are
the same as for parse().
parse_string() returns the result of calling the
end_document() handler.
Handler
The default handler object to receive all events from the parser.
Applications may change Handler in the middle of the parse
and the SAX parser will begin using the new handler
immediately. The Advanced SAX document
lists a number of more specialized handlers that can be used should you
wish to dispatch different types of events to different objects.
These methods are the most commonly used by SAX handlers.
start_document(document)
Receive notification of the beginning of a document.
The SAX parser will invoke this method only once, before any other
methods (except for set_document_locator() in advanced SAX
handlers).
No properties are defined for this event (document is
empty).
end_document(document)
Receive notification of the end of a document.
The SAX parser will invoke this method only once, and it will be
the last method invoked during the parse. The parser shall not invoke
this method until it has either abandoned parsing (because of an
unrecoverable error) or reached the end of input.
No properties are defined for this event (document is
empty).
The return value of end_document() is returned by the
parser's parse() methods.
start_element(element)
Receive notification of the start of an element.
The Parser will invoke this method at the beginning of every
element in the XML document; there will be a corresponding
end_element() event for every start_element() event (even when the
element is empty). All of the element's content will be reported, in
order, before the corresponding end_element() event.
element is a hash with these properties:
Name
The element type name (including prefix).
Attributes
The attributes attached to the element, if any.
If namespace processing is turned on (which is the default), these
properties are also available:
NamespaceURI
The namespace of this element.
Prefix
The namespace prefix used on this element.
LocalName
The local name of this element.
Attributes is a hash keyed by JClark namespace notation. That
is, the keys are of the form "{NamespaceURI}LocalName". If the attribute
has no NamespaceURI, then it is simply "{}LocalName". Each attribute is
a hash with these properties:
Name
The attribute name (including prefix).
Value
The normalized value of the attribute.
NamespaceURI
The namespace of this attribute.
Prefix
The namespace prefix used on this attribute.
LocalName
The local name of this attribute.
end_element(element)
Receive notification of the end of an element.
The SAX parser will invoke this method at the end of every element
in the XML document; there will be a corresponding start_element() event for every end_element() event (even when the element is
empty).
element is a hash with these properties:
Name
The element type name (including prefix).
If namespace processing is turned on (which is the default), these
properties are also available:
NamespaceURI
The namespace of this element.
Prefix
The namespace prefix used on this element.
LocalName
The local name of this element.
characters(characters)
Receive notification of character data.
The Parser will call this method to report each chunk of character
data. SAX parsers may return all contiguous character data in a
single chunk, or they may split it into several chunks (however, all
of the characters in any single event must come from the same external
entity so that the Locator provides useful information).
characters is a hash with this property:
Data
The characters from the XML document.
ignorable_whitespace(characters)
Receive notification of ignorable whitespace in element content.
Validating Parsers must use this method to report each chunk of
ignorable whitespace (see the W3C XML 1.0 recommendation, section
2.10): non-validating parsers may also use this method if they are
capable of parsing and using content models.
SAX parsers may return all contiguous whitespace in a single chunk,
or they may split it into several chunks; however, all of the
characters in any single event must come from the same external
entity, so that the Locator provides useful information.
Conformant XML parsers are required to abort processing when
well-formedness or validation errors occur. In Perl, SAX parsers use
die() to signal these errors. To catch these errors and prevent
them from killing your program, use eval{}:
libxml-perl-0.08/doc/UsingPerlSAX.pod 0100644 0000764 0000764 00000004734 06715130346 015567 0 ustar ken ken =head1 Using PerlSAX
Working with PerlSAX involves using two classes (packages), a PerlSAX
parser that generates parsing events and a class that you write that
will receive those parsing events, the ``handler''. This guide will
use the XML::Parser::PerlSAX parser that uses Clark Cooper's
XML::Parser module.
The handler class implements the PerlSAX handler methods that you are
interested in. The following example, MyHandler.pm, prints a message
every time an element starts or ends:
package MyHandler;
sub new {
my ($type) = @_;
return bless {}, $type;
}
sub start_element {
my ($self, $element) = @_;
print "Start element: $element->{Name}\n";
}
sub end_element {
my ($self, $element) = @_;
print "End element: $element->{Name}\n";
}
1;
To use your handler you will need to have a script, myhandler.pl, that
loads and creates your handler and the parser, and then calls the
parser to parse the XML instance and send events to your handler:
use XML::Parser::PerlSAX;
use MyHandler;
my $my_handler = MyHandler->new;
my $parser = XML::Parser::PerlSAX->new( Handler => $my_handler );
foreach my $instance (@ARGV) {
$parser->parse(Source => { SystemId => $instance });
}
Given this XML instance, myhandler.xml:
Using PerlSAXWorking with PerlSAX ...
Running myhandler.pl like this:
perl myhandler.pl myhandler.xml
will produce this output:
Start element: article
Start element: title
End element: title
Start element: paragraph
End element: paragraph
End element: article
=head2 For More Information
PerlSAX.pod describes the PerlSAX interface. Each parser module
describes it's individual capabilities. XML::Parser::PerlSAX is the
most commonly used PerlSAX implementation.
The files described in this doc are in the `examples' directory. A
more complete implementation of the very simple handler above is in
the module XML::Handler::Sample. Other, more complex handlers are in
the XML::Handler directory as well.
Another hands-on doc for PerlSAX is the XML-Parser-and-PerlSAX.pod.
This doc describes the difference between and the purpose of PerlSAX
with respect to XML::Parser.
This document was inspired by and uses the code examples from David
Megginson's ``Quick Start for SAX Application Writers.''
libxml-perl-0.08/doc/index.html 0100644 0000764 0000764 00000034210 07070670426 014567 0 ustar ken ken
libxml-perl
libxml-perl
Current version is @VERSION@
libxml-perl is a collection of Perl modules,
scripts, and documents for working with XML in Perl. libxml-perl
software works in combination with XML::Parser, PerlSAX, XML::DOM,
XML::Grove, and others.
Questions about how to use this library should be directed to the
comp.lang.perl.modules USENET Newsgroup. Bug reports and
suggestions for improvements can be sent to the
<perl-xml@activestate.com> mailing list. This mailing list is
also the place for general discussions and development of the
libxml-perl package.
To join the Perl-XML mailing list, send an email message to
ListManager@ActiveState.com with the following text in the body:
Subscribe Perl-XML
Source
libxml-perl source is available on CPAN in the XML module
directory. This link goes through the CPAN redirector so if the
site gives you any problems, just click it again and you will be
redirected to a different site.
Modules
The following modules are part of libxml-perl. Below they are marked with their release status:
STABLE
has been in use for a while with few or no outstanding bugs
BETA
interfaces are stable but there may still be bugs
ALPHA
interfaces are changing, there may be lots of bugs, and there may not be docs available yet
XML::Parser::PerlSAX
BETA
XML::Parser::PerlSAX is a PerlSAX parser using XML::Parser (which uses James Clark's Expat XML Parser).
XML::Handler::XMLWriter
BETA
A PerlSAX handler for writing readable XML (in contrast to Canonical
XML, for example). XMLWriter is also subclassable and supports
calling start and end methods by element-names (subclassed from
XML::Handler::Subs). XMLWriter is similar to XML::Parser's Stream
style.
XML::Handler::Subs
BETA
A PerlSAX handler base class that calls start and end methods by
element-names. Subs is similar to XML::Parser's Subs style.
XML::Handler::Sample
BETA
XML::Handler::Sample is a PerlSAX handler that simply prints out the
event names as they are parsed by a PerlSAX parser. It can be used for
debugging or as a template for building new handlers.
XML::Handler::Sample contains handlers for all known parser events.
XML::ESISParser
BETA
XML::ESISParser is a validating PerlSAX parser using James Clark's
`nsgmls' SGML/XML Parser. ESISParser supports both XML and SGML
document instances. Unless you need validation, you should probably
be using XML::Parser::PerlSAX or XML::Parser.
XML::ESISParser with XML::Grove obsolete the SGML::SPGroveBuilder and SGML::Grove modules.
XML::SAX2Perl, XML::Perl2SAX
ALPHA
SAX2Perl and Perl2SAX are SAX Parser<->DocumentHandler filters. These
modules translate parse events between the Java/CORBA style SAX
methods and PerlSAX style methods.
The following modules will very likely be renamed in the next release.XML::PatAct::MatchName
ALPHA
MatchName is a pattern matching module that can be used with PatAct
action modules. MatchName uses simple element names or element name
lists to match names to actions.
XML::PatAct::ToObjects
ALPHA
ToObjects is a PatAct action module. ToObjects can be used to create
application-ready Perl objects from XML instances.
XML::PatAct::Amsterdam
ALPHA
Amsterdam is a PatAct action module. Amsterdam can be used to apply a
very simple form of style-sheet to an XML instance by using ``before''
and ``after'' strings that are output before and after the contents of
elements.
XML::PatAct::PatternTempl, XML::PatAct::ActionTempl
BETA
PatternTempl and ActionTempl are template files that pattern/action
module writers can copy to create new modules. See Creating PatAct
Modules for more information.
Documents PerlSAX
This document defines a Perl binding to SAX 1.0. PerlSAX-based parser
modules implement and possibly extend the interface described in
PerlSAX.
Using PerlSAX
UsingPerlSAX is a brief introduction to PerlSAX using the
XML::Parser::PerlSAX module.
Using PatAct Modules
Describes how to use pattern/action modules to transform XML
instances.
Creating PatAct Modules
A document for module writers who are writing new pattern/ action
modules.
modules.xml
modules.xml contains a listing of all Perl XML packages and their
public modules categorized by several topics.
XML::ESISParser: report record end as
characters if no record_end() handler
-
XML::Parser::PerlSAX: For attribute
list declarations, now correctly calls the attlist_decl() method and
passes the ElementName property, it used to call entity_decl() passing
EntityName. Reported by Enno
Derksen and Colin
Muller
Added pattern/action modules for name matching, converting to objects, and applying simple styles -- XML::PatAct::MatchName, XML::PatAct::ToObjects, and XML::PatAct::Amsterdam.
-
Added ``Using PatAct Modules'' and ``Creating PatActModules'' docs.
-
XML::Parser::PerlSAX and XML::ESISParser were not passing a hash for `start_document()' and `end_document()' per spec.
moved PerlSAX.pod and interface-style.pod to `doc/'
-
renamed Data::Grove::Tied to Data::Grove::Parent
Contributors
The following have shared their code, documents, comments, and/or suggestions for libxml-perl:
Clark Cooper
Eduard (Enno) Derksen
Michael Koehne
KangChan Lee
Ken MacLeod
Colin Muller
Eric Prud'hommeaux
Larry Wall
libxml-perl-0.08/Changes 0100644 0000764 0000764 00000011142 07745254162 013323 0 ustar ken ken Revision history for Perl extension libxml
Backwards incompatible changes are marked with a `*'.
ToDo
- XML::ESISParser: include Robert Braddock's update for OpenSP,
in email 25Jul
- XML::Parser::PerlSAX doesn't pass ParseParamEnt to
XML::Parser, inspired by a request by Paul Mahoney
- switch Data::Grove::Visitor to use UNIVERSAL::can instead of
$self->{'has'}, suggested by Mike Richardson
- no modules are yet supporting SAX2
- XML::Parser::PerlSAX doesn't implement ErrorHandler, it
should at least call fatal_error() if XML::Parser dies;
reported by Craig N. Caroon
0.08 Tue Oct 21 10:54:18 CDT 2003
- added Perl SAX 2.0 Binding
- XML::ESISParser: add -E0 to nsgmls options so that nsgmls
doesn't quit after 200 errors. Add more detail to command
character error message. Suggested by Charles Thayer
.
- fixes
- Data::Grove::Visitor: children_accept_name was not
returning any data in some cases; reported by Laurent
CAPRANI
- XML::SAX2Perl: typo in startElement; reported by Mark
A. Hershberger
- t/stream.t Test 11 fails due to 8-bit characters on Perl
5.6, first reported by Ed Arnold
0.07 Tue Feb 22 14:24:52 CST 2000
- doc/index.html: libxml-perl site index
- doc/mirror.sh: creates a libxml-perl mirror site
- fixes
- all modules: release script didn't insert version numbers
in Perl modules. Reported by Enno Derksen
- doc/modules.xml: well-formedness errors. Reported by
KangChan Lee
0.06 Wed Dec 22 15:14:39 CST 1999
- all modules: add $VERSION. Suggested by Michael Koehne
- XML::Parser::PerlSAX: add UseAttributeOrder option and
AttributeOrder and Defaulted properties to start_element()
handler. Suggested by Enno Derksen
- XML::Parser::PerlSAX: add start_cdata, end_cdata, and
entity_reference events
- XML::PatAct::Amsterdam: added Output and AsString options,
added support for replacing attributes
- Data::Grove: add a Data::Grove::Characters class to act as a
default grove object for containing characters.
- fixes
- XML::PatAct::ToObjects: removed leftover debugging statement
- XML::ESISParser: report record end as characters if no
record_end() handler
- XML::Parser::PerlSAX: For attribute list declarations, now
correctly calls the attlist_decl() method and passes the
ElementName property, it used to call entity_decl()
passing EntityName. Reported by Enno Derksen
and Colin Muller
0.05 Mon Aug 16 11:02:32 CDT 1999
- Major update to PerlSAX.pod
- added an introduction
- added a ``Deviations from the Java version'' section
* re-added the `set_document_locator()' handler method
- added arguments to method synopses
- attributed most of the content to the SAX 1.0 JavaDoc
- minor typos
- XML::Handler::XMLWriter: a new PerlSAX handler for writing
readable XML (in contrast to Canonical XML)
- XML::Handler::Subs: a new PerlSAX handler base class for
calling user-defined subs
- XML::Handler::Sample: this is a template for creating
PerlSAX handlers, it is now in the Public Domain
- XML::PatAct::ToObjects: add CopyAttributes option, add
-grove-contents option
- all PatAct modules can now take parameters as either a list
of key, value pairs or a hash
- fixes
- XML::ESISParser wasn't testing handlers for what methods
they support
- XML::Parser::PerlSAX wasn't capturing XML::Parser Element
events
0.04 Wed Aug 11 10:03:00 CDT 1999
- README: updated with PatAct modules
- added Creating PatAct Modules and Using PatAct Modules docs
- added XML::PatAct::ActionTempl, XML::PatAct::Amsterdam,
XML::PatAct::MatchName, XML::PatAct::PatternTempl,
XML::PatAct::ToObjects
- added schema.pl and schema.xml examples
- added schema.t test
- fixes
- XML::Parser::PerlSAX and XML::ESISParser were not passing
a hash for start_document() or end_document() per spec
- t/canon_xml_writer.t, t/xp_sax.t: added CVS ID
0.03 Wed May 26 19:49:46 CDT 1999
- added XML::Handler::CanonXMLWriter and test
0.02 Mon May 24 18:02:00 CDT 1999
- renamed package from `libxml' to `libxml-perl'
- added doc/modules.xml
- added doc/UsingPerlSAX.pod and example files
- moved PerlSAX.pod and interface-style.pod to `doc/'
- renamed Data::Grove::Tied to Data::Grove::Parent
0.01 Fri May 7 14:59:07 CDT 1999
- original version
libxml-perl-0.08/MANIFEST 0100644 0000764 0000764 00000001643 07745275111 013163 0 ustar ken ken ChangeLog
Changes
MANIFEST
Makefile.PL
README
libxml-perl.spec
libxml-perl-0.08.spec
doc/CreatingPatActModules.pod
doc/PerlSAX.pod
doc/UsingPatActModules.pod
doc/UsingPerlSAX.pod
doc/index.html
doc/interface-style.pod
doc/mirror.sh
doc/modules.xml
doc/sax-2.0.html
doc/sax-2.0-adv.html
lib/Data/Grove.pm
lib/Data/Grove/Parent.pm
lib/Data/Grove/Visitor.pm
lib/XML/ESISParser.pm
lib/XML/Perl2SAX.pm
lib/XML/SAX2Perl.pm
lib/XML/Handler/CanonXMLWriter.pm
lib/XML/Handler/Sample.pm
lib/XML/Handler/Subs.pm
lib/XML/Handler/XMLWriter.pm
lib/XML/Parser/PerlSAX.pm
lib/XML/PatAct/ActionTempl.pm
lib/XML/PatAct/Amsterdam.pm
lib/XML/PatAct/MatchName.pm
lib/XML/PatAct/PatternTempl.pm
lib/XML/PatAct/ToObjects.pm
examples/MyHandler.pm
examples/esis-test.pl
examples/myhandler.pl
examples/myhandler.xml
examples/perlsax-test.pl
examples/schema.pl
examples/schema.xml
t/amsterdam.t
t/canon_xml_writer.t
t/schema.t
t/stream.t
t/subs.t
t/xp_sax.t
libxml-perl-0.08/examples/ 0040755 0000764 0000764 00000000000 07745275112 013650 5 ustar ken ken libxml-perl-0.08/examples/schema.xml 0100644 0000764 0000764 00000000625 06754116426 015633 0 ustar ken ken
MyTableA short summaryA long description that may
contain a subset of HTMLMyColumn1A short summaryA long description42
libxml-perl-0.08/examples/MyHandler.pm 0100644 0000764 0000764 00000000523 07745275111 016065 0 ustar ken ken # This is the example module in doc/UsingPerlSAX.pod
package MyHandler;
sub new {
my ($type) = @_;
return bless {}, $type;
}
sub start_element {
my ($self, $element) = @_;
print "Start element: $element->{Name}\n";
}
sub end_element {
my ($self, $element) = @_;
print "End element: $element->{Name}\n";
}
1;
libxml-perl-0.08/examples/myhandler.xml 0100644 0000764 0000764 00000000171 06715130346 016344 0 ustar ken ken
Using PerlSAXWorking with PerlSAX ...
libxml-perl-0.08/examples/myhandler.pl 0100644 0000764 0000764 00000000436 06715130346 016163 0 ustar ken ken # This is the example script in doc/UsingPerlSAX.pod
use XML::Parser::PerlSAX;
use MyHandler;
my $my_handler = MyHandler->new;
my $parser = XML::Parser::PerlSAX->new( Handler => $my_handler );
foreach my $instance (@ARGV) {
$parser->parse(Source => { SystemId => $instance });
}
libxml-perl-0.08/examples/schema.pl 0100644 0000764 0000764 00000002627 06754116426 015452 0 ustar ken ken # This template file is in the Public Domain.
# You may do anything you want with this file.
#
# $Id: schema.pl,v 1.1 1999/08/10 21:43:50 kmacleod Exp $
#
# This is the example script in the XML::PatAct::ToObjects module doc,
# it also uses XML::PatAct::MatchName and is an example of using PatAct
# modules.
use XML::Parser::PerlSAX;
use XML::PatAct::MatchName;
use XML::PatAct::ToObjects;
my $patterns =
[
'schema' => [ qw{ -holder } ],
'table' => [ qw{ -make Schema::Table } ],
'name' => [ qw{ -field Name -as-string } ],
'summary' => [ qw{ -field Summary -as-string } ],
'description' => [ qw{ -field Description -grove } ],
'column' => [ qw{ -make Schema::Column -push-field Columns } ],
'unique' => [ qw{ -field Unique -value 1 } ],
'non-null' => [ qw{ -field NonNull -value 1 } ],
'default' => [ qw{ -field Default -as-string } ],
];
my $matcher = XML::PatAct::MatchName->new( Patterns => $patterns );
my $handler = XML::PatAct::ToObjects->new( Patterns => $patterns,
Matcher => $matcher);
my $parser = XML::Parser::PerlSAX->new( Handler => $handler );
$schema = $parser->parse(Source => { SystemId => $ARGV[0] } );
require 'dumpvar.pl';
dumpvar('main', 'schema');
libxml-perl-0.08/examples/perlsax-test.pl 0100644 0000764 0000764 00000000416 06714406533 016634 0 ustar ken ken use XML::Parser::PerlSAX;
use XML::Handler::Sample;
if ($#ARGV != 0) {
die "usage: esis-test FILE\n";
}
$file = shift @ARGV;
$my_handler = XML::Handler::Sample->new;
XML::Parser::PerlSAX->new->parse(Source => { SystemId => $file },
Handler => $my_handler);
libxml-perl-0.08/examples/esis-test.pl 0100644 0000764 0000764 00000000571 06714400506 016115 0 ustar ken ken use XML::ESISParser;
use XML::Handler::Sample;
if ($ARGV[0] eq '--sgml') {
push (@additional_args, IsSGML => 1);
shift @ARGV;
}
if ($#ARGV != 0) {
die "usage: esis-test FILE\n";
}
$file = shift @ARGV;
$my_handler = XML::Handler::Sample->new;
XML::ESISParser->new->parse(Source => { SystemId => $file },
Handler => $my_handler,
@additional_args);
libxml-perl-0.08/t/ 0040755 0000764 0000764 00000000000 07745275112 012275 5 ustar ken ken libxml-perl-0.08/t/schema.t 0100644 0000764 0000764 00000006123 06754116317 013721 0 ustar ken ken # Hey Emacs, this is -*- perl -*- !
#
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'
#
# $Id: schema.t,v 1.1 1999/08/10 21:42:39 kmacleod Exp $
#
######################### We start with some black magic to print on failure.
# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)
BEGIN { $| = 1; print "1..2\n"; }
END {print "not ok 1\n" unless $loaded;}
use XML::Parser::PerlSAX;
use XML::PatAct::MatchName;
use XML::PatAct::ToObjects;
$loaded = 1;
print "ok 1\n";
my $patterns = [
'schema' => [ qw{ -holder } ],
'table' => [ qw{ -make Schema::Table } ],
'name' => [ qw{ -field Name -as-string } ],
'summary' => [ qw{ -field Summary -as-string } ],
'description' => [ qw{ -field Description -as-string } ],
'column' => [ qw{ -make Schema::Column -push-field Columns } ],
'unique' => [ qw{ -field Unique -value 1 } ],
'non-null' => [ qw{ -field NonNull -value 1 } ],
'default' => [ qw{ -field Default -as-string } ],
];
my $matcher = XML::PatAct::MatchName->new( Patterns => $patterns );
my $handler = XML::PatAct::ToObjects->new( Patterns => $patterns,
Matcher => $matcher);
my $parser = XML::Parser::PerlSAX->new( Handler => $handler );
$schema = $parser->parse(Source => { String => <<'EOF' } );
MyTableA short summaryA long description that may
contain a subset of HTMLMyColumn1A short summaryA long description42
EOF
$not_ok = 0;
$not_ok |= (!defined($schema)) || (ref($schema->[0]) ne 'Schema::Table');
$not_ok |= (!defined($schema->[0]{Name})) || ($schema->[0]{Name} ne 'MyTable');
$not_ok |= (!defined($schema->[0]{Summary}))
|| ($schema->[0]{Summary} ne 'A short summary');
$not_ok |= (!defined($schema->[0]{Description}));
$not_ok |= (!defined($schema->[0]{Columns}))
|| (ref($schema->[0]{Columns}[0]) ne 'Schema::Column');
$not_ok |= (!defined($schema->[0]{Columns}[0]{Name}))
|| ($schema->[0]{Columns}[0]{Name} ne 'MyColumn1');
$not_ok |= (!defined($schema->[0]{Columns}[0]{Summary}))
|| ($schema->[0]{Columns}[0]{Summary} ne 'A short summary');
$not_ok |= !defined($schema->[0]{Columns}[0]{Description});
$not_ok |= (!defined($schema->[0]{Columns}[0]{Unique}))
|| ($schema->[0]{Columns}[0]{Unique} != 1);
$not_ok |= (!defined($schema->[0]{Columns}[0]{NonNull}))
|| ($schema->[0]{Columns}[0]{NonNull} != 1);
$not_ok |= (!defined($schema->[0]{Columns}[0]{Default}))
|| ($schema->[0]{Columns}[0]{Default} != 42);
print $not_ok ? "not ok 2\n" : "ok 2\n";
libxml-perl-0.08/t/stream.t 0100644 0000764 0000764 00000005152 07745254162 013757 0 ustar ken ken # Hey Emacs, this is -*- perl -*- !
#
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'
#
# $Id: stream.t,v 1.2 2003/10/21 16:01:54 kmacleod Exp $
#
######################### We start with some black magic to print on failure.
# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)
BEGIN { $| = 1; print "1..11\n"; }
END {print "not ok 1\n" unless $loaded;}
use XML::Parser::PerlSAX;
use XML::Handler::XMLWriter;
$loaded = 1;
print "ok 1\n";
my $subs = MySubs->new( AsString => 1 );
my $parser = XML::Parser::PerlSAX->new( Handler => $subs );
$string = $parser->parse(Source => { Encoding => 'ISO-8859-1',
String => <<"EOF;" } );
]>
First line in foo
Fran is &fran; and Zoe is &zoe;
1st line in bar
2nd line in bar
3rd line in bar
This, '\240', would be a bad character in UTF-8.
EOF;
foreach $test (2..10) {
print $subs->{Tests}[$test] ? "ok $test\n" : "not ok $test\n" ;
}
$expected = <<"EOF;";
First line in foo
Fran is fran-def and Zoe is zoe.ent
1st line in bar
2nd line in bar
3rd line in bar
This, '\240', would be a bad character in UTF-8.
EOF;
print (($string eq $expected) ? "ok 11\n" : "not ok 11\n");
package MySubs;
use vars qw{ @ISA };
BEGIN { @ISA = qw{ XML::Handler::XMLWriter }; };
sub s_zap {
my ($self, $element) = @_;
$self->{Tests}[2] = 1; # we got here
$self->{Tests}[3] = 1
if $element->{Name} eq 'zap';
$self->{Tests}[4] = 1
if $element->{Name} eq $self->{Names}[-1];
$self->{Tests}[5] = 1
if $element == $self->{Nodes}[-1];
$self->{Tests}[6] = 1
if $#{$self->{Names}} == 1;
$self->{Tests}[7] = 1
if $#{$self->{Nodes}} == 1;
$element->{Attributes}{'fubar'} = 1;
$self->print_start_element($element);
}
sub e_zap {
my ($self, $element) = @_;
$self->{Tests}[8] = 1; # we got here
$self->{Tests}[9] = 1
if $self->in_element('zap');
$self->{Tests}[10] = 1
if $self->within_element('zap') == 1;
$self->print_end_element($element);
}
libxml-perl-0.08/t/xp_sax.t 0100644 0000764 0000764 00000010511 06766050424 013756 0 ustar ken ken # Hey Emacs, this is -*- perl -*- !
#
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'
#
# $Id: xp_sax.t,v 1.4 1999/09/10 00:30:12 kmacleod Exp $
#
######################### We start with some black magic to print on failure.
# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)
BEGIN { $| = 1; print "1..15\n"; }
END {print "not ok 1\n" unless $loaded;}
use XML::Parser::PerlSAX;
$loaded = 1;
print "ok 1\n";
######################### End of black magic.
# Insert your test code below (better if it prints "ok 13"
# (correspondingly "not ok 13") depending on the success of chunk 13
# of the test code):
# Test Plan:
#
# * done; standard loading test
# * not done; parse a document with data for all events
# * not done; check all properties returned from events
# * not done; check location
#
# The following is copied from XML::Parser by Clark Cooper
#
open(ZOE, '>zoe.ent');
print ZOE "'cute'";
close(ZOE);
# XML string for tests
my $xmlstring =<<"End_of_XML;";
]>
First line in foo
Fran is &fran; and Zoe is &zoe;
1st line in bar
2nd line in bar
3rd line in bar
This, '\240', would be a bad character in UTF-8.
End_of_XML;
# Handlers
my @tests;
my $pos ='';
my $parser = XML::Parser::PerlSAX->new;
if ($parser) {
print "ok 2\n";
} else {
print "not ok 2\n";
exit;
}
# Tests 4..15
eval {
$parser->parse( Source => { String => $xmlstring,
Encoding => 'ISO-8859-1' },
Handler => TestHandler->new( Tests => \@tests ) );
};
warn $@ if $@;
if ($@) {
print "Parse error:\n$@";
} else {
$tests[3] ++;
}
unlink('zoe.ent') if (-f 'zoe.ent');
$xmlstring = <<'EOF;';
]>
&anEntRef;
EOF;
eval {
$parser->parse( Source => { String => $xmlstring },
Handler => NoEntRefsHandler->new( Tests => \@tests ) );
};
warn $@ if $@;
eval {
$parser->parse( Source => { String => $xmlstring },
Handler => EntRefsHandler->new( Tests => \@tests ) );
};
warn $@ if $@;
for (3 .. 15)
{
print "not " unless $tests[$_];
print "ok $_\n";
}
exit;
package TestHandler;
sub new {
my $type = shift;
return bless { @_ }, $type;
}
sub characters {
my $self = shift;
$self->{Tests}[4] ++;
}
sub start_element {
my $self = shift;
$self->{Tests}[5] ++;
}
sub end_element {
my $self = shift;
$self->{Tests}[6] ++;
}
sub processing_instruction {
my $self = shift;
$self->{Tests}[7] ++;
}
sub notation_decl {
my $self = shift;
$self->{Tests}[8] ++;
}
sub unparsed_entity_decl {
my $self = shift;
$self->{Tests}[9] ++;
}
sub start_cdata {
my $self = shift;
$self->{Tests}[12] ++;
}
sub end_cdata {
my $self = shift;
$self->{Tests}[13] ++;
}
sub resolve_entity {
my $self = shift;
my $entity = shift;
if ($entity->{SystemId} eq 'fran-def') {
$self->{Tests}[10] ++;
return { String => 'pretty' };
} elsif ($entity->{SystemId} eq 'zoe.ent') {
$self->{Tests}[11] ++;
local(*FOO);
open(FOO, $entity->{SystemId}) or die "Couldn't open $entity->{SystemId}";
return { ByteStream => *FOO };
}
}
package NoEntRefsHandler;
sub new {
my $type = shift;
return bless { @_ }, $type;
}
sub characters {
my $self = shift;
my $characters = shift;
if ($characters->{Data} eq 'The Ent Ref') {
$self->{Tests}[14] ++;
}
}
package EntRefsHandler;
sub new {
my $type = shift;
return bless { @_ }, $type;
}
sub characters {
my $self = shift;
my $characters = shift;
if ($characters->{Data} eq 'The Ent Ref') {
die "shouldn't have made it here";
}
}
sub entity_reference {
my $self = shift;
my $ent_ref = shift;
if (($ent_ref->{Name} eq 'anEntRef')
&& ($ent_ref->{Value} eq 'The Ent Ref')) {
$self->{Tests}[15] ++;
}
}
libxml-perl-0.08/t/subs.t 0100644 0000764 0000764 00000003027 06756033163 013434 0 ustar ken ken # Hey Emacs, this is -*- perl -*- !
#
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'
#
# $Id: subs.t,v 1.1 1999/08/16 16:04:03 kmacleod Exp $
#
######################### We start with some black magic to print on failure.
# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)
BEGIN { $| = 1; print "1..10\n"; }
END {print "not ok 1\n" unless $loaded;}
use XML::Parser::PerlSAX;
use XML::Handler::Subs;
$loaded = 1;
print "ok 1\n";
my $subs = MySubs->new( );
my $parser = XML::Parser::PerlSAX->new( Handler => $subs );
$parser->parse(Source => { String => <<'EOF' } );
EOF
foreach $test (2..10) {
print $subs->{Tests}[$test] ? "ok $test\n" : "not ok $test\n" ;
}
package MySubs;
use vars qw{ @ISA };
BEGIN { @ISA = qw{ XML::Handler::Subs }; };
sub s_foo__it {
my ($self, $element) = @_;
$self->{Tests}[2] = 1; # we got here
$self->{Tests}[3] = 1
if $element->{Name} eq 'foo:-it';
$self->{Tests}[4] = 1
if $element->{Name} eq $self->{Names}[-1];
$self->{Tests}[5] = 1
if $element == $self->{Nodes}[-1];
$self->{Tests}[6] = 1
if $#{$self->{Names}} == 0;
$self->{Tests}[7] = 1
if $#{$self->{Nodes}} == 0;
}
sub e_foo__it {
my ($self, $element) = @_;
$self->{Tests}[8] = 1; # we got here
$self->{Tests}[9] = 1
if $self->in_element('foo:-it');
$self->{Tests}[10] = 1
if $self->within_element('foo:-it') == 1;
}
libxml-perl-0.08/t/canon_xml_writer.t 0100644 0000764 0000764 00000007467 06754116317 016047 0 ustar ken ken # Hey Emacs, this is -*- perl -*- !
#
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'
#
# $Id: canon_xml_writer.t,v 1.2 1999/08/10 21:42:39 kmacleod Exp $
#
######################### We start with some black magic to print on failure.
# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)
BEGIN { $| = 1; print "1..5\n"; }
END {print "not ok 1\n" unless $loaded;}
use XML::Parser::PerlSAX;
use XML::Handler::CanonXMLWriter;
$loaded = 1;
print "ok 1\n";
######################### End of black magic.
# Insert your test code below (better if it prints "ok 13"
# (correspondingly "not ok 13") depending on the success of chunk 13
# of the test code):
my $parser = XML::Parser::PerlSAX->new;
my $writer = XML::Handler::CanonXMLWriter->new;
if ($writer) {
print "ok 2\n";
} else {
print "not ok 2\n";
exit;
}
#
# The following XML is copied from XML::Parser by Clark Cooper
#
# XML string for tests
my $xmlstring =<<"End_of_XML;";
]>
First line in foo
1st line in bar
2nd line in bar
3rd line in bar
End_of_XML;
###
### plain test
###
$expected_result = <<'End_of_XML;';
First line in foo
1st line in bar
2nd line in bar
3rd line in bar
End_of_XML;
$expected_result =~ s/\n$//s;
$canon_xml = $parser->parse( Source => { String => $xmlstring },
Handler => $writer );
if ($canon_xml eq $expected_result) {
print "ok 3\n";
} else {
warn "---- expected result ----\n";
warn "$expected_result\n";
warn "---- actual result ----\n";
warn "$canon_xml\n";
print "not ok 3\n";
}
###
### Test PrintComments option
###
$expected_result = <<'End_of_XML;';
First line in foo
1st line in bar
2nd line in bar
3rd line in bar
End_of_XML;
$expected_result =~ s/\n$//s;
$writer->{PrintComments} = 1;
$canon_xml = $parser->parse( Source => { String => $xmlstring },
Handler => $writer );
if ($canon_xml eq $expected_result) {
print "ok 4\n";
} else {
warn "---- expected result ----\n";
warn "$expected_result\n";
warn "---- actual result ----\n";
warn "$canon_xml\n";
print "not ok 4\n";
}
undef $writer->{PrintComments};
###
### Test James Clark's XML test suite
###
$xml_test = (defined $ENV{XMLTEST}) ? $ENV{XMLTEST} : "$ENV{HOME}/xmltest";
# allow test to skip if directory does not exist and MUST_TEST isn't set
if (!-d $xml_test && !defined($ENV{MUST_TEST})) {
print "ok 5\n";
exit;
}
$tested_file = 0;
foreach $file (glob("$xml_test/valid/sa/*.xml")) {
$tested_file = 1;
$canon_xml = $parser->parse( Source => { SystemId => $file },
Handler => $writer );
# add the `out' dir to get the corresponding canon xml
($out_file = $file) =~ s|/([^/]+)$|/out/$1|;
open (CANON, $out_file)
or die "$out_file: $!\n";
$expected_result = join('', );
close (CANON);
if ($canon_xml ne $expected_result) {
warn "---- expected result for $file ----\n";
warn "$expected_result\n";
warn "---- actual result ----\n";
warn "$canon_xml\n";
$not_ok = 1;
}
}
if (!$tested_file || $not_ok) {
print "not ok 5\n";
} else {
print "ok 5\n";
}
libxml-perl-0.08/t/amsterdam.t 0100644 0000764 0000764 00000002374 06762020221 014425 0 ustar ken ken # Hey Emacs, this is -*- perl -*- !
#
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'
#
# $Id: amsterdam.t,v 1.1 1999/08/28 17:46:57 kmacleod Exp $
#
######################### We start with some black magic to print on failure.
# Change 1..1 below to 1..last_test_to_print .
# (It may become useful if the test is moved to ./t subdirectory.)
BEGIN { $| = 1; print "1..2\n"; }
END {print "not ok 1\n" unless $loaded;}
use XML::Parser::PerlSAX;
use XML::PatAct::MatchName;
use XML::PatAct::Amsterdam;
$loaded = 1;
print "ok 1\n";
$patterns =
[
'outer' => { Before => "Outer-before, '[attr]'",
After => "Outer-after\n" },
'inner' => { Before => "Inner" },
];
my $matcher = XML::PatAct::MatchName->new( Patterns => $patterns );
my $handler = XML::PatAct::Amsterdam->new( Patterns => $patterns,
Matcher => $matcher,
AsString => 1 );
my $parser = XML::Parser::PerlSAX->new( Handler => $handler );
$string = $parser->parse(Source => { String => <<'EOF;' } );
EOF;
$expected = <<"EOF;";
Outer-before, 'an attr'
Inner
Outer-after
EOF;
print (($string eq $expected) ? "ok 2\n" : "not ok 2\n");
libxml-perl-0.08/ChangeLog 0100644 0000764 0000764 00000017410 07745254161 013605 0 ustar ken ken 2003-10-21 Ken MacLeod
* t/stream.t: fixed test 11 for Perl >= 5.6
2001-07-23 Ken MacLeod
* lib/XML/SAX2Perl.pm (startElement): typo; reported by
mhershb@mcdermott.com (Mark A. Hershberger)
2000-03-30 Ken MacLeod
* doc/index.html (Contributors): added Clark Cooper
* MANIFEST (doc/sax-2.0.html, doc/sax-2.0-adv.html): added
2000-03-20 Ken MacLeod
* lib/Data/Grove/Visitor.pm (_children_accept_name): add return
@return; reported by Laurent CAPRANI
2000-03-07 Ken MacLeod
* doc/sax-2.0.html, doc/sax-2.0-adv.html: added
2000-03-02 Ken MacLeod
* lib/XML/ESISParser.pm: add -E0 to NSGMLS_FLAGS to not limit the
number of errors reported; suggested by Charles Thayer
(parse_fh): report line and line number on command character
errors; also suggested by Charles
2000-02-22 Ken MacLeod
* doc/index.html (Contributors): added Michael Koehne, KangChan
Lee, and Colin Muller
* doc/mirror.sh, doc/index.html: added
2000-02-17 Ken MacLeod
* doc/modules.xml: fixed several well-formedness errors; reported
by KangChan Lee
1999-12-22 Ken MacLeod
* lib/Data/Grove.pm, lib/Data/Grove/Parent.pm,
lib/Data/Grove/Visitor.pm, lib/XML/Handler/XMLWriter.pm,
lib/XML/Handler/CanonXMLWriter.pm, lib/XML/Handler/Subs.pm,
lib/XML/SAX2Perl.pm, lib/XML/Perl2SAX.pm, lib/XML/ESISParser.pm,
lib/XML/Parser/PerlSAX.pm, lib/XML/PatAct/Amsterdam.pm,
lib/XML/PatAct/MatchName.pm, lib/XML/PatAct/ToObjects.pm: added
$VERSION
* lib/XML/Parser/PerlSAX.pm (_handle_start): support
UseAttributeOrder option
(_handle_attlist): Changed EntityName to ElementName (re 9/28
entry)
1999-09-28 Ken MacLeod
* lib/XML/Parser/PerlSAX.pm (_handle_attlist): typo: was calling
entity_decl
1999-09-09 Ken MacLeod
* lib/XML/Parser/PerlSAX.pm: add start_cdata, end_cdata, and
entity_reference events
1999-08-28 Ken MacLeod
* lib/XML/PatAct/Amsterdam.pm: added Output and AsString options,
added support for attribute replacement
* t/amsterdam.t: added
1999-08-18 Ken MacLeod
* lib/Data/Grove.pm: added Data::Grove::Characters
* lib/XML/ESISParser.pm (parse_fh): report newline as characters
if no record_end() handler
* lib/XML/PatAct/ToObjects.pm (_parse_action): removed debugging
statement
1999-08-16 Ken MacLeod
* README: updated
* doc/modules.xml (libxml-perl): updated
* doc/PerlSAX.pod (Parameters): missing '>'
* release 0.05
* lib/XML/Parser/PerlSAX.pm (_handle_init): call set_document_locator
* lib/XML/PatAct/ActionTempl.pm, lib/XML/PatAct/Amsterdam.pm,
lib/XML/PatAct/MatchName.pm, lib/XML/PatAct/PatternTempl.pm (new):
Accept both key, value pairs and hash options
* lib/XML/PatAct/ToObjects.pm (new):
* lib/XML/Handler/Subs.pm: added
* t/subs.t: added
* t/stream.t: added
1999-08-15 Ken MacLeod
* lib/XML/Handler/XMLWriter.pm: added
* lib/XML/Handler/Sample.pm: Placed in public domain
1999-08-14 Ken MacLeod
* doc/PerlSAX.pod: added an introduction, a ``Deviations from the Java version'' section, added `set_document_locator()' handler method
* lib/XML/PatAct/ToObjects.pm: add CopyAttributes option, add
-grove-contents action
1999-08-12 Ken MacLeod
* lib/XML/ESISParser.pm (parse_fh): dynamically test event handler
existance
* lib/XML/Parser/PerlSAX.pm (parse): wasn't capturing XML::Parser
Element events
1999-08-10 Ken MacLeod
* README, doc/modules.xml: updated with PatAct modules
* lib/XML/PatAct/ActionTempl.pm, lib/XML/PatAct/Amsterdam.pm,
lib/XML/PatAct/MatchName.pm, lib/XML/PatAct/PatternTempl.pm,
lib/XML/PatAct/ToObjects.pm: added
* t/xp_sax.t, t/canon_xml_writer.t: added CVS ID
* t/schema.t: added
* examples/schema.xml, examples/schema.pl: added
* doc/UsingPatActModules.pod, doc/CreatingPatActModules.pod: added
* lib/XML/Parser/PerlSAX.pm (_handle_extern_ent): change "Perl
SAX" to "PerlSAX" in doc
1999-08-09 Ken MacLeod
* lib/XML/ESISParser.pm (parse_fh): was not passing an empty hash
* lib/XML/Parser/PerlSAX.pm (_handle_init, _handle_final): was not
passing an empty hash
1999-05-26 Ken MacLeod
* lib/XML/Handler/CanonXMLWriter.pm, t/canon_xml_writer.t: added
1999-05-23 Ken MacLeod
* lib/Data/Grove/Tied.pm: renamed to Parent.pm
* README (DOCUMENTS): added
renamed libxml to libxml-perl
* libxml.spec: renamed libxml-perl.spec
1999-05-17 Ken MacLeod
* libxml.spec: files in `doc/' go into top-dir of /usr/doc/$PKG
* PerlSAX.pod: moved to doc/PerlSAX.pod
1999-05-09 Ken MacLeod
* doc/modules.xml: added
1999-05-08 Ken MacLeod
* doc/UsingPerlSAX.pod, examples/MyHandler.pm,
examples/myhandler.pl, examples/myhandler.xml: added
1999-05-07 Ken MacLeod
* lib/XML/ESISParser.pm, lib/Data/Grove.pm,
lib/XML/Handler/Sample.pm: added POD
1999-05-06 Ken MacLeod
* lib/Data/Grove/Visitor.pm: remove XML::Grove extensions and make
generic
* lib/XML/Parser/SAXPerl.pm: renamed PerlSAX.pm
* lib/XML/Handler/Sample.pm: added
* examples/perlsax-test.pl: added
* examples/esis-test.pl: updated for new XML::ESISParser, moved
handler (Receiver) to XML::Handler::Sample, added command line
option for SGML
1999-04-30 Ken MacLeod
* Makefile.PL: added PREREQ_PM for XML::Parser
1999-04-15 Ken MacLeod
* lib/Data/Grove/Visitor.pm (accept): change XML:: to Data::
* lib/Data/Grove.pm (new): %{ shift } was being read as %shift
1999-02-18 Ken MacLeod
* lib/Data/Grove/Visitor.pm: was XML::Grove::Visitor
* lib/Data/Grove/Tied.pm: was XML::Grove::Node
* lib/Data/Grove.pm: created from XML::Grove
1999-02-15 Ken MacLeod
* lib/XML/Parser/SAXPerl.pm (parse): add comments
* lib/XML/ESISParser.pm: major changes for support of both XML and
SGML, and ongoing Perl SAX updates
* SAX.pod (end_document): noted that the return value of
end_document() is the return value of parse()
* README: added reference to FAQ, added module statuses, more
cleary described ESISParser, require Perl 5.005
1999-02-13 Ken MacLeod
* lib/XML/ESISParser.pm: start move to Perl SAX
1999-02-12 Ken MacLeod
* lib/XML/SAX2Perl.pm, lib/XML/Perl2SAX.pm, lib/XML/ESISParser.pm:
update to new Perl SAX
* lib/XML/Parser/SAXPerl.pm (new): allow hash or key/value pairs
1999-02-12 Ken MacLeod
* interface-style.pod: note still undecided items
* lib/XML/Parser/SAXPerl.pm: fixes shown by xp_sax.t
* t/xp_sax.t: added
* lib/XML/Parser/SAXPerl.pm: added pod
many changes for Perl SAX and XML::Parser::Expat
1999-02-11 Ken MacLeod
* SAX.pod: suggestions from Eric Prud'hommeaux and Enno Derksen
* interface-style.pod: suggestions from Larry Wall
1999-02-01 Ken MacLeod
* MANIFEST: updated
* lib/XML/Parser/SAXPerl.pm: modified more towards Perl SAX
* SAX.pod: added
1999-01-31 Ken MacLeod
* interface-style.pod: added
1998-12-10 Ken MacLeod
* lib/XML/Parser/SAXPerl.pm: added
1998-12-08 Ken MacLeod
* MANIFEST: added
libxml-perl-0.08/libxml-perl.spec 0100644 0000764 0000764 00000004413 06756034003 015125 0 ustar ken ken Summary: Collection of Perl modules for working with XML
Name: libxml-perl
Version: @VERSION@
Release: 1
Source: http://www.perl.com/CPAN/modules/by-module/XML/libxml-perl-@VERSION@.tar.gz
Copyright: Artistic or GPL
Group: Applications/Publishing/XML
URL: http://www.perl.com/
Packager: ken@bitsko.slc.ut.us (Ken MacLeod)
BuildRoot: /tmp/libxml-perl
#
# $Id: libxml-perl.spec,v 1.4 1999/08/16 16:10:43 kmacleod Exp $
#
%description
libxml-perl is a collection of Perl modules for working with XML.
%prep
%setup
perl Makefile.PL INSTALLDIRS=perl
%build
make
%install
make PREFIX="${RPM_ROOT_DIR}/usr" pure_install
DOCDIR="${RPM_ROOT_DIR}/usr/doc/libxml-perl-@VERSION@-1"
mkdir -p "$DOCDIR/examples"
for ii in PerlSAX.pod UsingPerlSAX.pod interface-style.pod modules.xml; do
cp doc/$ii "$DOCDIR/$ii"
chmod 644 "$DOCDIR/$ii"
done
for ii in README Changes examples/*; do
cp $ii "$DOCDIR/$ii"
chmod 644 "$DOCDIR/$ii"
done
%files
/usr/doc/libxml-perl-@VERSION@-1
/usr/lib/perl5/Data/Grove.pm
/usr/lib/perl5/Data/Grove/Parent.pm
/usr/lib/perl5/Data/Grove/Visitor.pm
/usr/lib/perl5/XML/ESISParser.pm
/usr/lib/perl5/XML/Handler/CanonXMLWriter.pm
/usr/lib/perl5/XML/Handler/Sample.pm
/usr/lib/perl5/XML/Handler/Subs.pm
/usr/lib/perl5/XML/Handler/XMLWriter.pm
/usr/lib/perl5/XML/SAX2Perl.pm
/usr/lib/perl5/XML/Perl2SAX.pm
/usr/lib/perl5/XML/Parser/PerlSAX.pm
/usr/lib/perl5/XML/PatAct/ActionTempl.pm
/usr/lib/perl5/XML/PatAct/Amsterdam.pm
/usr/lib/perl5/XML/PatAct/MatchName.pm
/usr/lib/perl5/XML/PatAct/PatternTempl.pm
/usr/lib/perl5/XML/PatAct/ToObjects.pm
/usr/lib/perl5/man/man3/Data::Grove.3
/usr/lib/perl5/man/man3/Data::Grove::Parent.3
/usr/lib/perl5/man/man3/Data::Grove::Visitor.3
/usr/lib/perl5/man/man3/XML::Handler::CanonXMLWriter.3
/usr/lib/perl5/man/man3/XML::Handler::Sample.3
/usr/lib/perl5/man/man3/XML::Handler::Subs.3
/usr/lib/perl5/man/man3/XML::Handler::XMLWriter.3
/usr/lib/perl5/man/man3/XML::ESISParser.3
/usr/lib/perl5/man/man3/XML::SAX2Perl.3
/usr/lib/perl5/man/man3/XML::Perl2SAX.3
/usr/lib/perl5/man/man3/XML::Parser::PerlSAX.3
/usr/lib/perl5/man/man3/XML::PatAct::ActionTempl.3
/usr/lib/perl5/man/man3/XML::PatAct::Amsterdam.3
/usr/lib/perl5/man/man3/XML::PatAct::MatchName.3
/usr/lib/perl5/man/man3/XML::PatAct::PatternTempl.3
/usr/lib/perl5/man/man3/XML::PatAct::ToObjects.3
libxml-perl-0.08/libxml-perl-0.08.spec 0100644 0000764 0000764 00000004367 07745275111 015526 0 ustar ken ken Summary: Collection of Perl modules for working with XML
Name: libxml-perl
Version: 0.08
Release: 1
Source: http://www.perl.com/CPAN/modules/by-module/XML/libxml-perl-0.08.tar.gz
Copyright: Artistic or GPL
Group: Applications/Publishing/XML
URL: http://www.perl.com/
Packager: ken@bitsko.slc.ut.us (Ken MacLeod)
BuildRoot: /tmp/libxml-perl
#
# $Id: libxml-perl.spec,v 1.4 1999/08/16 16:10:43 kmacleod Exp $
#
%description
libxml-perl is a collection of Perl modules for working with XML.
%prep
%setup
perl Makefile.PL INSTALLDIRS=perl
%build
make
%install
make PREFIX="${RPM_ROOT_DIR}/usr" pure_install
DOCDIR="${RPM_ROOT_DIR}/usr/doc/libxml-perl-0.08-1"
mkdir -p "$DOCDIR/examples"
for ii in PerlSAX.pod UsingPerlSAX.pod interface-style.pod modules.xml; do
cp doc/$ii "$DOCDIR/$ii"
chmod 644 "$DOCDIR/$ii"
done
for ii in README Changes examples/*; do
cp $ii "$DOCDIR/$ii"
chmod 644 "$DOCDIR/$ii"
done
%files
/usr/doc/libxml-perl-0.08-1
/usr/lib/perl5/Data/Grove.pm
/usr/lib/perl5/Data/Grove/Parent.pm
/usr/lib/perl5/Data/Grove/Visitor.pm
/usr/lib/perl5/XML/ESISParser.pm
/usr/lib/perl5/XML/Handler/CanonXMLWriter.pm
/usr/lib/perl5/XML/Handler/Sample.pm
/usr/lib/perl5/XML/Handler/Subs.pm
/usr/lib/perl5/XML/Handler/XMLWriter.pm
/usr/lib/perl5/XML/SAX2Perl.pm
/usr/lib/perl5/XML/Perl2SAX.pm
/usr/lib/perl5/XML/Parser/PerlSAX.pm
/usr/lib/perl5/XML/PatAct/ActionTempl.pm
/usr/lib/perl5/XML/PatAct/Amsterdam.pm
/usr/lib/perl5/XML/PatAct/MatchName.pm
/usr/lib/perl5/XML/PatAct/PatternTempl.pm
/usr/lib/perl5/XML/PatAct/ToObjects.pm
/usr/lib/perl5/man/man3/Data::Grove.3
/usr/lib/perl5/man/man3/Data::Grove::Parent.3
/usr/lib/perl5/man/man3/Data::Grove::Visitor.3
/usr/lib/perl5/man/man3/XML::Handler::CanonXMLWriter.3
/usr/lib/perl5/man/man3/XML::Handler::Sample.3
/usr/lib/perl5/man/man3/XML::Handler::Subs.3
/usr/lib/perl5/man/man3/XML::Handler::XMLWriter.3
/usr/lib/perl5/man/man3/XML::ESISParser.3
/usr/lib/perl5/man/man3/XML::SAX2Perl.3
/usr/lib/perl5/man/man3/XML::Perl2SAX.3
/usr/lib/perl5/man/man3/XML::Parser::PerlSAX.3
/usr/lib/perl5/man/man3/XML::PatAct::ActionTempl.3
/usr/lib/perl5/man/man3/XML::PatAct::Amsterdam.3
/usr/lib/perl5/man/man3/XML::PatAct::MatchName.3
/usr/lib/perl5/man/man3/XML::PatAct::PatternTempl.3
/usr/lib/perl5/man/man3/XML::PatAct::ToObjects.3
libxml-perl-0.08/Makefile.PL 0100644 0000764 0000764 00000001071 07745275111 013777 0 ustar ken ken #
# Copyright (C) 1998 Ken MacLeod
# This library is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
#
# $Id: Makefile.PL,v 1.3 1999/05/24 23:25:02 kmacleod Exp $
#
use ExtUtils::MakeMaker;
$VERSION = '0.08';
# See lib/ExtUtils/MakeMaker.pm for details of how to influence
# the contents of the Makefile that is written.
WriteMakefile(
'NAME' => 'libxml-perl',
'VERSION_FROM' => 'Makefile.PL',
'PREREQ_PM' => { 'XML::Parser' => '2.19' },
dist => {'COMPRESS' => 'gzip', 'SUFFIX' => '.gz'},
);
libxml-perl-0.08/README 0100644 0000764 0000764 00000014435 07745254162 012720 0 ustar ken ken $Id: README,v 1.10 2003/10/21 16:01:54 kmacleod Exp $
libxml-perl
Collection of Perl modules for working with XML.
INTRODUCTION
libxml-perl is a collection of smaller Perl modules, scripts, and
documents for working with XML in Perl. libxml-perl software
works in combination with XML::Parser, PerlSAX, XML::DOM,
XML::Grove and others.
See the file Changes for user-visible changes and ChangeLog for
detailed changes. See the `examples' directory for examples. POD
style documentation is included in all non-alpha modules and
scripts. You should also be able to use the 'perldoc' utility to
extract documentation from the module files directly. HTML
formatted docs are available at the libxml-perl home page
.
Newer versions of this module can be found on CPAN at
. To join the
Perl-XML mailing list, send an email message to
ListManager@ActiveState.com with the following text in the body:
Subscribe Perl-XML
View the Perl XML FAQ at
.
Copyright (C) 1998 Ken MacLeod and others
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
MODULES
The following modules are marked with their release status:
STABLE -- has been in use for a while with few or no outstanding
bugs
BETA -- interfaces are stable but there may still be bugs
ALPHA -- interfaces are changing, there may be lots of bugs, and
there may not be docs available yet
XML::Parser::PerlSAX STABLE
XML::Parser::PerlSAX is a PerlSAX parser using XML::Parser
(which uses James Clark's Expat XML Parser).
XML::Handler::Sample STABLE
XML::Handler::Sample is a PerlSAX handler that simply prints
out the event names as they are parsed by a PerlSAX parser.
It can be used for debugging or as a template for building new
handlers. XML::Handler::Sample contains handlers for all
known parser events.
XML::ESISParser STABLE
XML::ESISParser is a validating PerlSAX parser using James
Clark's `nsgmls' SGML/XML Parser. ESISParser supports both
XML and SGML document instances. Unless you need validation,
you should probably be using XML::Parser::PerlSAX or
XML::Parser.
XML::ESISParser with XML::Grove obsolete the
SGML::SPGroveBuilder and SGML::Grove modules.
XML::Handler::XMLWriter STABLE
A PerlSAX handler for writing readable XML (in contrast to
Canonical XML, for example). XMLWriter is also subclassable
and supports calling start and end methods by element-names
(subclassed from XML::Handler::Subs). XMLWriter is similar to
XML::Parser's Stream style.
XML::Handler::Subs STABLE
A PerlSAX handler base class that calls start and end methods
by element-names. Subs is similar to XML::Parser's Subs
style.
XML::Handler::CanonXMLWriter STABLE
A PerlSAX handler that outputs in Canonical XML
. This module is
generally only used for debugging.
Data::Grove STABLE
Data::Grove::Parent STABLE
Data::Grove::Visitor STABLE
Data::Grove and it's helpers provide a base class for deeply
nested or directed graph structures. Used by XML::Grove (and
others soon).
XML::SAX2Perl ALPHA
XML::Perl2SAX ALPHA
SAX2Perl and Perl2SAX are SAX Parser<->DocumentHandler
filters. These modules translate parse events between the
Java/CORBA style SAX methods and PerlSAX style methods.
XML::PatAct::MatchName ALPHA
MatchName is a pattern matching module that can be used with
PatAct action modules. MatchName uses simple element names or
element name lists to match names to actions.
XML::PatAct::ToObjects ALPHA
ToObjects is a PatAct action module. ToObjects can be used to
create application-ready Perl objects from XML instances.
XML::PatAct::Amsterdam ALPHA
Amsterdam is a PatAct action module. Amsterdam can be used to
apply a very simple form of style-sheet to an XML instance by
using ``before'' and ``after'' strings that are output before
and after the contents of elements.
XML::PatAct::PatternTempl BETA
XML::PatAct::ActionTempl BETA
PatternTempl and ActionTempl are template files that
pattern/action module writers can copy to create new modules.
See Creating PatAct Modules for more information.
DOCUMENTS
sax-2.0.html, sax-2.0-adv.html
PerlSAX 2.0 bindings. Maintained by Robin Berjon and the
XML-Perl mailing list.
PerlSAX
This document defines a Perl binding to SAX 1.0. PerlSAX-
based parser modules implement and possibly extend the
interface described in PerlSAX.
UsingPerlSAX
A brief introduction to PerlSAX using the XML::Parser::PerlSAX
module.
UsingPatActModules
Describes how to use pattern/action modules to transform XML
instances.
CreatingPatActModules
A document for module writers who are writing new pattern/
action modules.
modules.xml
modules.xml contains a listing of all Perl XML packages and
their public modules categorized by several topics.
INSTALLATION
In order to use this package you will need Perl version 5.005 or
better. Several other modules may also be required to use some
modules in libxml-perl, including XML::Parser, XML::DOM, and
XML::Grove. These are all available in the XML module directory
on CPAN.
You install libxml-perl, as you would install any perl module
library, by running these commands:
perl Makefile.PL
make
make test
make install
If you want to install a private copy of libxml-perl in your home
directory, then you should try to produce the initial Makefile
with something like this command:
perl Makefile.PL PREFIX=~/perl