libxml-perl-0.08/0040755000076400007640000000000007745275112012032 5ustar kenkenlibxml-perl-0.08/lib/0040755000076400007640000000000007745275112012600 5ustar kenkenlibxml-perl-0.08/lib/XML/0040755000076400007640000000000007745275112013240 5ustar kenkenlibxml-perl-0.08/lib/XML/Handler/0040755000076400007640000000000007745275112014615 5ustar kenkenlibxml-perl-0.08/lib/XML/Handler/Subs.pm0100644000076400007640000000766607745275111016102 0ustar kenken# # Copyright (C) 1999 Ken MacLeod # XML::Handler::XMLWriter is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Id: Subs.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $ # use strict; package XML::Handler::Subs; use UNIVERSAL; use vars qw{ $VERSION }; # will be substituted by make-rel script $VERSION = "0.08"; sub new { my $type = shift; my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ }; return bless $self, $type; } sub start_document { my ($self, $document) = @_; $self->{Names} = []; $self->{Nodes} = []; } sub end_document { my ($self, $document) = @_; delete $self->{Names}; delete $self->{Nodes}; return(); } sub start_element { my ($self, $element) = @_; push @{$self->{Names}}, $element->{Name}; push @{$self->{Nodes}}, $element; my $el_name = "s_" . $element->{Name}; $el_name =~ s/[^a-zA-Z0-9_]/_/g; if ($self->can($el_name)) { $self->$el_name($element); return 1; } return 0; } sub end_element { my ($self, $element) = @_; my $called_sub = 0; my $el_name = "e_" . $element->{Name}; $el_name =~ s/[^a-zA-Z0-9_]/_/g; if ($self->can(${el_name})) { $self->$el_name($element); $called_sub = 1; } pop @{$self->{Names}}; pop @{$self->{Nodes}}; return $called_sub; } sub in_element { my ($self, $name) = @_; return ($self->{Names}[-1] eq $name); } sub within_element { my ($self, $name) = @_; my $count = 0; foreach my $el_name (@{$self->{Names}}) { $count ++ if ($el_name eq $name); } return $count; } 1; __END__ =head1 NAME XML::Handler::Subs - a PerlSAX handler base class for calling user-defined subs =head1 SYNOPSIS use XML::Handler::Subs; package MyHandlers; use vars qw{ @ISA }; sub s_NAME { my ($self, $element) = @_ }; sub e_NAME { my ($self, $element) = @_ }; $self->{Names}; # an array of names $self->{Nodes}; # an array of $element nodes $handler = MyHandlers->new(); $self->in_element($name); $self->within_element($name); =head1 DESCRIPTION C is a base class for PerlSAX handlers. C is subclassed to implement complete behavior and to add element-specific handling. Each time an element starts, a method by that name prefixed with `s_' is called with the element to be processed. Each time an element ends, a method with that name prefixed with `e_' is called. Any special characters in the element name are replaced by underscores. Subclassing XML::Handler::Subs in this way is similar to XML::Parser's Subs style. XML::Handler::Subs maintains a stack of element names, `C<$self->{Names}', and a stack of element nodes, `C<$self->{Nodes}>' that can be used by subclasses. The current element is pushed on the stacks before calling an element-name start method and popped off the stacks after calling the element-name end method. The `C' and `C' calls use these stacks. If the subclass implements `C', `C', `C', and `C', be sure to use `C' to call the the superclass methods also. See perlobj(1) for details on SUPER::. `C' and `C' return 1 if an element-name method is called, they return 0 if no method was called. XML::Handler::Subs does not implement any other PerlSAX handlers. XML::Handler::Subs supports the following methods: =over 4 =item new( I ) A basic `C' method. `C' takes a list of key, value pairs or a hash and creates and returns a hash with those options; the hash is blessed into the subclass. =item in_element($name) Returns true if `C<$name>' is equal to the name of the innermost currently opened element. =item within_element($name) Returns the number of times the `C<$name>' appears in Names. =back =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1), PerlSAX.pod(3) =cut libxml-perl-0.08/lib/XML/Handler/CanonXMLWriter.pm0100644000076400007640000000707407745275111017773 0ustar kenken# # Copyright (C) 1998, 1999 Ken MacLeod # XML::Handler::CanonXMLWriter is free software; you can redistribute # it and/or modify it under the same terms as Perl itself. # # $Id: CanonXMLWriter.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $ # use strict; package XML::Handler::CanonXMLWriter; use vars qw{ $VERSION %char_entities }; # will be substituted by make-rel script $VERSION = "0.08"; %char_entities = ( "\x09" => ' ', "\x0a" => ' ', "\x0d" => ' ', '&' => '&', '<' => '<', '>' => '>', '"' => '"', ); sub new { my ($class, %args) = @_; my $self = \%args; return bless $self, $class; } sub start_document { my $self = shift; my $document = shift; $self->{'_text_array'} = []; } sub end_document { my $self = shift; my $document = shift; if (defined $self->{IOHandle}) { return (); } else { my $text = join ('', @{$self->{'_text_array'}}); undef $self->{'_text_array'}; return $text; } } sub start_element { my $self = shift; my $element = shift; $self->_print('<' . $element->{Name}); my $key; my $attrs = $element->{Attributes}; foreach $key (sort keys %$attrs) { $self->_print(" $key=\"" . $self->_escape($attrs->{$key}) . '"'); } $self->_print('>'); } sub end_element { my $self = shift; my $element = shift; $self->_print('{Name} . '>'); } sub characters { my $self = shift; my $characters = shift; $self->_print($self->_escape($characters->{Data})); } sub ignorable_whitespace { my $self = shift; my $characters = shift; $self->_print($self->_escape($characters->{Data})); } sub processing_instruction { my $self = shift; my $pi = shift; $self->_print('{Target} . ' ' . $pi->{Data} . '?>'); } sub entity { # entities don't occur in text return (); } sub comment { my $self = shift; my $comment = shift; if ($self->{PrintComments}) { $self->_print(''); } else { return (); } } sub _print { my $self = shift; my $string = shift; if (defined $self->{IOHandle}) { $self->{IOHandle}->print($string); return (); } else { push @{$self->{'_text_array'}}, $string; } } sub _escape { my $self = shift; my $string = shift; $string =~ s/([\x09\x0a\x0d&<>"])/$char_entities{$1}/ge; return $string; } 1; __END__ =head1 NAME XML::Handler::CanonXMLWriter - output XML in canonical XML format =head1 SYNOPSIS use XML::Handler::CanonXMLWriter; $writer = XML::Handler::CanonXMLWriter OPTIONS; $parser->parse(Handler => $writer); =head1 DESCRIPTION C is a PerlSAX handler that will return a string or write a stream of canonical XML for an XML instance and it's content. C objects hold the options used for writing the XML objects. Options can be supplied when the the object is created, $writer = new XML::Handler::CanonXMLWriter PrintComments => 1; or modified at any time before calling the parser's `C' method: $writer->{PrintComments} = 0; =head1 OPTIONS =over 4 =item IOHandle IOHandle contains a handle for writing the canonical XML to. If an IOHandle is not provided, the canonical XML string will be returned from `C'. =item PrintComments By default comments are not written to the output. Setting comment to a true value will include comments in the output. =back =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1), PerlSAX James Clark's Canonical XML definition =cut libxml-perl-0.08/lib/XML/Handler/Sample.pm0100644000076400007640000000615107745275111016373 0ustar kenken# This template file is in the Public Domain. # You may do anything you want with this file. # # $Id: Sample.pm,v 1.4 1999/08/16 16:04:03 kmacleod Exp $ # package XML::Handler::Sample; use vars qw{ $AUTOLOAD }; sub new { my $type = shift; my $self = ( $#_ == 0 ) ? shift : { @_ }; return bless $self, $type; } # Basic PerlSAX sub start_document { print "start_document\n"; } sub end_document { print "end_document\n"; } sub start_element { print "start_element\n"; } sub end_element { print "end_element\n"; } sub characters { print "characters\n"; } sub processing_instruction { print "processing_instruction\n"; } sub ignorable_whitespace { print "ignorable_whitespace\n"; } # Additional expat callbacks in XML::Parser::PerlSAX sub comment { print "comment\n"; } sub notation_decl { print "notation_decl\n"; } sub unparsed_entity_decl { print "unparsed_entity_decl\n"; } sub entity_decl { print "entity_decl\n"; } sub element_decl { print "element_decl\n"; } sub doctype_decl { print "doctype_decl\n"; } sub xml_decl { print "xml_decl\n"; } # Additional SP/nsgmls callbacks in XML::ESISParser sub start_subdoc { print "start_subdoc\n"; } sub end_subdoc { print "start_subdoc\n"; } sub appinfo { print "appinfo\n"; } sub internal_entity_ref { print "sdata\n"; } sub external_entity_ref { print "sdata\n"; } sub record_end { print "record_end\n"; } sub internal_entity_decl { print "internal_entity_decl\n"; } sub external_entity_decl { print "external_entity_decl\n"; } sub external_sgml_entity_decl { print "external_sgml_entity_decl\n"; } sub subdoc_entity_decl { print "subdoc_entity_decl\n"; } sub notation { print "notation\n"; } sub error { print "error\n"; } sub conforming { print "conforming\n"; } # Others sub AUTOLOAD { my $self = shift; my $method = $AUTOLOAD; $method =~ s/.*:://; return if $method eq 'DESTROY'; print "UNRECOGNIZED $method\n"; } 1; __END__ =head1 NAME XML::Handler::Sample - a trivial PerlSAX handler =head1 SYNOPSIS use XML::Parser::PerlSAX; use XML::Handler::Sample; $my_handler = XML::Handler::Sample->new; XML::Parser::PerlSAX->new->parse(Source => { SystemId => 'REC-xml-19980210.xml' }, Handler => $my_handler); =head1 DESCRIPTION C is a trivial PerlSAX handler that prints out the name of each event it receives. The source for C lists all the currently known PerlSAX handler methods. C is intended for Perl module authors who wish to look at example PerlSAX handler modules. C can be used as a template for writing your own PerlSAX handler modules. C is in the Public Domain and can be used for any purpose without restriction. =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1), PerlSAX.pod(3) =cut libxml-perl-0.08/lib/XML/Handler/XMLWriter.pm0100644000076400007640000001766007745275111017016 0ustar kenken# # Copyright (C) 1999 Ken MacLeod # Portions derived from code in XML::Writer by David Megginson # XML::Handler::XMLWriter is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Id: XMLWriter.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $ # use strict; package XML::Handler::XMLWriter; use XML::Handler::Subs; use vars qw{ $VERSION @ISA $escapes }; # will be substituted by make-rel script $VERSION = "0.08"; @ISA = qw{ XML::Handler::Subs }; $escapes = { '&' => '&', '<' => '<', '>' => '>', '"' => '"' }; sub start_document { my ($self, $document) = @_; $self->SUPER::start_document($document); # create a temporary Output_ in case we're creating a standard # output file that we'll delete later. if (!$self->{AsString} && !defined($self->{Output})) { require IO::File; import IO::File; $self->{Output_} = new IO::File(">-"); } elsif (defined($self->{Output})) { $self->{Output_} = $self->{Output}; } if ($self->{AsString}) { $self->{Strings} = []; } $self->print("\n"); # FIXME support Doctype declarations } sub end_document { my ($self, $document) = @_; if (defined($self->{Output_})) { $self->{Output_}->print("\n"); delete $self->{Output_}; } my $string = undef; if (defined($self->{AsString})) { push @{$self->{Strings}}, "\n"; $string = join('', @{$self->{Strings}}); delete $self->{Strings}; } $self->SUPER::end_document($document); return($string); } sub start_element { my ($self, $element) = @_; if ($self->SUPER::start_element($element) == 0) { $self->print_start_element($element); } } sub print_start_element { my ($self, $element) = @_; my $output = "<$element->{Name}"; if (defined($element->{Attributes})) { foreach my $name (sort keys %{$element->{Attributes}}) { my $esc_value = $element->{Attributes}{$name}; $esc_value =~ s/([\&\<\>\"])/$escapes->{$1}/ge; $output .= " $name=\"$esc_value\""; } } if ($self->{Newlines}) { $output .= "\n"; } $output .= ">"; $self->print($output); } sub end_element { my ($self, $element) = @_; if ($self->SUPER::end_element($element) == 0) { $self->print_end_element($element); } } sub print_end_element { my ($self, $element) = @_; my $output = "{Name}" . ($self->{Newlines} ? "\n" : "") . ">"; $self->print($output); } sub characters { my ($self, $characters) = @_; my $output = $characters->{Data}; $output =~ s/([\&\<\>])/$escapes->{$1}/ge; $self->print($output); } sub processing_instruction { my ($self, $pi) = @_; my $nl = ($#{$self->{Names}} == -1) ? "\n" : ""; my $output; if ($self->{IsSGML}) { $output = "{Data}>\n"; } else { if ($pi->{Data}) { $output = "{Target} $pi->{Data}?>$nl"; } else { $output = "{Target}?>$nl"; } } $self->print($output); } sub ignorable_whitespace { my ($self, $whitespace) = @_; $self->print($whitespace->{Data}); } sub comment { my ($self, $comment) = @_; my $nl = ($#{$self->{Names}} == -1) ? "\n" : ""; my $output = "$nl"; $self->print($output); } sub print { my ($self, $output) = @_; $self->{Output_}->print($output) if (defined($self->{Output_})); push(@{$self->{Strings}}, $output) if (defined($self->{AsString})); } 1; __END__ =head1 NAME XML::Handler::XMLWriter - a PerlSAX handler for writing readable XML =head1 SYNOPSIS use XML::Parser::PerlSAX; use XML::Handler::XMLWriter; $my_handler = XML::Handler::XMLWriter->new( I ); XML::Parser::PerlSAX->new->parse(Source => { SystemId => 'REC-xml-19980210.xml' }, Handler => $my_handler); =head1 DESCRIPTION C is a PerlSAX handler for writing readable XML (in contrast to Canonical XML, for example). XML::Handler::XMLWriter can be used with a parser to reformat XML, with XML::DOM or XML::Grove to write out XML, or with other PerlSAX modules that generate events. C is intended to be used with PerlSAX event generators and does not perform any checking itself (for example, matching start and end element events). If you want to generate XML directly from your Perl code, use the XML::Writer module. XML::Writer has an easy to use interface and performs many checks to make sure that the XML you generate is well-formed. C is a subclass of C. C can be further subclassed to alter it's behavior or to add element-specific handling. In the subclass, each time an element starts, a method by that name prefixed with `s_' is called with the element to be processed. Each time an element ends, a method with that name prefixed with `e_' is called. Any special characters in the element name are replaced by underscores. If there isn't a start or end method for an element, the default action is to write the start or end tag. Start and end methods can use the `C' and `C' methods to print start or end tags. Subclasses can call the `C' method to write additional output. Subclassing XML::Handler::XMLWriter in this way is similar to XML::Parser's Stream style. XML::Handler::Subs maintains a stack of element names, `C<$self->{Names}', and a stack of element nodes, `C<$self->{Nodes}>' that can be used by subclasses. The current element is pushed on the stacks before calling an element-name start method and popped off the stacks after calling the element-name end method. See XML::Handler::Subs for additional methods. In addition to the standard PerlSAX handler methods (see PerlSAX for descriptions), XML::Handler::XMLWriter supports the following methods: =over 4 =item new( I ) Creates and returns a new instance of XML::Handler::XMLWriter with the given I. Options may be changed at any time by modifying them directly in the hash returned. I can be a list of key, value pairs or a hash. The following I are supported: =over 4 =item Output An IO::Handle or one of it's subclasses (such as IO::File), if this parameter is not present and the AsString option is not used, the module will write to standard output. =item AsString Return the generated XML as a string from the `C' method of the PerlSAX event generator. =item Newlines A true or false value; if this parameter is present and its value is true, then the module will insert an extra newline before the closing delimiter of start, end, and empty tags to guarantee that the document does not end up as a single, long line. If the paramter is not present, the module will not insert the newlines. =item IsSGML A true or false value; if this parameter is present and its value is true, then the module will generate SGML rather than XML. =back =item print_start_element($element) Print a start tag for `C<$element>'. This is the default action for the PerlSAX `C' handler, but subclasses may use this if they define a start method for an element. =item print_end_element($element) Prints an end tag for `C<$element>'. This is the default action for the PerlSAX `C' handler, but subclasses may use this if they define a start method for an element. =item print($output) Write `C<$output>' to Output and/or append it to the string to be returned. Subclasses may use this to write additional output. =back =head1 TODO =over 4 =item * An Elements option that provides finer control over newlines than the Newlines option, where you can choose before and after newline for element start and end tags. Inspired by the Python XMLWriter. =item * Support Doctype and XML declarations. =back =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us This module is partially derived from XML::Writer by David Megginson. =head1 SEE ALSO perl(1), PerlSAX.pod(3) =cut libxml-perl-0.08/lib/XML/PatAct/0040755000076400007640000000000007745275112014414 5ustar kenkenlibxml-perl-0.08/lib/XML/PatAct/ActionTempl.pm0100644000076400007640000000614307745275111017171 0ustar kenken# This template file is in the Public Domain. # You may do anything you want with this file. # # $Id: ActionTempl.pm,v 1.2 1999/08/16 16:04:03 kmacleod Exp $ # # replace all occurrences of ACTION with the name of your module! use strict; use UNIVERSAL; package XML::PatAct::ACTION; sub new { my $type = shift; my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ }; bless $self, $type; my $usage = <<'EOF'; usage: XML::PatAct::ACTION->new( Matcher => $matcher, Patterns => $patterns ); EOF die "No Matcher specified\n$usage\n" if !defined $self->{Matcher}; die "No Patterns specified\n$usage\n" if !defined $self->{Patterns}; # perform additional initialization here return $self; } sub start_document { my ($self, $document) = @_; # initialize the pattern module at the start of a document $self->{Matcher}->initialize($self); # create empty name and node lists for passing to `match()' $self->{Names} = [ ]; $self->{Nodes} = [ ]; # Knowing that a source is a tree can be useful information $self->{SourceIsGrove} = UNIVERSAL::isa($document, 'Data::Grove'); } sub end_document { my ($self, $document) = @_; # notify the pattern module that we're done $self->{Matcher}->finalize(); my $value; # perform any finalization actions, use $value to return a result # from calling `parse()' # release all the info that is just used during event handling $self->{Matcher} = $self->{Names} = $self->{Nodes} = undef; $self->{SourceIsGrove} = undef; return $value; } sub start_element { my ($self, $element) = @_; push @{$self->{Names}}, $element->{Name}; push @{$self->{Nodes}}, $element; my $index = $self->{Matcher}->match($element, $self->{Names}, $self->{Nodes}); # use $index to retrieve an action for this element } sub end_element { my ($self, $end_element) = @_; my $name = pop @{$self->{Names}}; my $element = pop @{$self->{Nodes}}; # perform any finishing steps at the end of an element } sub characters { my ($self, $characters) = @_; } sub processing_instruction { my ($self, $pi) = @_; } sub ignorable_whitespace { my ($self, $characters) = @_; } 1; __END__ =head1 NAME XML::PatAct::ACTION - An action module for =head1 SYNOPSIS use XML::PatAct::ACTION; my $patterns = [ PATTERN => ACTION, ... ]; my $matcher = XML::PatAct::ACTION->new(Patterns => $patterns, Matcher => $matcher ); =head1 DESCRIPTION XML::PatAct::ACTION is a PerlSAX handler for applying pattern-action lists to XML parses or trees. XML::PatAct::ACTION ... New XML::PatAct::ACTION instances are creating by calling `new()'. A Parameters can be passed as a list of key, value pairs or a hash. Patterns and Matcher options are required. Patterns is the pattern-action list to apply. Matcher is an instance of the pattern or query matching module. DESCRIBE THE FORMAT OF YOUR ACTIONS HERE =head1 AUTHOR This template file was written by Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1) ``Using PatAct Modules'' and ``Creating PatAct Modules'' in libxml-perl. =cut libxml-perl-0.08/lib/XML/PatAct/PatternTempl.pm0100644000076400007640000000301507745275111017364 0ustar kenken# This template file is in the Public Domain. # You may do anything you want with this file. # # $Id: PatternTempl.pm,v 1.2 1999/08/16 16:04:03 kmacleod Exp $ # # replace all occurrences of PATTERN with the name of your module! use strict; package XML::PatAct::PATTERN; sub new { my $type = shift; my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ }; # perform any one-time initializations return bless $self, $type; } sub initialize { my ($self, $driver) = @_; $self->{Driver} = $driver; # perform initializations for each XML instance } sub finalize { my $self = shift; # clean up any state information $self->{Driver} = undef; } sub match { my ($self, $element, $names, $nodes) = @_; # Use the Patterns list to match a pattern return undef; } 1; __END__ =head1 NAME XML::PatAct::PATTERN - A pattern module for =head1 SYNOPSIS use XML::PatAct::PATTERN; my $patterns = [ PATTERN => ACTION, ... ] my $matcher = XML::PatAct::PATTERN->new( Patterns => $patterns ); =head1 DESCRIPTION XML::PatAct::PATTERN is a pattern module for use with PatAct action modules for applying pattern-action lists to XML parses or trees. XML::PatAct::PATTERN ... Parameters can be passed as a list of key, value pairs or a hash. DESCRIBE THE FORMAT OR LANGUAGE OF YOUR PATTERNS HERE =head1 AUTHOR This template file was written by Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1) ``Using PatAct Modules'' and ``Creating PatAct Modules'' in libxml-perl. =cut libxml-perl-0.08/lib/XML/PatAct/Amsterdam.pm0100644000076400007640000001262707745275111016673 0ustar kenken# # Copyright (C) 1999 Ken MacLeod # XML::PatAct::Amsterdam is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Id: Amsterdam.pm,v 1.4 1999/12/22 21:15:00 kmacleod Exp $ # use strict; use UNIVERSAL; package XML::PatAct::Amsterdam; use vars qw{ $VERSION }; # will be substituted by make-rel script $VERSION = "0.08"; sub new { my $type = shift; my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ }; bless $self, $type; my $usage = <<'EOF'; usage: XML::PatAct::Amsterdam->new( Matcher => $matcher, Patterns => $patterns ); EOF die "No Matcher specified\n$usage\n" if !defined $self->{Matcher}; die "No Patterns specified\n$usage\n" if !defined $self->{Patterns}; # perform additional initialization here return $self; } sub start_document { my ($self, $document) = @_; # initialize the pattern module at the start of a document $self->{Matcher}->initialize($self); # create empty name and node lists for passing to `match()' $self->{Names} = [ ]; $self->{Nodes} = [ ]; $self->{ActionStack} = [ ]; # create a temporary Output_ in case we're creating a standard # output file that we'll delete later. if (!$self->{AsString} && !defined($self->{Output})) { require IO::File; import IO::File; $self->{Output_} = new IO::File(">-"); } elsif (defined($self->{Output})) { $self->{Output_} = $self->{Output}; } if ($self->{AsString}) { $self->{Strings} = []; } } sub end_document { my ($self, $document) = @_; # notify the pattern module that we're done $self->{Matcher}->finalize(); if (defined($self->{Output_})) { delete $self->{Output_}; } my $string = undef; if (defined($self->{AsString})) { $string = join('', @{$self->{Strings}}); delete $self->{Strings}; } # release all the info that is just used during event handling $self->{Matcher} = $self->{Names} = $self->{Nodes} = undef; $self->{ActionStack} = undef; return($string); } sub start_element { my ($self, $element) = @_; push @{$self->{Names}}, $element->{Name}; push @{$self->{Nodes}}, $element; my $index = $self->{Matcher}->match($element, $self->{Names}, $self->{Nodes}); my $action; if (!defined $index) { $action = undef; } else { $action = $self->{Patterns}[$index * 2 + 1]; } push @{$self->{ActionStack}}, $action; if (defined($action)) { my $before = $action->{Before}; if (defined $before) { my $atts = $element->{Attributes}; $before =~ s/\[([\w.:]+)\]/ ($1 eq '_element') ? $element->{Name} : $atts->{$1} /eg; $self->print($before); } } } sub end_element { my ($self, $end_element) = @_; my $name = pop @{$self->{Names}}; my $element = pop @{$self->{Nodes}}; my $action = pop @{$self->{ActionStack}}; if (defined($action)) { my $after = $action->{After}; if (defined $after) { my $atts = $element->{Attributes}; $after =~ s/\[([\w.:]+)\]/ ($1 eq '_element') ? $element->{Name} : $atts->{$1} /eg; $self->print($after); } } } sub characters { my ($self, $characters) = @_; $self->print($characters->{Data}); } sub print { my ($self, $output) = @_; $self->{Output_}->print($output) if (defined($self->{Output_})); push(@{$self->{Strings}}, $output) if (defined($self->{AsString})); } 1; __END__ =head1 NAME XML::PatAct::Amsterdam - An action module for simplistic style-sheets =head1 SYNOPSIS use XML::PatAct::Amsterdam; my $patterns = [ PATTERN => { Before => 'before', After => 'after' }, ... ]; my $matcher = XML::PatAct::Amsterdam->new( I ); =head1 DESCRIPTION XML::PatAct::Amsterdam is a PerlSAX handler for applying pattern-action lists to XML parses or trees. XML::PatAct::Amsterdam applies a very simple style sheet to an instance and outputs the result. Amsterdam gets it's name from the Amsterdam SGML Parser (ASP) which inspired this module. CAUTION: Amsterdam is a very simple style module, you will run into it's limitations quickly with even moderately complex XML instances, be aware of and prepared to switch to more complete style modules. New XML::PatAct::Amsterdam instances are creating by calling `new()'. Parameters can be passed as a list of key, value pairs or a hash. A Patterns and Matcher options are required. The following I are supported: =over 4 =item Patterns The pattern-action list to apply. The list is an anonymous array of pattern, action pairs. Each action in the list contains either or both a Before and an After string to copy to the output before and after processing an XML element. The Before and After strings may contain attribute names enclosed in square brackets (`C<[>' I `C<]>'), these are replaced with the value of the attribute with that name. The special I `C<_element>' will be replaced with the element's name. =item Matcher An instance of the pattern or query matching module. =item Output An IO::Handle or one of it's subclasses (such as IO::File), if this parameter is not present and the AsString option is not used, the module will write to standard output. =item AsString Return the generated output as a string from the `C' method of the PerlSAX event generator. =back =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1) ``Using PatAct Modules'' and ``Creating PatAct Modules'' in libxml-perl. =cut libxml-perl-0.08/lib/XML/PatAct/MatchName.pm0100644000076400007640000000427507745275111016613 0ustar kenken# # Copyright (C) 1999 Ken MacLeod # XML::PatAct::MatchName is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Id: MatchName.pm,v 1.3 1999/12/22 21:15:00 kmacleod Exp $ # use strict; package XML::PatAct::MatchName; use vars qw{ $VERSION }; # will be substituted by make-rel script $VERSION = "0.08"; sub new { my $type = shift; my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ }; return bless $self, $type; } # This is functionally equivalent to PerlSAX `start_document()' sub initialize { my ($self, $driver) = @_; $self->{Driver} = $driver; } # This is functionally equivalent to PerlSAX `end_document()' sub finalize { my $self = shift; $self->{Driver} = undef; } # This is functionally equivalent to a PerlSAX `start_element()' sub match { my ($self, $element, $names, $nodes) = @_; my $names_path = '/' . join('/', @$names); my $patterns = $self->{Patterns}; my $ii = 0; while ($ii <= $#$patterns) { my $pattern = $patterns->[$ii]; if ($names_path =~ m|/$pattern$|) { return $ii / 2; } $ii += 2; } return undef; } 1; __END__ =head1 NAME XML::PatAct::MatchName - A pattern module for matching element names =head1 SYNOPSIS use XML::PatAct::MatchName; my $matcher = XML::PatAct::MatchName->new(); my $patterns = [ 'foo' => ACTION, 'bar/foo' => ACTION, ... ]; =head1 DESCRIPTION XML::PatAct::MatchName is a pattern module for use with PatAct drivers for applying pattern-action lists to XML parses or trees. XML::PatAct::MatchName is a simple pattern module that uses just element names to match on. If multiple names are supplied seperated by `C' characters, then all of the parent element names must match as well. The order of patterns in the list is not significant. XML::PatAct::MatchName will use the most specific match. Using the synopsis above as an example, if you have an element `C', `C' will match if `C' is in an element `C', otherwise just the pattern with `C' will match. =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1) ``Using PatAct Modules'' and ``Creating PatAct Modules'' in libxml-perl. =cut libxml-perl-0.08/lib/XML/PatAct/ToObjects.pm0100644000076400007640000003621207745275111016646 0ustar kenken# # Copyright (C) 1999 Ken MacLeod # XML::PatAct::ToObjects is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Id: ToObjects.pm,v 1.5 1999/12/22 21:15:00 kmacleod Exp $ # # The original XML::Grove::ToObjects actually generated and compiled a # sub for matching actions, possibly a performance improvement of three # or four times over all the comparisons made in start_element() and # end_element(). use strict; use UNIVERSAL; package XML::PatAct::ToObjects; use vars qw{ $VERSION $name_re }; # will be substituted by make-rel script $VERSION = "0.08"; # FIXME I doubt this is a correct Perl RE for productions [4] and # [5] in the XML 1.0 specification, especially considering Unicode chars $name_re = '[A-Za-z_:][A-Za-z0-9._:-]*'; sub new { my $type = shift; my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ }; bless $self, $type; my $usage = <<'EOF'; usage: XML::PatAct::ToObjects->new( Matcher => $matcher, Patterns => $patterns ); EOF die "No Matcher specified\n$usage\n" if !defined $self->{Matcher}; die "No Patterns specified\n$usage\n" if !defined $self->{Patterns}; # Parse action items $self->{Actions} = [ ]; my $patterns = $self->{Patterns}; my $ii = 1; while ($ii <= $#$patterns) { if (ref $patterns->[$ii]) { push @{$self->{Actions}}, $self->_parse_action($patterns->[$ii]); } else { # is a code fragment } $ii += 2; } if (defined $self->{GroveBuilder}) { require XML::Grove::Builder; import XML::Grove::Builder; $self->{GroveBuilder} = XML::Grove::Builder->new(); } return $self; } sub start_document { my ($self, $document) = @_; $self->{Matcher}->initialize($self); $self->{Parents} = [ { Contents => [ ] } ]; $self->{ActionStack} = [ ]; $self->{States} = [ 'normal' ]; $self->{Document} = $document; $self->{Names} = [ ]; $self->{Nodes} = [ ]; $self->{Data} = undef; $self->{SourceIsGrove} = UNIVERSAL::isa($document, 'Data::Grove'); if (!defined $self->{CharacterDataType}) { require Data::Grove; import Data::Grove; $self->{CharacterDataType} = 'Data::Grove::Characters'; } } sub end_document { my ($self, $document) = @_; $self->{Matcher}->finalize(); # FIXME check to make sure no other fields were assigned to my $value = $self->{Parents}[0]{Contents}; # release all the info that is just used during event handling $self->{Matcher} = $self->{Parents} = $self->{ActionStack} = undef; $self->{States} = $self->{Document} = $self->{Names} = undef; $self->{Nodes} = $self->{Data} = $self->{SourceIsGrove} = undef; return $value; } sub start_element { my ($self, $element) = @_; push @{$self->{Names}}, $element->{Name}; push @{$self->{Nodes}}, $element; my $index = $self->{Matcher}->match($element, $self->{Names}, $self->{Nodes}); my $action; if (!defined $index) { $action = undef; } else { $action = $self->{Actions}[$index]; } push @{$self->{ActionStack}}, $action; my $state = $self->{States}[-1]; push @{$self->{States}}, $state; if (($state eq 'as-grove') and !$self->{SourceIsGrove}) { $self->{GroveBuilder}->start_element($element); } return if (($state ne 'normal') && ($state ne 'pcdata')); if (defined($action) and defined($action->{PCData})) { $self->{States}[-1] = 'pcdata'; } if (!defined($action) or $action->{Holder}) { # ignore this element but continue processing below return; } if ($action->{Ignore} or $action->{FieldValue}) { # ignore (discard) this element and it's children $self->{States}[-1] = 'discarding'; return; } if ($action->{AsString}) { $self->{Data} = [ ]; $self->{States}[-1] = 'as-string'; return; } if ($action->{AsGrove}) { $self->{States}[-1] = 'as-grove'; if (!$self->{SourceIsGrove}) { $self->{GroveBuilder}->start_document( { } ); $self->{GroveBuilder}->start_element($element); } return; } if (defined $action->{Make}) { my @args; if (defined $element->{Attributes}) { if (defined $self->{CopyAttributes}) { push @args, %{$element->{Attributes}}; } elsif ($self->{CopyId} && defined($element->{Attributes}{ID})) { # FIXME use code from XML::Grove::IDs push (@args, ID => $element->{Attributes}{ID}); } } if (defined $action->{Args}) { eval 'push (@args, (' . $action->{Args} . '))'; if ($@) { warn "$@\nwhile processing pattern/action #$index\n"; } } if ($action->{Make} eq 'HASH') { push @{$self->{Parents}}, { @args }; } else { my $is_defined = 0; #eval "\$is_defined = defined %{$action->{Make}" . "::}"; if ($is_defined) { push @{$self->{Parents}}, $action->{Make}->new( @args ); } else { push (@{$self->{Parents}}, bless ({ @args }, $action->{Make})); } } if ($action->{ContentsAsGrove}) { $self->{States}[-1] = 'as-grove'; if (!$self->{SourceIsGrove}) { $self->{GroveBuilder}->start_document( { } ); } } return; } # Place to store all the rest of gathered contents push (@{$self->{Parents}}, { } ); } sub end_element { my ($self, $end_element) = @_; my $name = pop @{$self->{Names}}; my $element = pop @{$self->{Nodes}}; my $action = pop @{$self->{ActionStack}}; my $state = pop @{$self->{States}}; if ($state eq 'as-grove' and !$self->{SourceIsGrove}) { $self->{GroveBuilder}->end_element($end_element); } if (!defined($action) or $action->{Holder}) { return; } if ($action->{Ignore}) { return; } my $value; if ($action->{AsString}) { $value = join("", @{$self->{Data}}); } elsif ($action->{AsGrove}) { if ($self->{SourceIsGrove}) { $value = $element; } else { # get just the root element of the document fragment $value = $self->{GroveBuilder}->end_document({ })->{Contents}[0]; } } elsif (defined $action->{FieldValue}) { $value = $action->{FieldValue}; $value =~ s/%\{($name_re)\}/$element->{Attributes}{$1}/ge; } elsif (defined $action->{Make}) { $value = pop @{$self->{Parents}}; if ($action->{ContentsAsGrove}) { if ($self->{SourceIsGrove}) { $value->{Contents} = $element->{Contents}; } else { $value->{Contents} = $self->{GroveBuilder}->end_document({ })->{Contents}; } } } else { $value = pop(@{$self->{Parents}})->{Contents}; } if ($action->{FieldIsArray}) { push @{$self->{Parents}[-1]{$action->{Field}}}, $value; } elsif (defined $action->{Field}) { $self->{Parents}[-1]{$action->{Field}} = $value; } else { push @{$self->{Parents}[-1]{Contents}}, $value; } } sub characters { my ($self, $characters) = @_; my $state = $self->{States}[-1]; if ($state eq 'as-string') { push @{$self->{Data}}, $characters->{Data}; } elsif ($state eq 'as-grove' and !$self->{SourceIsGrove}) { $self->{GroveBuilder}->characters($characters); } elsif ($state eq 'pcdata') { push (@{$self->{Parents}[-1]{Contents}}, $self->{CharacterDataType}->new(%$characters)); } } # we ignore processing instructions and ignorable whitespace by not # defining those functions ### ### private functions ### sub _parse_action { my $self = shift; my $source = shift; my $action = {}; while ($#$source > -1) { my $option = shift @$source; if ($option eq '-holder') { $action->{Holder} = 1; } elsif ($option eq '-make') { $action->{Make} = shift @$source; } elsif ($option eq '-args') { my $args = shift @$source; $args =~ s/%\{($name_re)\}/(\$element->{Attributes}{'$1'})/g; $action->{Args} = $args; } elsif ($option eq '-field') { $action->{Field} = shift @$source; } elsif ($option eq '-push-field') { $action->{Field} = shift @$source; $action->{FieldIsArray} = 1; } elsif ($option eq '-as-string') { $action->{AsString} = 1; } elsif ($option eq '-value') { $action->{FieldValue} = shift @$source; } elsif ($option eq '-grove') { $self->{GroveBuilder} = 1; $action->{AsGrove} = 1; } elsif ($option eq '-grove-contents') { $self->{GroveBuilder} = 1; $action->{ContentsAsGrove} = 1; } elsif ($option eq '-ignore') { $action->{Ignore} = 1; } elsif ($option eq '-pcdata') { $action->{PCData} = 1; } else { die "$option: undefined option\n"; } } return $action; } 1; __END__ =head1 NAME XML::PatAct::ToObjects - An action module for creating Perl objects =head1 SYNOPSIS use XML::PatAct::ToObjects; my $patterns = [ PATTERN => [ OPTIONS ], PATTERN => "PERL-CODE", ... ]; my $matcher = XML::PatAct::ToObjects->new( Patterns => $patterns, Matcher => $matcher, CopyId => 1, CopyAttributes => 1 ); =head1 DESCRIPTION XML::PatAct::ToObjects is a PerlSAX handler for applying pattern-action lists to XML parses or trees. XML::PatAct::ToObjects creates Perl objects of the types and contents of the action items you define. New XML::PatAct::ToObject instances are creating by calling `new()'. Parameters can be passed as a list of key, value pairs or a hash. `new()' requires the Patterns and Matcher parameters, the rest are optional: =over 4 =item Patterns The pattern-action list to apply. =item Matcher An instance of the pattern or query matching module. =item CopyId Causes the `ID' attribute, if any, in a source XML element to be copied to an `ID' attribute in newly created objects. Note that IDs may be lost of no pattern matches that element or an object is not created (C<-make>) for that element. =item CopyAttributes Causes all attributes of the element to be copied to the newly created objects. =back Each action can either be a list of options defined below or a string containing a fragment of Perl code. If the action is a string of Perl code then simple then some simple substitutions are made as described further below. Options that can be used in an action item containing an option-list: =over 4 =item B<-holder> Ignore this element, but continue processing it's children (compare to B<-ignore>). C<-pcdata> may be used with this option. =item B<-ignore> Ignore (discard) this element and it's children (compare to B<-holder>). =item B<-pcdata> Character data in this element should be copied to the C field. =item B<-make> I Create an object blessed into I, and continue processing this element and it's children. I may be the type `C' to simply create an anonyous hash. =item B<-args> I Use I in creating the object specified by B<-make>. This is commonly used to copy element attributes into fields in the newly created object. For example: -make => 'HASH', -args => 'URL => %{href}' would copy the `C' attribute in an element to the `C' field of the newly created hash. =item B<-field> I Store this element, object, or children of this element in the parent object's field named by I. =item B<-push-field> I Similar to B<-field>, except that I is an array and the contents are pushed onto that array. =item B<-value> I Use I as a literal value to store in I, otherwise ignoring this element and it's children. Only valid with B<-field> or B<-push-field>. `C<%{I}>' notation can be used to substitute the value of an attribute into the literal value. =item B<-as-string> Convert the contents of this element to a string (as in C) and store in I. Only valid with B<-field> or B<-push-field>. =item B<-grove> Copy this element to I without further processing. The element can then be processed later as the Perl objects are manipulated. Only valid with B<-field> or B<-push-field>. If ToObjects is used with PerlSAX, this will use XML::Grove::Builder to build the grove element. =item B<-grove-contents> Used with B<-make>, B<-grove-contents> creates an object but then takes all of the content of that element and stores it in Contents. =back If an action item is a string, that string is treated as a fragment of Perl code. The following simple substitutions are performed on the fragment to provide easy access to the information being converted: =over 4 =item B<@ELEM@> The object that caused this action to be called. If ToObjects is used with PerlSAX this will be a hash with the element name and attributes, with XML::Grove this will be the element object, with Data::Grove it will be the matching object, and with XML::DOM it will be an XML::DOM::Element. =back =head1 EXAMPLE The example pattern-action list below will convert the following XML representing a Database schema: MyTableA short summaryA long description that may contain a subset of HTML MyColumn1 A short summary A long description 42
into Perl objects looking like: [ { Name => "MyTable", Summary => "A short summary", Description => $grove_object, Columns => [ { Name => "MyColumn1", Summary => "A short summary", Description => $grove_object, Unique => 1, NonNull => 1, Default => 42 } ] } ] Here is a Perl script and pattern-action list that will perform the conversion using the simple name matching pattern module XML::PatAct::MatchName. The script accepts a Schema XML file as an argument (C<$ARGV[0]>) to the script. This script creates a grove as one of it's objects, so it requires the XML::Grove module. use XML::Parser::PerlSAX; use XML::PatAct::MatchName; use XML::PatAct::ToObjects; my $patterns = [ 'schema' => [ qw{ -holder } ], 'table' => [ qw{ -make Schema::Table } ], 'name' => [ qw{ -field Name -as-string } ], 'summary' => [ qw{ -field Summary -as-string } ], 'description' => [ qw{ -field Description -grove } ], 'column' => [ qw{ -make Schema::Column -push-field Columns } ], 'unique' => [ qw{ -field Unique -value 1 } ], 'non-null' => [ qw{ -field NonNull -value 1 } ], 'default' => [ qw{ -field Default -as-string } ], ]; my $matcher = XML::PatAct::MatchName->new( Patterns => $patterns ); my $handler = XML::PatAct::ToObjects->new( Patterns => $patterns, Matcher => $matcher); my $parser = XML::Parser::PerlSAX->new( Handler => $handler ); my $schema = $parser->parse(Source => { SystemId => $ARGV[0] } ); =head1 TODO =over 4 =item * It'd be nice if patterns could be applied even in B<-as-string> and B<-grove>. =item * Implement Perl code actions. =item * B<-as-xml> to write XML into the field. =back =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1), Data::Grove(3) ``Using PatAct Modules'' and ``Creating PatAct Modules'' in libxml-perl. =cut libxml-perl-0.08/lib/XML/SAX2Perl.pm0100644000076400007640000001452607745275111015142 0ustar kenken# # Copyright (C) 1998 Ken MacLeod # XML::SAX2Perl is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Id: SAX2Perl.pm,v 1.4 2001/07/23 15:47:15 kmacleod Exp $ # use strict; package XML::SAX2Perl; use vars qw{ $VERSION }; # will be substituted by make-rel script $VERSION = "0.08"; sub new { my $type = shift; my $self = ($#_ == 0) ? shift : { @_ }; return bless $self, $type; } sub setDocumentLocator { my $self = shift; my $self->{Locator} = shift; } sub startDocument { my $self = shift; my @properties; if (defined $self->{Locator}) { push @properties, locator => $self->{Locator}; } $self->{DocumentHandler}->start_document(@properties); } sub endDocument { my $self = shift; $self->{DocumentHandler}->end_document; } sub startElement { my $self = shift; my $name = shift; my $attributes = shift; # FIXME depends on how Perl SAX treats attributes $self->{DocumentHandler}->start_element(Name => $name, Attributes => $attributes); } sub endElement { my $self = shift; my $name = shift; $self->{DocumentHandler}->end_element(Name => $name); } sub characters { my $self = shift; my $ch = shift; my $start = shift; my $length = shift; $self->{DocumentHandler}->characters(Data => substr($ch, $start, $length)); } sub ignorableWhitespace { my $self = shift; my $ch = shift; my $start = shift; my $length = shift; $self->{DocumentHandler}->ignorable_whitespace(Data => substr($ch, $start, $length)); } sub processingInstruction { my $self = shift; my $target = shift; my $data = shift; $self->{DocumentHandler}->processing_instruction(Target => $target, Data => $data); } 1; __END__ =head1 NAME XML::SAX2Perl -- translate Java/CORBA style SAX methods to Perl methods =head1 SYNOPSIS use XML::SAX2Perl; $sax2perl = XML::SAX2Perl(Handler => $my_handler); $sax->setDocumentHandler($sax2perl); =head1 DESCRIPTION C is a SAX filter that translates Java/CORBA style SAX methods to Perl style method calls. This man page summarizes the specific options, handlers, and properties supported by C; please refer to the Perl SAX standard C for general usage information. =head1 METHODS =over 4 =item new Creates a new parser object. Default options for parsing, described below, are passed as key-value pairs or as a single hash. Options may be changed directly in the parser object unless stated otherwise. Options passed to `C' override the default options in the parser object for the duration of the parse. =item parse Parses a document. Options, described below, are passed as key-value pairs or as a single hash. Options passed to `C' override default options in the parser object. =item location Returns the location as a hash: ColumnNumber The column number of the parse. LineNumber The line number of the parse. PublicId A string containing the public identifier, or undef if none is available. SystemId A string containing the system identifier, or undef if none is available. =item SAX DocumentHandler Methods The following methods are DocumentHandler methods that the SAX 1.0 parser will call and C will translate to Perl SAX methods calls. See SAX 1.0 for details. setDocumentLocator(locator) startDocument() endDocument() startElement(name, atts) endElement(name) characters(ch, start, length) ignorableWhitespace(ch, start, length) processingInstruction(target, data) =back =head1 OPTIONS The following options are supported by C: Handler default handler to receive events DocumentHandler handler to receive document events DTDHandler handler to receive DTD events ErrorHandler handler to receive error events EntityResolver handler to resolve entities Locale locale to provide localisation for errors Source hash containing the input source for parsing If no handlers are provided then all events will be silently ignored, except for `C' which will cause a `C' to be called after calling `C'. If a single string argument is passed to the `C' method, it is treated as if a `C' option was given with a `C' parameter. The `C' hash may contain the following parameters: ByteStream The raw byte stream (file handle) containing the document. String A string containing the document. SystemId The system identifier (URI) of the document. PublicId The public identifier. Encoding A string describing the character encoding. If more than one of `C', `C', or `C', then preference is given first to `C', then `C', then `C'. =head1 HANDLERS The following handlers and properties are supported by C: =head2 DocumentHandler methods =over 4 =item start_document Receive notification of the beginning of a document. Locator An object that can return the location of any SAX document event. =item end_document Receive notification of the end of a document. No properties defined. =item start_element Receive notification of the beginning of an element. Name The element type name. Attributes Attributes attached to the element, if any. ALPHA WARNING: The `C' value is not translated from the SAX 1.0 value, so it will contain an AttributeList object. =item end_element Receive notification of the end of an element. Name The element type name. =item characters Receive notification of character data. Data The characters from the XML document. =item ignorable_whitespace Receive notification of ignorable whitespace in element content. Data The characters from the XML document. =item processing_instruction Receive notification of a processing instruction. Target The processing instruction target. Data The processing instruction data, if any. =back =head1 AUTHOR Ken MacLeod =head1 SEE ALSO perl(1), XML::Perl2SAX(3). Extensible Markup Language (XML) Simple API for XML (SAX) =cut libxml-perl-0.08/lib/XML/Perl2SAX.pm0100644000076400007640000000502007745275111015127 0ustar kenken# # Copyright (C) 1998 Ken MacLeod # XML::Perl2SAX is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Id: Perl2SAX.pm,v 1.3 1999/12/22 21:15:00 kmacleod Exp $ # use strict; package XML::Perl2SAX; use vars qw{ $VERSION }; # will be substituted by make-rel script $VERSION = "0.08"; sub new { my $type = shift; my $self = ($#_ == 0) ? shift : { @_ }; return bless $self, $type; } sub start_document { my $self = shift; my $properties = ($#_ == 0) ? shift : { @_ }; if ($properties->{Locator}) { $self->{DocumentHandler}->setDocumentLocator($properties->{Locator}); } $self->{DocumentHandler}->startDocument; } sub end_document { my $self = shift; $self->{DocumentHandler}->endDocument; } sub start_element { my $self = shift; my $properties = shift; # FIXME depends on how Perl SAX treats attributes $self->{DocumentHandler}->startElement($properties->{Name}, $properties->{Attributes}); } sub end_element { my $self = shift; my $properties = shift; $self->{DocumentHandler}->endElement($properties->{Name}); } sub characters { my $self = shift; my $properties = shift; $self->{DocumentHandler}->characters($properties->{Data}, 0, length($properties->{Data})); } sub ignorable_whitespace { my $self = shift; my $properties = shift; $self->{DocumentHandler}->ignorableWhitespace($properties->{Data}, 0, length($properties->{Data})); } sub processing_instruction { my $self = shift; my $properties = shift; $self->{DocumentHandler}->processingInstruction($properties->{Target}, $properties->{Data}); } 1; __END__ =head1 NAME XML::SAX2Perl -- translate Perl SAX methods to Java/CORBA style methods =head1 SYNOPSIS use XML::Perl2SAX; $perl2sax = XML::Perl2SAX(handler => $java_style_handler); =head1 DESCRIPTION C is a SAX filter that translates Perl style SAX methods to Java/CORBA style method calls. This module performs the inverse operation from C. C is a Perl SAX document handler. The `C' method takes a `C' argument that is a Java/CORBA style handler that the new Perl2SAX instance will call. The SAX interfaces are defined at . =head1 AUTHOR Ken MacLeod =head1 SEE ALSO perl(1), XML::Perl2SAX(3). Extensible Markup Language (XML) Simple API for XML (SAX) =cut libxml-perl-0.08/lib/XML/ESISParser.pm0100644000076400007640000004700007745275111015513 0ustar kenken# # Copyright (C) 1999 Ken MacLeod # See the file COPYING for distribution terms. # # $Id: ESISParser.pm,v 1.9 2000/03/02 20:18:09 kmacleod Exp $ # use strict; use IO::File; use UNIVERSAL; package XML::ESISParser; use vars qw{ $VERSION $NSGMLS_sgml $NSGMLS_FLAGS_sgml $NSGMLS_ENV_sgml $NSGMLS_xml $NSGMLS_FLAGS_xml $NSGMLS_ENV_xml $XML_DECL }; # will be substituted by make-rel script $VERSION = "0.08"; $NSGMLS_sgml = 'nsgmls'; $NSGMLS_FLAGS_sgml = '-oentity -oempty -onotation-sysid -oincluded -oline -E0'; $NSGMLS_ENV_sgml = ''; $NSGMLS_xml = 'nsgmls'; $XML_DECL = '/usr/lib/sgml/declaration/xml.decl'; $NSGMLS_FLAGS_xml = '-oentity -oempty -onotation-sysid -oline -oincluded -wxml -E0 '; $NSGMLS_ENV_xml = 'SP_CHARSET_FIXED=YES SP_ENCODING=XML'; sub new { my $type = shift; return bless { @_ }, $type; } sub parse { my $self = shift; die "XML::ESISParser: parser instance ($self) already parsing\n" if (defined $self->{ParseOptions}); # If there's one arg and it has no ref, it's a string my $args; if (scalar (@_) == 1 && !ref($_[0])) { $args = { Source => { String => shift } }; } else { $args = (scalar (@_) == 1) ? shift : { @_ }; } my $parse_options = { %$self, %$args }; $self->{ParseOptions} = $parse_options; # ensure that we have at least one source if (!defined $parse_options->{Source} || !(defined $parse_options->{Source}{String} || defined $parse_options->{Source}{ByteStream} || defined $parse_options->{Source}{SystemId} || defined $parse_options->{Source}{ESISStream})) { die "XML::ESISParser: no source defined for parse\n"; } # assign default Handler to any undefined handlers if (defined $parse_options->{Handler}) { $parse_options->{DocumentHandler} = $parse_options->{Handler} if (!defined $parse_options->{DocumentHandler}); $parse_options->{DTDHandler} = $parse_options->{Handler} if (!defined $parse_options->{DTDHandler}); $parse_options->{ErrorHandler} = $parse_options->{Handler} if (!defined $parse_options->{ErrorHandler}); } # create the NSGMLS command my ($nsgmls_command, $nsgmls, $nsgmls_flags); if (defined $parse_options->{NSGMLSCommand}) { $nsgmls_command = $parse_options->{NSGMLSCommand}; } elsif (defined $parse_options->{IsSGML} && $parse_options->{IsSGML}) { my $declaration = (defined $parse_options->{Declaration}) ? " " . $parse_options->{Declaration} : ""; $nsgmls = $parse_options->{NSGMLS} = $NSGMLS_sgml; $nsgmls_flags = $parse_options->{NSGMLS_FLAGS} = $NSGMLS_FLAGS_sgml; $nsgmls_command = $parse_options->{NSGMLS_COMMAND} = "$nsgmls $nsgmls_flags $declaration"; } else { my $declaration = (defined $parse_options->{Declaration}) ? $parse_options->{Declaration} : $XML_DECL; $nsgmls = $parse_options->{NSGMLS} = $NSGMLS_xml; $nsgmls_flags = $parse_options->{NSGMLS_FLAGS} = $NSGMLS_FLAGS_xml; $nsgmls_command = $parse_options->{NSGMLS_COMMAND} = "$NSGMLS_ENV_xml $nsgmls $nsgmls_flags $declaration"; } my $result; if (defined $self->{ParseOptions}{Source}{ESISStream}) { # read ESIS stream directly my $system_id = (defined $self->{ParseOptions}{Source}{SystemId}) ? "\`$self->{ParseOptions}{Source}{SystemId}'" : 'ESIS Stream'; eval { $result = $self->parse_fh ($self->{ParseOptions}{Source}{ESISStream}) }; my $retval = $@; if ($retval) { die "XML::ESISParser::parse: unable to parse \`$system_id'\n$retval"; } } elsif (defined $self->{ParseOptions}{Source}{ByteStream}) { # call nsgmls using file handle # FIXME special case stdin? # For ByteStreams (Perl file handles) we create a sub-process # that we feed the XML/SGML document and we get back the ESIS # stream my $retval; my $system_id = (defined $self->{ParseOptions}{Source}{SystemId}) ? "\`$self->{ParseOptions}{Source}{SystemId}'" : 'Byte Stream'; my ($pid) = open (ESIS, "-|"); if ($pid == 0) { # 20% speed increase if grep swipes implieds (only 8% if # we do it in `parse_fh'). XXX use a C routine or patch SP open (SGML, "| $nsgmls_command 2>&1 | egrep -v '^A.* IMPLIED\$'") or die "XML::ESISParser::parse: can't run \`$nsgmls' on \`$system_id'\n"; $self->{ParseOptions}{Source}{ByteStream}->print (*SGML); close (SGML) or die "XML::ESISParser::parse: can't run \`$nsgmls' on \`$system_id'\n"; exit 0; } else { eval { $result = $self->parse_fh (*ESIS) }; $retval = $@; wait; # clean up that process } close (ESIS); $self->{ParseOptions}{Source}{ByteStream}->close (); if ($retval) { die "XML::ESISParser::parse: unable to parse \`$system_id'\n$retval"; } } elsif (defined $self->{ParseOptions}{Source}{String}) { # call nsgmls with a literal string } elsif (defined $self->{ParseOptions}{Source}{SystemId}) { # if SystemId is a file, call nsgmls with file name # otherwise, open stream on SystemId and do ByteStream # FIXME this only handles file SystemIds right now # 20% speed increase if grep swipes implieds (only 8% if # we do it in `parse'). XXX use a C routine or patch SP my $system_id = $self->{ParseOptions}{Source}{SystemId}; my ($fh) = IO::File->new ("$nsgmls_command '$system_id' 2>&1 | egrep -v '^A.* IMPLIED\$' |"); die "XML::ESISParser::parse: can't run \`$nsgmls' on \`$system_id'\n" if (!defined $fh); eval { $result = $self->parse_fh ($fh) }; my $retval = $@; close ($fh); if ($retval) { die "XML::ESISParser::parse: unable to parse \`$system_id'\n$retval"; } } # clean up parser instance delete $self->{ParseOptions}; delete $self->{DocumentHandler}; delete $self->{DTDHandler}; delete $self->{ErrorHandler}; return $result; } # # Parse the `ESIS' information coming from `file' # sub parse_fh { my ($self, $file) = @_; my (@attributes, @properties, $files); my $doc_h = $self->{ParseOptions}{DocumentHandler}; my $dtd_h = $self->{ParseOptions}{DTDHandler}; my $err_h = $self->{ParseOptions}{ErrorHandler}; # we cache these most commonly used `can()' calls my $can_start_element = $doc_h->can('start_element'); my $can_end_element = $doc_h->can('end_element'); my $can_characters = $doc_h->can('characters'); my $can_record_end = $doc_h->can('record_end'); my $line = 0; $doc_h->start_document( { } ) if ($doc_h->can('start_document')); # 30% speed improvement by breaking the encapsulation my ($is_filehandle) = (ref ($file) eq "FileHandle" || ref ($file) eq "IO::File"); while ($_ = ($is_filehandle ? <$file> : $file->getline())) { chop; if (/^A/) { # attribute # Note: the output of `nsgmls' is `grep -v'ed to get rid of # IMPLIED attributes, if we do it here we only get an 8% # speed boost my ($name, $type, $value) = split (/\s/, $', 3); push (@attributes, $name => $value); next; } if (/^\(/) { # start element # break the encapsulation for an 8% boost if ($#attributes >= 0) { push (@properties, Attributes => { @attributes }); } $doc_h->start_element ({ Name => $', @properties }) if ($can_start_element); @properties = (); @attributes = (); next; } if (/^\)/) { # end element $doc_h->end_element ({ Name => $' }) if ($can_end_element); next; } if (/^L/) { # line number $line = $'; next; } if (/^-/) { # data (including sdata entities) # This section is derived from David Megginson's SGMLSpm my $sdata_flag = 0; my $out = ''; my $data = $'; while ($data =~ /\\(\\|n|\||[0-7]{1,3})/) { $out .= $`; $data = $'; if ($1 eq '|') { # beginning or end of SDATA if ("$out" ne '') { if ($sdata_flag) { $doc_h->internal_entity_ref({ Name => $self->{'internal_entities_by_value'}{$out} }) if ($doc_h->can('internal_entity_ref')); } else { $doc_h->characters({ Data => $out }) if ($can_characters); } $out = ''; } $sdata_flag = !$sdata_flag; } elsif ($1 eq 'n') { # record end if ("$out" ne '') { if ($sdata_flag) { $doc_h->internal_entity_ref({ Name => $self->{'internal_entities_by_value'}{$out} }) if ($doc_h->can('internal_entity_ref')); } else { $doc_h->characters({ Data => $out }) if ($can_characters); } $out = ''; } if ($can_record_end) { $doc_h->record_end( { } ); } else { $doc_h->characters({ Data => "\n" }) if ($can_characters); } } elsif ($1 eq '\\') { $out .= '\\'; } else { $out .= chr(oct($1)); } } $out .= $data; if ("$out" ne '') { if ($sdata_flag) { $doc_h->internal_entity_ref({ Name => $self->{'internal_entities_by_value'}{$out} }) if ($doc_h->can('internal_entity_ref')); } else { $doc_h->characters({ Data => $out }) if ($can_characters); } } next; } if (/^s/) { # sysid push (@properties, SystemId => $'); next; } if (/^p/) { # pubid push (@properties, PublicId => $'); next; } if (/^f/) { # file if (!defined $files) { $files = $'; } elsif (!ref $files) { $files = [ $files, $' ]; } else { push (@$files, $'); } next; } if (/^E/) { # external entity definition my ($entity_data) = $'; $entity_data =~ /^(\S+) (\S+) (\S+)$/ or die "XML::ESISParser::parse_fh: bad external entity event data: $entity_data\n"; my ($name,$type,$notation) = ($1,$2,$3); if (defined $files) { push (@properties, GeneratedId => $files); } $dtd_h->external_entity_decl ({ Name => $name, Type => $type, Notation => $notation, @properties }) if ($dtd_h->can('external_entity_decl')); @properties = (); undef $files; next; } if (/^I/) { # internal entity definition my ($name, $type, $value) = split (/\s/, $', 3); $self->{'internal_entities_by_value'}{$value} = $name; $dtd_h->internal_entity_decl ({ Name => $name, Type => $type, Value => $value }) if ($dtd_h->can('internal_entity_decl')); next; } if (/^&/) { # external entity reference my ($name) = $'; $doc_h->external_entity_ref({ Name => $name }) if ($doc_h->can('external_entity_ref')); next; } if (/^\?/) { # processing instruction (PI) my ($data) = $'; if ($self->{ParseOptions}{IsSGML}) { $doc_h->processing_instruction({ Data => $data }) if ($doc_h->can('processing_instruction')); } else { my ($target, $pi_data) = split (/\s+/, $data, 2); $doc_h->processing_instruction({ Target => $target, Data => $pi_data }) if ($doc_h->can('processing_instruction')); } next; } if (/^N/) { # notation definition my ($name) = $'; if (defined $files) { push (@properties, GeneratedId => $files); } $dtd_h->notation_decl ({ Name => $name, @properties }) if ($dtd_h->can('notation_decl')); @properties = (); undef $files; next; } if (/^S/) { # subdoc definition my ($name) = $'; if (defined $files) { push (@properties, GeneratedId => $files); } $dtd_h->subdoc_entity_decl ({ Name => $name, @properties }) if ($dtd_h->can('subdoc_entity_decl')); @properties = (); undef $files; next; } if (/^T/) { # external SGML text entity definition my ($name) = $'; if (defined $files) { push (@properties, GeneratedId => $files); } $dtd_h->external_sgml_entity_decl ({ Name => $name, @properties }) if ($dtd_h->can('external_sgml_entity_decl')); @properties = (); undef $files; next; } if (/^D/) { # data attribute # FIXME my $message = "XML::ESISParser: can't handle data attributes yet\n"; if ($err_h->can('error')) { $err_h->error ({ Message => $message }); } else { die "$message"; } next; } if (/^D/) { # link attribute # FIXME my $message = "XML::ESISParser: can't handle link attributes yet\n"; if ($err_h->can('error')) { $err_h->error ({ Message => $message }); } else { die "$message"; } next; } if (/^{/) { # subdoc start my ($name) = $'; $doc_h->start_subdoc ({ Name => $name }) if ($doc_h->can('start_subdoc')); next; } if (/^}/) { # subdoc end my ($name) = $'; $doc_h->end_subdoc ({ Name => $name }) if ($doc_h->can('end_subdoc')); next; } if (/^#/) { # appinfo my ($text) = $'; $doc_h->appinfo ({ Text => $text }) if ($doc_h->can('appinfo')); next; } if (/^i/) { # next element is an included subelement push (@properties, IncludedSubelement => 1); next; } if (/^e/) { # next element is declared empty push (@properties, Empty => 1); next; } if (/^C/) { # conforming $doc_h->conforming({}) if ($doc_h->can('conforming')); next; } if (/^$self->{ParseOptions}{NSGMLS}:/) { # `nsgmls' error my $message = $_; if ($err_h->can('error')) { $err_h->error ({ Message => $message }); } else { die "$message\n"; } next; } my ($op) = substr ($_, 0, 1); my $message = "XML::ESISParser::parse_fh: ESIS command character \`$op' not recognized when reading line \`$_' around line $line ($.)"; if ($err_h->can('error')) { $err_h->error ({ Message => $message }); } else { die "$message"; } } if ($doc_h->can('end_document')) { return $doc_h->end_document({}); } else { return (); } } 1; __END__ =head1 NAME XML::ESISParser - Perl SAX parser using nsgmls =head1 SYNOPSIS use XML::ESISParser; $parser = XML::ESISParser->new( [OPTIONS] ); $result = $parser->parse( [OPTIONS] ); $result = $parser->parse($string); =head1 DESCRIPTION C is a Perl SAX parser using the `nsgmls' command of James Clark's SGML Parser (SP), a validating XML and SGML parser. This man page summarizes the specific options, handlers, and properties supported by C; please refer to the Perl SAX standard in `C' for general usage information. C defaults to parsing XML and has an option for parsing SGML. `C' source, and binaries for some platforms, is available from . `C' is included in both the SP and Jade packages. =head1 METHODS =over 4 =item new Creates a new parser object. Default options for parsing, described below, are passed as key-value pairs or as a single hash. Options may be changed directly in the parser object unless stated otherwise. Options passed to `C' override the default options in the parser object for the duration of the parse. =back =head1 OPTIONS The following options are supported by C: Handler default handler to receive events DocumentHandler handler to receive document events DTDHandler handler to receive DTD events ErrorHandler handler to receive error events Source hash containing the input source for parsing IsSGML the document to be parsed is in SGML If no handlers are provided then all events will be silently ignored. If a single string argument is passed to the `C' method, it is treated as if a `C' option was given with a `C' parameter. The `C' hash may contain the following parameters: ByteStream The raw byte stream (file handle) containing the document. String A string containing the document. SystemId The system identifier (URI) of the document. If more than one of `C', `C', or `C', then preference is given first to `C', then `C', then `C'. =head1 HANDLERS The following handlers and properties are supported by C: =head2 DocumentHandler methods =over 4 =item start_document Receive notification of the beginning of a document. No properties defined. =item end_document Receive notification of the end of a document. No properties defined. =item start_element Receive notification of the beginning of an element. Name The element type name. Attributes A hash containing the attributes attached to the element, if any. IncludedSubelement This element is an included subelement. Empty This element is declared empty. The `C' hash contains only string values. The `C' flag is not set for an element that merely has no content, it is set only if the DTD declares it empty. BETA: Attribute values currently do not expand SData entities into entity objects, they are still in the system data notation used by nsgmls (inside `|'). A future version of XML::ESISParser will also convert other types of attributes into their respective objects, currently just their notation or entity names are given. =item end_element Receive notification of the end of an element. Name The element type name. =item characters Receive notification of character data. Data The characters from the document. =item record_end Receive notification of a record end sequence. XML applications should convert this to a new-line. =item processing_instruction Receive notification of a processing instruction. Target The processing instruction target in XML. Data The processing instruction data, if any. =item internal_entity_ref Receive notification of a system data (SData) internal entity reference. Name The name of the internal entity reference. =item external_entity_ref Receive notification of a external entity reference. Name The name of the external entity reference. =item start_subdoc Receive notification of the start of a sub document. Name The name of the external entity reference. =item end_subdoc Receive notification of the end of a sub document. Name The name of the external entity reference. =item conforming Receive notification that the document just parsed conforms to it's document type declaration (DTD). No properties defined. =back =head2 DTDHandler methods =over 4 =item external_entity_decl Receive notification of an external entity declaration. Name The entity's entity name. Type The entity's type (CDATA, NDATA, etc.) SystemId The entity's system identifier. PublicId The entity's public identifier, if any. GeneratedId Generated system identifiers, if any. =item internal_entity_decl Receive notification of an internal entity declaration. Name The entity's entity name. Type The entity's type (CDATA, NDATA, etc.) Value The entity's character value. =item notation_decl Receive notification of a notation declaration. Name The notation's name. SystemId The notation's system identifier. PublicId The notation's public identifier, if any. GeneratedId Generated system identifiers, if any. =item subdoc_entity_decl Receive notification of a subdocument entity declaration. Name The entity's entity name. SystemId The entity's system identifier. PublicId The entity's public identifier, if any. GeneratedId Generated system identifiers, if any. =item external_sgml_entity_decl Receive notification of an external SGML-entity declaration. Name The entity's entity name. SystemId The entity's system identifier. PublicId The entity's public identifier, if any. GeneratedId Generated system identifiers, if any. =back =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1), PerlSAX.pod(3) Extensible Markup Language (XML) SAX 1.0: The Simple API for XML SGML Parser (SP) =cut libxml-perl-0.08/lib/XML/Parser/0040755000076400007640000000000007745275112014474 5ustar kenkenlibxml-perl-0.08/lib/XML/Parser/PerlSAX.pm0100644000076400007640000005525507745275111016320 0ustar kenken# # Copyright (C) 1999 Ken MacLeod # XML::Parser::PerlSAX is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Id: PerlSAX.pm,v 1.7 1999/12/22 21:15:00 kmacleod Exp $ # use strict; package XML::Parser::PerlSAX; use XML::Parser; use UNIVERSAL; use vars qw{ $VERSION $name_re }; # will be substituted by make-rel script $VERSION = "0.08"; # FIXME I doubt this is a correct Perl RE for productions [4] and # [5] in the XML 1.0 specification, especially considering Unicode chars $name_re = '[A-Za-z_:][A-Za-z0-9._:-]*'; sub new { my $type = shift; my $self = (@_ == 1) ? shift : { @_ }; return bless $self, $type; } sub parse { my $self = shift; die "XML::Parser::PerlSAX: parser instance ($self) already parsing\n" if (defined $self->{ParseOptions}); # If there's one arg and it has no ref, it's a string my $args; if (scalar (@_) == 1 && !ref($_[0])) { $args = { Source => { String => shift } }; } else { $args = (scalar (@_) == 1) ? shift : { @_ }; } my $parse_options = { %$self, %$args }; $self->{ParseOptions} = $parse_options; # ensure that we have at least one source if (!defined $parse_options->{Source} || !(defined $parse_options->{Source}{String} || defined $parse_options->{Source}{ByteStream} || defined $parse_options->{Source}{SystemId})) { die "XML::Parser::PerlSAX: no source defined for parse\n"; } # assign default Handler to any undefined handlers if (defined $parse_options->{Handler}) { $parse_options->{DocumentHandler} = $parse_options->{Handler} if (!defined $parse_options->{DocumentHandler}); $parse_options->{DTDHandler} = $parse_options->{Handler} if (!defined $parse_options->{DTDHandler}); $parse_options->{EntityResolver} = $parse_options->{Handler} if (!defined $parse_options->{EntityResolver}); } my @handlers; if (defined $parse_options->{DocumentHandler}) { # cache DocumentHandler in self for callbacks $self->{DocumentHandler} = $parse_options->{DocumentHandler}; my $doc_h = $parse_options->{DocumentHandler}; push (@handlers, Init => sub { $self->_handle_init(@_) } ) if (UNIVERSAL::can($doc_h, 'start_document')); push (@handlers, Final => sub { $self->_handle_final(@_) } ) if (UNIVERSAL::can($doc_h, 'end_document')); push (@handlers, Start => sub { $self->_handle_start(@_) } ) if (UNIVERSAL::can($doc_h, 'start_element')); push (@handlers, End => sub { $self->_handle_end(@_) } ) if (UNIVERSAL::can($doc_h, 'end_element')); push (@handlers, Char => sub { $self->_handle_char(@_) } ) if (UNIVERSAL::can($doc_h, 'characters')); push (@handlers, Proc => sub { $self->_handle_proc(@_) } ) if (UNIVERSAL::can($doc_h, 'processing_instruction')); push (@handlers, Comment => sub { $self->_handle_comment(@_) } ) if (UNIVERSAL::can($doc_h, 'comment')); push (@handlers, CdataStart => sub { $self->_handle_cdatastart(@_) } ) if (UNIVERSAL::can($doc_h, 'start_cdata')); push (@handlers, CdataEnd => sub { $self->_handle_cdataend(@_) } ) if (UNIVERSAL::can($doc_h, 'end_cdata')); if (UNIVERSAL::can($doc_h, 'entity_reference')) { push (@handlers, Default => sub { $self->_handle_default(@_) } ); $self->{UseEntRefs} = 1; } } if (defined $parse_options->{DTDHandler}) { # cache DTDHandler in self for callbacks $self->{DTDHandler} = $parse_options->{DTDHandler}; my $dtd_h = $parse_options->{DTDHandler}; push (@handlers, Notation => sub { $self->_handle_notation(@_) } ) if (UNIVERSAL::can($dtd_h, 'notation_decl')); push (@handlers, Unparsed => sub { $self->_handle_unparsed(@_) } ) if (UNIVERSAL::can($dtd_h, 'unparsed_entity_decl')); push (@handlers, Entity => sub { $self->_handle_entity(@_) } ) if ($self->{UseEntRefs} || UNIVERSAL::can($dtd_h, 'entity_decl')); push (@handlers, Element => sub { $self->_handle_element(@_) } ) if (UNIVERSAL::can($dtd_h, 'element_decl')); push (@handlers, Attlist => sub { $self->_handle_attlist(@_) } ) if (UNIVERSAL::can($dtd_h, 'attlist_decl')); push (@handlers, Doctype => sub { $self->_handle_doctype(@_) } ) if (UNIVERSAL::can($dtd_h, 'doctype_decl')); push (@handlers, XMLDecl => sub { $self->_handle_xmldecl(@_) } ) if (UNIVERSAL::can($dtd_h, 'xml_decl')); } if (defined $parse_options->{EntityResolver}) { # cache EntityResolver in self for callbacks $self->{EntityResolver} = $parse_options->{EntityResolver}; my $er = $parse_options->{EntityResolver}; push (@handlers, ExternEnt => sub { $self->_handle_extern_ent(@_) } ) if (UNIVERSAL::can($er, 'resolve_entity')); } my @xml_parser_options; if ($self->{UseEntRefs}) { @xml_parser_options = ( NoExpand => 1, Handlers => { @handlers } ); } else { @xml_parser_options = ( Handlers => { @handlers } ); } push (@xml_parser_options, ProtocolEncoding => $self->{ParseOptions}{Source}{Encoding}) if (defined $self->{ParseOptions}{Source}{Encoding}); my $parser = new XML::Parser(@xml_parser_options); my $result; if (defined $self->{ParseOptions}{Source}{ByteStream}) { $result = $parser->parse($self->{ParseOptions}{Source}{ByteStream}); } elsif (defined $self->{ParseOptions}{Source}{String}) { $result = $parser->parse($self->{ParseOptions}{Source}{String}); } elsif (defined $self->{ParseOptions}{Source}{SystemId}) { $result = $parser->parsefile($self->{ParseOptions}{Source}{SystemId}); } # clean up parser instance delete $self->{ParseOptions}; delete $self->{DocumentHandler}; delete $self->{DTDHandler}; delete $self->{EntityResolver}; delete $self->{Expat}; return $result; } sub location { my $self = shift; my $expat = $self->{Expat}; my @properties = ( ColumnNumber => $expat->current_column, LineNumber => $expat->current_line, BytePosition => $expat->current_byte, Base => $expat->base ); # FIXME these locations change while parsing external entities push (@properties, PublicId => $self->{Source}{PublicId}) if (defined $self->{Source}{PublicId}); push (@properties, SystemId => $self->{Source}{SystemId}) if (defined $self->{Source}{SystemId}); return { @properties }; } ### ### DocumentHandler methods ### sub _handle_init { my $self = shift; my $expat = shift; $self->{Expat} = $expat; if ($self->{DocumentHandler}->can('set_document_locator')) { $self->{DocumentHandler}->set_document_locator( { Locator => $self } ); } $self->{DocumentHandler}->start_document( { } ); } sub _handle_final { my $self = shift; delete $self->{UseEntRefs}; delete $self->{EntRefs}; return $self->{DocumentHandler}->end_document( { } ); } sub _handle_start { my $self = shift; my $expat = shift; my $element = shift; my @properties; if ($self->{ParseOptions}{UseAttributeOrder}) { # Capture order and defined() status for attributes my $ii; my $order = []; for ($ii = 0; $ii < $#_; $ii += 2) { push @$order, $_[$ii]; } push @properties, 'AttributeOrder', $order; # Divide by two because XML::Parser counts both attribute name # and value within it's index push @properties, 'Defaulted', ($expat->specified_attr() / 2); } $self->{DocumentHandler}->start_element( { Name => $element, Attributes => { @_ }, @properties } ); } sub _handle_end { my $self = shift; my $expat = shift; my $element = shift; $self->{DocumentHandler}->end_element( { Name => $element } ); } sub _handle_char { my $self = shift; my $expat = shift; my $string = shift; $self->{DocumentHandler}->characters( { Data => $string } ); } sub _handle_proc { my $self = shift; my $expat = shift; my $target = shift; my $data = shift; $self->{DocumentHandler}->processing_instruction( { Target => $target, Data => $data } ); } sub _handle_comment { my $self = shift; my $expat = shift; my $data = shift; $self->{DocumentHandler}->comment( { Data => $data } ); } sub _handle_cdatastart { my $self = shift; my $expat = shift; $self->{DocumentHandler}->start_cdata( { } ); } sub _handle_cdataend { my $self = shift; my $expat = shift; $self->{DocumentHandler}->end_cdata( { } ); } # Default receives all characters that aren't handled by some other # handler, this means a lot of stuff goes through here. All we're # looking for are `&NAME;' entity reference sequences sub _handle_default { my $self = shift; my $expat = shift; my $string = shift; if ($string =~ /^&($name_re);$/) { my $ent_ref = $self->{EntRefs}{$1}; if (!defined $ent_ref) { $ent_ref = { Name => $1 }; } $self->{DocumentHandler}->entity_reference($ent_ref); } } ### ### DTDHandler methods ### sub _handle_notation { my $self = shift; my $expat = shift; my $notation = shift; my $base = shift; my $sysid = shift; my $pubid = shift; my @properties = (Name => $notation); push (@properties, Base => $base) if (defined $base); push (@properties, SystemId => $sysid) if (defined $sysid); push (@properties, PublicId => $pubid) if (defined $pubid); $self->{DTDHandler}->notation_decl( { @properties } ); } sub _handle_unparsed { my $self = shift; my $expat = shift; my $entity = shift; my $base = shift; my $sysid = shift; my $pubid = shift; my @properties = (Name => $entity, SystemId => $sysid); push (@properties, Base => $base) if (defined $base); push (@properties, PublicId => $pubid) if (defined $pubid); $self->{DTDHandler}->unparsed_entity_decl( { @properties } ); } sub _handle_entity { my $self = shift; my $expat = shift; my $name = shift; my $val = shift; my $sysid = shift; my $pubid = shift; my $ndata = shift; my @properties = (Name => $name); push (@properties, Value => $val) if (defined $val); push (@properties, PublicId => $pubid) if (defined $pubid); push (@properties, SystemId => $sysid) if (defined $sysid); push (@properties, Notation => $ndata) if (defined $ndata); my $properties = { @properties }; if ($self->{UseEntRefs}) { $self->{EntRefs}{$name} = $properties; } if ($self->{DTDHandler}->can('entity_decl')) { $self->{DTDHandler}->entity_decl( $properties ); } } sub _handle_element { my $self = shift; my $expat = shift; my $name = shift; my $model = shift; $self->{DTDHandler}->element_decl( { Name => $name, Model => $model } ); } sub _handle_attlist { my $self = shift; my $expat = shift; my $elname = shift; my $attname = shift; my $type = shift; my $default = shift; my $fixed = shift; $self->{DTDHandler}->attlist_decl( { ElementName => $elname, AttributeName => $attname, Type => $type, Default => $default, Fixed => $fixed } ); } sub _handle_doctype { my $self = shift; my $expat = shift; my $name = shift; my $sysid = shift; my $pubid = shift; my $internal = shift; my @properties = (Name => $name); push (@properties, SystemId => $sysid) if (defined $sysid); push (@properties, PublicId => $pubid) if (defined $pubid); push (@properties, Internal => $internal) if (defined $internal); $self->{DTDHandler}->doctype_decl( { @properties } ); } sub _handle_xmldecl { my $self = shift; my $expat = shift; my $version = shift; my $encoding = shift; my $standalone = shift; my @properties = (Version => $version); push (@properties, Encoding => $encoding) if (defined $encoding); push (@properties, Standalone => $standalone) if (defined $standalone); $self->{DTDHandler}->xml_decl( { @properties } ); } ### ### EntityResolver methods ### sub _handle_extern_ent { my $self = shift; my $expat = shift; my $base = shift; my $sysid = shift; my $pubid = shift; my @properties = (SystemId => $sysid); push (@properties, Base => $base) if (defined $base); push (@properties, PublicId => $pubid) if (defined $pubid); my $result = $self->{EntityResolver}->resolve_entity( { @properties } ); if (UNIVERSAL::isa($result, 'HASH')) { if ($result->{ByteStream}) { return $result->{ByteStream}; } elsif ($result->{String}) { return $result->{String}; } elsif ($result->{SystemId}) { # FIXME must be able to resolve SystemIds, XML::Parser's # default can :-( die "PerlSAX: automatic opening of SystemIds from \`resolve_entity' not implemented, contact the author\n"; } else { # FIXME die "PerlSAX: invalid source returned from \`resolve_entity'\n"; } } return undef; } 1; __END__ =head1 NAME XML::Parser::PerlSAX - Perl SAX parser using XML::Parser =head1 SYNOPSIS use XML::Parser::PerlSAX; $parser = XML::Parser::PerlSAX->new( [OPTIONS] ); $result = $parser->parse( [OPTIONS] ); $result = $parser->parse($string); =head1 DESCRIPTION C is a PerlSAX parser using the XML::Parser module. This man page summarizes the specific options, handlers, and properties supported by C; please refer to the PerlSAX standard in `C' for general usage information. =head1 METHODS =over 4 =item new Creates a new parser object. Default options for parsing, described below, are passed as key-value pairs or as a single hash. Options may be changed directly in the parser object unless stated otherwise. Options passed to `C' override the default options in the parser object for the duration of the parse. =item parse Parses a document. Options, described below, are passed as key-value pairs or as a single hash. Options passed to `C' override default options in the parser object. =item location Returns the location as a hash: ColumnNumber The column number of the parse. LineNumber The line number of the parse. BytePosition The current byte position of the parse. PublicId A string containing the public identifier, or undef if none is available. SystemId A string containing the system identifier, or undef if none is available. Base The current value of the base for resolving relative URIs. ALPHA WARNING: The `C' and `C' properties returned are the system and public identifiers of the document passed to `C', not the identifiers of the currently parsing external entity. The column, line, and byte positions I of the current entity being parsed. =head1 OPTIONS The following options are supported by C: Handler default handler to receive events DocumentHandler handler to receive document events DTDHandler handler to receive DTD events ErrorHandler handler to receive error events EntityResolver handler to resolve entities Locale locale to provide localisation for errors Source hash containing the input source for parsing UseAttributeOrder set to true to provide AttributeOrder and Defaulted properties in `start_element()' If no handlers are provided then all events will be silently ignored, except for `C' which will cause a `C' to be called after calling `C'. If a single string argument is passed to the `C' method, it is treated as if a `C' option was given with a `C' parameter. The `C' hash may contain the following parameters: ByteStream The raw byte stream (file handle) containing the document. String A string containing the document. SystemId The system identifier (URI) of the document. PublicId The public identifier. Encoding A string describing the character encoding. If more than one of `C', `C', or `C', then preference is given first to `C', then `C', then `C'. =head1 HANDLERS The following handlers and properties are supported by C: =head2 DocumentHandler methods =over 4 =item start_document Receive notification of the beginning of a document. No properties defined. =item end_document Receive notification of the end of a document. No properties defined. =item start_element Receive notification of the beginning of an element. Name The element type name. Attributes A hash containing the attributes attached to the element, if any. The `C' hash contains only string values. If the `C' parser option is true, the following properties are also passed to `C': AttributeOrder An array of attribute names in the order they were specified, followed by the defaulted attribute names. Defaulted The index number of the first defaulted attribute in `AttributeOrder. If this index is equal to the length of `AttributeOrder', there were no defaulted values. Note to C users: `C' will be half the value of C's `C' function because only attribute names are provided, not their values. =item end_element Receive notification of the end of an element. Name The element type name. =item characters Receive notification of character data. Data The characters from the XML document. =item processing_instruction Receive notification of a processing instruction. Target The processing instruction target. Data The processing instruction data, if any. =item comment Receive notification of a comment. Data The comment data, if any. =item start_cdata Receive notification of the start of a CDATA section. No properties defined. =item end_cdata Receive notification of the end of a CDATA section. No properties defined. =item entity_reference Receive notification of an internal entity reference. If this handler is defined, internal entities will not be expanded and not passed to the `C' handler. If this handler is not defined, internal entities will be expanded if possible and passed to the `C' handler. Name The entity reference name Value The entity reference value =back =head2 DTDHandler methods =over 4 =item notation_decl Receive notification of a notation declaration event. Name The notation name. PublicId The notation's public identifier, if any. SystemId The notation's system identifier, if any. Base The base for resolving a relative URI, if any. =item unparsed_entity_decl Receive notification of an unparsed entity declaration event. Name The unparsed entity's name. SystemId The entity's system identifier. PublicId The entity's public identifier, if any. Base The base for resolving a relative URI, if any. =item entity_decl Receive notification of an entity declaration event. Name The entity name. Value The entity value, if any. PublicId The notation's public identifier, if any. SystemId The notation's system identifier, if any. Notation The notation declared for this entity, if any. For internal entities, the `C' parameter will contain the value and the `C', `C', and `C' will be undefined. For external entities, the `C' parameter will be undefined, the `C' parameter will have the system id, the `C' parameter will have the public id if it was provided (it will be undefined otherwise), the `C' parameter will contain the notation name for unparsed entities. If this is a parameter entity declaration, then a '%' will be prefixed to the entity name. Note that `C' and `C' overlap. If both methods are implemented by a handler, then this handler will not be called for unparsed entities. =item element_decl Receive notification of an element declaration event. Name The element type name. Model The content model as a string. =item attlist_decl Receive notification of an attribute list declaration event. This handler is called for each attribute in an ATTLIST declaration found in the internal subset. So an ATTLIST declaration that has multiple attributes will generate multiple calls to this handler. ElementName The element type name. AttributeName The attribute name. Type The attribute type. Fixed True if this is a fixed attribute. The default for `C' is the default value, which will either be "#REQUIRED", "#IMPLIED" or a quoted string (i.e. the returned string will begin and end with a quote character). =item doctype_decl Receive notification of a DOCTYPE declaration event. Name The document type name. SystemId The document's system identifier. PublicId The document's public identifier, if any. Internal The internal subset as a string, if any. Internal will contain all whitespace, comments, processing instructions, and declarations seen in the internal subset. The declarations will be there whether or not they have been processed by another handler (except for unparsed entities processed by the Unparsed handler). However, comments and processing instructions will not appear if they've been processed by their respective handlers. =item xml_decl Receive notification of an XML declaration event. Version The version. Encoding The encoding string, if any. Standalone True, false, or undefined if not declared. =back =head2 EntityResolver =over 4 =item resolve_entity Allow the handler to resolve external entities. Name The notation name. SystemId The notation's system identifier. PublicId The notation's public identifier, if any. Base The base for resolving a relative URI, if any. `C' should return undef to request that the parser open a regular URI connection to the system identifier or a hash describing the new input source. This hash has the same properties as the `C' parameter to `C': PublicId The public identifier of the external entity being referenced, or undef if none was supplied. SystemId The system identifier of the external entity being referenced. String String containing XML text ByteStream An open file handle. CharacterStream An open file handle. Encoding The character encoding, if known. =back =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1), PerlSAX.pod(3) Extensible Markup Language (XML) SAX 1.0: The Simple API for XML =cut libxml-perl-0.08/lib/Data/0040755000076400007640000000000007745275112013451 5ustar kenkenlibxml-perl-0.08/lib/Data/Grove/0040755000076400007640000000000007745275112014533 5ustar kenkenlibxml-perl-0.08/lib/Data/Grove/Parent.pm0100644000076400007640000002016107745275111016316 0ustar kenken# # Copyright (C) 1998,1999 Ken MacLeod # Data::Grove::Parent is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Id: Parent.pm,v 1.2 1999/12/22 21:15:00 kmacleod Exp $ # ### ### WARNING ### ### ### This code has a bug in it that renders it useless. In the FETCH ### routines, the new object created should have a reference to the ### the tied object that has $self as the underlying value. As of ### this version, I don't know of a way to get to the tied object. ### # Search for places marked `VALIDATE' to see where validation hooks # may be added in the future. use strict; #-------------------------------------------------------------------------- # Data::Grove::Parent #-------------------------------------------------------------------------- package Data::Grove::Parent; use UNIVERSAL; use Carp; use vars qw{ $VERSION }; # will be substituted by make-rel script $VERSION = "0.08"; sub new { my $type = shift; my $raw = shift; my $parent = shift; if (UNIVERSAL::isa($raw, 'Data::Grove::Parent')) { return $raw; } my @properties = ( Raw => $raw ); if (defined $parent) { push @properties, Parent => $parent; } my $dummy = bless {}, ref($raw); tie %$dummy, $type, @properties; return $dummy; } sub TIEHASH { my $type = shift; return bless { @_ }, $type; } sub STORE { my $self = shift; my $key = shift; my $value = shift; if (exists $self->{$key}) { $self->{$key} = $value; } else { # VALIDATE if (UNIVERSAL::isa($value, 'Data::Grove::Parent')) { $value = $value->{Raw}; } elsif (UNIVERSAL::isa($value, 'Data::Grove::ParentList')) { $value = $value->[0]; } $self->{Raw}{$key} = $value; } } sub FETCH { my $self = shift; my $key = shift; if (exists $self->{$key}) { return $self->{$key}; } else { my $value = $self->{Raw}{$key}; if (ref($value) eq 'ARRAY') { $value = Data::Grove::ParentList->new($value, $self); } return $value; } } sub FIRSTKEY { my $self = shift; my $raw = $self->{Raw}; $self->{'__each_in_raw'} = 1; my $a = scalar keys %$raw; each %$raw; } sub NEXTKEY { my $self = shift; my $raw = $self->{Raw}; my ($key, $value); if ($self->{'__each_in_raw'}) { if (($key, $value) = each %$raw) { return $key; } delete $self->{'__each_in_raw'}; my $a = scalar keys %$self; } return each %$self; } sub EXISTS { my $self = shift; my $key = shift; return (exists $self->{Raw}{$key}) || (exists $self->{$key}); } sub DELETE { my $self = shift; my $key = shift; if (exists $self->{$key}) { croak "can't delete \`Parent' or \`Raw' properties\n" if ($key eq 'Parent' || $key eq 'Raw'); delete $self->{$key}; } else { delete $self->{'Raw'}{$key}; } } sub CLEAR { my $self = shift; %{ $self->{Raw} } = (); } #-------------------------------------------------------------------------- # Data::Grove::ParentList #-------------------------------------------------------------------------- package Data::Grove::ParentList; use UNIVERSAL; sub new { my $type = shift; my $raw = shift; my $parent = shift; if (UNIVERSAL::isa($raw, 'Data::Grove::ParentList')) { return $raw; } my $dummy = []; tie @$dummy, $type, $raw, $parent; return $dummy; } sub TIEARRAY { my $type = shift; return bless [ @_ ], $type; } sub FETCHSIZE { scalar @{$_[0][0]}; } sub STORESIZE { $#{$_[0][0]} = $_[1]-1; } sub STORE { my $self = shift; my $index = shift; my $value = shift; # VALIDATE if (UNIVERSAL::isa($value, 'Data::Grove::Parent')) { $value = $value->{Raw}; } elsif (UNIVERSAL::isa($value, 'Data::Grove::ParentList')) { $value = $value->[0]; } $self->[0][$index] = $value; } sub FETCH { my $self = shift; my $index = shift; my $value = $self->[0][$index]; if (defined $value) { if (ref($value)) { return Data::Grove::Parent->new($value, $self->[1]); } else { return Data::Grove::Parent->new({ Data => $value }, $self->[1]); } } return $value; } sub CLEAR { @{$_[0][0]} = (); } sub POP { pop(@{$_[0][0]}); } sub PUSH { my $o = shift; foreach my $value (@_) { # VALIDATE if (UNIVERSAL::isa($value, 'Data::Grove::Parent')) { $value = $value->{Raw}; } elsif (UNIVERSAL::isa($value, 'Data::Grove::ParentList')) { $value = $value->[0]; } } push(@{$o->[0]},@_); } sub SHIFT { shift(@{$_[0][0]}); } sub UNSHIFT { my $o = shift; foreach my $value (@_) { # VALIDATE if (UNIVERSAL::isa($value, 'Data::Grove::Parent')) { $value = $value->{Raw}; } elsif (UNIVERSAL::isa($value, 'Data::Grove::ParentList')) { $value = $value->[0]; } } unshift(@{$o->[0]},@_); } sub SPLICE { my $ob = shift; my $sz = $ob->FETCHSIZE; my $off = @_ ? shift : 0; $off += $sz if $off < 0; my $len = @_ ? shift : $sz-$off; foreach my $value (@_) { # VALIDATE if (UNIVERSAL::isa($value, 'Data::Grove::Parent')) { $value = $value->{Raw}; } elsif (UNIVERSAL::isa($value, 'Data::Grove::ParentList')) { $value = $value->[0]; } } return splice(@{$ob->[0]},$off,$len,@_); } #-------------------------------------------------------------------------- # Data::Grove #-------------------------------------------------------------------------- package Data::Grove; sub root { my $self = shift; return $self if !defined $self->{Parent}; return $self->{Parent}->root(@_); } sub rootpath { my $self = shift; if (defined $self->{Parent}) { return ($self->{Parent}->rootpath, $self); } else { return ($self); } } sub add_magic { my $self = shift; my $parent = shift; return Data::Grove::Parent->new($self, $parent); } 1; __END__ =head1 NAME Data::Grove::Parent - provide parent properties to Data::Grove objects =head1 SYNOPSIS use Data::Grove::Parent; $root = $object->root; $rootpath = $object->rootpath; $tied = $object->add_magic([ $parent ]); $node = Data::Grove::Parent->new($hash [, $parent]); $node_list = Data::Grove::ParentList->new($array [, $parent]); =head1 DESCRIPTION Data::Grove::Parent is an extension to Data::Grove that adds `C' and `C' properties to Data::Grove objects and methods for returning the root node of a grove, a list of nodes between and including the root node and the current node, and a method that creates parented nodes. Data::Grove::Parent works by creating a Perl ``tied'' object that contains a parent reference (`C') and a reference to the original Data::Grove object (`C'). Tying-magic is used so that every time you reference the Data::Grove::Parent object it actually references the underlying raw object. When you retrieve a list or a property of the Raw object, Data::Grove::Parent automatically adds magic to the returned list or node. This means you only call `add_magic()' once to create the first Data::Grove::Parent object and then use the grove objects like you normally would. The most obvious use of this is so you don't have to call a `C' method when you want to release a grove or part of a grove; since Data::Grove and Data::Grove::Parent objects have no cyclic references, Perl can garbage collect them normally. A secondary use is to allow you to reuse grove or property set fragments in multiple trees. WARNING: Data::Grove currently does not protect you from creating your B cyclic references! This could lead to infinite loops if you don't take care to avoid them. =head1 METHODS =over 4 =item $object->root() =item $object->rootpath() `C' returns the root node if `C<$object>' is a `C' object. `C' returns an array of all the nodes between and including the root node and `C<$object>'. =item $tied = $object->add_magic([ $parent ]) `C' returns a C object with `C<$object>' as it's `C' object. If `C<$parent>' is given, that becomes the tied object's parent object. =back =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1), Data::Grove(3) =cut libxml-perl-0.08/lib/Data/Grove/Visitor.pm0100644000076400007640000001300507745275111016523 0ustar kenken# # Copyright (C) 1998,1999 Ken MacLeod # Data::Grove::Visitor is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Id: Visitor.pm,v 1.6 2000/03/20 23:06:45 kmacleod Exp $ # use strict; use 5.005; package Data::Grove::Visitor; use vars qw{ $VERSION }; # will be substituted by make-rel script $VERSION = "0.08"; # The following methods extend Data::Grove package Data::Grove; sub accept { my $self = shift; my $visitor = shift; my $type_name; my $package = ref($self); eval "\$type_name = \$${package}::type_name"; if (!defined $type_name) { return (); # no action } my $method_name = 'visit_' . $type_name; if ($visitor->can($method_name)) { return $visitor->$method_name ($self, @_); } else { return (); # no action } } sub accept_name { my $self = shift; if (!defined $self->{Name}) { return $self->accept (@_); } my $visitor = shift; my $name = $self->{Name}; $name =~ s/\W/_/g; my $name_method = "visit_name_$name"; if (!$self->{'has'}{$name_method}) { return if (defined $self->{'has'}{$name_method}); $self->{'has'}{$name_method} = $visitor->can($name_method); return $self->accept($visitor, @_) if (!$self->{'has'}{$name_method}); } return $visitor->$name_method ($self, @_); } sub attr_accept { my $self = shift; my $attr = shift; my $visitor = shift; if (!defined $self->{Attributes}) { return (); # no action } my $attrs = $self->{Attributes}{$attr}; if (ref($attrs) eq 'ARRAY') { return $self->_children_accept ($attrs, $visitor, @_); } else { if (!$self->{has_visit_characters}) { return if (defined $self->{has_visit_characters}); $self->{has_visit_characters} = $visitor->can('visit_characters'); return if (!$self->{has_visit_characters}); } # FIXME should be some other generic than XML::Grove::Characters return $visitor->visit_characters (XML::Grove::Characters->new(Data => $attrs), @_); } } sub children_accept { my $self = shift; if (defined $self->{Contents}) { return $self->_children_accept ($self->{Contents}, @_); } else { return (); # no action } } sub children_accept_name { my $self = shift; if (defined $self->{Contents}) { return $self->_children_accept_name ($self->{Contents}, @_); } else { return (); # no action } } sub _children_accept { my $self = shift; my $array = shift; my $visitor = shift; my @return; my $ii; for ($ii = 0; $ii <= $#$array; $ii ++) { push @return, $array->[$ii]->accept ($visitor, @_); } return @return; } sub _children_accept_name { my $self = shift; my $array = shift; my $visitor = shift; my @return; my $ii; for ($ii = 0; $ii <= $#$array; $ii ++) { push @return, $array->[$ii]->accept_name ($visitor, @_); } return @return; } 1; __END__ =head1 NAME Data::Grove::Visitor - add visitor/callback methods to Data::Grove objects =head1 SYNOPSIS use Data::Grove::Visitor; @results = $object->accept ($visitor, ...); @results = $object->accept_name ($visitor, ...); @results = $object->children_accept ($visitor, ...); @results = $object->children_accept_name ($visitor, ...); =head1 DESCRIPTION Data::Grove::Visitor adds visitor methods (callbacks) to Data::Grove objects. A ``visitor'' is a class (a package) you write that has methods (subs) corresponding to the objects in the classes being visited. You use the visitor methods by creating an instance of your visitor class, and then calling `C' on the top-most object you want to visit, that object will in turn call your visitor back with `C>', where I is the type of object. There are several forms of `C'. Simply calling `C' calls your package back using the object type of the object you are visiting. Calling `C' on an element object calls you back with `C>' where I is the tag name of the element, on all other objects it's as if you called `C'. All of the forms of `C' return a concatenated list of the result of all `C' methods. `C' calls `C' on each of the children of the element. This is generally used in element callbacks to recurse down into the element's children, you don't need to get the element's contents and call `C' on each item. `C' does the same but calling `C' on each of the children. `C' calls `C' on each of the objects in the named attribute. Refer to the documentation of the classes you are visiting (XML::Grove, etc.) for the type names (`C', `C', etc.) of the objects it implements. =head1 RESERVED NAMES The hash keys `C' and `C' are used to indicate objects with children (for `C') and named objects (for `C'). =head1 NOTES These are random ideas that haven't been implemented yet: =over 4 =item * Several objects fall into subclasses, or you may want to be able to subclass a visited object and still be able to tell the difference. In SGML::Grove I had used the package name in the callback (`C') instead of a generic name (`C'). The idea here would be to try calling `C>' with the most specific class first, then try superclasses, and lastly to try the generic. =back =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1), Data::Grove Extensible Markup Language (XML) =cut libxml-perl-0.08/lib/Data/Grove.pm0100644000076400007640000000666307745275111015100 0ustar kenken# # Copyright (C) 1999 Ken MacLeod # Data::Grove is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. # # $Id: Grove.pm,v 1.6 1999/12/22 21:15:00 kmacleod Exp $ # ### ### For a similar package, see also: ### ### Graph::Element -- elements for a directed graph ### Neil Bowers (NIELB) ### package Data::Grove; use vars qw{ $VERSION }; # will be substituted by make-rel script $VERSION = "0.08"; sub new { my $type = shift; my $self = ($#_ == 0) ? { %{ (shift) } } : { @_ }; if (defined $self->{Raw}) { # clone the raw object $self = { %{ $self->{Raw} } }; } return bless $self, $type; } package Data::Grove::Characters; use vars qw{ @ISA $type_name }; @ISA = qw{Data::Grove}; $type_name = 'characters'; 1; __END__ =head1 NAME Data::Grove -- support for deeply nested structures =head1 SYNOPSIS use Data::Grove; $object = MyPackage->new; package MyPackage; @ISA = qw{Data::Grove}; =head1 DESCRIPTION C provides support for deeply nested tree or graph structures. C is intended primarily for Perl module authors writing modules with many types or classes of objects that need to be manipulated and extended in a consistent and flexible way. C is best used by creating a core set of ``data'' classes and then incrementally adding functionality to the core data classes by using ``extension'' modules. One reason for this design is so that the data classes can be swapped out and the extension modules can work with new data sources. For example, these other data sources could be disk-based, network-based or built on top of a relational database. Two extension modules that come with C are C and C. C adds a `C' property to grove objects and implements a `C' method to grove objects to return the root node of the tree from anywhere in the tree and a `C' method to return a list of nodes between the root node and ``this'' node. C adds callback methods `C' and `C' that call your handler or receiver module back by object type name or the object's name. C objects do not contain parent references, Perl garbage collection will delete them when no longer referenced and sub-structures can be shared among several structures. C is used to create temporary objects with parent pointers. Properties of data classes are accessed directly using Perl's hash functions (i.e. `C<$object-E{Property}>'). Extension modules may also define properties that they support or use, for example Data::Grove::Parent adds `C' and `C' properties and Visitor depends on `C' and `C' properties. See the module C for an example implementation of C. =head1 METHODS =over 4 =item new( PROPERTIES ) Return a new object blessed into the SubClass, with the given properties. PROPERTIES may either be a list of key/value pairs, a single hash containing key/value pairs, or an existing C object. If an existing C is passed to `C', a shallow copy of that object will be returned. A shallow copy means that you are returned a new object, but all of the objects underneath still refer to the original objects. =back =head1 AUTHOR Ken MacLeod, ken@bitsko.slc.ut.us =head1 SEE ALSO perl(1) =cut libxml-perl-0.08/doc/0040755000076400007640000000000007745275112012577 5ustar kenkenlibxml-perl-0.08/doc/CreatingPatActModules.pod0100644000076400007640000000613106754116317017463 0ustar kenken=head1 Creating PatAct Modules This document is targeted towards the module writer creating a new pattern or action module or readers who want to understand what is going on inside a pattern or action module. If you are only interesting in using PatAct modules, please see ``Using PatAct Modules.'' There are two types of modules involved in processing a pattern-action list the pattern module and the action module. Pattern modules are created by users and passed to the `new()' method of action modules, otherwise all pattern module methods are used only by the action module. Action modules are PerlSAX handlers (see PerlSAX.pod in libxml-perl). Action modules are responsible for initializing the pattern module, receiving PerlSAX events, calling the `match()' method in the pattern module for each element, and applying actions for matching elements. The interface the user uses to call the drivers is described in ``Using PatAct Modules''. In general, the pattern-action modules perform their work on an element-by-element basis, but the action modules are called with PerlSAX events for all parse events (characters, processing instructions, etc.). =head1 Pattern Modules Pattern modules have this interface, where PATTERN is the pattern or query implementation: use XML::PatAct::PATTERN; $matcher = XML::PatAct::PATTERN->new(Patterns => $patterns [, OPTIONS]); $matcher->initialize($actor); $index = $matcher->match($element, $names, $nodes); $matcher->finalize(); A pattern module instance is created with the pattern list that will be used or processing as well as any additional options a pattern module may define. `$patterns' is the original array reference passed in by the user to the action module, so it is made up of pairs of PATTERN => ACTION. The pattern matcher should ignore the ACTION items. `initialize()' is called before any calls to `match()'. `$actor' is the action module that is calling the pattern module. `initialize()' is normally called from the `start_document()' PerlSAX event. `match()' performs a single matching against the pattern list and returns the index of the matching pattern or undef if no pattern matches. `$element' is the element to match. `$names' and `$nodes' are array references containing the names and nodes (hashes) of this element and all parent elements up to the element where processing started. `finalize()' is called at the end of processing and may be used to release state information. `finalize()' is normally called from the `end_document()' PerlSAX event. Here is a template for creating a pattern module: @include ../lib/XML/PatAct/PatternTempl.pm =head1 Action Modules Action modules are PerlSAX handlers (see PerlSAX.pod in libxml-perl). Action modules are responsible for initializing the pattern module, receiving PerlSAX events, calling the `match()' method in the pattern module for each element, and applying actions for matching elements. Action modules must also maintain arrays of element names and element nodes to be passed to the `match()' method. Here is a template for creating an action module: @include ../lib/XML/PatAct/ActionTempl.pm libxml-perl-0.08/doc/interface-style.pod0100644000076400007640000000517106720064155016375 0ustar kenken=head1 Priorities Larry Wall suggests, ``In the absence of other considerations, I'd encourage you to provide the cleanest interface from the user's standpoint, and let the implementer worry about the details.'' =head1 Naming B All method names use lower-case, `C<_>' seperated names. B All method names match their Java counterparts. All options, parameters, and property names use mixed case names, with an initial upper case character. This eliminates a certain amount of potential confusion with reserved words, which, for the most part, are lower case. The following words are abbreviated in method names and parameters: Declaration decl Decl Reference ref Ref Identifier id Id =head1 Object Instantiation and Options For creating new parser or handler objects, the `new' methods accept a list of key-value pairs (C<=E>) or a hash containing the options. The key names are derived from the SAX positional parameter names (`C' and `C' in Parser's `C') or the name of option setting methods (`C', `C', `C', `C', and `C' in Parser). Callers may get and set options directly in the object, for example: $parser = SAX::Parser->new( Source => { ByteStream => $fh }, DocumentHandler => $doc_handler ); $parser->{Locale} = 'el_GR.ISO-8859-7'; There are no set/get methods in the Perl SAX API. =head1 Handler Calls Handler calls all take hashes instead of positional parameters. Key names are derived from SAX positional parameter names. This allows parsers and filters to provide additional parameters if they can or the user requests it. =head1 Extending Handler Interfaces Developers of event-generators can extend the handler interface as they need to. Event-generators that use an extended interface should accept generator options or use `C' to test whether a handler can support their extended interface. For example, a C that wants to receive internal entity events instead of having them resolved and passed in to the `C' method would define a `C' method and/or set a parser option to pass or not pass internal entity events. =head1 Helper Classes Perl SAX avoids helper classes (like SAXException and InputSource) where those classes only hold information and have no behavior. In those cases, simple hashes are used instead. B if these should be implemented anyway for easier portability. =head1 Contributors Eduard (Enno) Derksen Ken MacLeod Eric Prud'hommeaux Larry Wall libxml-perl-0.08/doc/modules.xml0100644000076400007640000001571007053054656014772 0ustar kenken Apache-MimeXML Apache-MimeXML XML CGI-Formalware CGI::Formalware XML XML::Parser CGI-XML CGI::XML XML::Parser XML Conversion CGI-XMLForm CGI::XMLForm XML::Parser XML Conversion DBIx-XML_RDB DBIx::XML_RDB XML Conversion libxml-perl Data::Grove XML Data::Grove::Visitor Data::Grove XML::ESISParser PerlSAX XML::Handler::CanonXMLWriter PerlSAX XML Conversion XML::Handler::Sample PerlSAX XML Conversion XML::Handler::Subs PerlSAX XML Conversion XML::Handler::XMLWriter PerlSAX XML Conversion XML::Parser::PerlSAX PerlSAX XML::Parser XML::PatAct::ActionTempl PatAct PatAct Action XML::PatAct::Amsterdam PerlSAX XML Conversion PatAct PatAct Action XML::PatAct::MatchName PatAct PatAct Pattern XML::PatAct::PatternTempl PatAct PatAct Pattern XML::PatAct::ToObjects PatAct PatAct Action XML Conversion XML::Perl2SAX PerlSAX XML::SAX2Perl PerlSAX Frontier-RPC Frontier::RPC2 XML::Parser XML Conversion Frontier::Client Frontier::Daemon XML-DOM XML::DOM XML::Parser XML Objects XML Query XML Conversion XML::DOM::UTF8 XML::DOM XML-Dumper XML::Dumper XML Conversion XML-Edifact XML::Edifact XML Conversion XML-Encoding XML::Encoding XML::Parser XML-Generator XML::Generator XML Writer XML-Grove XML::Grove XML Objects Data::Grove XML::Grove::AsCanonXML XML::Grove XML Writer XML::Grove::AsString XML::Grove XML Conversion XML::Grove::Builder XML::Grove PerlSAX XML::Grove::IDs XML::Grove XML Query XML::Grove::Path XML::Grove XML Query XML::Grove::PerlSAX XML::Grove PerlSAX XML::Grove::Sub XML::Grove XML::Grove::Subst XML::Grove XML-Parser XML::Parser XML XML::ParserDebug XML::Parser XML Conversion XML::Parser::Objects XML::Parser XML Objects XML::Parser::Stream XML::Parser XML Conversion XML::Parser::Subs XML::Parser XML::Parser::Tree XML::Parser XML Objects XML-QL XML::QL XML::Parser XML Query XML-Registry XML::Registry XML XML-Writer XML::Writer XML Writer XML-XQL XML::XQL XML::DOM XML Query libxml-perl-0.08/doc/sax-2.0-adv.html0100644000076400007640000010671407373017104015323 0ustar kenken Advanced Features of the Perl SAX 2.0 Binding

Advanced SAX

The classes, methods, and features described below are not commonly used in most applications and can be ignored by most users. If however you find that you are not getting the granularity you expect from Basic SAX, this would be the place to look for more. Advanced SAX isn't advanced in the sense that it is harder, or requires better programming skills. It is simply more complete, and has been separated to keep Basic SAX simple in terms of the number of events one would have to deal with.

SAX Parsers

SAX supports several classes of event handlers: content handlers, declaration handlers, DTD handlers, error handlers, entity resolvers, and other extensions. For each class of events, a seperate handler can be used to handle those events. If a handler is not defined for a class of events, then the default handler, Handler, is used. Each of these handlers is described in the sections below. Applications may change an event handler in the middle of the parse and the SAX parser will begin using the new handler immediately.

SAX's basic interface defines methods for parsing system identifiers (URIs), open files, and strings. Behind the scenes, though, SAX uses a Source hash that contains that information, plus encoding, system and public identifiers if available. These are described below under the Source option.

SAX parsers accept all features as options to the parse() methods and on the parser's constructor. Features are described in the next section.

parse(options)
Parses the XML instance identified by the Source option. options can be a list of option, value pairs or a hash. parse() returns the result of calling the end_document() handler.

ContentHandler
Object to receive document content events. The ContentHandler, with additional events defined below, is the class of events described in Basic SAX Handler.If the application does not register a content handler or content event handlers on the default handler, content events reported by the SAX parser will be silently ignored.

DTDHandler
Object to receive basic DTD events. If the application does not register a DTD handler or DTD event handlers on the default handler, DTD events reported by the SAX parser will be silently ignored.

EntityResolver
Object to resolve external entities. If the application does not register an entity resolver or entity events on the default handler, the SAX parser will perform its own default resolution.

ErrorHandler
Object to receive error-message events. If the application does not register an error handler or error event handlers on the default handler, all error events reported by the SAX parser will be silently ignored; however, normal processing may not continue. It is highly recommended that all SAX applications implement an error handler to avoid unexpected bugs.

Source
A hash containing information about the XML instance to be parsed. See Input Sources below. Note that Source cannot be changed during the parse

Features
A hash containing Feature information, as described below. Features can be set at runtime but not directly on the Features hash (at least, not reliably. You can do it, but the results might not be what you expect as it doesn't give the parser a chance to look at what you've set so that it can't react properly to errors, or Features that it doesn't support). You should use the set_feature() method instead.

Features

Features are as defined in SAX2: Features and Properties, but not of course limited to those. You may add your own Features. Also, Java has an artificial distinction between Features and Properties which is unnecessary. In Perl, both have been merged under the same name.

Features can be passed as options when creating a parser or calling a parse() method. They may also be set using the set_feature().

    $parser = AnySAXParser->new(
                                Features => {
                                             'http://xml.org/sax/features/namespaces' => 0,
                                             },
                                );
    $parser->parse(
                   Features => {
                               'http://xml.org/sax/features/namespaces' => 0,
                               },
                   );
    $parser->set_feature('http://xml.org/sax/properties/xml-string', 1);
    $string = $parser->get_feature('http://xml.org/sax/properties/xml-string');

When performing namespace processing, Perl SAX parsers always provide both the raw tag name in Name and the namespace names in NamespaceURI, LocalName, and Prefix. Therefore, the "http://xml.org/sax/features/namespace-prefixes" Feature is ignored.

Also, Features are things that are supposed to be turned on, and thus should normally be off by default, especially if the parser doesn't support turning them off. Due to backwards compatibility problems, the one exception to this rule is the "http://xml.org/sax/features/namespaces" Feature which is on by default and which a number of parsers may not be able to turn off. Thus, a parser claiming to support this Feature (and all SAX2 parsers must support it) may in fact only support turning it on. This is only a minor problem as turning it off basically amounts to returning to SAX1, which can be accomplished by a filter (eg XML::Filter::SAX2toSAX1).

In addition to the Features described in the SAX spec itself, a number of new ones may be defined for Perl. An example of this would be http://xmlns.perl.org/sax/node-factory which when supported by the parser would be settable to a NodeFactory object that would be in charge of creating SAX nodes different from those that are normally received by event handlers. See http://xmlns.perl.org/ (currently in alpha state) for details on how to register Features.

The following methods are used to get and set features:

get_feature(name)
Look up the value of a feature.

The feature name is any fully-qualified URI. It is possible for an SAX parser to recognize a feature name but to be unable to return its value; this is especially true in the case of an adapter for a SAX1 Parser, which has no way of knowing whether the underlying parser is validating, for example.

Some feature values may be available only in specific contexts, such as before, during, or after a parse.

get_feature() returns the value of the feature, which is usually either a boolean or an object, and will throw XML::SAX::Exception::NotRecognized when the SAX parser does not recognize the feature name and XML::SAX::Exception::NotSupported when the SAX parser recognizes the feature name but cannot determine its value at this time.

set_feature(name, value)
Set the state of a feature.

The feature name is any fully-qualified URI. It is possible for an SAX parser to recognize a feature name but to be unable to set its value; this is especially true in the case of an adapter for a SAX1 Parser, which has no way of affecting whether the underlying parser is validating, for example.

Some feature values may be immutable or mutable only in specific contexts, such as before, during, or after a parse.

set_feature() will throw XML::SAX::Exception::NotRecognized when the SAX parser does not recognize the feature name and XML::SAX::Exception::NotSupported when the SAX parser recognizes the feature name but cannot set the requested value.

This method is also the standard mechanism for setting extended handlers, such as "http://xml.org/sax/handlers/DeclHandler".

get_features()
Look up all Features that this parser claims to support.

This method returns a hash of Features which the parser claims to support. The value of the hash is currently unspecified though it may be used later. This method is meant to be inherited so that Features supported by the base parser class (XML::SAX::Base) are declared to be supported by subclasses.

Calling this method is probably only moderately useful to end users. It is mostly meant for use by XML::SAX, so that it can query parsers for Feature support and return an appropriate parser depending on the Features that are required.

Input Sources

Input sources may be provided to parser objects or are returned by entity resolvers. An input source is a hash with these properties:

PublicId
The public identifier of this input source.

The public identifier is always optional: if the application writer includes one, it will be provided as part of the location information.

SystemId
The system identifier (URI) of this input source.

The system identifier is optional if there is a byte stream or a character stream, but it is still useful to provide one, since the application can use it to resolve relative URIs and can include it in error messages and warnings (the parser will attempt to open a connection to the URI only if there is no byte stream or character stream specified).

If the application knows the character encoding of the object pointed to by the system identifier, it can register the encoding using the Encoding property.
ByteStream
The byte stream for this input source.

The SAX parser will ignore this if there is also a character stream specified, but it will use a byte stream in preference to opening a URI connection itself.

If the application knows the character encoding of the byte stream, it should set the Encoding property.
CharacterStream
The character stream for this input source.

If there is a character stream specified, the SAX parser will ignore any byte stream and will not attempt to open a URI connection to the system identifier.

Note: A CharacterStream is a filehandle that does not need any encoding translation done on it. This is implemented as a regular filehandle and only works under Perl 5.7.2 or higher using PerlIO. To get a single character, or number of characters from it, use the perl core read() function. To get a single byte from it (or number of bytes), you can use sysread(). The encoding of the stream should be in the Encoding entry for the Source.

Encoding
The character encoding, if known.

The encoding must be a string acceptable for an XML encoding declaration (see section 4.3.3 of the XML 1.0 recommendation).

This property has no effect when the application provides a character stream.

SAX Handlers

SAX supports several classes of event handlers: content handlers, declaration handlers, DTD handlers, error handlers, entity resolvers, and other extensions. This section defines each of these classes of events.

Content Events

This is the main interface that most SAX applications implement: if the application needs to be informed of basic parsing events, it implements this interface and registers an instance with the SAX parser using the ContentHandler property. The parser uses the instance to report basic document-related events like the start and end of elements and character data.

The order of events in this interface is very important, and mirrors the order of information in the document itself. For example, all of an element's content (character data, processing instructions, and/or subelements) will appear, in order, between the start_element event and the corresponding end_element event.

set_document_locator(locator)
Receive an object for locating the origin of SAX document events.

SAX parsers are strongly encouraged (though not absolutely required) to supply a locator: if it does so, it must supply the locator to the application by invoking this method before invoking any of the other methods in the ContentHandler interface.

The locator allows the application to determine the end position of any document-related event, even if the parser is not reporting an error. Typically, the application will use this information for reporting its own errors (such as character content that does not match an application's business rules). The information provided by the locator is probably not sufficient for use with a search engine.

Note that the locator will provide correct information only during the invocation of the events in this interface. The application should not attempt to use it at any other time.

The locator is a hash with these properties:

ColumnNumber The column number of the end of the text where the exception occurred.
LineNumber The line number of the end of the text where the exception occurred.
PublicId The public identifier of the entity where the exception occurred.
SystemId The system identifier of the entity where the exception occurred.

start_prefix_mapping(mapping)
Begin the scope of a prefix-URI Namespace mapping.

The information from this event is not necessary for normal Namespace processing: the SAX XML reader will automatically replace prefixes for element and attribute names when the "http://xml.org/sax/features/namespaces" feature is true (the default).

There are cases, however, when applications need to use prefixes in character data or in attribute values, where they cannot safely be expanded automatically; the start/end_prefix_mapping event supplies the information to the application to expand prefixes in those contexts itself, if necessary.

Note that start/end_prefix_mapping() events are not guaranteed to be properly nested relative to each-other: all start_prefix_apping() events will occur before the corresponding start_element() event, and all end_prefix_mapping events will occur after the corresponding end_element() event, but their order is not guaranteed.

mapping is a hash with these properties:

Prefix The Namespace prefix being declared.
NamespaceURI The Namespace URI the prefix is mapped to.

end_prefix_mapping(mapping)
End the scope of a prefix-URI mapping.

See start_prefix_mapping() for details. This event will always occur after the corresponding end_element event, but the order of end_prefix_mapping events is not otherwise guaranteed.

mapping is a hash with this property:

Prefix The Namespace prefix that was being mapped.

processing_instruction(pi)
Receive notification of a processing instruction.

The Parser will invoke this method once for each processing instruction found: note that processing instructions may occur before or after the main document element.

A SAX parser should never report an XML declaration (XML 1.0, section 2.8) or a text declaration (XML 1.0, section 4.3.1) using this method.

pi is a hash with these properties:

Target The processing instruction target.
Data The processing instruction data, or null if none was supplied.

skipped_entity(entity)
Receive notification of a skipped entity.

The Parser will invoke this method once for each entity skipped. Non-validating processors may skip entities if they have not seen the declarations (because, for example, the entity was declared in an external DTD subset). All processors may skip external entities, depending on the values of the "http://xml.org/sax/features/external-general-entities" and the "http://xml.org/sax/features/external-parameter-entities" Features.

entity is a hash with these properties:

Name The name of the skipped entity. If it is a parameter entity, the name will begin with '%'.

Declaration Events

This is an optional extension handler for SAX2 to provide information about DTD declarations in an XML document. XML readers are not required to support this handler.

Note that data-related DTD declarations (unparsed entities and notations) are already reported through the DTDHandler interface.

If you are using the declaration handler together with a lexical handler, all of the events will occur between the start_dtd and the end_dtd events.

To set a seperate DeclHandler for an XML reader, set the "http://xml.org/sax/handlers/DeclHandler" Feature with the object to received declaration events. If the reader does not support declaration events, it will throw a XML::SAX::Exception::NotRecognized or a XML::SAX::Exception::NotSupported when you attempt to register the handler. Declaration event handlers on the default handler are automatically recognized and used.

element_decl(element)
Report an element type declaration.

The content model will consist of the string "EMPTY", the string "ANY", or a parenthesised group, optionally followed by an occurrence indicator. The model will be normalized so that all whitespace is removed, and will include the enclosing parentheses.

element is a hash with these properties:

Name The element type name.
Model The content model as a normalized string.

attribute_decl(attribute)
Report an attribute type declaration.

Only the effective (first) declaration for an attribute will be reported. The type will be one of the strings "CDATA", "ID", "IDREF", "IDREFS", "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION", or a parenthesized token group with the separator "|" and all whitespace removed.

attribute is a hash with these properties:

eName The name of the associated element.
aName The name of the attribute.
Type A string representing the attribute type.
ValueDefault A string representing the attribute default ("#IMPLIED", "#REQUIRED", or "#FIXED") or undef if none of these applies.
Value A string representing the attribute's default value, or null if there is none.

internal_entity_decl(entity)
Report an internal entity declaration.

Only the effective (first) declaration for each entity will be reported.

entity is a hash with these properties:

Name The name of the entity. If it is a parameter entity, the name will begin with '%'.
Value The replacement text of the entity.

external_entity_decl(entity)
Report a parsed external entity declaration.

Only the effective (first) declaration for each entity will be reported.

entity is a hash with these properties:

Name The name of the entity. If it is a parameter entity, the name will begin with '%'.
PublicId The public identifier of the entity, or undef if none was declared.
SystemId The system identifier of the entity.

DTD Events

If a SAX application needs information about notations and unparsed entities, then the application implements this interface. The parser uses the instance to report notation and unparsed entity declarations to the application.

The SAX parser may report these events in any order, regardless of the order in which the notations and unparsed entities were declared; however, all DTD events must be reported after the document handler's start_document() event, and before the first start_element() event.

It is up to the application to store the information for future use (perhaps in a hash table or object tree). If the application encounters attributes of type "NOTATION", "ENTITY", or "ENTITIES", it can use the information that it obtained through this interface to find the entity and/or notation corresponding with the attribute value.

notation_decl(notation)
Receive notification of a notation declaration event.

It is up to the application to record the notation for later reference, if necessary.

If a system identifier is present, and it is a URL, the SAX parser must resolve it fully before passing it to the application.

notation is a hash with these properties:

Name The notation name.
PublicId The public identifier of the entity, or undef if none was declared.
SystemId The system identifier of the entity, or undef if none was declared.

unparsed_entity_decl(entity)
Receive notification of an unparsed entity declaration event.

Note that the notation name corresponds to a notation reported by the notation_decl() event. It is up to the application to record the entity for later reference, if necessary.

If the system identifier is a URL, the parser must resolve it fully before passing it to the application.

entity is a hash with these properties:

Name The unparsed entity's name.
PublicId The public identifier of the entity, or undef if none was declared.
SystemId The system identifier of the entity.
Notation The name of the associated notation.

Entity Resolver

If a SAX application needs to implement customized handling for external entities, it must implement this interface.

The parser will then allow the application to intercept any external entities (including the external DTD subset and external parameter entities, if any) before including them.

Many SAX applications will not need to implement this interface, but it will be especially useful for applications that build XML documents from databases or other specialised input sources, or for applications that use URI types that are either not URLs, or that have schemes unknown to the parser.

resolve_entity(entity)
Allow the application to resolve external entities.

The Parser will call this method before opening any external entity except the top-level document entity (including the external DTD subset, external entities referenced within the DTD, and external entities referenced within the document element): the application may request that the parser resolve the entity itself, that it use an alternative URI, or that it use an entirely different input source.

Application writers can use this method to redirect external system identifiers to secure and/or local URIs, to look up public identifiers in a catalogue, or to read an entity from a database or other input source (including, for example, a dialog box).

If the system identifier is a URL, the SAX parser must resolve it fully before reporting it to the application.

entity is a hash with these properties:

PublicId The public identifier of the entity being referenced, or undef if none was declared.
SystemId The system identifier of the entity being referenced.

Error Events

If a SAX application needs to implement customized error handling, it must implement this interface. The parser will then report all errors and warnings through this interface.

The parser shall use this interface to report errors instead or in addition to throwing an exception: for errors and warnings the recommended approach is to leave the application throw its own exceptions and to not throw them in the parser. For fatal errors however, it is not uncommon that the parser will throw an exception after having reported the error as it renders any continuation of parsing impossible.

All error handlers receive a hash, exception, with the properties defined in Exceptions.

warning(exception)
Receive notification of a warning.

SAX parsers will use this method to report conditions that are not errors or fatal errors as defined by the XML 1.0 recommendation. The default behaviour is to take no action.

The SAX parser must continue to provide normal parsing events after invoking this method: it should still be possible for the application to process the document through to the end.

error(exception)
Receive notification of a recoverable error.

This corresponds to the definition of "error" in section 1.2 of the W3C XML 1.0 Recommendation. For example, a validating parser would use this callback to report the violation of a validity constraint. The default behaviour is to take no action.

The SAX parser must continue to provide normal parsing events after invoking this method: it should still be possible for the application to process the document through to the end. If the application cannot do so, then the parser should report a fatal error even if the XML 1.0 recommendation does not require it to do so.

fatal_error(exception)
Receive notification of a non-recoverable error.

This corresponds to the definition of "fatal error" in section 1.2 of the W3C XML 1.0 Recommendation. For example, a parser would use this callback to report the violation of a well-formedness constraint.

The application must assume that the document is unusable after the parser has invoked this method, and should continue (if at all) only for the sake of collecting addition error messages: in fact, SAX parsers are free to stop reporting any other events once this method has been invoked.

Lexical Events

This is an optional extension handler for SAX2 to provide lexical information about an XML document, such as comments and CDATA section boundaries; XML readers are not required to support this handler.

The events in the lexical handler apply to the entire document, not just to the document element, and all lexical handler events must appear between the content handler's start_document() and end_document() events.

To set the LexicalHandler for an XML reader, set the Feature "http://xml.org/sax/handlers/LexicalHandler" on the parser to the object to receive lexical events. If the reader does not support lexical events, it will throw a XML::SAX::Exception::NotRecognized or a XML::SAX::Exception::NotSupported when you attempt to register the handler.

start_dtd(dtd)
Report the start of DTD declarations, if any.

Any declarations are assumed to be in the internal subset unless otherwise indicated by a start_entity event.

Note that the start/end_dtd() events will appear within the start/end_document() events from Content Handler and before the first start_element() event.

dtd is a hash with these properties:

Name The document type name.
PublicId The declared public identifier for the external DTD subset, or undef if none was declared.
SystemId The declared system identifier for the external DTD subset, or undef if none was declared.

end_dtd(dtd)
Report the end of DTD declarations.

No properties are defined for this event (dtd is empty).

start_entity(entity)
Report the beginning of an entity in content.

NOTE: entity references in attribute values -- and the start and end of the document entity -- are never reported.

The start and end of the external DTD subset are reported using the pseudo-name "[dtd]". All other events must be properly nested within start/end entity events.

Note that skipped entities will be reported through the skipped_entity() event, which is part of the ContentHandler interface.

entity is a hash with these properties:

Name The name of the entity. If it is a parameter entity, the name will begin with '%'.

end_entity(entity)
Report the end of an entity.

entity is a hash with these properties:

Name The name of the entity that is ending.

start_cdata(cdata)
Report the start of a CDATA section.

The contents of the CDATA section will be reported through the regular characters event.

No properties are defined for this event (cdata is empty).

end_cdata(cdata)
Report the end of a CDATA section.

No properties are defined for this event (cdata is empty).

comment(comment)
Report an XML comment anywhere in the document.

This callback will be used for comments inside or outside the document element, including comments in the external DTD subset (if read).

comment is a hash with these properties:

Data The comment characters.

SAX Filters

An XML filter is like an XML event generator, except that it obtains its events from another XML event generator rather than a primary source like an XML document or database. Filters can modify a stream of events as they pass on to the final application.

Parent
The parent reader.

This Feature allows the application to link the filter to a parent event generator (which may be another filter).

See the XML::SAX::Base module for more on filters. It is meant to be used as a base class for filters and drivers, and makes them much easier to implement.

Java Compatibility

The Perl SAX 2.0 binding differs from the Java binding in these ways:
  • Takes parameters to new(), to parse(), and to be set directly in the object, instead of requiring set/get calls (see below).
  • Allows a default Handler parameter to be used for all handlers.
  • No base classes are enforced. Instead, parsers dynamically check the handlers for what methods they support. Note however that using XML::SAX::Base as your base class for Drivers and Filters will make your code a lot simpler, less error prone, and probably much more correct with regard to this spec. Only reimplement that functionality if you really need to.
  • The Attribute, InputSource, and SAXException (XML::SAX::Exception) classes are only described as hashes (see below).
  • Handlers are passed a hash (Node) containing properties as an argument instead of positional arguments.
  • parse() methods return the value returned by calling the end_document() handler.
  • Method names have been converted to lower-case with underscores. Parameters are all mixed case with initial upper-case.

If compatibility is a problem for you consider writing a Filter that converts from this style to the one you want. It is likely that such a Filter will be available from CPAN in the not distant future.

libxml-perl-0.08/doc/UsingPatActModules.pod0100644000076400007640000000631506754116317017020 0ustar kenken=head1 Using PatAct Modules This document is targeted towards people who want to write scripts or modules that use pattern and action modules. If you want to create a new pattern or action module, please see ``Creating PatAct Modules.'' You would want to use pattern/action modules if you want to apply a complex set of patterns or queries against an XML instance and perform actions associated with those patterns or queries. To be able to use pattern/action modules you will need a pattern-matching module that supports the format of the pattern or query language you can use and an action module that will perform the types of actions you need to perform. Available pattern-matching modules are: XML::PatAct:: ::MatchName Simple element name, element hierarchy matching Available action modules are: XML::PatAct:: ::ToObjects Convert XML instances into Perl objects ::Amsterdam Simplistic style-sheet using before/after strings Using pattern/action modules involves loading the modules, creating a pattern/action list, creating instances of the pattern and matching modules, and then starting a parse using the matching module as a handler: use XML::Parser::PerlSAX; use XML::PatAct::MatchName; use XML::PatAct::ToObjects; my $patterns = [ 'schema' => [ qw{ -holder } ], 'table' => [ qw{ -make Schema::Table } ], 'name' => [ qw{ -field Name -as-string } ], ]; my $matcher = XML::PatAct::MatchName->new( Patterns => $patterns ); my $handler = XML::PatAct::ToObjects->new( Patterns => $patterns, Matcher => $matcher); my $parser = XML::Parser::PerlSAX->new( Handler => $handler ); my $schema = $parser->parse(Source => { SystemId => $ARGV[0] } ); The example above use the MatchName and ToObjects pattern and action modules. The pattern list contains pairs of patterns and actions in the format specified by MatchName and ToObjects, other modules will use other formats. The patterns that MatchName supports are a simple element name or a hierarchy of element names. The actions that ToObjects support describe how to create Perl objects from the XML instances. The $matcher object is an instance of XML::PatAct::MatchName. $matcher is created and associated with the pattern/action list that will be matched against. The $handler object is an instance of XML::PatAct::ToObjects. $handler is created and associated with the pattern/action list to be matched against as well as the pattern matching instance $matcher. $handler is a PerlSAX event handler. XML::Parser::PerlSAX is used as the source of XML events. Other PerlSAX event generators include XML::Grove::PerlSAX and XML::ESISParser. $parser is created with the $handler object as it's Handler. The `parse()' method of $parser is called to run the handler (the matching object) to produce the output from XML::PatAct::ToObjects, which is a Perl object converted from XML, $schema. The above example is an abbrieviated version. A complete example of usage of the MatchName and ToObjects modules, including source XML, is in the documentation for the XML::PatAct::ToObjects module. The script and source XML are also in the examples directory. libxml-perl-0.08/doc/PerlSAX.pod0100644000076400007640000005602006756066006014562 0ustar kenken=head1 SAX for Perl =head2 What is SAX? SAX (Simple API for XML) is a common parser interface for XML parsers. It allows application writers to write applications that use XML parsers, but are independent of which parser is actually used. This document describes a version of SAX used by Perl modules. The original version of SAX, for Java, is described at . There are two basic interfaces in the Perl version of SAX, the parser interface and the handler interface. The parser interface creates new parser instances, initiates parsing, and provides additional information to handlers on request. The handler interface is used to receive parse events from the parser. =head2 Deviations from the Java version =over 4 =item * Takes parameters to `C' instead of using `set*' calls. =item * Allows a default Handler parameter to be used for all handlers. =item * No base classes are implemented. Instead, parsers dynamically check the handlers for what methods they support. =item * The AttributeList, InputSource, and SAXException classes have been replaced by anonymous hashes. =item * Handlers are passed a hash containing properties as an argument in place of positional arguments. =item * `C' returns the value returned by calling the `C' handler. =item * Method names have been converted to lower-case with underscores. Parameters are all mixed case with initial upper-case. =back =head1 Parser Interface SAX parsers are reusable but not re-entrant: the application may reuse a parser object (possibly with a different input source) once the first parse has completed successfully, but it may not invoke the `C' methods recursively within a parse. Parser objects contain the following options. A new or different handler option may provided in the middle of a parse, and the SAX parser must begin using the new handler immediately. The `C' option must not be changed in the middle of a parse. If an application does not provide a handler for a particular set of events, those events will be silently ignored unless otherwise stated. If an `C' is not provided, the parser will resolve system identifiers and open connections to entities itself. Handler default handler to receive events DocumentHandler handler to receive document events DTDHandler handler to receive DTD events ErrorHandler handler to receive error events EntityResolver handler to resolve entities Locale locale to provide localisation for errors If no handlers are provided then all events will be silently ignored, except for `C' which will cause a `C' to be called after calling `C'. All handler methods are called with a single hash argument containing the parameters for that method. `C' methods can be called with a hash or a list of key-value pairs containing the parameters. All SAX parsers must implement this basic interface: it allows applications to provide handlers for different types of events and to initiate a parse from a URI, a byte stream, or a character stream. =over 4 =item new( I ) Creates a Parser that will be used to parse XML sources. Any parameters passed to `C' will be used for subsequent parses. I may be a list of key, value pairs or a hash. =item parse( I ) Parse an XML document. The application can use this method to instruct the SAX parser to begin parsing an XML document from any valid input source (a character stream, a byte stream, or a URI). I may be a list of key, value pairs or a hash. I passed to `C' override options given when the parser instance was created with `C'. Applications may not invoke this method while a parse is in progress (they should create a new Parser instead for each additional XML document). Once a parse is complete, an application may reuse the same Parser object, possibly with a different input source. `C' returns the result of calling the handler method `C'. A `C' parameter must have been provided to either the `C' or `C' methods. The `C' parameter is a hash containing the following parameters: =over 4 =item PublicId The public identifier for this input source. The public identifier is always optional: if the application writer includes one, it will be provided as part of the location information. =item SystemId The system identifier for this input source. The system identifier is optional if there is a byte stream, a character stream, or a string, but it is still useful to provide one, since the application can use it to resolve relative URIs and can include it in error messages and warnings (the parser will attempt to open a connection to the URI only if there is no byte stream or character stream specified). If the application knows the character encoding of the object pointed to by the system identifier, it can provide the encoding using the `C' parameter. If the system ID is a URL, it must be fully resolved. =item String A scalar value containing XML text to be parsed. The SAX parser will ignore this if there is also a byte or character stream, but it will use a string in preference to opening a URI connection. =item ByteStream The byte stream (file handle) for this input source. The SAX parser will ignore this if there is also a character stream specified, but it will use a byte stream in preference to opening a URI connection itself or using `C'. If the application knows the character encoding of the byte stream, it should set it with the `C' parameter. =item CharacterStream FOR FUTURE USE ONLY -- Perl does not currently support any character streams, only use the `C', `C', or `C' parameters. The character stream (file handle) for this input source. If there is a character stream specified, the SAX parser will ignore any byte stream and will not attempt to open a URI connection to the system identifier. =item Encoding The character encoding, if known. The encoding must be a string acceptable for an XML encoding declaration (see section 4.3.3 of the XML 1.0 recommendation). This parameter has no effect when the application provides a character stream. =back =back =head2 Locator Interface for associating a SAX event with a document location. If a SAX parser provides location information to the SAX application, it does so by implementing the following methods and then calling the `C' handler method. The handler can use the object to obtain the location of any other document handler event in the XML source document. Note that the results returned by the object will be valid only during the scope of each document handler method: the application will receive unpredictable results if it attempts to use the locator at any other time. SAX parsers are not required to supply a locator, but they are very strongly encouraged to do so. =over 4 =item location() Return the location information for the current event. Returns a hash containing the following parameters: ColumnNumber The column number, or undef if none is available. LineNumber The line number, or undef if none is available. PublicId A string containing the public identifier, or undef if none is available. SystemId A string containing the system identifier, or undef if none is available. =back =head1 Handler Interfaces SAX handler methods are grouped into four interfaces: the document handler for receiving normal document events, the DTD handler for receiving notation and unparsed entity events, the error handler for receiving errors and warnings, and the entity resolver for redirecting external system identifiers. The application may choose to implement each interface in one package or in seperate packages, as long as the objects provided as parameters to the parser provide the matching interface. Parsers may implement additional methods in each of these categories, refer to the parser documentation for further information. All handlers are called with a single hash argument containing the parameters for that handler. Application writers who do not want to implement the entire interface can leave those methods undefined. Events whose handler methods are undefined will be ignored unless otherwise stated. =head2 DocumentHandler This is the main interface that most SAX applications implement: if the application needs to be informed of basic parsing events, it implements this interface and provides an instance with the SAX parser using the `C' parameter. The parser uses the instance to report basic document-related events like the start and end of elements and character data. The order of events in this interface is very important, and mirrors the order of information in the document itself. For example, all of an element's content (character data, processing instructions, and/or subelements) will appear, in order, between the `C' event and the corresponding `C' event. The application can find the location of any event using the Locator interface supplied by the Parser through the `C' method. =over 4 =item set_document_locator( { Locator => $locator } ) Receive an object for locating the origin of SAX document events. SAX parsers are strongly encouraged (though not absolutely required) to supply a locator: if it does so, it must supply the locator to the application by invoking this method before invoking any of the other methods in the DocumentHandler interface. The locator allows the application to determine the end position of any document-related event, even if the parser is not reporting an error. Typically, the application will use this information for reporting its own errors (such as character content that does not match an application's business rules). The information returned by the locator is probably not sufficient for use with a search engine. Note that the locator will return correct information only during the invocation of the events in this interface. The application should not attempt to use it at any other time. Parameters: Locator An object that can return the location of any SAX document event. =item start_document( { } ) Receive notification of the beginning of a document. The SAX parser will invoke this method only once, before any other methods in this interface or in DTDHandler. =item end_document( { } ) Receive notification of the end of a document, no parameters are passed for the end of a document. The SAX parser will invoke this method only once, and it will be the last method invoked during the parse. The parser shall not invoke this method until it has either abandoned parsing (because of an unrecoverable error) or reached the end of input. The value returned by calling `C' will be the value returned by `C'. =item start_element( { Name => $name, Attributes => $attributes } ) Receive notification of the beginning of an element. The Parser will invoke this method at the beginning of every element in the XML document; there will be a corresponding `C' event for every `C' event (even when the element is empty). All of the element's content will be reported, in order, before the corresponding `C' event. If the element name has a namespace prefix, the prefix will still be attached. Note that the attribute list provided will contain only attributes with explicit values (specified or defaulted): #IMPLIED attributes will be omitted. Parameters: Name The element type name. Attributes The attributes attached to the element, if any. =item end_element( { Name => $name } ) Receive notification of the end of an element. The SAX parser will invoke this method at the end of every element in the XML document; there will be a corresponding `C' event for every `C' event (even when the element is empty). If the element name has a namespace prefix, the prefix will still be attached to the name. Parameters: Name The element type name. =item characters( { Data => $characters } ) Receive notification of character data. The Parser will call this method to report each chunk of character data. SAX parsers may return all contiguous character data in a single chunk, or they may split it into several chunks; however, all of the characters in any single event must come from the same external entity, so that the Locator provides useful information. Note that some parsers will report whitespace using the `C' method rather than this one (validating parsers must do so). Parameters: Data The characters from the XML document. =item ignorable_whitespace( { Data => $whitespace } ) Receive notification of ignorable whitespace in element content. Validating Parsers must use this method to report each chunk of ignorable whitespace (see the W3C XML 1.0 recommendation, section 2.10): non-validating parsers may also use this method if they are capable of parsing and using content models. SAX parsers may return all contiguous whitespace in a single chunk, or they may split it into several chunks; however, all of the characters in any single event must come from the same external entity, so that the Locator provides useful information. The application must not attempt to read from the array outside of the specified range. Data The characters from the XML document. =item processing_instruction ( { Target => $target, Data => $data } ) Receive notification of a processing instruction. The Parser will invoke this method once for each processing instruction found: note that processing instructions may occur before or after the main document element. A SAX parser should never report an XML declaration (XML 1.0, section 2.8) or a text declaration (XML 1.0, section 4.3.1) using this method. Parameters: Target The processing instruction target. Data The processing instruction data, if any. =back =head2 ErrorHandler Basic interface for SAX error handlers. If a SAX application needs to implement customized error handling, it must implement this interface and then provide an instance to the SAX parser using the parser's `C' parameter. The parser will then report all errors and warnings through this interface. The parser shall use this interface instead of throwing an exception: it is up to the application whether to throw an exception for different types of errors and warnings. Note, however, that there is no requirement that the parser continue to provide useful information after a call to `C' (in other words, a SAX driver class could catch an exception and report a fatalError). All error handlers receive the following I. The `C', `C', `C', and `C' are provided only if the parser has that information available. Messsage The error or warning message, or undef to use the message from the `C' parameter PublicId The public identifer of the entity that generated the error or warning. SystemId The system identifer of the entity that generated the error or warning. LineNumber The line number of the end of the text that caused the error or warning. ColumnNumber The column number of the end of the text that cause the error or warning. EvalError The error value returned from a lower level interface. Application writers who do not want to implement the entire interface can leave those methods undefined. If not defined, calls to the `C' and `C' handlers will be ignored and a processing will be terminated (going straight to `C') after the call to `C'. =over 4 =item warning( { I } ) Receive notification of a warning. SAX parsers will use this method to report conditions that are not errors or fatal errors as defined by the XML 1.0 recommendation. The default behaviour is to take no action. The SAX parser must continue to provide normal parsing events after invoking this method: it should still be possible for the application to process the document through to the end. =item error( { I } ) Receive notification of a recoverable error. This corresponds to the definition of "error" in section 1.2 of the W3C XML 1.0 Recommendation. For example, a validating parser would use this callback to report the violation of a validity constraint. The default behaviour is to take no action. The SAX parser must continue to provide normal parsing events after invoking this method: it should still be possible for the application to process the document through to the end. If the application cannot do so, then the parser should report a fatal error even if the XML 1.0 recommendation does not require it to do so. =item fatal_error( { I } ) Receive notification of a non-recoverable error. This corresponds to the definition of "fatal error" in section 1.2 of the W3C XML 1.0 Recommendation. For example, a parser would use this callback to report the violation of a well-formedness constraint. The application must assume that the document is unusable after the parser has invoked this method, and should continue (if at all) only for the sake of collecting addition error messages: in fact, SAX parsers are free to stop reporting any other events once this method has been invoked. =back =head2 DTDHandler Receive notification of basic DTD-related events. If a SAX application needs information about notations and unparsed entities, then the application implements this interface and provide an instance to the SAX parser using the parser's `C' parameter. The parser uses the instance to report notation and unparsed entity declarations to the application. The SAX parser may report these events in any order, regardless of the order in which the notations and unparsed entities were declared; however, all DTD events must be reported after the document handler's `C' event, and before the first `C' event. It is up to the application to store the information for future use (perhaps in a hash table or object tree). If the application encounters attributes of type "NOTATION", "ENTITY", or "ENTITIES", it can use the information that it obtained through this interface to find the entity and/or notation corresponding with the attribute value. Application writers who do not want to implement the entire interface can leave those methods undefined. Events whose handler methods are undefined will be ignored. =over 4 =item notation_decl( { I } ) Receive notification of a notation declaration event. It is up to the application to record the notation for later reference, if necessary. If a system identifier is present, and it is a URL, the SAX parser must resolve it fully before passing it to the application. I: Name The notation name. PublicId The notation's public identifier, or undef if none was given. SystemId The notation's system identifier, or undef if none was given. =item unparsed_entity_decl( { I } ) Receive notification of an unparsed entity declaration event. Note that the notation name corresponds to a notation reported by the `C' event. It is up to the application to record the entity for later reference, if necessary. If the system identifier is a URL, the parser must resolve it fully before passing it to the application. I: Name The unparsed entity's name. PublicId The entity's public identifier, or undef if none was given. SystemId The entity's system identifier (it must always have one). NotationName The name of the associated notation. =back =head2 EntityResolver Basic interface for resolving entities. If a SAX application needs to implement customized handling for external entities, it must implement this interface and provide an instance with the SAX parser using the parser's `C' parameter. The parser will then allow the application to intercept any external entities (including the external DTD subset and external parameter entities, if any) before including them. Many SAX applications will not need to implement this interface, but it will be especially useful for applications that build XML documents from databases or other specialised input sources, or for applications that use URI types other than URLs. The application can also use this interface to redirect system identifiers to local URIs or to look up replacements in a catalog (possibly by using the public identifier). =over 4 =item resolve_entity( { PublicId => $public_id, SystemId => $system_id } ) Allow the application to resolve external entities. The Parser will call this method before opening any external entity except the top-level document entity (including the external DTD subset, external entities referenced within the DTD, and external entities referenced within the document element): the application may request that the parser resolve the entity itself, that it use an alternative URI, or that it use an entirely different input source. Application writers can use this method to redirect external system identifiers to secure and/or local URIs, to look up public identifiers in a catalogue, or to read an entity from a database or other input source (including, for example, a dialog box). If the system identifier is a URL, the SAX parser must resolve it fully before reporting it to the application. Parameters: PublicId The public identifier of the external entity being referenced, or undef if none was supplied. SystemId The system identifier of the external entity being referenced. `C' returns undef to request that the parser open a regular URI connection to the system identifier or returns a hash containing the same parameters as the `C' parameter to Parser's `C' method, summarized here: PublicId The public identifier of the external entity being referenced, or undef if none was supplied. SystemId The system identifier of the external entity being referenced. String String containing XML text ByteStream An open file handle. CharacterStream An open file handle. Encoding The character encoding, if known. See Parser's `C' method for complete details on how these parameters interact. =back =head1 Contributors SAX was developed collaboratively by the members of the XML-DEV mailing list. Please see the ``SAX History and Contributors'' page for the people who did the real work behind SAX. Much of the content of this document was copied from the SAX 1.0 Java Implementation documentation. The SAX for Python specification was helpful in creating this specification. Thanks to the following people who contributed to Perl SAX. Eduard (Enno) Derksen Ken MacLeod Eric Prud'hommeaux Larry Wall libxml-perl-0.08/doc/mirror.sh0100644000076400007640000000314707054574600014444 0ustar kenken#! /bin/sh # # NAME # mirror -- update web page with a libxml-perl release # # SYNOPSIS usage="mirror RELEASE DESTDIR" # # DESCRIPTION # `mirror' creates a web mirror using a libxml-perl release tar # file. # # `mirror' pulls files from the tar file to create the web page. # `mirror' searches HTML files for the string @VERSION@ and # replaces it with RELEASE. `mirror' searches for all *.pm and # *.pod files and converts them to HTML. It also copies a few # hardcoded files. # # `mirror' installs the web pages in DESTDIR. # # CAUTION: `mirror' removes the contents of DESTDIR before # copying files to it. # # AUTHOR # Ken MacLeod # # $Id: mirror.sh,v 1.2 2000/02/22 21:02:56 kmacleod Exp $ # PWD_CMD="/bin/pwd" SED="sed" TR="/usr/bin/tr" if [ $# != 2 ]; then echo "usage: $usage" exit 1 fi RELEASE="$1" DESTDIR="$2" set -e set -x rm -rf $DESTDIR mkdir -p $DESTDIR cp libxml-perl-${RELEASE}.tar.gz $DESTDIR cd $DESTDIR tar xzvf libxml-perl-${RELEASE}.tar.gz for ii in libxml-perl-${RELEASE}/doc/*.html; do $SED <$ii >`basename $ii` \ -e "s/@VERSION@/$RELEASE/g" done for ii in `cd libxml-perl-${RELEASE}/doc; echo *.pod`; do pod2html libxml-perl-${RELEASE}/doc/$ii >`basename $ii .pod`.html done for ii in `cd libxml-perl-${RELEASE}/lib; echo */*.pm */*/*.pm`; do dstfile=`echo $ii | sed -e 's|/|::|g'` pod2html libxml-perl-${RELEASE}/lib/$ii >`basename $dstfile .pm`.html done mv libxml-perl-${RELEASE}/README libxml-perl-${RELEASE}.readme mv libxml-perl-${RELEASE}/doc/modules.xml . rm -rf libxml-perl-${RELEASE} pod2html-dircache pod2html-itemcache libxml-perl-0.08/doc/sax-2.0.html0100644000076400007640000002735207423065107014555 0ustar kenken Perl SAX 2.0 Binding

Perl SAX 2.0 Binding

SAX (Simple API for XML) is a common parser interface for XML parsers. It allows application writers to write applications that use XML parsers, but are independent of which parser is actually used.

This document describes the version of SAX used by Perl modules. The original version of SAX 2.0, for Java, is described at http://sax.sourceforge.net/.

There are two basic interfaces in the Perl version of SAX, the parser interface and the handler interface. The parser interface creates new parser instances, starts parsing, and provides additional information to handlers on request. The handler interface is used to receive parse events from the parser. This pattern is also commonly called "Producer and Consumer" or "Generator and Sink". Note that the parser doesn't have to be an XML parser, all it needs to do is provide a stream of events to the handler as if it were parsing XML. But the actual data from which the events are generated can be anything, a Perl object, a CSV file, a database table...

SAX is typically used like this:

    my $handler = MyHandler->new();
    my $parser = AnySAXParser->new( Handler => $handler );
    $parser->parse($uri);

Handlers are typically written like this:

    package MyHandler;

    sub new {
        my $type = shift;
        return bless {}, $type;
    }

    sub start_element {
        my ($self, $element) = @_;

        print "Starting element $element->{Name}\n";
    }

    sub end_element {
        my ($self, $element) = @_;

        print "Ending element $element->{Name}\n";
    }

    sub characters {
        my ($self, $characters) = @_;

        print "characters: $characters->{Data}\n";
    }

    1;

Basic SAX Parser

These methods and options are the most commonly used with SAX parsers and event generators.

Applications may not invoke a parse() method again while a parse is in progress (they should create a new SAX parser instead for each nested XML document). Once a parse is complete, an application may reuse the same parser object, possibly with a different input source.

During the parse, the parser will provide information about the XML document through the registered event handlers. Note that an event that hasn't been registered (ie that doesn't have its corresponding method in the handler's class) will not be called. This allows one to only get the events one is interested in.

parse(uri [, options])
Parses the XML instance identified by uri (a system identifier). options can be a list of option, value pairs or a hash. Options include Handler, features and properties, and advanced SAX parser options. parse() returns the result of calling the end_document() handler. The options supported by parse() may vary slightly if what is being "parsed" isn't XML.

parse_file(stream [, options])
Parses the XML instance in the already opened stream, an IO::Handler or similar. options are the same as for parse(). parse_file() returns the result of calling the end_document() handler.

parse_string(string [, options])
Parses the XML instance in string. options are the same as for parse(). parse_string() returns the result of calling the end_document() handler.

Handler
The default handler object to receive all events from the parser. Applications may change Handler in the middle of the parse and the SAX parser will begin using the new handler immediately. The Advanced SAX document lists a number of more specialized handlers that can be used should you wish to dispatch different types of events to different objects.

Basic SAX Handler

These methods are the most commonly used by SAX handlers.

start_document(document)
Receive notification of the beginning of a document.

The SAX parser will invoke this method only once, before any other methods (except for set_document_locator() in advanced SAX handlers).

No properties are defined for this event (document is empty).

end_document(document)
Receive notification of the end of a document.

The SAX parser will invoke this method only once, and it will be the last method invoked during the parse. The parser shall not invoke this method until it has either abandoned parsing (because of an unrecoverable error) or reached the end of input.

No properties are defined for this event (document is empty).

The return value of end_document() is returned by the parser's parse() methods.

start_element(element)
Receive notification of the start of an element.

The Parser will invoke this method at the beginning of every element in the XML document; there will be a corresponding end_element() event for every start_element() event (even when the element is empty). All of the element's content will be reported, in order, before the corresponding end_element() event.

element is a hash with these properties:
Name The element type name (including prefix).
Attributes The attributes attached to the element, if any.
If namespace processing is turned on (which is the default), these properties are also available:
NamespaceURI The namespace of this element.
Prefix The namespace prefix used on this element.
LocalName The local name of this element.
Attributes is a hash keyed by JClark namespace notation. That is, the keys are of the form "{NamespaceURI}LocalName". If the attribute has no NamespaceURI, then it is simply "{}LocalName". Each attribute is a hash with these properties:
Name The attribute name (including prefix).
Value The normalized value of the attribute.
NamespaceURI The namespace of this attribute.
Prefix The namespace prefix used on this attribute.
LocalName The local name of this attribute.

end_element(element)
Receive notification of the end of an element.

The SAX parser will invoke this method at the end of every element in the XML document; there will be a corresponding start_element() event for every end_element() event (even when the element is empty).

element is a hash with these properties:
Name The element type name (including prefix).
If namespace processing is turned on (which is the default), these properties are also available:
NamespaceURI The namespace of this element.
Prefix The namespace prefix used on this element.
LocalName The local name of this element.

characters(characters)
Receive notification of character data.

The Parser will call this method to report each chunk of character data. SAX parsers may return all contiguous character data in a single chunk, or they may split it into several chunks (however, all of the characters in any single event must come from the same external entity so that the Locator provides useful information).

characters is a hash with this property:

Data The characters from the XML document.

ignorable_whitespace(characters)
Receive notification of ignorable whitespace in element content.

Validating Parsers must use this method to report each chunk of ignorable whitespace (see the W3C XML 1.0 recommendation, section 2.10): non-validating parsers may also use this method if they are capable of parsing and using content models.

SAX parsers may return all contiguous whitespace in a single chunk, or they may split it into several chunks; however, all of the characters in any single event must come from the same external entity, so that the Locator provides useful information.

characters is a hash with this property:

Data The whitespace characters from the XML document.

Exceptions

Conformant XML parsers are required to abort processing when well-formedness or validation errors occur. In Perl, SAX parsers use die() to signal these errors. To catch these errors and prevent them from killing your program, use eval{}:

    eval { $parser->parse($uri) };
    if ($@) {
        # handle error
    }

Exceptions can also be thrown when setting features or properties on the SAX parser (see advanced SAX below).

Exception values ($@) in SAX are hashes blessed into the package that defines their type, and have the following properties:

Message A detail message for this exception.
Exception The embedded exception, or undef if there is none.
If the exception is raised due to parse errors, these properties are also available:
ColumnNumber The column number of the end of the text where the exception occurred.
LineNumber The line number of the end of the text where the exception occurred.
PublicId The public identifier of the entity where the exception occurred.
SystemId The system identifier of the entity where the exception occurred.


Advanced SAX

libxml-perl-0.08/doc/UsingPerlSAX.pod0100644000076400007640000000473406715130346015567 0ustar kenken=head1 Using PerlSAX Working with PerlSAX involves using two classes (packages), a PerlSAX parser that generates parsing events and a class that you write that will receive those parsing events, the ``handler''. This guide will use the XML::Parser::PerlSAX parser that uses Clark Cooper's XML::Parser module. The handler class implements the PerlSAX handler methods that you are interested in. The following example, MyHandler.pm, prints a message every time an element starts or ends: package MyHandler; sub new { my ($type) = @_; return bless {}, $type; } sub start_element { my ($self, $element) = @_; print "Start element: $element->{Name}\n"; } sub end_element { my ($self, $element) = @_; print "End element: $element->{Name}\n"; } 1; To use your handler you will need to have a script, myhandler.pl, that loads and creates your handler and the parser, and then calls the parser to parse the XML instance and send events to your handler: use XML::Parser::PerlSAX; use MyHandler; my $my_handler = MyHandler->new; my $parser = XML::Parser::PerlSAX->new( Handler => $my_handler ); foreach my $instance (@ARGV) { $parser->parse(Source => { SystemId => $instance }); } Given this XML instance, myhandler.xml:
Using PerlSAX Working with PerlSAX ...
Running myhandler.pl like this: perl myhandler.pl myhandler.xml will produce this output: Start element: article Start element: title End element: title Start element: paragraph End element: paragraph End element: article =head2 For More Information PerlSAX.pod describes the PerlSAX interface. Each parser module describes it's individual capabilities. XML::Parser::PerlSAX is the most commonly used PerlSAX implementation. The files described in this doc are in the `examples' directory. A more complete implementation of the very simple handler above is in the module XML::Handler::Sample. Other, more complex handlers are in the XML::Handler directory as well. Another hands-on doc for PerlSAX is the XML-Parser-and-PerlSAX.pod. This doc describes the difference between and the purpose of PerlSAX with respect to XML::Parser. This document was inspired by and uses the code examples from David Megginson's ``Quick Start for SAX Application Writers.'' libxml-perl-0.08/doc/index.html0100644000076400007640000003421007070670426014567 0ustar kenken libxml-perl
libxml-perl
Current version is @VERSION@    
libxml-perl is a collection of Perl modules, scripts, and documents for working with XML in Perl. libxml-perl software works in combination with XML::Parser, PerlSAX, XML::DOM, XML::Grove, and others.

Questions about how to use this library should be directed to the comp.lang.perl.modules USENET Newsgroup. Bug reports and suggestions for improvements can be sent to the <perl-xml@activestate.com> mailing list. This mailing list is also the place for general discussions and development of the libxml-perl package.

To join the Perl-XML mailing list, send an email message to ListManager@ActiveState.com with the following text in the body:

    Subscribe Perl-XML

Source
libxml-perl source is available on CPAN in the XML module directory. This link goes through the CPAN redirector so if the site gives you any problems, just click it again and you will be redirected to a different site.

libxml-perl source is also available here.

The libxml-perl-@VERSION@ README

Modules
The following modules are part of libxml-perl. Below they are marked with their release status:

    STABLEhas been in use for a while with few or no outstanding bugs
    BETAinterfaces are stable but there may still be bugs
    ALPHAinterfaces are changing, there may be lots of bugs, and there may not be docs available yet
XML::Parser::PerlSAX
    BETA
XML::Parser::PerlSAX is a PerlSAX parser using XML::Parser (which uses James Clark's Expat XML Parser).

XML::Handler::XMLWriter
    BETA
A PerlSAX handler for writing readable XML (in contrast to Canonical XML, for example). XMLWriter is also subclassable and supports calling start and end methods by element-names (subclassed from XML::Handler::Subs). XMLWriter is similar to XML::Parser's Stream style.

XML::Handler::Subs
    BETA
A PerlSAX handler base class that calls start and end methods by element-names. Subs is similar to XML::Parser's Subs style.

XML::Handler::CanonXMLWriter
    BETA
XML::Handler::CanonXMLWriter is a PerlSAX handler that outputs in Canonical XML.

XML::Handler::Sample
    BETA
XML::Handler::Sample is a PerlSAX handler that simply prints out the event names as they are parsed by a PerlSAX parser. It can be used for debugging or as a template for building new handlers. XML::Handler::Sample contains handlers for all known parser events.

XML::ESISParser
    BETA
XML::ESISParser is a validating PerlSAX parser using James Clark's `nsgmls' SGML/XML Parser. ESISParser supports both XML and SGML document instances. Unless you need validation, you should probably be using XML::Parser::PerlSAX or XML::Parser.

XML::ESISParser with XML::Grove obsolete the SGML::SPGroveBuilder and SGML::Grove modules.

Data::Grove, Data::Grove::Parent, Data::Grove::Visitor
    BETA
Data::Grove and it's helpers provide a base class for deeply nested or directed graph structures. Used by XML::Grove (and others soon).

XML::SAX2Perl, XML::Perl2SAX
    ALPHA
SAX2Perl and Perl2SAX are SAX Parser<->DocumentHandler filters. These modules translate parse events between the Java/CORBA style SAX methods and PerlSAX style methods.

The following modules will very likely be renamed in the next release. XML::PatAct::MatchName
    ALPHA
MatchName is a pattern matching module that can be used with PatAct action modules. MatchName uses simple element names or element name lists to match names to actions.

XML::PatAct::ToObjects
    ALPHA
ToObjects is a PatAct action module. ToObjects can be used to create application-ready Perl objects from XML instances.

XML::PatAct::Amsterdam
    ALPHA
Amsterdam is a PatAct action module. Amsterdam can be used to apply a very simple form of style-sheet to an XML instance by using ``before'' and ``after'' strings that are output before and after the contents of elements.

XML::PatAct::PatternTempl, XML::PatAct::ActionTempl
    BETA
PatternTempl and ActionTempl are template files that pattern/action module writers can copy to create new modules. See Creating PatAct Modules for more information.

Documents
PerlSAX
This document defines a Perl binding to SAX 1.0. PerlSAX-based parser modules implement and possibly extend the interface described in PerlSAX.

Using PerlSAX
UsingPerlSAX is a brief introduction to PerlSAX using the XML::Parser::PerlSAX module.

Using PatAct Modules
Describes how to use pattern/action modules to transform XML instances.

Creating PatAct Modules
A document for module writers who are writing new pattern/ action modules.

modules.xml
modules.xml contains a listing of all Perl XML packages and their public modules categorized by several topics.

News
libxml-perl-0.07
February 22, 2000
-doc/index.html: libxml-perl site index
-doc/mirror.sh: creates a libxml-perl mirror site
-Fixes:
-all modules: release script didn't insert version numbers in Perl modules. Reported by Enno Derksen
-doc/modules.xml: well-formedness errors. Reported by KangChan Lee

libxml-perl-0.06
February 4, 2000
-all modules: add $VERSION. Suggested by Michael Koehne
-XML::Parser::PerlSAX: add UseAttributeOrder option and AttributeOrder and Defaulted properties to start_element() handler. Suggested by Enno Derksen
-XML::Parser::PerlSAX: add start_cdata, end_cdata, and entity_reference events
-XML::PatAct::Amsterdam: added Output and AsString options, added support for replacing attributes
-Data::Grove: add a Data::Grove::Characters class to act as a default grove object for containing characters.
-Fixes:
-XML::PatAct::ToObjects: removed leftover debugging statement
-XML::ESISParser: report record end as characters if no record_end() handler
-XML::Parser::PerlSAX: For attribute list declarations, now correctly calls the attlist_decl() method and passes the ElementName property, it used to call entity_decl() passing EntityName. Reported by Enno Derksen and Colin Muller

libxml-perl-0.05
August 16, 1999
-Major update to PerlSAX specification
-Added an introduction
-Added a ``Deviations from the Java version'' section
-Re-added the `set_document_locator()' handler method
-Added arguments to method synopses
-Attributed most of the content to the SAX 1.0 JavaDoc
-Minor typos
-XML::Handler::XMLWriter: a new PerlSAX handler for writing readable XML (in contrast to Canonical XML)
-XML::Handler::Subs: a new PerlSAX handler base class for calling user-defined subs
-XML::PatAct::ToObjects: add CopyAttributes option, add -grove-contents action
-All PatAct modules can now take parameters as either a list of key, value pairs or a hash
-Fixes:
-XML::ESISParser wasn't testing handlers for what methods they support
-XML::Parser::PerlSAX wasn't forwarding XML::Parser Element events

libxml-perl-0.04
August 11, 1999
-Added pattern/action modules for name matching, converting to objects, and applying simple styles -- XML::PatAct::MatchName, XML::PatAct::ToObjects, and XML::PatAct::Amsterdam.
-Added ``Using PatAct Modules'' and ``Creating PatActModules'' docs.
-XML::Parser::PerlSAX and XML::ESISParser were not passing a hash for `start_document()' and `end_document()' per spec.

libxml-perl-0.03
May 26, 1999
-added XML::Handler::CanonXMLWriter and test

libxml-perl-0.02
May 24, 1999
-renamed package from `libxml' to `libxml-perl'
-added doc/modules.xml
-added doc/UsingPerlSAX.pod and example files
-moved PerlSAX.pod and interface-style.pod to `doc/'
-renamed Data::Grove::Tied to Data::Grove::Parent

Contributors
The following have shared their code, documents, comments, and/or suggestions for libxml-perl:

Clark Cooper
Eduard (Enno) Derksen
Michael Koehne
KangChan Lee
Ken MacLeod
Colin Muller
Eric Prud'hommeaux
Larry Wall
libxml-perl-0.08/Changes0100644000076400007640000001114207745254162013323 0ustar kenkenRevision history for Perl extension libxml Backwards incompatible changes are marked with a `*'. ToDo - XML::ESISParser: include Robert Braddock's update for OpenSP, in email 25Jul - XML::Parser::PerlSAX doesn't pass ParseParamEnt to XML::Parser, inspired by a request by Paul Mahoney - switch Data::Grove::Visitor to use UNIVERSAL::can instead of $self->{'has'}, suggested by Mike Richardson - no modules are yet supporting SAX2 - XML::Parser::PerlSAX doesn't implement ErrorHandler, it should at least call fatal_error() if XML::Parser dies; reported by Craig N. Caroon 0.08 Tue Oct 21 10:54:18 CDT 2003 - added Perl SAX 2.0 Binding - XML::ESISParser: add -E0 to nsgmls options so that nsgmls doesn't quit after 200 errors. Add more detail to command character error message. Suggested by Charles Thayer . - fixes - Data::Grove::Visitor: children_accept_name was not returning any data in some cases; reported by Laurent CAPRANI - XML::SAX2Perl: typo in startElement; reported by Mark A. Hershberger - t/stream.t Test 11 fails due to 8-bit characters on Perl 5.6, first reported by Ed Arnold 0.07 Tue Feb 22 14:24:52 CST 2000 - doc/index.html: libxml-perl site index - doc/mirror.sh: creates a libxml-perl mirror site - fixes - all modules: release script didn't insert version numbers in Perl modules. Reported by Enno Derksen - doc/modules.xml: well-formedness errors. Reported by KangChan Lee 0.06 Wed Dec 22 15:14:39 CST 1999 - all modules: add $VERSION. Suggested by Michael Koehne - XML::Parser::PerlSAX: add UseAttributeOrder option and AttributeOrder and Defaulted properties to start_element() handler. Suggested by Enno Derksen - XML::Parser::PerlSAX: add start_cdata, end_cdata, and entity_reference events - XML::PatAct::Amsterdam: added Output and AsString options, added support for replacing attributes - Data::Grove: add a Data::Grove::Characters class to act as a default grove object for containing characters. - fixes - XML::PatAct::ToObjects: removed leftover debugging statement - XML::ESISParser: report record end as characters if no record_end() handler - XML::Parser::PerlSAX: For attribute list declarations, now correctly calls the attlist_decl() method and passes the ElementName property, it used to call entity_decl() passing EntityName. Reported by Enno Derksen and Colin Muller 0.05 Mon Aug 16 11:02:32 CDT 1999 - Major update to PerlSAX.pod - added an introduction - added a ``Deviations from the Java version'' section * re-added the `set_document_locator()' handler method - added arguments to method synopses - attributed most of the content to the SAX 1.0 JavaDoc - minor typos - XML::Handler::XMLWriter: a new PerlSAX handler for writing readable XML (in contrast to Canonical XML) - XML::Handler::Subs: a new PerlSAX handler base class for calling user-defined subs - XML::Handler::Sample: this is a template for creating PerlSAX handlers, it is now in the Public Domain - XML::PatAct::ToObjects: add CopyAttributes option, add -grove-contents option - all PatAct modules can now take parameters as either a list of key, value pairs or a hash - fixes - XML::ESISParser wasn't testing handlers for what methods they support - XML::Parser::PerlSAX wasn't capturing XML::Parser Element events 0.04 Wed Aug 11 10:03:00 CDT 1999 - README: updated with PatAct modules - added Creating PatAct Modules and Using PatAct Modules docs - added XML::PatAct::ActionTempl, XML::PatAct::Amsterdam, XML::PatAct::MatchName, XML::PatAct::PatternTempl, XML::PatAct::ToObjects - added schema.pl and schema.xml examples - added schema.t test - fixes - XML::Parser::PerlSAX and XML::ESISParser were not passing a hash for start_document() or end_document() per spec - t/canon_xml_writer.t, t/xp_sax.t: added CVS ID 0.03 Wed May 26 19:49:46 CDT 1999 - added XML::Handler::CanonXMLWriter and test 0.02 Mon May 24 18:02:00 CDT 1999 - renamed package from `libxml' to `libxml-perl' - added doc/modules.xml - added doc/UsingPerlSAX.pod and example files - moved PerlSAX.pod and interface-style.pod to `doc/' - renamed Data::Grove::Tied to Data::Grove::Parent 0.01 Fri May 7 14:59:07 CDT 1999 - original version libxml-perl-0.08/MANIFEST0100644000076400007640000000164307745275111013163 0ustar kenkenChangeLog Changes MANIFEST Makefile.PL README libxml-perl.spec libxml-perl-0.08.spec doc/CreatingPatActModules.pod doc/PerlSAX.pod doc/UsingPatActModules.pod doc/UsingPerlSAX.pod doc/index.html doc/interface-style.pod doc/mirror.sh doc/modules.xml doc/sax-2.0.html doc/sax-2.0-adv.html lib/Data/Grove.pm lib/Data/Grove/Parent.pm lib/Data/Grove/Visitor.pm lib/XML/ESISParser.pm lib/XML/Perl2SAX.pm lib/XML/SAX2Perl.pm lib/XML/Handler/CanonXMLWriter.pm lib/XML/Handler/Sample.pm lib/XML/Handler/Subs.pm lib/XML/Handler/XMLWriter.pm lib/XML/Parser/PerlSAX.pm lib/XML/PatAct/ActionTempl.pm lib/XML/PatAct/Amsterdam.pm lib/XML/PatAct/MatchName.pm lib/XML/PatAct/PatternTempl.pm lib/XML/PatAct/ToObjects.pm examples/MyHandler.pm examples/esis-test.pl examples/myhandler.pl examples/myhandler.xml examples/perlsax-test.pl examples/schema.pl examples/schema.xml t/amsterdam.t t/canon_xml_writer.t t/schema.t t/stream.t t/subs.t t/xp_sax.t libxml-perl-0.08/examples/0040755000076400007640000000000007745275112013650 5ustar kenkenlibxml-perl-0.08/examples/schema.xml0100644000076400007640000000062506754116426015633 0ustar kenken MyTableA short summaryA long description that may contain a subset of HTML MyColumn1 A short summary A long description 42
libxml-perl-0.08/examples/MyHandler.pm0100644000076400007640000000052307745275111016065 0ustar kenken# This is the example module in doc/UsingPerlSAX.pod package MyHandler; sub new { my ($type) = @_; return bless {}, $type; } sub start_element { my ($self, $element) = @_; print "Start element: $element->{Name}\n"; } sub end_element { my ($self, $element) = @_; print "End element: $element->{Name}\n"; } 1; libxml-perl-0.08/examples/myhandler.xml0100644000076400007640000000017106715130346016344 0ustar kenken
Using PerlSAX Working with PerlSAX ...
libxml-perl-0.08/examples/myhandler.pl0100644000076400007640000000043606715130346016163 0ustar kenken# This is the example script in doc/UsingPerlSAX.pod use XML::Parser::PerlSAX; use MyHandler; my $my_handler = MyHandler->new; my $parser = XML::Parser::PerlSAX->new( Handler => $my_handler ); foreach my $instance (@ARGV) { $parser->parse(Source => { SystemId => $instance }); } libxml-perl-0.08/examples/schema.pl0100644000076400007640000000262706754116426015452 0ustar kenken# This template file is in the Public Domain. # You may do anything you want with this file. # # $Id: schema.pl,v 1.1 1999/08/10 21:43:50 kmacleod Exp $ # # This is the example script in the XML::PatAct::ToObjects module doc, # it also uses XML::PatAct::MatchName and is an example of using PatAct # modules. use XML::Parser::PerlSAX; use XML::PatAct::MatchName; use XML::PatAct::ToObjects; my $patterns = [ 'schema' => [ qw{ -holder } ], 'table' => [ qw{ -make Schema::Table } ], 'name' => [ qw{ -field Name -as-string } ], 'summary' => [ qw{ -field Summary -as-string } ], 'description' => [ qw{ -field Description -grove } ], 'column' => [ qw{ -make Schema::Column -push-field Columns } ], 'unique' => [ qw{ -field Unique -value 1 } ], 'non-null' => [ qw{ -field NonNull -value 1 } ], 'default' => [ qw{ -field Default -as-string } ], ]; my $matcher = XML::PatAct::MatchName->new( Patterns => $patterns ); my $handler = XML::PatAct::ToObjects->new( Patterns => $patterns, Matcher => $matcher); my $parser = XML::Parser::PerlSAX->new( Handler => $handler ); $schema = $parser->parse(Source => { SystemId => $ARGV[0] } ); require 'dumpvar.pl'; dumpvar('main', 'schema'); libxml-perl-0.08/examples/perlsax-test.pl0100644000076400007640000000041606714406533016634 0ustar kenkenuse XML::Parser::PerlSAX; use XML::Handler::Sample; if ($#ARGV != 0) { die "usage: esis-test FILE\n"; } $file = shift @ARGV; $my_handler = XML::Handler::Sample->new; XML::Parser::PerlSAX->new->parse(Source => { SystemId => $file }, Handler => $my_handler); libxml-perl-0.08/examples/esis-test.pl0100644000076400007640000000057106714400506016115 0ustar kenkenuse XML::ESISParser; use XML::Handler::Sample; if ($ARGV[0] eq '--sgml') { push (@additional_args, IsSGML => 1); shift @ARGV; } if ($#ARGV != 0) { die "usage: esis-test FILE\n"; } $file = shift @ARGV; $my_handler = XML::Handler::Sample->new; XML::ESISParser->new->parse(Source => { SystemId => $file }, Handler => $my_handler, @additional_args); libxml-perl-0.08/t/0040755000076400007640000000000007745275112012275 5ustar kenkenlibxml-perl-0.08/t/schema.t0100644000076400007640000000612306754116317013721 0ustar kenken# Hey Emacs, this is -*- perl -*- ! # # Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' # # $Id: schema.t,v 1.1 1999/08/10 21:42:39 kmacleod Exp $ # ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..2\n"; } END {print "not ok 1\n" unless $loaded;} use XML::Parser::PerlSAX; use XML::PatAct::MatchName; use XML::PatAct::ToObjects; $loaded = 1; print "ok 1\n"; my $patterns = [ 'schema' => [ qw{ -holder } ], 'table' => [ qw{ -make Schema::Table } ], 'name' => [ qw{ -field Name -as-string } ], 'summary' => [ qw{ -field Summary -as-string } ], 'description' => [ qw{ -field Description -as-string } ], 'column' => [ qw{ -make Schema::Column -push-field Columns } ], 'unique' => [ qw{ -field Unique -value 1 } ], 'non-null' => [ qw{ -field NonNull -value 1 } ], 'default' => [ qw{ -field Default -as-string } ], ]; my $matcher = XML::PatAct::MatchName->new( Patterns => $patterns ); my $handler = XML::PatAct::ToObjects->new( Patterns => $patterns, Matcher => $matcher); my $parser = XML::Parser::PerlSAX->new( Handler => $handler ); $schema = $parser->parse(Source => { String => <<'EOF' } ); MyTableA short summaryA long description that may contain a subset of HTML MyColumn1 A short summary A long description 42
EOF $not_ok = 0; $not_ok |= (!defined($schema)) || (ref($schema->[0]) ne 'Schema::Table'); $not_ok |= (!defined($schema->[0]{Name})) || ($schema->[0]{Name} ne 'MyTable'); $not_ok |= (!defined($schema->[0]{Summary})) || ($schema->[0]{Summary} ne 'A short summary'); $not_ok |= (!defined($schema->[0]{Description})); $not_ok |= (!defined($schema->[0]{Columns})) || (ref($schema->[0]{Columns}[0]) ne 'Schema::Column'); $not_ok |= (!defined($schema->[0]{Columns}[0]{Name})) || ($schema->[0]{Columns}[0]{Name} ne 'MyColumn1'); $not_ok |= (!defined($schema->[0]{Columns}[0]{Summary})) || ($schema->[0]{Columns}[0]{Summary} ne 'A short summary'); $not_ok |= !defined($schema->[0]{Columns}[0]{Description}); $not_ok |= (!defined($schema->[0]{Columns}[0]{Unique})) || ($schema->[0]{Columns}[0]{Unique} != 1); $not_ok |= (!defined($schema->[0]{Columns}[0]{NonNull})) || ($schema->[0]{Columns}[0]{NonNull} != 1); $not_ok |= (!defined($schema->[0]{Columns}[0]{Default})) || ($schema->[0]{Columns}[0]{Default} != 42); print $not_ok ? "not ok 2\n" : "ok 2\n"; libxml-perl-0.08/t/stream.t0100644000076400007640000000515207745254162013757 0ustar kenken# Hey Emacs, this is -*- perl -*- ! # # Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' # # $Id: stream.t,v 1.2 2003/10/21 16:01:54 kmacleod Exp $ # ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..11\n"; } END {print "not ok 1\n" unless $loaded;} use XML::Parser::PerlSAX; use XML::Handler::XMLWriter; $loaded = 1; print "ok 1\n"; my $subs = MySubs->new( AsString => 1 ); my $parser = XML::Parser::PerlSAX->new( Handler => $subs ); $string = $parser->parse(Source => { Encoding => 'ISO-8859-1', String => <<"EOF;" } ); ]> First line in foo Fran is &fran; and Zoe is &zoe; 1st line in bar 2nd line in bar 3rd line in bar This, '\240', would be a bad character in UTF-8. EOF; foreach $test (2..10) { print $subs->{Tests}[$test] ? "ok $test\n" : "not ok $test\n" ; } $expected = <<"EOF;"; First line in foo Fran is fran-def and Zoe is zoe.ent 1st line in bar 2nd line in bar 3rd line in bar This, '\240', would be a bad character in UTF-8. EOF; print (($string eq $expected) ? "ok 11\n" : "not ok 11\n"); package MySubs; use vars qw{ @ISA }; BEGIN { @ISA = qw{ XML::Handler::XMLWriter }; }; sub s_zap { my ($self, $element) = @_; $self->{Tests}[2] = 1; # we got here $self->{Tests}[3] = 1 if $element->{Name} eq 'zap'; $self->{Tests}[4] = 1 if $element->{Name} eq $self->{Names}[-1]; $self->{Tests}[5] = 1 if $element == $self->{Nodes}[-1]; $self->{Tests}[6] = 1 if $#{$self->{Names}} == 1; $self->{Tests}[7] = 1 if $#{$self->{Nodes}} == 1; $element->{Attributes}{'fubar'} = 1; $self->print_start_element($element); } sub e_zap { my ($self, $element) = @_; $self->{Tests}[8] = 1; # we got here $self->{Tests}[9] = 1 if $self->in_element('zap'); $self->{Tests}[10] = 1 if $self->within_element('zap') == 1; $self->print_end_element($element); } libxml-perl-0.08/t/xp_sax.t0100644000076400007640000001051106766050424013756 0ustar kenken# Hey Emacs, this is -*- perl -*- ! # # Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' # # $Id: xp_sax.t,v 1.4 1999/09/10 00:30:12 kmacleod Exp $ # ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..15\n"; } END {print "not ok 1\n" unless $loaded;} use XML::Parser::PerlSAX; $loaded = 1; print "ok 1\n"; ######################### End of black magic. # Insert your test code below (better if it prints "ok 13" # (correspondingly "not ok 13") depending on the success of chunk 13 # of the test code): # Test Plan: # # * done; standard loading test # * not done; parse a document with data for all events # * not done; check all properties returned from events # * not done; check location # # The following is copied from XML::Parser by Clark Cooper # open(ZOE, '>zoe.ent'); print ZOE "'cute'"; close(ZOE); # XML string for tests my $xmlstring =<<"End_of_XML;"; ]> First line in foo Fran is &fran; and Zoe is &zoe; 1st line in bar 2nd line in bar 3rd line in bar This, '\240', would be a bad character in UTF-8. End_of_XML; # Handlers my @tests; my $pos =''; my $parser = XML::Parser::PerlSAX->new; if ($parser) { print "ok 2\n"; } else { print "not ok 2\n"; exit; } # Tests 4..15 eval { $parser->parse( Source => { String => $xmlstring, Encoding => 'ISO-8859-1' }, Handler => TestHandler->new( Tests => \@tests ) ); }; warn $@ if $@; if ($@) { print "Parse error:\n$@"; } else { $tests[3] ++; } unlink('zoe.ent') if (-f 'zoe.ent'); $xmlstring = <<'EOF;'; ]> &anEntRef; EOF; eval { $parser->parse( Source => { String => $xmlstring }, Handler => NoEntRefsHandler->new( Tests => \@tests ) ); }; warn $@ if $@; eval { $parser->parse( Source => { String => $xmlstring }, Handler => EntRefsHandler->new( Tests => \@tests ) ); }; warn $@ if $@; for (3 .. 15) { print "not " unless $tests[$_]; print "ok $_\n"; } exit; package TestHandler; sub new { my $type = shift; return bless { @_ }, $type; } sub characters { my $self = shift; $self->{Tests}[4] ++; } sub start_element { my $self = shift; $self->{Tests}[5] ++; } sub end_element { my $self = shift; $self->{Tests}[6] ++; } sub processing_instruction { my $self = shift; $self->{Tests}[7] ++; } sub notation_decl { my $self = shift; $self->{Tests}[8] ++; } sub unparsed_entity_decl { my $self = shift; $self->{Tests}[9] ++; } sub start_cdata { my $self = shift; $self->{Tests}[12] ++; } sub end_cdata { my $self = shift; $self->{Tests}[13] ++; } sub resolve_entity { my $self = shift; my $entity = shift; if ($entity->{SystemId} eq 'fran-def') { $self->{Tests}[10] ++; return { String => 'pretty' }; } elsif ($entity->{SystemId} eq 'zoe.ent') { $self->{Tests}[11] ++; local(*FOO); open(FOO, $entity->{SystemId}) or die "Couldn't open $entity->{SystemId}"; return { ByteStream => *FOO }; } } package NoEntRefsHandler; sub new { my $type = shift; return bless { @_ }, $type; } sub characters { my $self = shift; my $characters = shift; if ($characters->{Data} eq 'The Ent Ref') { $self->{Tests}[14] ++; } } package EntRefsHandler; sub new { my $type = shift; return bless { @_ }, $type; } sub characters { my $self = shift; my $characters = shift; if ($characters->{Data} eq 'The Ent Ref') { die "shouldn't have made it here"; } } sub entity_reference { my $self = shift; my $ent_ref = shift; if (($ent_ref->{Name} eq 'anEntRef') && ($ent_ref->{Value} eq 'The Ent Ref')) { $self->{Tests}[15] ++; } } libxml-perl-0.08/t/subs.t0100644000076400007640000000302706756033163013434 0ustar kenken# Hey Emacs, this is -*- perl -*- ! # # Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' # # $Id: subs.t,v 1.1 1999/08/16 16:04:03 kmacleod Exp $ # ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..10\n"; } END {print "not ok 1\n" unless $loaded;} use XML::Parser::PerlSAX; use XML::Handler::Subs; $loaded = 1; print "ok 1\n"; my $subs = MySubs->new( ); my $parser = XML::Parser::PerlSAX->new( Handler => $subs ); $parser->parse(Source => { String => <<'EOF' } ); EOF foreach $test (2..10) { print $subs->{Tests}[$test] ? "ok $test\n" : "not ok $test\n" ; } package MySubs; use vars qw{ @ISA }; BEGIN { @ISA = qw{ XML::Handler::Subs }; }; sub s_foo__it { my ($self, $element) = @_; $self->{Tests}[2] = 1; # we got here $self->{Tests}[3] = 1 if $element->{Name} eq 'foo:-it'; $self->{Tests}[4] = 1 if $element->{Name} eq $self->{Names}[-1]; $self->{Tests}[5] = 1 if $element == $self->{Nodes}[-1]; $self->{Tests}[6] = 1 if $#{$self->{Names}} == 0; $self->{Tests}[7] = 1 if $#{$self->{Nodes}} == 0; } sub e_foo__it { my ($self, $element) = @_; $self->{Tests}[8] = 1; # we got here $self->{Tests}[9] = 1 if $self->in_element('foo:-it'); $self->{Tests}[10] = 1 if $self->within_element('foo:-it') == 1; } libxml-perl-0.08/t/canon_xml_writer.t0100644000076400007640000000746706754116317016047 0ustar kenken# Hey Emacs, this is -*- perl -*- ! # # Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' # # $Id: canon_xml_writer.t,v 1.2 1999/08/10 21:42:39 kmacleod Exp $ # ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..5\n"; } END {print "not ok 1\n" unless $loaded;} use XML::Parser::PerlSAX; use XML::Handler::CanonXMLWriter; $loaded = 1; print "ok 1\n"; ######################### End of black magic. # Insert your test code below (better if it prints "ok 13" # (correspondingly "not ok 13") depending on the success of chunk 13 # of the test code): my $parser = XML::Parser::PerlSAX->new; my $writer = XML::Handler::CanonXMLWriter->new; if ($writer) { print "ok 2\n"; } else { print "not ok 2\n"; exit; } # # The following XML is copied from XML::Parser by Clark Cooper # # XML string for tests my $xmlstring =<<"End_of_XML;"; ]> First line in foo 1st line in bar 2nd line in bar 3rd line in bar End_of_XML; ### ### plain test ### $expected_result = <<'End_of_XML;'; First line in foo 1st line in bar 2nd line in bar 3rd line in bar End_of_XML; $expected_result =~ s/\n$//s; $canon_xml = $parser->parse( Source => { String => $xmlstring }, Handler => $writer ); if ($canon_xml eq $expected_result) { print "ok 3\n"; } else { warn "---- expected result ----\n"; warn "$expected_result\n"; warn "---- actual result ----\n"; warn "$canon_xml\n"; print "not ok 3\n"; } ### ### Test PrintComments option ### $expected_result = <<'End_of_XML;'; First line in foo 1st line in bar 2nd line in bar 3rd line in bar End_of_XML; $expected_result =~ s/\n$//s; $writer->{PrintComments} = 1; $canon_xml = $parser->parse( Source => { String => $xmlstring }, Handler => $writer ); if ($canon_xml eq $expected_result) { print "ok 4\n"; } else { warn "---- expected result ----\n"; warn "$expected_result\n"; warn "---- actual result ----\n"; warn "$canon_xml\n"; print "not ok 4\n"; } undef $writer->{PrintComments}; ### ### Test James Clark's XML test suite ### $xml_test = (defined $ENV{XMLTEST}) ? $ENV{XMLTEST} : "$ENV{HOME}/xmltest"; # allow test to skip if directory does not exist and MUST_TEST isn't set if (!-d $xml_test && !defined($ENV{MUST_TEST})) { print "ok 5\n"; exit; } $tested_file = 0; foreach $file (glob("$xml_test/valid/sa/*.xml")) { $tested_file = 1; $canon_xml = $parser->parse( Source => { SystemId => $file }, Handler => $writer ); # add the `out' dir to get the corresponding canon xml ($out_file = $file) =~ s|/([^/]+)$|/out/$1|; open (CANON, $out_file) or die "$out_file: $!\n"; $expected_result = join('', ); close (CANON); if ($canon_xml ne $expected_result) { warn "---- expected result for $file ----\n"; warn "$expected_result\n"; warn "---- actual result ----\n"; warn "$canon_xml\n"; $not_ok = 1; } } if (!$tested_file || $not_ok) { print "not ok 5\n"; } else { print "ok 5\n"; } libxml-perl-0.08/t/amsterdam.t0100644000076400007640000000237406762020221014425 0ustar kenken# Hey Emacs, this is -*- perl -*- ! # # Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl test.pl' # # $Id: amsterdam.t,v 1.1 1999/08/28 17:46:57 kmacleod Exp $ # ######################### We start with some black magic to print on failure. # Change 1..1 below to 1..last_test_to_print . # (It may become useful if the test is moved to ./t subdirectory.) BEGIN { $| = 1; print "1..2\n"; } END {print "not ok 1\n" unless $loaded;} use XML::Parser::PerlSAX; use XML::PatAct::MatchName; use XML::PatAct::Amsterdam; $loaded = 1; print "ok 1\n"; $patterns = [ 'outer' => { Before => "Outer-before, '[attr]'", After => "Outer-after\n" }, 'inner' => { Before => "Inner" }, ]; my $matcher = XML::PatAct::MatchName->new( Patterns => $patterns ); my $handler = XML::PatAct::Amsterdam->new( Patterns => $patterns, Matcher => $matcher, AsString => 1 ); my $parser = XML::Parser::PerlSAX->new( Handler => $handler ); $string = $parser->parse(Source => { String => <<'EOF;' } ); EOF; $expected = <<"EOF;"; Outer-before, 'an attr' Inner Outer-after EOF; print (($string eq $expected) ? "ok 2\n" : "not ok 2\n"); libxml-perl-0.08/ChangeLog0100644000076400007640000001741007745254161013605 0ustar kenken2003-10-21 Ken MacLeod * t/stream.t: fixed test 11 for Perl >= 5.6 2001-07-23 Ken MacLeod * lib/XML/SAX2Perl.pm (startElement): typo; reported by mhershb@mcdermott.com (Mark A. Hershberger) 2000-03-30 Ken MacLeod * doc/index.html (Contributors): added Clark Cooper * MANIFEST (doc/sax-2.0.html, doc/sax-2.0-adv.html): added 2000-03-20 Ken MacLeod * lib/Data/Grove/Visitor.pm (_children_accept_name): add return @return; reported by Laurent CAPRANI 2000-03-07 Ken MacLeod * doc/sax-2.0.html, doc/sax-2.0-adv.html: added 2000-03-02 Ken MacLeod * lib/XML/ESISParser.pm: add -E0 to NSGMLS_FLAGS to not limit the number of errors reported; suggested by Charles Thayer (parse_fh): report line and line number on command character errors; also suggested by Charles 2000-02-22 Ken MacLeod * doc/index.html (Contributors): added Michael Koehne, KangChan Lee, and Colin Muller * doc/mirror.sh, doc/index.html: added 2000-02-17 Ken MacLeod * doc/modules.xml: fixed several well-formedness errors; reported by KangChan Lee 1999-12-22 Ken MacLeod * lib/Data/Grove.pm, lib/Data/Grove/Parent.pm, lib/Data/Grove/Visitor.pm, lib/XML/Handler/XMLWriter.pm, lib/XML/Handler/CanonXMLWriter.pm, lib/XML/Handler/Subs.pm, lib/XML/SAX2Perl.pm, lib/XML/Perl2SAX.pm, lib/XML/ESISParser.pm, lib/XML/Parser/PerlSAX.pm, lib/XML/PatAct/Amsterdam.pm, lib/XML/PatAct/MatchName.pm, lib/XML/PatAct/ToObjects.pm: added $VERSION * lib/XML/Parser/PerlSAX.pm (_handle_start): support UseAttributeOrder option (_handle_attlist): Changed EntityName to ElementName (re 9/28 entry) 1999-09-28 Ken MacLeod * lib/XML/Parser/PerlSAX.pm (_handle_attlist): typo: was calling entity_decl 1999-09-09 Ken MacLeod * lib/XML/Parser/PerlSAX.pm: add start_cdata, end_cdata, and entity_reference events 1999-08-28 Ken MacLeod * lib/XML/PatAct/Amsterdam.pm: added Output and AsString options, added support for attribute replacement * t/amsterdam.t: added 1999-08-18 Ken MacLeod * lib/Data/Grove.pm: added Data::Grove::Characters * lib/XML/ESISParser.pm (parse_fh): report newline as characters if no record_end() handler * lib/XML/PatAct/ToObjects.pm (_parse_action): removed debugging statement 1999-08-16 Ken MacLeod * README: updated * doc/modules.xml (libxml-perl): updated * doc/PerlSAX.pod (Parameters): missing '>' * release 0.05 * lib/XML/Parser/PerlSAX.pm (_handle_init): call set_document_locator * lib/XML/PatAct/ActionTempl.pm, lib/XML/PatAct/Amsterdam.pm, lib/XML/PatAct/MatchName.pm, lib/XML/PatAct/PatternTempl.pm (new): Accept both key, value pairs and hash options * lib/XML/PatAct/ToObjects.pm (new): * lib/XML/Handler/Subs.pm: added * t/subs.t: added * t/stream.t: added 1999-08-15 Ken MacLeod * lib/XML/Handler/XMLWriter.pm: added * lib/XML/Handler/Sample.pm: Placed in public domain 1999-08-14 Ken MacLeod * doc/PerlSAX.pod: added an introduction, a ``Deviations from the Java version'' section, added `set_document_locator()' handler method * lib/XML/PatAct/ToObjects.pm: add CopyAttributes option, add -grove-contents action 1999-08-12 Ken MacLeod * lib/XML/ESISParser.pm (parse_fh): dynamically test event handler existance * lib/XML/Parser/PerlSAX.pm (parse): wasn't capturing XML::Parser Element events 1999-08-10 Ken MacLeod * README, doc/modules.xml: updated with PatAct modules * lib/XML/PatAct/ActionTempl.pm, lib/XML/PatAct/Amsterdam.pm, lib/XML/PatAct/MatchName.pm, lib/XML/PatAct/PatternTempl.pm, lib/XML/PatAct/ToObjects.pm: added * t/xp_sax.t, t/canon_xml_writer.t: added CVS ID * t/schema.t: added * examples/schema.xml, examples/schema.pl: added * doc/UsingPatActModules.pod, doc/CreatingPatActModules.pod: added * lib/XML/Parser/PerlSAX.pm (_handle_extern_ent): change "Perl SAX" to "PerlSAX" in doc 1999-08-09 Ken MacLeod * lib/XML/ESISParser.pm (parse_fh): was not passing an empty hash * lib/XML/Parser/PerlSAX.pm (_handle_init, _handle_final): was not passing an empty hash 1999-05-26 Ken MacLeod * lib/XML/Handler/CanonXMLWriter.pm, t/canon_xml_writer.t: added 1999-05-23 Ken MacLeod * lib/Data/Grove/Tied.pm: renamed to Parent.pm * README (DOCUMENTS): added renamed libxml to libxml-perl * libxml.spec: renamed libxml-perl.spec 1999-05-17 Ken MacLeod * libxml.spec: files in `doc/' go into top-dir of /usr/doc/$PKG * PerlSAX.pod: moved to doc/PerlSAX.pod 1999-05-09 Ken MacLeod * doc/modules.xml: added 1999-05-08 Ken MacLeod * doc/UsingPerlSAX.pod, examples/MyHandler.pm, examples/myhandler.pl, examples/myhandler.xml: added 1999-05-07 Ken MacLeod * lib/XML/ESISParser.pm, lib/Data/Grove.pm, lib/XML/Handler/Sample.pm: added POD 1999-05-06 Ken MacLeod * lib/Data/Grove/Visitor.pm: remove XML::Grove extensions and make generic * lib/XML/Parser/SAXPerl.pm: renamed PerlSAX.pm * lib/XML/Handler/Sample.pm: added * examples/perlsax-test.pl: added * examples/esis-test.pl: updated for new XML::ESISParser, moved handler (Receiver) to XML::Handler::Sample, added command line option for SGML 1999-04-30 Ken MacLeod * Makefile.PL: added PREREQ_PM for XML::Parser 1999-04-15 Ken MacLeod * lib/Data/Grove/Visitor.pm (accept): change XML:: to Data:: * lib/Data/Grove.pm (new): %{ shift } was being read as %shift 1999-02-18 Ken MacLeod * lib/Data/Grove/Visitor.pm: was XML::Grove::Visitor * lib/Data/Grove/Tied.pm: was XML::Grove::Node * lib/Data/Grove.pm: created from XML::Grove 1999-02-15 Ken MacLeod * lib/XML/Parser/SAXPerl.pm (parse): add comments * lib/XML/ESISParser.pm: major changes for support of both XML and SGML, and ongoing Perl SAX updates * SAX.pod (end_document): noted that the return value of end_document() is the return value of parse() * README: added reference to FAQ, added module statuses, more cleary described ESISParser, require Perl 5.005 1999-02-13 Ken MacLeod * lib/XML/ESISParser.pm: start move to Perl SAX 1999-02-12 Ken MacLeod * lib/XML/SAX2Perl.pm, lib/XML/Perl2SAX.pm, lib/XML/ESISParser.pm: update to new Perl SAX * lib/XML/Parser/SAXPerl.pm (new): allow hash or key/value pairs 1999-02-12 Ken MacLeod * interface-style.pod: note still undecided items * lib/XML/Parser/SAXPerl.pm: fixes shown by xp_sax.t * t/xp_sax.t: added * lib/XML/Parser/SAXPerl.pm: added pod many changes for Perl SAX and XML::Parser::Expat 1999-02-11 Ken MacLeod * SAX.pod: suggestions from Eric Prud'hommeaux and Enno Derksen * interface-style.pod: suggestions from Larry Wall 1999-02-01 Ken MacLeod * MANIFEST: updated * lib/XML/Parser/SAXPerl.pm: modified more towards Perl SAX * SAX.pod: added 1999-01-31 Ken MacLeod * interface-style.pod: added 1998-12-10 Ken MacLeod * lib/XML/Parser/SAXPerl.pm: added 1998-12-08 Ken MacLeod * MANIFEST: added libxml-perl-0.08/libxml-perl.spec0100644000076400007640000000441306756034003015125 0ustar kenkenSummary: Collection of Perl modules for working with XML Name: libxml-perl Version: @VERSION@ Release: 1 Source: http://www.perl.com/CPAN/modules/by-module/XML/libxml-perl-@VERSION@.tar.gz Copyright: Artistic or GPL Group: Applications/Publishing/XML URL: http://www.perl.com/ Packager: ken@bitsko.slc.ut.us (Ken MacLeod) BuildRoot: /tmp/libxml-perl # # $Id: libxml-perl.spec,v 1.4 1999/08/16 16:10:43 kmacleod Exp $ # %description libxml-perl is a collection of Perl modules for working with XML. %prep %setup perl Makefile.PL INSTALLDIRS=perl %build make %install make PREFIX="${RPM_ROOT_DIR}/usr" pure_install DOCDIR="${RPM_ROOT_DIR}/usr/doc/libxml-perl-@VERSION@-1" mkdir -p "$DOCDIR/examples" for ii in PerlSAX.pod UsingPerlSAX.pod interface-style.pod modules.xml; do cp doc/$ii "$DOCDIR/$ii" chmod 644 "$DOCDIR/$ii" done for ii in README Changes examples/*; do cp $ii "$DOCDIR/$ii" chmod 644 "$DOCDIR/$ii" done %files /usr/doc/libxml-perl-@VERSION@-1 /usr/lib/perl5/Data/Grove.pm /usr/lib/perl5/Data/Grove/Parent.pm /usr/lib/perl5/Data/Grove/Visitor.pm /usr/lib/perl5/XML/ESISParser.pm /usr/lib/perl5/XML/Handler/CanonXMLWriter.pm /usr/lib/perl5/XML/Handler/Sample.pm /usr/lib/perl5/XML/Handler/Subs.pm /usr/lib/perl5/XML/Handler/XMLWriter.pm /usr/lib/perl5/XML/SAX2Perl.pm /usr/lib/perl5/XML/Perl2SAX.pm /usr/lib/perl5/XML/Parser/PerlSAX.pm /usr/lib/perl5/XML/PatAct/ActionTempl.pm /usr/lib/perl5/XML/PatAct/Amsterdam.pm /usr/lib/perl5/XML/PatAct/MatchName.pm /usr/lib/perl5/XML/PatAct/PatternTempl.pm /usr/lib/perl5/XML/PatAct/ToObjects.pm /usr/lib/perl5/man/man3/Data::Grove.3 /usr/lib/perl5/man/man3/Data::Grove::Parent.3 /usr/lib/perl5/man/man3/Data::Grove::Visitor.3 /usr/lib/perl5/man/man3/XML::Handler::CanonXMLWriter.3 /usr/lib/perl5/man/man3/XML::Handler::Sample.3 /usr/lib/perl5/man/man3/XML::Handler::Subs.3 /usr/lib/perl5/man/man3/XML::Handler::XMLWriter.3 /usr/lib/perl5/man/man3/XML::ESISParser.3 /usr/lib/perl5/man/man3/XML::SAX2Perl.3 /usr/lib/perl5/man/man3/XML::Perl2SAX.3 /usr/lib/perl5/man/man3/XML::Parser::PerlSAX.3 /usr/lib/perl5/man/man3/XML::PatAct::ActionTempl.3 /usr/lib/perl5/man/man3/XML::PatAct::Amsterdam.3 /usr/lib/perl5/man/man3/XML::PatAct::MatchName.3 /usr/lib/perl5/man/man3/XML::PatAct::PatternTempl.3 /usr/lib/perl5/man/man3/XML::PatAct::ToObjects.3 libxml-perl-0.08/libxml-perl-0.08.spec0100644000076400007640000000436707745275111015526 0ustar kenkenSummary: Collection of Perl modules for working with XML Name: libxml-perl Version: 0.08 Release: 1 Source: http://www.perl.com/CPAN/modules/by-module/XML/libxml-perl-0.08.tar.gz Copyright: Artistic or GPL Group: Applications/Publishing/XML URL: http://www.perl.com/ Packager: ken@bitsko.slc.ut.us (Ken MacLeod) BuildRoot: /tmp/libxml-perl # # $Id: libxml-perl.spec,v 1.4 1999/08/16 16:10:43 kmacleod Exp $ # %description libxml-perl is a collection of Perl modules for working with XML. %prep %setup perl Makefile.PL INSTALLDIRS=perl %build make %install make PREFIX="${RPM_ROOT_DIR}/usr" pure_install DOCDIR="${RPM_ROOT_DIR}/usr/doc/libxml-perl-0.08-1" mkdir -p "$DOCDIR/examples" for ii in PerlSAX.pod UsingPerlSAX.pod interface-style.pod modules.xml; do cp doc/$ii "$DOCDIR/$ii" chmod 644 "$DOCDIR/$ii" done for ii in README Changes examples/*; do cp $ii "$DOCDIR/$ii" chmod 644 "$DOCDIR/$ii" done %files /usr/doc/libxml-perl-0.08-1 /usr/lib/perl5/Data/Grove.pm /usr/lib/perl5/Data/Grove/Parent.pm /usr/lib/perl5/Data/Grove/Visitor.pm /usr/lib/perl5/XML/ESISParser.pm /usr/lib/perl5/XML/Handler/CanonXMLWriter.pm /usr/lib/perl5/XML/Handler/Sample.pm /usr/lib/perl5/XML/Handler/Subs.pm /usr/lib/perl5/XML/Handler/XMLWriter.pm /usr/lib/perl5/XML/SAX2Perl.pm /usr/lib/perl5/XML/Perl2SAX.pm /usr/lib/perl5/XML/Parser/PerlSAX.pm /usr/lib/perl5/XML/PatAct/ActionTempl.pm /usr/lib/perl5/XML/PatAct/Amsterdam.pm /usr/lib/perl5/XML/PatAct/MatchName.pm /usr/lib/perl5/XML/PatAct/PatternTempl.pm /usr/lib/perl5/XML/PatAct/ToObjects.pm /usr/lib/perl5/man/man3/Data::Grove.3 /usr/lib/perl5/man/man3/Data::Grove::Parent.3 /usr/lib/perl5/man/man3/Data::Grove::Visitor.3 /usr/lib/perl5/man/man3/XML::Handler::CanonXMLWriter.3 /usr/lib/perl5/man/man3/XML::Handler::Sample.3 /usr/lib/perl5/man/man3/XML::Handler::Subs.3 /usr/lib/perl5/man/man3/XML::Handler::XMLWriter.3 /usr/lib/perl5/man/man3/XML::ESISParser.3 /usr/lib/perl5/man/man3/XML::SAX2Perl.3 /usr/lib/perl5/man/man3/XML::Perl2SAX.3 /usr/lib/perl5/man/man3/XML::Parser::PerlSAX.3 /usr/lib/perl5/man/man3/XML::PatAct::ActionTempl.3 /usr/lib/perl5/man/man3/XML::PatAct::Amsterdam.3 /usr/lib/perl5/man/man3/XML::PatAct::MatchName.3 /usr/lib/perl5/man/man3/XML::PatAct::PatternTempl.3 /usr/lib/perl5/man/man3/XML::PatAct::ToObjects.3 libxml-perl-0.08/Makefile.PL0100644000076400007640000000107107745275111013777 0ustar kenken# # Copyright (C) 1998 Ken MacLeod # This library is free software; you can redistribute it and/or modify # it under the same terms as Perl itself. # # $Id: Makefile.PL,v 1.3 1999/05/24 23:25:02 kmacleod Exp $ # use ExtUtils::MakeMaker; $VERSION = '0.08'; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( 'NAME' => 'libxml-perl', 'VERSION_FROM' => 'Makefile.PL', 'PREREQ_PM' => { 'XML::Parser' => '2.19' }, dist => {'COMPRESS' => 'gzip', 'SUFFIX' => '.gz'}, ); libxml-perl-0.08/README0100644000076400007640000001443507745254162012720 0ustar kenken$Id: README,v 1.10 2003/10/21 16:01:54 kmacleod Exp $ libxml-perl Collection of Perl modules for working with XML. INTRODUCTION libxml-perl is a collection of smaller Perl modules, scripts, and documents for working with XML in Perl. libxml-perl software works in combination with XML::Parser, PerlSAX, XML::DOM, XML::Grove and others. See the file Changes for user-visible changes and ChangeLog for detailed changes. See the `examples' directory for examples. POD style documentation is included in all non-alpha modules and scripts. You should also be able to use the 'perldoc' utility to extract documentation from the module files directly. HTML formatted docs are available at the libxml-perl home page . Newer versions of this module can be found on CPAN at . To join the Perl-XML mailing list, send an email message to ListManager@ActiveState.com with the following text in the body: Subscribe Perl-XML View the Perl XML FAQ at . Copyright (C) 1998 Ken MacLeod and others This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. MODULES The following modules are marked with their release status: STABLE -- has been in use for a while with few or no outstanding bugs BETA -- interfaces are stable but there may still be bugs ALPHA -- interfaces are changing, there may be lots of bugs, and there may not be docs available yet XML::Parser::PerlSAX STABLE XML::Parser::PerlSAX is a PerlSAX parser using XML::Parser (which uses James Clark's Expat XML Parser). XML::Handler::Sample STABLE XML::Handler::Sample is a PerlSAX handler that simply prints out the event names as they are parsed by a PerlSAX parser. It can be used for debugging or as a template for building new handlers. XML::Handler::Sample contains handlers for all known parser events. XML::ESISParser STABLE XML::ESISParser is a validating PerlSAX parser using James Clark's `nsgmls' SGML/XML Parser. ESISParser supports both XML and SGML document instances. Unless you need validation, you should probably be using XML::Parser::PerlSAX or XML::Parser. XML::ESISParser with XML::Grove obsolete the SGML::SPGroveBuilder and SGML::Grove modules. XML::Handler::XMLWriter STABLE A PerlSAX handler for writing readable XML (in contrast to Canonical XML, for example). XMLWriter is also subclassable and supports calling start and end methods by element-names (subclassed from XML::Handler::Subs). XMLWriter is similar to XML::Parser's Stream style. XML::Handler::Subs STABLE A PerlSAX handler base class that calls start and end methods by element-names. Subs is similar to XML::Parser's Subs style. XML::Handler::CanonXMLWriter STABLE A PerlSAX handler that outputs in Canonical XML . This module is generally only used for debugging. Data::Grove STABLE Data::Grove::Parent STABLE Data::Grove::Visitor STABLE Data::Grove and it's helpers provide a base class for deeply nested or directed graph structures. Used by XML::Grove (and others soon). XML::SAX2Perl ALPHA XML::Perl2SAX ALPHA SAX2Perl and Perl2SAX are SAX Parser<->DocumentHandler filters. These modules translate parse events between the Java/CORBA style SAX methods and PerlSAX style methods. XML::PatAct::MatchName ALPHA MatchName is a pattern matching module that can be used with PatAct action modules. MatchName uses simple element names or element name lists to match names to actions. XML::PatAct::ToObjects ALPHA ToObjects is a PatAct action module. ToObjects can be used to create application-ready Perl objects from XML instances. XML::PatAct::Amsterdam ALPHA Amsterdam is a PatAct action module. Amsterdam can be used to apply a very simple form of style-sheet to an XML instance by using ``before'' and ``after'' strings that are output before and after the contents of elements. XML::PatAct::PatternTempl BETA XML::PatAct::ActionTempl BETA PatternTempl and ActionTempl are template files that pattern/action module writers can copy to create new modules. See Creating PatAct Modules for more information. DOCUMENTS sax-2.0.html, sax-2.0-adv.html PerlSAX 2.0 bindings. Maintained by Robin Berjon and the XML-Perl mailing list. PerlSAX This document defines a Perl binding to SAX 1.0. PerlSAX- based parser modules implement and possibly extend the interface described in PerlSAX. UsingPerlSAX A brief introduction to PerlSAX using the XML::Parser::PerlSAX module. UsingPatActModules Describes how to use pattern/action modules to transform XML instances. CreatingPatActModules A document for module writers who are writing new pattern/ action modules. modules.xml modules.xml contains a listing of all Perl XML packages and their public modules categorized by several topics. INSTALLATION In order to use this package you will need Perl version 5.005 or better. Several other modules may also be required to use some modules in libxml-perl, including XML::Parser, XML::DOM, and XML::Grove. These are all available in the XML module directory on CPAN. You install libxml-perl, as you would install any perl module library, by running these commands: perl Makefile.PL make make test make install If you want to install a private copy of libxml-perl in your home directory, then you should try to produce the initial Makefile with something like this command: perl Makefile.PL PREFIX=~/perl