XML-SAX-Machines-0.46/0000755000175000001440000000000012204442306014014 5ustar perigrinusersXML-SAX-Machines-0.46/CHANGES0000644000175000001440000000605412204442306015014 0ustar perigrinusersCHANGES file for XML-SAX-Machines 0.46 2013-08-19 12:16:00 America/New_York 0.45 2013-08-19 11:30:07 America/New_York 0.44 2013-08-18 11:51:46 America/New_York 0.43 2013-03-11 00:37:03 America/New_York - update to use Dist::Zilla and stop using Module::Install 0.42 Jun 11, 2009 - remove psudo hashes (RT #1523) - Update packaging to use Module::Install 0.41 Tue Jan 20 11:57:39 EST 2004 - XML::SAX::Pipeline SYNOPSIS doc fixed (reported by bplatz a t agere dt com ) - XML::SAX::Machine now implements the Handler option to new properly. (reported by bplatz a t agere dt com ) 0.40 - Add several state getter methods to XML::Filter::Merger to make subclassing easier. 0.39 Fri Sep 6 11:44:23 EDT 2002 - Get end_document to return the next handler's end_document() result when not in manifold mode. 0.38 Fri Sep 6 10:05:32 EDT 2002 - Require XML::SAX::Writer > 0.41. - Allow merger to be subclassed - Remove need to call start_/end_manifold_document() if you're just merging secondary documents that occur inline. 0.37 Thu Sep 5 17:35:02 EDT 2002 - Make XML::Filter::Merger handle nested documents correctly and tweak XML::Filter::DocSplitter to agree. 0.36 Tue Jul 23 14:00:47 EDT 2002 - Correct t/xsw_version.t to test for 0.41 instead of 0.39 0.35 Mon Jul 22 13:10:23 EDT 2002 - Remove a dependency on hash order that perl5.8.0's new hash algorithm exposes. THIS AFFECTS ONLY THE TEST SUITE unless you are checking for $@ =~ /.../ in order to catch/filter cycle dependancy checks. - Makefile.PL warn()s if XML::SAX::Writer 0.40 is detected. It's too broken. There's also a new test suite t/00xsw_version.t to barn again and fail. 0.34 Thu Mar 21 09:55:46 EST 2002 - Get Merger.pm to properly deal with events before and after the root element events. Reported by ASSAD Arnaud 0.33 Tue Mar 19 10:59:42 EST 2002 - Port back to perl5.00503 (reported by Kip Hampton - no named regexp character classes ([...[:alpha:]...]) - can't supress "used only once, possible typo" warnings 0.32 Wed Mar 6 10:09:55 EST 2002 - fix eval { return } thinko in X::S::Machine::parse(), reported by Rick Frankel . - test for $class->can( "new" ) before doing eval "require $class" awkward code highlighted by christian.glahn@uibk.ac.at - Added diagnositc errors for undef or '' filter names - Added t/10bad_machine.t - Prevent unresolvable filters from causing later, additional, confusing error messages. - Add test to make sure an "empty derived class" can be specced by name (this worked ok before and now; just added a test). 0.311 - Require at least XML::SAX::Base 1.02, removed X::S::B::set_handler workaround. - Require XML::SAX 0.05, not 0.3 - Added two more examples to MANIFEST 0.31 Tue Jan 22 12:27:55 EST 2002 - ByRecords now counts stack depth accurately. Tested with patched XML::Filter::LibXSLT. XML-SAX-Machines-0.46/lib/0000755000175000001440000000000012204442306014562 5ustar perigrinusersXML-SAX-Machines-0.46/lib/XML/0000755000175000001440000000000012204442306015222 5ustar perigrinusersXML-SAX-Machines-0.46/lib/XML/Filter/0000755000175000001440000000000012204442306016447 5ustar perigrinusersXML-SAX-Machines-0.46/lib/XML/Filter/DocSplitter.pm0000644000175000001440000001645412204442306021253 0ustar perigrinuserspackage XML::Filter::DocSplitter; { $XML::Filter::DocSplitter::VERSION = '0.46'; } # ABSTRACT: Multipass processing of documents use XML::SAX::Base; @ISA = qw( XML::SAX::Base ); @EXPORT_OK = qw( DocSplitter ); use strict; use Carp; use XML::SAX::EventMethodMaker qw( sax_event_names compile_missing_methods ); ## Inherited. sub set_aggregator { my $self = shift; $self->{Aggregator} = shift; $self->{AggregatorPassThrough} = XML::SAX::Base->new() unless $self->{AggregatorPassThrough}; $self->{AggregatorPassThrough}->set_handler( $self->{Aggregator} ); } sub get_aggregator { my $self = shift; return $self->{Aggregator}; } sub set_split_path { my $self = shift; my $pat = $self->{SplitPoint} = shift; $pat = "/$pat" unless substr( $pat, 0, 1 ) eq "/"; $pat = quotemeta $pat; $pat =~ s{\\\*}{[^/]*}g; ## Hmmm, * will match nodes with 0 length names "" $pat =~ s{\\/\\/}{/.*/}g; $pat =~ s{^\\/}{^}g; $self->{SplitPathRe} = qr/$pat(?!\n)\Z/; return undef; } sub get_split_path { my $self = shift; return $self->{SplitPoint}; } sub _check_stack { my $self = shift; my $stack = join "/", @{$self->{Stack}}; $stack =~ $self->{SplitPathRe}; } sub start_document { my $self = shift; my $aggie = $self->get_aggregator; $aggie->start_manifold_document( @_ ) if $aggie && $aggie->can( "start_manifold_document" ); $aggie->set_include_all_roots( 1 ) if $aggie && $aggie->can( "set_include_all_roots" ); $aggie->start_document( @_ ); $self->{Stack} = []; $self->{Splitting} = 0; $self->set_split_path( "/*/*" ) unless defined $self->get_split_path; ## don't pass on the start_document, we'll do that once for each ## record. return undef; } sub start_element { my $self = shift; my ( $elt ) = @_; push @{$self->{Stack}}, $elt->{Name}; if ( ! $self->{Splitting} && $self->_check_stack ) { ++$self->{Splitting}; $self->SUPER::start_document( {} ); } elsif ( $self->{Splitting} ) { ++$self->{Splitting}; } if ( $self->{Splitting} ) { return $self->SUPER::start_element( @_ ); } $self->{AggregatorPassThrough}->start_element( @_ ) if $self->{AggregatorPassThrough}; return undef; } sub end_element { my $self = shift; my ( $elt ) = @_; pop @{$self->{Stack}}; my $r ; if ( $self->{Splitting} ) { $r = $self->SUPER::end_element( @_ ) } else { $r = $self->{AggregatorPassThrough}->end_element( @_ ) if $self->{AggregatorPassThrough}; } if ( $self->{Splitting} && ! --$self->{Splitting} ) { ## ignore the result code, we'll get it in end_document. $self->SUPER::end_document( {} ); } return $r; } sub end_document { my $self = shift; my $aggie = $self->get_aggregator; my $r; if ( $aggie ) { $r = $aggie->end_document( @_ ); $r = $aggie->end_manifold_document( @_ ) if $aggie->can( "end_manifold_document" ); } return $r; } compile_missing_methods __PACKAGE__, <<'TPL_END', sax_event_names ; sub { my $self = shift; return $self->SUPER::( @_ ) if $self->{Splitting}; $self->{AggregatorPassThrough}->( @_ ) if $self->{AggregatorPassThrough}; } TPL_END 1; __END__ =pod =head1 NAME XML::Filter::DocSplitter - Multipass processing of documents =head1 VERSION version 0.46 =head1 SYNOPSIS ## See XML::SAX::???? for an easier way to use this filter. use XML::SAX::Machines qw( Machine ) ; my $m = Machine( [ Intake => "XML::Filter::DocSplitter" => qw( Filter ) ], [ Filter => "My::Filter" => qw( Merger ) ], [ Merger => "XML::Filter::Merger" => qw( Output ) ], [ Output => \*STDOUT ], ); ## Let the distributor coordinate with the merger ## XML::SAX::Manifold does this for you. $m->Intake->set_aggregator( $m->Merger ); $m->parse_file( "foo" ); =head1 DESCRIPTION XML::Filter::DocSplitter is a SAX filter that allows you to apply a filter to repeated sections of a document. It splits a document up at a predefined elements in to multiple documents and the filter is run on each document. The result can be left as a stream of separate documents or combined back in to a single document using a filter like L. By default, the input document is split in all children of the root element. By that reckoning, this document has three sub-documents in it: .... .... .... When using without an aggregator, all events up to the first record are lost; with an aggregator, they are passed directly in to the aggregator as the "first" document. All elements between the records (the "\n " text nodes, in this case) are also passed directly to the merger (these will arrive between the end_document and start_document calls for each of the records), as are all events from the last record until the end of the input document. This means that the first document, as seen by the merger, is incomplete; it's missing it's end_element, which is passed later. The approach of passing events from the input document right on through to the merger differs from the way L works. This class is derived from L, see that for details. =head1 NAME XML::Filter::DocSplitter - Multipass processing of documents =head1 METHODS =over =item new my $d = XML::Filter::DocSplitter->new( Handler => $h, Aggregator => $a, ## optional ); =item set_aggregator $h->set_aggregator( $a ); Sets the SAX filter that will stitch the resulting subdocuments back together. Set to C to prevent such stitchery. The aggregator should support the C, C, and C methods as described in L. =item get_aggregator my $a = $h->get_aggregator; Gets the SAX filter that will stitch the resulting subdocuments back together. =item set_split_path $h->set_split_path( "/a/b/c" ); Sets the pattern to use when splitting the document. Patterns are a tiny little subset of the XPath language: Pattern Description ======= =========== /*/* splits the document on children of the root elt (default) //record splits each elt in to a document /*/record splits each child of the root elt /a/b/c/d splits each of the elts in to a document =item get_split_path my $a = $h->get_split_path; =head1 LIMITATIONS Can only feed a single aggregator at the moment :). I can fix this with a bit of effort. =head1 AUTHOR Barrie Slaymaker =head1 COPYRIGHT Copyright 2000, Barrie Slaymaker, All Rights Reserved. You may use this module under the terms of the Artistic, GPL, or the BSD licenses. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/Filter/Distributor.pm0000644000175000001440000001635112204442306021325 0ustar perigrinuserspackage XML::Filter::Distributor; { $XML::Filter::Distributor::VERSION = '0.46'; } # ABSTRACT: Multipass processing of documents use XML::SAX::Base; @ISA = qw( XML::SAX::Base ); @EXPORT_OK = qw( Distributor ); use strict; use Carp; use XML::SAX::EventMethodMaker qw( sax_event_names missing_methods compile_methods ); sub new { my $proto = shift; my $class = ref $proto || $proto; my $self = bless {}, $class; $self->{Channels} = []; for ( @_ ) { push @{$self->{Channels}}, $_; } return $self; } sub set_handlers { my $self = shift; @{$self->{Channels}} = map { { Handler => $_ } } @_; } sub set_handler { shift()->set_handlers( @_ ); } sub _buffer { my $self = shift; push @{$self->{BUFFER}}, [ @_ ]; } sub set_aggregator { my $self = shift; $self->{Aggregator} = shift; } sub get_aggregator { my $self = shift; return $self->{Aggregator}; } sub _change_channels { my $self = shift; my ( $desired_channel ) = @_; $desired_channel = $self->{CurChannelNum} + 1 unless defined $desired_channel; $desired_channel = undef if $desired_channel < 0 || $desired_channel > $#{$self->{Channels}}; ## Mess with XML::SAX::Base's internals a bit (ugh). ## TODO: Get less messy when the X::S::B in CVS makes it in to the ## real world. $self->{Methods} = {}; $self->{Handler} = undef; if ( defined $desired_channel ) { $self->{CurChannel} = $self->{Channels}->[$desired_channel]; $self->{$_} = $self->{CurChannel}->{$_} for keys %{$self->{CurChannel}}; } $self->{CurChannelNum} = $desired_channel; return $desired_channel; } sub _replay { my $self = shift; my $r; for ( @{$self->{BUFFER}} ) { my $event = shift @$_; ## This is ugly, must be a faster way, too tired to think of one. my $meth = "SUPER::$event"; $self->$meth( @$_ ); unshift @$_, $event; } return $r; } sub start_document { my $self = shift; @{$self->{BUFFER}} = (); $self->_buffer( "start_document", @_ ); $self->_change_channels( 0 ); my $aggie = $self->get_aggregator; $aggie->start_manifold_document( @_ ) if $aggie && $aggie->can( "start_manifold_document" ); return $self->SUPER::start_document( @_ ); } sub end_document { my $self = shift; $self->_buffer( "end_document", @_ ); $self->SUPER::end_document( @_ ); $self->_replay while $self->_change_channels; @{$self->{BUFFER}} = (); my $aggie = $self->get_aggregator; return $aggie->end_manifold_document( @_ ) if $aggie && $aggie->can( "end_manifold_document" ); return ; } compile_methods __PACKAGE__, <<'TPL_END', missing_methods __PACKAGE__, sax_event_names ; sub { my $self = shift; $self->_buffer( "", @_ ); return $self->SUPER::( @_ ); } TPL_END 1; __END__ =pod =head1 NAME XML::Filter::Distributor - Multipass processing of documents =head1 VERSION version 0.46 =head1 SYNOPSIS ## See XML::SAX::Manifold for an easier way to use this filter. use XML::SAX::Machines qw( Machine ) ; ## See the wondrous ASCII ART below for help visualizing this ## XML::SAX::Manifold makes this a lot easier. my $m = Machine( [ Intake => "XML::Filter::Distributor" => qw( V TOC Body ) ], [ V => "My::Validator" ], [ TOC => "My::TOCExtractor" => qw( Merger ) ], [ Body => "My::BodyMasseuse" => qw( Merger ) ], [ Merger => "XML::Filter::Merger" => qw( Output ) ], [ Output => \*STDOUT ], ); ## Let the distributor coordinate with the merger. ## XML::SAX::Manifold does this for you. $m->Intake->set_aggregator( $m->Merger ); $m->parse_file( "foo" ); =head1 DESCRIPTION XML::Filter::Distributor is a SAX filter that allows "multipass" processing of a document by sending the document through several channels of SAX processors one channel at a time. A channel may be a single SAX processor or a machine like a pipeline (see L). This can be used to send the source document through one entire processing chain before beginning the next one, for instance if the first channel is a validator or linter that throws exceptions on error. It can also be used to run the document through multiple processing chains and glue all of the chains' output documents back together with something like XML::Filter::Merger. The SYNOPSIS does both. This differs from L in that the channels are prioritized and each channel receives all events for a document before the next channel receives any events. XML::Filter::Distributor buffers all events while feeding them to the highest priority channel (C<$processor1> in the synopsis), and replays them for each lower priority channel one at a time. The event flow for the example in the SYNOPSIS would look like, with the numbers next to the connection arrow indicating when the document's events flow along that arrow. +-------------+ +->| Validator | 1/ +-------------+ / 1 +-------+ 2 +--------------+ 2 +--------+ upstream ----| Dist. |---->| TOCExtractor |--*-->| Merger |-> STDOUT +-------+ +--------------+ / +--------+ \3 /3 \ +--------------+ +->| BodyMasseuse | +--------------+ | Here's the timing of the event flows: 1: upstream -> Dist -> Validator 2: Dist -> TOCExtractorc -> Merger -> STDOUT 3: Dist -> BodyMassseuse -> Merger -> STDOUT When the document arrives from upstream, the events all arrive during time period 1 and are buffered and also passed through processor 1. After all events have been received (as indicated by an C event from upstream), all events are then played back through processor 2, and then through processor 3. =head1 NAME XML::Filter::Distributor - Multipass processing of documents =head1 METHODS =over =item new my $d = XML::Filter::Distributor->new( { Handler => $h1 }, { Handler => $h2 }, ... ); A channel may be any SAX machine, frequently they are pipelines. =item set_handlers $p->set_handlers( $handler1, $handler2 ); Provided for compatability with other SAX processors, use set_handlers instead. =item set_handler Provided for compatability with other SAX processors, use set_handlers instead. =head1 LIMITATIONS Can only feed a single aggregator at the moment :). I can fix this with a bit of effort. =head1 AUTHOR Barrie Slaymaker =head1 COPYRIGHT Copyright 2000, Barrie Slaymaker, All Rights Reserved. You may use this module under the terms of the Artistic, GPL, or the BSD licenses. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/Filter/Tee.pm0000644000175000001440000001377212204442306017534 0ustar perigrinuserspackage XML::Filter::Tee; { $XML::Filter::Tee::VERSION = '0.46'; } # ABSTRACT: Send SAX events to multiple processor, with switching use strict; use Carp; use XML::SAX::Base; use XML::SAX::EventMethodMaker qw( compile_methods sax_event_names ); sub new { my $proto = shift; my $self = bless {}, ref $proto || $proto; $self->{DisabledHandlers} = []; $self->set_handlers( @_ ); return $self; } sub set_handlers { my $self = shift; $self->{Handlers} = [ map XML::SAX::Base->new( ref $_ eq "HASH" ? %$_ : { Handler => $_ } ), @_ ]; } sub disable_handlers { my $self = shift; croak "Can only disable one handler" if @_ > 1; my ( $which ) = @_; my $hs = $self->{Handlers}; if ( ref $which ) { for my $i ( 0..$#$hs ) { next unless $hs->[$i]; if ( $hs->[$i] == $which ) { $self->{DisabledHandlers}->[$i] = $hs->[$i]; $hs->[$i] = undef; } } } elsif ( $which =~ /^\d+(?!\n)$/ ) { $self->{DisabledHandlers}->[$which] = $hs; $self->{Handlers}->[$which] = undef; } else { for my $i ( 0..$#$hs ) { next unless $hs->[$i]; if ( $hs->[$i]->{Name} eq $which ) { $self->{DisabledHandlers}->[$i] = $hs->[$i]; $hs->[$i] = undef; } } } } sub enable_handlers { my $self = shift; croak "Can only enable one handler" if @_ > 1; my ( $which ) = @_; my $hs = $self->{Handlers}; if ( ref $which ) { for my $i ( 0..$#$hs ) { next unless $hs->[$i]; if ( $hs->[$i] == $which ) { $hs->[$i] = $self->{DisabledHandlers}->[$i]; $self->{DisabledHandlers}->[$i] = undef; } } } elsif ( $which =~ /^\d+(?!\n)$/ ) { $hs->[$which] = $self->{DisabledHandlers}->[$which]; $self->{DisabledHandlers}->[$which] = undef; } else { for my $i ( 0..$#$hs ) { next unless $hs->[$i]; if ( $hs->[$i]->{Name} eq $which ) { $hs->[$i] = $self->{DisabledHandlers}->[$i]; $self->{DisabledHandlers}->[$i] = undef; } } } } compile_methods( __PACKAGE__, <<'FOO', sax_event_names ); sub { my $self = shift; for ( @{$self->{Handlers}} ) { next unless defined $_; $_->( @_ ); } } FOO 1; __END__ =pod =head1 NAME XML::Filter::Tee - Send SAX events to multiple processor, with switching =head1 VERSION version 0.46 =head1 SYNOPSIS my $t = XML::Filter::Tee->new( { Handler => $h0 }, { Handler => $h1 }, { Handler => $h2 }, ... ); ## Altering the handlers list: $t->set_handlers( $h0, $h1, $h2, $h3 ); ## Controlling flow to a handler by number and by reference: $t->disable_handler( 0 ); $t->enable_handler( 0 ); $t->disable_handler( $h0 ); $t->enable_handler( $h0 ); ## Use in a SAX machine (though see L and ## L for a more convenient way to build a machine ## like this): my $m = Machine( [ Intake => "XML::Filter::Tee" => qw( A B ) ], [ A => ">>log.xml" ], [ B => \*OUTPUT ], ); =head1 DESCRIPTION XML::Filter::Tee is a SAX filter that passes each event it receives on to a list of downstream handlers. It's like L in that the events are not buffered; each event is sent first to the tap, and then to the branch (this is different from L, which buffers the events). Unlike L, it allows it's list of handlers to be reconfigured (via L) and it allows control over which handlers are allowed to receive events. These features are designed to make XML::Filter::Tee instances more useful with SAX machines, but they to add some overhead relative to XML::Filter::SAXT. The events are not copied, since they may be data structures that are difficult or impossibly to copy properly, like parts of a C-based DOM implementation. This means that the handlers must not alter the events or later handlers will see the alterations. =head1 NAME XML::Filter::Tee - Send SAX events to multiple processor, with switching =head1 METHODS =over =item new my $t = XML::Filter::Tee->new( { Handler => $h0 }, { Handler => $h1 }, { Handler => $h2 }, ... ); =item set_handlers $t->set_handlers( $h0, $h1, $h2 ); $t->set_handlers( { Handler => $h0, }, { Handler => $h1, }, ); Replaces the current list of handlers with new ones. Can also name handlers to make enabling/disabling them by name easier: $m->set_handlers( { Handler => $validator, Name => "Validator", }, { Handler => $outputer, }, ); $m->disable_handler( "Validator" ); =item disable_handler $t->disable_handler( 0 ); ## By location $t->disable_handler( "Validator" ); ## By name $t->disable_handler( $h0 ); ## By reference Stops sending events to the indicated handler. =item enable_handler $t->enable_handler( 0 ); ## By location $t->enable_handler( "Validator" ); ## By name $t->enable_handler( $h0 ); ## By reference Stops sending events to the indicated handler. =back =head1 AUTHOR Barrie Slaymaker =head1 COPYRIGHT Copyright 2002, Barrie Slaymaker, All Rights Reserved You may use this module under the terms of the Artistic, GNU Public, or BSD licenses, as you choose. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/Filter/Merger.pm0000644000175000001440000003476012204442306020240 0ustar perigrinuserspackage XML::Filter::Merger; { $XML::Filter::Merger::VERSION = '0.46'; } # ABSTRACT: Assemble multiple SAX streams in to one document use base qw( XML::SAX::Base ); use strict; use Carp; use XML::SAX::EventMethodMaker qw( sax_event_names compile_missing_methods ); sub _logging() { 0 }; sub new { my $self = shift->SUPER::new( @_ ); $self->reset; return $self; } sub reset { my $self = shift; $self->{DocumentDepth} = 0; $self->{DocumentCount} = 0; $self->{TailEvents} = undef; $self->{ManifoldDocumentStarted} = 0; $self->{Cutting} = 0; $self->{Depth} = 0; $self->{RootEltSeen} = 0; $self->{AutoReset} = 0; } sub start_manifold_document { my $self = shift; $self->reset; $self->{ManifoldDocumentStarted} = 1; ## A little fudging here until XML::SAX::Base gets a new release $self->{Methods} = {}; } sub _log { my $self = shift; warn "MERGER: ", $self->{DocumentCount}, " ", "| " x $self->{DocumentDepth}, ". " x $self->{Depth}, @_, "\n"; } sub _cutting { my $self = shift; # if ( @_ ) { # my $v = shift; #warn "MERGER: CUTTING ", $v ? "SET!!" : "CLEARED!!", "\n" # if ( $v && ! $self->{Cutting} ) || ( ! $v && $self->{Cutting} ); # $self->{Cutting} = $v; # } my $v = shift; $v = 1 if ! defined $v && ( $self->{DocumentCount} > 1 || $self->{DocumentDepth} > 1 ) && ! $self->{Depth}; $self->_log( $v ? () : "NOT ", "CUTTING ", do { my $c = (caller(1))[3]; $c =~ s/.*:://; $c }, " (doccount=$self->{DocumentCount}", " docdepth=$self->{DocumentDepth}", " depth=$self->{Depth})" ) if _logging; return $v; return $self->{Cutting}; } sub _saving { my $self = shift; return $self->{ManifoldDocumentStarted} && $self->{DocumentCount} == 1 && $self->{DocumentDepth} == 1 && $self->{RootEltSeen}; } sub _push { my $self = shift; $self->_log( "SAVING ", $_[0] ) if _logging; push @{$self->{TailEvents}}, [ @_ ]; return undef; } sub start_document { my $self = shift; $self->reset if $self->{AutoReset}; push @{$self->{DepthStack}}, $self->{Depth}; ++$self->{DocumentCount} unless $self->{DocumentDepth}; ++$self->{DocumentDepth}; $self->{Depth} = 0; $self->SUPER::start_document( @_ ) unless $self->_cutting; } sub end_document { my $self = shift; my $r; unless ( $self->_cutting ) { if ( $self->_saving ) { $self->_push( "end_document", @_ ); } else { $r = $self->SUPER::end_document( @_ ); } } --$self->{DocumentDepth}; $self->{Depth} = pop @{$self->{DepthStack}}; return $r; } sub start_element { my $self = shift ; my $r; $r = $self->SUPER::start_element( @_ ) unless $self->_cutting( $self->{IncludeAllRoots} ? 0 : () ); ++$self->{Depth}; return $r; } sub end_element { my $self = shift ; --$self->{Depth}; $self->{RootEltSeen} ||= $self->{DocumentDepth} == 1 && $self->{Depth} == 0; return undef if $self->_cutting( $self->{IncludeAllRoots} ? 0 : () ); return $self->_saving ? $self->_push( "end_element", @_ ) : $self->SUPER::end_element( @_ ); } compile_missing_methods __PACKAGE__, <<'TEMPLATE_END', sax_event_names; sub { my $self = shift; return undef if $self->_cutting; return $self->_saving ? $self->_push( "", @_ ) : $self->SUPER::( @_ ); } TEMPLATE_END sub in_master_document { my $self = shift; return $self->{DocumentCount} == 1 && $self->{DocumentDepth} <= 1; } sub document_depth { shift->{DocumentDepth} - 1; } sub element_depth { shift->{Depth} - 1; } sub top_level_document_number { shift->{DocumentCount} - 1; } sub end_manifold_document { my $self = shift; my $r; if ( $self->{TailEvents} ) { for ( @{$self->{TailEvents}} ) { my $sub_name = shift @$_; $self->_log( "PLAYING BACK $sub_name" ) if _logging; $sub_name = "SUPER::$sub_name"; $r = $self->$sub_name( @$_ ); } } $self->{ManifoldDocumentStarted} = 0; $self->{AutoReset} = 1; return $r; } sub set_include_all_roots { my $self = shift; $self->{IncludeAllRoots} = shift; } 1; __END__ =pod =head1 NAME XML::Filter::Merger - Assemble multiple SAX streams in to one document =head1 VERSION version 0.46 =head1 SYNOPSIS ## See XML::SAX::Manifold and XML::SAX::ByRecord for easy ways ## to use this processor. my $w = XML::SAX::Writer->new( Output => \*STDOUT ); my $h = XML::Filter::Merger->new( Handler => $w ); my $p = XML::SAX::ParserFactory->parser( Handler => $h ); ## To insert second and later docs in to the first doc: $h->start_manifold_document( {} ); $p->parse_file( $_ ) for @ARGV; $h->end_manifold_document( {} ); ## To insert multiple docs inline (especially useful if ## a subclass does the inline parse): $h->start_document( {} ); $h->start_element( { ... } ); .... $h->start_element( { Name => "foo", ... } ); $p->parse_uri( $uri ); ## Body of $uri inserted in ... $h->end_element( { Name => "foo", ... } ); ... =head1 DESCRIPTION Combines several documents in to one "manifold" document. This can be done in two ways, both of which start by parsing a master document in to which (the guts of) secondary documents will be inserted. =head2 Inlining Secondary Documents The most SAX-like way is to simply pause the parsing of the master document between the two events where you want to insert a secondard document and parse the complete secondard document right then and there so it's events are inserted in the pipeline at the right spot. XML::Filter::Merger only passes the content of the secondary document's root element: my $h = XML::Filter::Merger->new( Handler => $w ); $h->start_document( {} ); $h->start_element( { Name => "foo1" } ); $p->parse_string( "" ); $h->end_element( { Name => "foo1" } ); $h->end_document( {} ); results in C<$w> seeing a document like C<< >>. This technique is especially useful when subclassing XML::Filter::Merger to implement XInclude-like behavior. Here's a useless example that inserts some content after each C event: package Subclass; use vars qw( @ISA ); @ISA = qw( XML::Filter::Merger ); sub characters { my $self = shift; return $self->SUPER::characters( @_ ) ## ** unless $self->in_master_document; ## ** my $r = $self->SUPER::characters( @_ ); $self->set_include_all_roots( 1 ); XML::SAX::PurePerl->new( Handler => $self )->parse_string( "" ); return $r; } ## **: It is often important to use the recursion guard shown here ## to protect the decision making logic that should only be run on ## the events in the master document from being run on events in the ## subdocument. Of course, if you want to apply the logic ## recursively, just leave the guard code out (and, yes, in this ## example, th guard code is phrased in a slightly redundant fashion, ## but we want to make the idiom clear). Feeding this filter C<< >> results in C<< >>. We've called C to get the secondary document's root element included. =head2 Inserting Manifold Documents A more involved way suitable to handling consecutive documents it to use the two non-SAX events--C and C--that are called before the first document to be combined and after the last one, respectively. The first document to be started after the C is the master document and is emitted as-is except that it will contain the contents of all of the other documents just before the root C tag. For example: $h->start_manifold_document( {} ); $p->parse_string( "" ); $p->parse_string( "" ); $h->end_manifold_document( {} ); results in C<< >>. =head2 The details In case the above was a bit vague, here are the rules this filter lives by. For the master document: =over =item * Events before the root C are forwarded as received. Because of the rules for secondary documents, any secondary documents sent to the filter in the midst of a master document will be inserted inline as their events are received. =item * All remaining events, from the root C are buffered until the end_manifold_document() received, and are then forwarded on. =back For secondary documents: =over =item * All events before the root C are discarded. There is no way to recover these (though we can add an option for most non-DTD events, I believe). =item * The root C is discarded by default, or forwarded if C has been used to set a true value. =item * All events up to, but not including, the root C are forwarded as received. =item * The root C is discarded or forwarded if the matching C was. =item * All remaining events until and including the C are forwarded and processing. =item * Secondary documents may contain other secondary documents. =item * Secondary documents need not be well formed. The must, however, be well balanced. =back This requires very little buffering and is "most natural" with the limitations: =over =item * All of each secondary document's events must all be received between two consecutive events of it's master document. This is because most master document events are not buffered and this filter cannot tell from which upstream source a document came. =item * If the master document should happen to have some egregiously large amount of whitespace, commentary, or illegal events after the root element, buffer memory could be huge. This should be exceedingly rare, even non-existent in the real world. =item * If any documents are not well balanced, the result won't be. =item * =back =head1 NAME XML::Filter::Merger - Assemble multiple SAX streams in to one document =head1 METHODS =over =item new my $d = XML::Filter::Merger->new( \%options ); =item reset Clears the filter after an accident. Useful when reusing the filter. new() and start_manifold_document() both call this. =item start_manifold_document This must be called before the master document's C if you want XML::Filter::Merger to insert documents that will be sent after the master document. It does not need to be called if you are going to insert secondary documents by sending their events in the midst of processing the master document. It is passed an empty ({}) data structure. =head1 Additional Methods These are provided to make it easy for subclasses to find out roughly where they are in the document structure. Generally, these should be called after calling SUPER::start_...() and before calling SUPER::end_...() to be accurate. =over =item in_master_document Returns TRUE if the current event is in the first top level document. =item document_depth Gets how many nested documents surround the current document. 0 means that you are in a top level document. In manifold mode, This may or may not be a secondary document: secondary documents may also follow the primary document, in which case they have a document depth of 0. =item element_depth Gets how many nested elements surround the current element in the current input document. Does not count elements from documents surrounding this document. =item top_level_document_number Returns the number of the top level document in a manifold document. This is 0 for the first top level document, which is always the master document. =item end_manifold_document This must be called after the last document's end_document is called. It is passed an empty ({}) data structure which is passed on to the next processor's end_document() call. This call also causes the end_element() for the root element to be passed on. =item set_include_all_roots $h->set_include_all_roots( 1 ); Setting this option causes the merger to include all root element nodes, not just the first document's. This means that later documents are treated as subdocuments of the output document, rather than as envelopes carrying subdocuments. Given two documents received are: Doc1: Doc1: Doc3: then with this option cleared (the default), the result looks like: . This is useful when processing document oriented XML and each upstream filter channel gets a complete copy of the document. This is the case with the machine L and the splitting filter L. With this option set, the result looks like: This is useful when processing record oriented XML, where the first document only contains the preamble and postamble for the records and not all of the records. This is the case with the machine L and the splitting filter L. The two splitter filters mentioned set this feature appropriately. =back =head1 LIMITATIONS The events before and after a secondary document's root element events are discarded. It is conceivable that characters, PIs and commentary outside the root element might need to be kept. This may be added as an option. The DocumentLocators are not properly managed: they should be saved and restored around each each secondary document. Does not yet buffer all events after the first document's root end_element event. If these bite you, contact me. =head1 AUTHOR Barrie Slaymaker =head1 COPYRIGHT Copyright 2002, Barrie Slaymaker, All Rights Reserved. You may use this module under the terms of the Artistic, GNU Public, or BSD licenses, you choice. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/SAX/0000755000175000001440000000000012204442306015655 5ustar perigrinusersXML-SAX-Machines-0.46/lib/XML/SAX/Machines.pm0000644000175000001440000002354112204442306017747 0ustar perigrinuserspackage XML::SAX::Machines; { $XML::SAX::Machines::VERSION = '0.46'; } # ABSTRACT: manage collections of SAX processors use strict; use Carp; use Exporter; use vars qw( $debug @ISA @EXPORT_OK %EXPORT_TAGS ); ## TODO: Load this mapping from the config file, or generalize ## this. my %machines = ( ByRecord => "XML::SAX::ByRecord", Machine => "XML::SAX::Machine", Manifold => "XML::SAX::Manifold", Pipeline => "XML::SAX::Pipeline", Tap => "XML::SAX::Tap", ); @ISA = qw( Exporter ); @EXPORT_OK = keys %machines; %EXPORT_TAGS = ( "all" => \@EXPORT_OK ); ## Note: we don't put a constructor function in each package for two reasons. ## The first is that I want to generalize this mechanism in to a ## Class::CtorShortcut. The second, more marginal reason is that the ## easiest way to do that ## would be to make each of the machines be @ISA( Exporter ) and I don't ## want to add to to machines' @ISA lists for speed reasons, since ## below we manually search @ISA hierarchies for config settings. sub import { my $self = $_[0]; for ( @_[1..$#_] ) { for ( substr( $_, 0, 1 ) eq ":" ? @{$EXPORT_TAGS{substr $_, 1}} : $_ ) { croak "Unknown SAX machine: '$_'" unless exists $machines{$_}; carp "Loading SAX machine '$_'" if $debug; eval "use $machines{$_}; sub $_ { $machines{$_}->new( \@_ ) }; 1;" or die $@; } } goto &Exporter::import; } sub _read_config { delete $INC{"XML/SAX/Machines/ConfigDefaults.pm"}; delete $INC{"XML/SAX/Machines/SiteConfig.pm"}; eval "require XML::SAX::Machines::ConfigDefaults;"; eval "require XML::SAX::Machines::SiteConfig;"; my $xsm = "XML::SAX::Machines"; for ( qw( LegalProcessorClassOptions ProcessorClassOptions ) ) { no strict "refs"; ## I don't like creating these just to default them, but perls ## 5.005003 and older (at least) emit a "used only once, possible ## type" warngings that local $^W = 0 doesn't silence. ${__PACKAGE__."::ConfigDefaults::$_"} ||= {}; ${__PACKAGE__."::SiteConfig::$_"} ||= {}; ${__PACKAGE__."::Config::$_"} = { %{ ${__PACKAGE__."::ConfigDefaults::$_"} }, %{ ${__PACKAGE__."::SiteConfig::$_" } }, }; } ## Now check the config. my @errors; for my $class ( keys %$XML::SAX::Machines::Config::ProcessorClassOptions ) { push( @errors, "Illegal ProcessorClassOptions option name in $class: '$_'\n" ) for grep( ! exists $XML::SAX::Machines::Config::LegalProcessorClassOptions->{$_}, keys %{$XML::SAX::Machines::Config::ProcessorClassOptions->{$class}} ) ; } die @errors, " check XML::SAX::Machines::SiteConfig", " (or perhaps XML::SAX::Machines::ConfigDefaults)\n", " Legal names are: ", join( ", ", map "'$_'", keys %$XML::SAX::Machines::Config::LegalProcessorClassOptions ) if @errors; } _read_config; sub _config_as_string { require Data::Dumper; local $Data::Dumper::Indent = 1; local $Data::Dumper::QuoteKeys = 1; Data::Dumper->Dump( [ $XML::SAX::Machines::Config::ProcessorClassOptions ], [ 'Processors' ] ); } ## TODO: Move the config file accessors to a Config package. #=head2 Config File accessors # #Right now config files are read only. # #=cut # #=over # #=item processor_class_option # # if ( XML::SAX::Machines->processor_class_option # $class, "ConstructWithHashedOptions" # ) { # .... # } ## #Sees if an option is set for a processor class or the first class in it's #ISA hierarchy for which the option is defined. Caches results for speed. #The cache is cleared if the config file is re-read. # #$class may also be an object. # #Yes this is a wordy API; it shouldn't be needed too often :). # #=cut # sub processor_class_option { my $self = shift; my ( $class, $option ) = @_; croak "Can't set processor class options yet" if @_ > 2; Carp::cluck "Unknown ProcessorClassOptions option '$option'.\n", " Expected options are: ", join( ", ", map "'$_'", sort keys %$XML::SAX::Machines::Config::ExpectedProcessorClassOptions ), "\n", " Perhaps a call to XML::SAX::Machine->expected_processor_class_options( '$option' ) would help?" unless $XML::SAX::Machines::Config::ExpectedProcessorClassOptions->{$option}; $class = ref $class || $class; return $XML::SAX::Machines::Config::ProcessorClassOptions->{$class}->{$option} if exists $XML::SAX::Machines::Config::ProcessorClassOptions->{$class} && exists $XML::SAX::Machines::Config::ProcessorClassOptions->{$class}->{$option} && defined $XML::SAX::Machines::Config::ProcessorClassOptions->{$class}->{$option}; ## Hmm, gotta traipse through @ISA. my $isa = do { no strict "refs"; eval "require $class;" unless @{"${class}::ISA"}; \@{"${class}::ISA"}; }; my $value; for ( @$isa ) { next if $_ eq "Exporter" || $_ eq "DynaLoader" ; $value = $self->processor_class_option( $_, $option ); last if defined $value; } return undef unless $value; ## Cache the result. $XML::SAX::Machines::Config::ProcessorClassOptions->{$class}->{$option} = $value; return $value; } #=item expected_processor_class_options # # XML::SAX::Machine->expected_processor_class_options( MyOption ); # #This is used to inform XML::SAX::Machines that there's an option your #module expects to be able to retrieve. It does *not* check the options #in the config file, it checks options requests so as to catch typoes in #code. # #Yes this is a wordy API; it shouldn't be needed too often :). # #=cut sub expected_processor_class_options { my $self = shift; $XML::SAX::Machines::Config::ExpectedProcessorClassOptions->{$_} = 1 for @_; } #=back # #=cut 1; __END__ =pod =head1 NAME XML::SAX::Machines - manage collections of SAX processors =head1 VERSION version 0.46 =head1 SYNOPSIS use XML::SAX::Machines qw( :all ); my $m = Pipeline( "My::Filter1", ## My::Filter1 autoloaded in Pipeline() "My::Filter2", ## My::Filter2 " " " \*STDOUT, ## XML::SAX::Writer also loaded ); $m->parse_uri( $uri ); ## A parser is autoloaded via ## XML::SAX::ParserFactory if ## My::Filter1 isn't a parser. ## To import only individual machines: use XML::SAX::Machines qw( Manifold ); ## Here's a multi-pass machine that reads one document, runs ## it through 5 filtering channels (one channel at a time) and ## reassembles it in to a single document. my $m = Manifold( "My::TableOfContentsExtractor", "My::AbstractExtractor", "My::BodyFitler", "My::EndNotesFilter", "My::IndexFilter", ); $m->parse_string( $doc ); =head1 DESCRIPTION SAX machines are a way to gather and manage SAX processors without going nuts. Or at least without going completely nuts. Individual machines can also be like SAX processors; they don't need to parse or write anything: my $w = XML::SAX::Writer->new( Output => \*STDOUT ); my $m = Pipeline( "My::Filter1", "My::Filter2", { Handler => $w } ); my $p = XML::SAX::ParserFactory->new( handler => $p ); More documentation to come; see L, L, and L for now. Here are the machines this module knows about: ByRecord Record oriented processing of documents. L Machine Generic "directed graph of SAX processors" machines. L Manifold Multipass document processing L Pipeline A linear sequence of SAX processors L Tap An insertable pass through that examines the events without altering them using SAX processors. L =head2 Config file As mentioned in L, you might occasionally need to edit the config file to tell XML::SAX::Machine how to handle a particular SAX processor (SAX processors use a wide variety of API conventions). The config file is a the Perl module XML::SAX::Machines::SiteConfig, which contains a Perl data structure like: package XML::SAX::Machines::SiteConfig; $ProcessorClassOptions = { "XML::Filter::Tee" => { ConstructWithHashedOptions => 1, }, }; So far $Processors is the only available configuration structure. It contains a list of SAX processors with known special needs. Also, so far the only special need is the ConstructWithHashes option which tells XML::SAX::Machine to construct such classes like: XML::Filter::Tee->new( { Handler => $h } ); instead of XML::Filter::Tee->new( Handler => $h ); B If you modify anything, modify only XML::SAX::Machines::SiteConfig.pm. Don't alter XML::SAX::Machines::ConfigDefaults.pm or you will lose your changes when you upgrade. TODO: Allow per-app and per-machine overrides of options. When needed. =head1 NAME XML::SAX::Machines - manage collections of SAX processors =head1 AUTHORS Barrie Slaymaker =head1 LICENCE Copyright 2002-2009 by Barrie Slaymaker. This software is free. It is licensed under the same terms as Perl itself. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/SAX/Tap.pm0000644000175000001440000000735012204442306016744 0ustar perigrinuserspackage XML::SAX::Tap; { $XML::SAX::Tap::VERSION = '0.46'; } # ABSTRACT: Tap a pipeline of SAX processors use base qw( XML::SAX::Machine ); use strict; use Carp; sub new { my $proto = shift; my $options = @_ && ref $_[-1] eq "HASH" ? pop : {}; my $stage_number = 0; my @machine_spec = ( [ "Intake", "XML::Filter::Tee" ], map( [ "Stage_" . $stage_number++, $_ ], @_ ), ); push @{$machine_spec[$_]}, "Stage_" . $_ for 0..$#machine_spec-1 ; ## Pushing this last means that the Exhaust will get ## events after Stage_0 push @{$machine_spec[0]}, "Exhaust"; return $proto->SUPER::new( @machine_spec, $options ); } 1; __END__ =pod =head1 NAME XML::SAX::Tap - Tap a pipeline of SAX processors =head1 VERSION version 0.46 =head1 SYNOPSIS use XML::SAX::Machines qw( Pipeline Tap ) ; my $m = Pipeline( "UpstreamFilter", Tap( "My::Reformatter", \*STDERR ), "DownstreamFilter", ); my $m = Pipeline( "UpstreamFilter", Tap( "| xmllint --format -" ), "DownstreamFilter", ); =head1 DESCRIPTION XML::SAX::Tap is a SAX machine that passes each event it receives on to a brach handler and then on down to it's main handler. This allows debugging output, logging output, validators, and other processors (and machines, of course) to be placed in a pipeline. This differs from L, L and L in that a tap is also a pipeline; it contains the processoring that handles the tap. It's like L in that the events are not buffered; each event is sent first to the tap, and then to the branch (this is different from XML::SAX::Dispatcher, which buffers the events). It's like XML::SAX::Pipeline in that it contains a series of processors in a pipeline; these comprise the "tapping" processors: +----------------------------------------------+ | Tap instance | | | | Intake | | +-----+ +---------+ +---------+ | upstream --+->| Tee |--->| Stage_0 |--...-->| Stage_N | | | +-----+ +---------+ +---------+ | | \ | | \ Exhaust | | +----------------------------------+--> downstream | | +----------------------------------------------+ The events are not copied, since they may be data structures that are difficult or impossibly to copy properly, like parts of a C-based DOM implementation. Events go to the tap first so that you can validate events using a tap that throws exceptions and they will be acted on before the tap's handler sees them. This machine has no C port (see L for details about C and C ports). =head1 NAME XML::SAX::Tap - Tap a pipeline of SAX processors =head1 METHODS =over =item new my $tap = XML::SAX::Tap->new( @tap_processors, \%options ); =back =head1 AUTHOR Barrie Slaymaker =head1 COPYRIGHT Copyright 2002, Barrie Slaymaker, All Rights Reserved You may use this module under the terms of the Artistic, GNU Public, or BSD licenses, as you choose. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/SAX/Machines/0000755000175000001440000000000012204442306017404 5ustar perigrinusersXML-SAX-Machines-0.46/lib/XML/SAX/Machines/ConfigHelper.pm0000644000175000001440000000474012204442306022314 0ustar perigrinuserspackage XML::SAX::Machines::ConfigHelper; { $XML::SAX::Machines::ConfigHelper::VERSION = '0.46'; } # ABSTRACT: lib/XML/SAX/Machines/ConfigHelper.pm use strict; ## See the Makefile target "install_site_config" for where this is called. sub _write_site_config_if_absent { my ( $install_site_lib ) = @_; eval "use Data::Dumper; 1" or die $@; if ( eval "require XML::SAX::Machines::SiteConfig; 1" ) { warn qq[***\n*** Not overwriting $INC{"XML/SAX/Machines/SiteConfig.pm"}\n***\n]; return; } require File::Spec; my $dest = File::Spec->catfile( $install_site_lib, "XML", "SAX", "Machines", "SiteConfig.pm" ); open OUT, ">$dest" or die "$!: $dest"; warn "*** Writing $dest\n"; print OUT <<'SITE_CONFIG_END'; package XML::SAX::Machines::SiteConfig; # # Which options are legal in ProcessorClassOptions. This is provided here # so you can extend the options if need be. It's also a handy quick # reference. The master defaults are in DefaultConfig.pm. # $LegalProcessorClassOptions = { # ConstructWithOptionsHashes => "Use Foo->new( { Handler => $h } ) instead of Foo->new( Handler => $h )", }; # # SAX Processor specific configs. # # Per-processor options # ===================== # # ConstructWithOptionsHashes (boolean) # # tells XML::SAX::Machine to construct the processor like: # # Foo->new( # { Handler => $h }, # ); # # instead of # # Foo->new( Handler => $h ); # $ProcessorClassOptions = { # "XML::Filter::MyFilter" => { # ConstructWithOptionsHashes => 1, # }, }; 1; SITE_CONFIG_END } 1; __END__ =pod =head1 NAME XML::SAX::Machines::ConfigHelper - lib/XML/SAX/Machines/ConfigHelper.pm =head1 VERSION version 0.46 =head1 SYNOPSIS NONE: for internal use only. =head1 DESCRIPTION Some operations, like creating or writing XML::SAX::Machine::MyConfig.pm are rarely needed, and take a few modules not normally needed by XML::SAX::Machines. So this module contains all that and prevents bloating "normal" processes. Read the source to see what I mean. =head1 NAME XML::SAX::Machine::ConfigHelper - rarely needed config routines. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/SAX/Machines/ConfigDefaults.pm0000644000175000001440000000306412204442306022642 0ustar perigrinusers## ## DO NOT EDIT THIS FILE!!! ## ## CHANGES WILL BE LOST ON YOUR NEXT UPGRADE. ## ## EDIT XML::SAX::Machines::SiteConfig.pm INSTEAD!!! ## ## PLEASE. ## ## ## This file contains the "factory" settings for XML::SAX::Machines. ## ## Site specific settings should go in XML::SAX::Machines::SiteConfig package XML::SAX::Machines::ConfigDefaults; { $XML::SAX::Machines::ConfigDefaults::VERSION = '0.46'; } $LegalProcessorClassOptions = { ConstructWithHashedOptions => q{Use Foo->new( { Handler => $h } ) instead of Foo->new( Handler => $h )}, }, $ProcessorClassOptions = { "XML::Filter::SAXT" => { ConstructWithHashedOptions => 1, }, "XML::Filter::Distributor" => { ConstructWithHashedOptions => 1, }, "XML::Filter::Tee" => { ConstructWithHashedOptions => 1, }, "XML::SAX::Machine" => { ConstructWithHashedOptions => 1, }, } ## ## DO NOT EDIT THIS FILE!!! ## ## CHANGES WILL BE LOST ON YOUR NEXT UPGRADE. ## ## EDIT XML::SAX::Machines::SiteConfig.pm INSTEAD!!! ## ## PLEASE. ## __END__ =pod =head1 NAME XML::SAX::Machines::ConfigDefaults =head1 VERSION version 0.46 =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/SAX/ByRecord.pm0000644000175000001440000001307012204442306017725 0ustar perigrinuserspackage XML::SAX::ByRecord; { $XML::SAX::ByRecord::VERSION = '0.46'; } # ABSTRACT: Record oriented processing of (data) documents use base qw( XML::SAX::Machine ); use strict; use Carp; sub new { my $proto = shift; my $class = ref $proto || $proto; my @options_hash_if_present = @_ && ref $_[-1] eq "HASH" ? pop : () ; my $stage_num = 0; my @machine_spec = ( [ Intake => "XML::Filter::DocSplitter" ], map( [ "Stage_" . $stage_num++ => $_ ], @_ ), [ Merger => "XML::Filter::Merger" => qw( Exhaust ) ], ); push @{$machine_spec[$_]}, "Stage_" . $_ for 0..$#machine_spec-2 ; push @{$machine_spec[-2]}, "Merger" if @machine_spec; my $self = $proto->SUPER::new( @machine_spec, @options_hash_if_present ); my $distributor = $self->find_part( 0 ); $distributor->set_aggregator( $self->find_part( -1 ) ) if $distributor->can( "set_aggregator" ); return $self; } 1; __END__ =pod =head1 NAME XML::SAX::ByRecord - Record oriented processing of (data) documents =head1 VERSION version 0.46 =head1 SYNOPSIS use XML::SAX::Machines qw( ByRecord ) ; my $m = ByRecord( "My::RecordFilter1", "My::RecordFilter2", ... { Handler => $h, ## optional } ); $m->parse_uri( "foo.xml" ); =head1 DESCRIPTION XML::SAX::ByRecord is a SAX machine that treats a document as a series of records. Everything before and after the records is emitted as-is while the records are excerpted in to little mini-documents and run one at a time through the filter pipeline contained in ByRecord. The output is a document that has the same exact things before, after, and between the records that the input document did, but which has run each record through a filter. So if a document has 10 records in it, the per-record filter pipeline will see 10 sets of ( start_document, body of record, end_document ) events. An example is below. This has several use cases: =over =item * Big, record oriented documents Big documents can be treated a record at a time with various DOM oriented processors like L. =item * Streaming XML Small sections of an XML stream can be run through a document processor without holding up the stream. =item * Record oriented style sheets / processors Sometimes it's just plain easier to write a style sheet or SAX filter that applies to a single record at at time, rather than having to run through a series of records. =back =head2 Topology Here's how the innards look: +-----------------------------------------------------------+ | An XML:SAX::ByRecord | | Intake | | +----------+ +---------+ +--------+ Exhaust | --+-->| Splitter |--->| Stage_1 |-->...-->| Merger |----------+-----> | +----------+ +---------+ +--------+ | | \ ^ | | \ | | | +---------->---------------+ | | Events not in any records | | | +-----------------------------------------------------------+ The C is an L by default, and the C is an L by default. The line that bypasses the "Stage_1 ..." filter pipeline is used for all events that do not occur in a record. All events that occur in a record pass through the filter pipeline. =head2 Example Here's a quick little filter to uppercase text content: package My::Filter::Uc; use vars qw( @ISA ); @ISA = qw( XML::SAX::Base ); use XML::SAX::Base; sub characters { my $self = shift; my ( $data ) = @_; $data->{Data} = uc $data->{Data}; $self->SUPER::characters( @_ ); } And here's a little machine that uses it: $m = Pipeline( ByRecord( "My::Filter::Uc" ), \$out, ); When fed a document like: a b c d e f g the output looks like: a B c C e D g and the My::Filter::Uc got three sets of events like: start_document start_element: characters: 'b' end_element: end_document start_document start_element: characters: 'd' end_element: end_document start_document start_element: characters: 'f' end_element: end_document =head1 NAME XML::SAX::ByRecord - Record oriented processing of (data) documents =head1 METHODS =over =item new my $d = XML::SAX::ByRecord->new( @channels, \%options ); Longhand for calling the ByRecord function exported by XML::SAX::Machines. =back =head1 CREDIT Proposed by Matt Sergeant, with advise by Kip Hampton and Robin Berjon. =head1 Writing an aggregator. To be written. Pretty much just that C and C need to be provided. See L and it's source code for a starter. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/SAX/EventMethodMaker.pm0000644000175000001440000002360512204442306021423 0ustar perigrinuserspackage XML::SAX::EventMethodMaker; { $XML::SAX::EventMethodMaker::VERSION = '0.46'; } # ABSTRACT: SAX event names, creation of methods from templates @ISA = qw( Exporter ); @EXPORT_OK = qw( sax_event_names missing_methods compile_methods compile_missing_methods ); %EXPORT_TAGS = ( all => \@EXPORT_OK ); use strict ; ## First, a table in easy to maintain format :) ## ## Key to flags field: ## = SAX versions supported in. We'll need to make this more ## powerful (support ranges) if we get in to subversions. my %event_flags = ( Han => "Handler", DTD => "DTDHandler", Cnt => "ContentHandler", Doc => "DocumentHandler", Dec => "DeclHandler", Err => "ErrorHandler", Ent => "EntityResolver", Lex => "LexicalHandler", ); my %parse_method_flags = ( Parse => "ParseMethods", ); my %event_table = qw( start_document Han;1Doc;Cnt end_document Han;1Doc;Cnt start_element Han;1Doc;Cnt end_element Han;1Doc;Cnt characters Han;1Doc;Cnt start_prefix_mapping Han;----;Cnt end_prefix_mapping Han;----;Cnt processing_instruction Han;1Doc;Cnt ignorable_whitespace Han;1Doc;Cnt skipped_entity Han;----;Cnt set_document_locator Han;1Doc;Cnt notation_decl Han;----;----;DTD unparsed_entity_decl Han;----;----;DTD element_decl Han;----;----;----;----;Dec attribute_decl Han;----;----;----;----;Dec internal_entity_decl Han;----;----;----;----;Dec external_entity_decl Han;----;----;----;----;Dec comment Han;----;----;----;Lex start_dtd Han;----;----;----;Lex end_dtd Han;----;----;----;Lex start_cdata Han;----;----;----;Lex end_cdata Han;----;----;----;Lex start_entity Han;----;----;----;Lex end_entity Han;----;----;----;Lex warning Han;----;----;----;----;----;Err error Han;----;----;----;----;----;Err fatal_error Han;----;----;----;----;----;Err resolve_entity Han;----;----;----;----;----;----;Ent xml_decl 1Han;----;----;1DTD attlist_decl 1Han;----;----;1DTD doctype_decl 1Han;----;----;1DTD entity_decl 1Han;----;----;1DTD entity_reference 1Han;1Doc ); my %parse_methods_table = qw( parse 1:2Parse parse_file 2Parse parse_string 2Parse parse_uri 2Parse ); use Carp; ## Now, tear that apart so it's queryable my %events_db; for my $event ( keys %event_table, keys %parse_methods_table ) { my $flags = exists $event_table{$event} ? $event_table{$event} : $parse_methods_table{$event}; for ( split /[;-]+/, $flags ) { my ( $versions, $type ) = /^([\d:]*)(.*)/ or die "Couldn't parse '$_'"; my @versions = split /\D+/, $versions; die "Unknown flag '$_'" unless exists $event_flags{$type} || exists $parse_method_flags{$type}; @versions = ( 1, 2 ) unless @versions; $type = exists $event_flags{$type} ? $event_flags{$type} : $parse_method_flags{$type}; push @{$events_db{$type}}, $event; for my $version ( @versions ) { push @{$events_db{"$version,$type"}}, $event; $events_db{$version}->{$event} = undef unless $type eq "ParseMethods"; } } } #use Data::Dumper; local $Data::Dumper::Indent=1; warn Dumper( \%events_db ); my %legal_query_terms = map { ( $_ => undef ); } ( 1, 2, values %event_flags, values %parse_method_flags ); sub sax_event_names { ## This should be really common return keys %event_table unless @_; { my @baduns = grep ! exists $legal_query_terms{$_}, @_; croak "Illegal sax_event_name query term(s): ", join ", ", map "'$_'", @baduns if @baduns; } my @versions; my @types; while (@_) { $_[0] =~ /^\d+$/ ? push @versions, shift : push @types, shift; } ## These might be relatively common as well. return keys %{$events_db{$versions[0]}} if @versions == 1 && ! @types; return @{$events_db{$types[0]}} if ! @versions && @types == 1; @versions = (1,2) unless @versions; @types = values %event_flags unless @types; my @keys = map { my $version = $_; map { my $type = $_; "$version,$type"; } @types } @versions ; return keys %{{ map { map { ( $_ => undef ); } @{$events_db{$_}} } @keys }}; } sub missing_methods { my $where = shift; $where = ref $where || $where; no strict 'refs'; return grep ! exists ${"${where}::"}{$_}, @_; } sub compile_methods { my ( $where, $template ) = ( shift, shift ); $where = ref $where || $where; my @code; for ( @_ ) { push @code, $template; $code[-1] =~ s/|/$_/g; } eval join "", "package $where;", @code, "1" or die $@; } sub compile_missing_methods { my ( $where, $template ) = ( shift, shift ); compile_methods $where, $template, missing_methods $where, @_; } 1; __END__ =pod =head1 NAME XML::SAX::EventMethodMaker - SAX event names, creation of methods from templates =head1 VERSION version 0.46 =head1 SYNOPSIS use XML::SAX::EventMethodMaker qw( sax_event_names missing_methods compile_methods ); ## Getting event names by handler type and SAX version my @events = sax_event_names; my @dtd_events = sax_event_names "DTDHandler"; my @sax1_events = sax_event_names 1; my @sax1_dtd_events = sax_event_names 1, "DTDHandler"; ## Figuring out what events a class or object does not provide my @missing = missing_methods $class, @events ; ## Creating all SAX event methods compile_methods $class, <<'TEMPLATE_END', sax_event_names; sub { my $self = shift; ... do something ... ## Pass the event up to the base class $self->SUPER::( @_ ); } TEMPLATE_END ## Creating some methods compile_methods $class, <<'TEMPLATE_END', @method_names; ... TEMPLATE_END ## Creating only missing event handlers compile_missing_methods $class, <<'TEMPLATE_END'; ... TEMPLATE_END =head1 DESCRIPTION In building SAX machines, it is often handle to build a set of event handlers from a common template. This helper library (or class) provides the database of handler names, queryable by type, and =head1 NAME XML::SAX::EventMethodMaker - SAX event names, creation of methods from templates =head1 Functions =over =item sax_event_names my @names = sax_event_names @query_terms; Takes a list of query terms and returns all matching events. Query terms may be: - a SAX version number: 1 or 2 (no floating point or ranges) - Handler - DTDHandler - ContentHandler - DocumentHandler - DeclHandler - ErrorHandler - EntityResolver - LexicalHandler In addition to normal SAX events, there are also "parse" events: - ParseMethods Unrecognized query terms cause exceptions. If no query terms are provided, then all event names from all versions are returned except for parse methods (parse, parse_uri, ...). If any version numbers are supplied, then only events from those version numbers are returned. No support for noninteger version numbers is provided, nor for ranges. So far, only two SAX versions exist in Perl, 1 and 2. If any handler types are provided, then only events of those types are returned. Handler types are case insensitive. In other words, all returned events must match both a version number and a handler type. No support for boolean logic is provided. =item missing_methods my @missing = missing_methods __PACKAGE__, @event_names; my @missing = missing_methods $object, @event_names; This subroutine looks to see if the object or class has declared event handler methods for the named events. Any events that haven't been declared are returned. It is sufficient to use subroutine prototypes to prevent shimming AUTOLOADed (or otherwise lazily compiled) methods: sub start_document ; =item compile_methods compile_methods __PACKAGE__, $template, @method_names; compile_methods $object, $template, @method_names; Compiles the given template for each given event name, substituting the event name for the string or in the template. There is no difference between these two tags, they are provided to only to let you make your templates more readable to you. =item compile_missing_methods compile_missing_methods __PACKAGE__, $template, @method_names; compile_missing_methods $objects, $template, @method_names; Shorthand for calls like compile_methods __PACKAGE__, $template, missing_methods __PACKAGE__, @method_names; =back =head1 Due Credit The database of handlers by type was developed by Kip Hampton, modified by Robin Berjon, and pilfered and corrupted by me. =head1 LICENSE Database Copyright 2002, Barrie Slaymaker, Kip Hampton, Robin Berjon Code Copyright 2002, Barrie Slaymaker You may use this under the terms of the Artistic, GNU Public, or BSD licences, as you see fit. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/SAX/Manifold.pm0000644000175000001440000001102612204442306017744 0ustar perigrinuserspackage XML::SAX::Manifold; { $XML::SAX::Manifold::VERSION = '0.46'; } # ABSTRACT: Multipass processing of documents use base qw( XML::SAX::Machine ); use strict; use Carp; sub new { my $proto = shift; my $class = ref $proto || $proto; my @options_hash_if_present = @_ && ref $_[-1] eq "HASH" ? pop : () ; my $channel_num = 0; my $self = $proto->SUPER::new( [ Intake => "XML::Filter::Distributor", (1..$#_+1) ], map( [ "Channel_" . $channel_num++ => $_ => qw( Merger ) ], @_ ), [ Merger => "XML::Filter::Merger" => qw( Exhaust ) ], @options_hash_if_present ); my $distributor = $self->find_part( 0 ); $distributor->set_aggregator( $self->find_part( -1 ) ) if $distributor->can( "set_aggregator" ); return $self; } 1; __END__ =pod =head1 NAME XML::SAX::Manifold - Multipass processing of documents =head1 VERSION version 0.46 =head1 SYNOPSIS use XML::SAX::Machines qw( Manifold ) ; my $m = Manifold( $channel0, $channel1, $channel2, { Handler => $h, ## optional } ); =head1 DESCRIPTION XML::SAX::Manifold is a SAX machine that allows "multipass" processing of a document by sending the document through several channels of SAX processors one channel at a time. A channel may be a single SAX processor or a pipeline (see L). The results of each channel are aggregated by a SAX filter that supports the C event, C by default. See the section on writing an aggregator and L. This differs from L in that the channels are prioritized and each channel receives all events for a document before the next channel receives any events. XML::SAX::Manifold buffers all events while feeding them to the highest priority channel (C<$processor1> in the synopsis), and replays them for each lower priority channel one at a time. The event flow for the example in the SYNOPSIS would look like the following, with the numbers next to the connection arrow indicating when the document's events flow along that arrow. +--------------------------------------------------------+ | An XML::SAX::Manifold instance | | | | +-----------+ | | +->| Channel_0 |-+ | | 1/ +-----------+ \1 | | Intake / \ | 1 | +------+ 2 +-----------+ 2 \ +--------+ Exhaust | --+->| Dist |---->| Channel_1 |-----*-->| Merger |---------+--> $h | +------+ +-----------+ / +--------+ | | \3 3/ | | \ +-----------+ / | | +->| Channel_2 |-+ | | +-----------+ | +--------------------------------------------------------+ Here's the timing of the event flows: 1: upstream -> Dist. -> Channel_0 -> Merger -> downstream 2: Dist. -> Channel_1 -> Merger -> downstream 3: Dist. -> Channel_2 -> Merger -> downstream When the document arrives from upstream, the events all arrive during time period 1 and are buffered and also passed through Channel_0 and Channel_0's output is sent to the Merger. After all events have been received (as indicated by an C event from upstream), all events are then played back through Channel_1 and then through Channel_2 (which also output to the Merger). It's the merger's job to assemble the three documents it receives in to one document; see L for details. =head1 NAME XML::SAX::Manifold - Multipass processing of documents =head1 METHODS =over =item new my $d = XML::SAX::Manifold->new( @channels, \%options ); Longhand for calling the Manifold function exported by XML::SAX::Machines. =back =head1 Writing an aggregator. To be written. Pretty much just that C and C need to be provided. See L and it's source code for a starter. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/SAX/Pipeline.pm0000644000175000001440000001160112204442306017757 0ustar perigrinuserspackage XML::SAX::Pipeline; { $XML::SAX::Pipeline::VERSION = '0.46'; } # ABSTRACT: Manage a linear pipeline of SAX processors use base qw( XML::SAX::Machine ); use strict; use Carp; sub new { my $proto = shift; my $options = @_ && ref $_[-1] eq "HASH" ? pop : {}; my $stage_number = 0; my @machine_spec = map [ "Stage_" . $stage_number++, $_ ], @_; push @{$machine_spec[$_]}, $_ + 1 for 0..$#machine_spec-1 ; $machine_spec[0]->[0] = "Intake" if @machine_spec; push @{$machine_spec[-1]}, "Exhaust" if @machine_spec; return $proto->SUPER::new( @machine_spec, $options ); } 1; __END__ =pod =head1 NAME XML::SAX::Pipeline - Manage a linear pipeline of SAX processors =head1 VERSION version 0.46 =head1 SYNOPSIS use XML::SAX::Machines qw( Pipeline ); ## Most common way use XML::Fitler::Foo; my $m = Pipeline( XML::Filter::Foo->new, ## Create it manually "XML::Filter::Bar", ## Or let Pipeline load & create it "XML::Filter::Baz", { ## Normal options Handler => $h, } ); ## To choose the default parser automatically if XML::Filter::Foo ## does not implement a parse_file method, just pretend the Pipeline ## is a parser: $m->parse_file( "blah" ); ## To feed the pipeline from an upstream processor, treat it like ## any other SAX filter: my $p = Some::SAX::Generator->new( Handler => $m ); ## To read a file or the output from a subprocess: my $m = Pipeline( "outfile.txt" ); my $m = Pipeline( ..., "| xmllint --format -" ); =head1 DESCRIPTION An XML::SAX::Pipeline is a linear sequence SAX processors. Events passed to the pipeline are received by the C end of the pipeline and the last filter to process events in the pipeline passes the events out the C to the filter set as the pipeline's handler: +-----------------------------------------------------------+ | An XML:SAX::Pipeline | | Intake | | +---------+ +---------+ +---------+ Exhaust | --+-->| Stage_0 |--->| Stage_1 |-->...-->| Stage_N |----------+-----> | +---------+ +---------+ +---------+ | +-----------------------------------------------------------+ As with all SAX machines, a pipeline can also create an ad hoc parser (using L) if you ask it to parse something and the first SAX processer in the pipeline can't handle a parse request: +-------------------------------------------------------+ | An XML:SAX::Pipeline | | Intake | | +--------+ +---------+ +---------+ Exhaust | | | Parser |-->| Stage_0 |-->...-->| Stage_N |----------+-----> | +--------+ +---------+ +---------+ | +-------------------------------------------------------+ or if you specify an input file like so: my $m = Pipeline(qw( output_file.xml )); And, thanks to Perl's magic open (see L), you can read and write from processes: my $m = Pipeline( "gen_xml.pl |", "XML::Filter::Bar", "XML::Filter::Baz", "| consume_xml.pl", ); This can be used with an L to place a handy debugging tap in a pipeline (or other machine): my $m = Pipeline( "output_file.xml", ); =head1 NAME XML::SAX::Pipeline - Manage a linear pipeline of SAX processors =head1 METHODS See L for most of the methods. =over =item new my $pipeline = XML::SAX::Pipeline->new( @processors, \%options ); Creates a pipeline and links all of the given processors together. Longhand for Pipeline(). =back =head1 AUTHOR Barrie Slaymaker =head1 COPYRIGHT Copyright 2002, Barrie Slaymaker, All Rights Reserved. You may use this module under the terms of the Artistic, GNU Public, or BSD licenses, your choice. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/lib/XML/SAX/Machine.pm0000644000175000001440000012742412204442306017571 0ustar perigrinuserspackage XML::SAX::Machine; { $XML::SAX::Machine::VERSION = '0.46'; } # ABSTRACT: Manage a collection of SAX processors use strict; use constant has_named_regexp_character_classes => $] > 5.006000; use Carp; use UNIVERSAL; use XML::SAX::EventMethodMaker qw( :all ); use XML::SAX::Machines; ## Tell the config stuff what options we'll be requesting, so we ## don't get typoes in this code. Very annoying, but I mispelt it ## so often, that adding one statement like this seemed like a low ## pain solution, since testing options like this can be long and ## bothersome. XML::SAX::Machines->expected_processor_class_options(qw( ConstructWithHashedOptions )); sub new { my $proto = shift; my $class = ref $proto || $proto; my @options_if_any = @_ && ref $_[-1] eq "HASH" ? %{pop()} : (); my $self = bless { @options_if_any }, $class; $self->{Parts} = []; $self->{PartsByName} = {}; ## Mapping of names to parts $self->_compile_specs( @_ ); ## Set this last in case any specs have handler "Exhaust" $self->set_handler( $self->{Handler} ) if $self->{Handler}; return $self; } sub _find_part_rec { my $self = shift; my ( $id ) = @_; if ( ref $id ) { return exists $self->{PartsByProcessor}->{$id} && $self->{PartsByProcessor}->{$id}; } if ( $id =~ /^[+-]?\d+(?!\n)$/ ) { return undef if $id > $#{$self->{Parts}} || $id < - ( $#{$self->{Parts}} + 1 ); return $self->{Parts}->[$id]; } return $self->{PartsByName}->{$id} if exists $self->{PartsByName}->{$id}; return undef; } sub find_part { my $self = shift; my ( $spec ) = @_; return $self->{Handler} if $spec eq "Exhaust"; my $part_rec; if ( 0 <= index $spec, "/" ) { ## Take the sloooow road... require File::Spec::Unix; croak "find_part() path not absolute: '$spec'" unless File::Spec::Unix->file_name_is_absolute( $spec ); ## Cannonical-ize it, do /foo/../ => / conversion $spec = File::Spec::Unix->canonpath( $spec ); 1 while $spec =~ s{/[^/]+/\.\.(/|(?!\n\Z))}{$1}; my @names = File::Spec::Unix->splitdir( $spec ); pop @names while @names && ! length $names[-1]; shift @names while @names && ! length $names[0]; croak "invalid find_part() specification: '$spec'" unless File::Spec::Unix->file_name_is_absolute( $spec ); my @audit_trail; my $proc = $self; for ( @names ) { push @audit_trail, $_; $part_rec = $proc->_find_part_rec( $_ ); unless ( $part_rec ) { croak "find_path() could not find '", join( "/", "", @audit_trail ), "' in ", ref $self; } $proc = $part_rec->{Processor}; } } else { $part_rec = $self->_find_part_rec( $spec ); } croak "find_path() could not find '$spec' in ", ref $self unless $part_rec; my $proc = $part_rec->{Processor}; ## Be paranoid here, just in case we have a bug somewhere. I prefer ## getting reasonable bug reports... confess "find_path() found an undefined Processor reference as part '$_[0]' in ", ref $self unless defined $proc; confess "find_path() found '$proc' instead of a Processor reference as part '$_[0]' in ", ref $self unless ref $proc; confess "find_path() found a ", ref $proc, " reference instead of a Processor reference in part '$_[0]' in ", ref $self unless index( "SCALAR|ARRAY|HASH|Regexp|REF|CODE", ref $proc ) <= 0; return $proc; } use vars qw( $AUTOLOAD ); sub DESTROY {} ## Prevent AUTOLOADing of this. my $alpha_first_char = has_named_regexp_character_classes ? "^[[:alpha:]]" : "^[a-zA-Z]"; sub AUTOLOAD { my $self = shift; $AUTOLOAD =~ s/.*://; my $fc = substr $AUTOLOAD, 0, 1; ## TODO: Find out how Perl determines "alphaness" and use that. croak ref $self, " does not provide method $AUTOLOAD" unless $fc eq uc $fc && $AUTOLOAD =~ /$alpha_first_char/o; my $found = $self->find_part( $AUTOLOAD ); return $found; } sub parts { my $self = shift; croak "Can't set parts for a '", ref( $self ), "'" if @_; confess "undef Parts" unless defined $self->{Parts}; return map $_->{Processor}, @{$self->{Parts}}; } ## TODO: Detect deep recursion in _all_part_recs(). In fact, detect deep ## recursion when building the machine. sub _all_part_recs { my $self = shift; croak "Can't pass parms to ", ref( $self ), "->_all_part_recs" if @_; confess "undef Parts" unless defined $self->{Parts}; my $proc; return map { $proc = $_->{Processor}; UNIVERSAL::can( $proc, "all_parts" ) ? ( $_, $proc->_all_part_recs ) : $_; } @{$self->{Parts}}; } sub all_parts { my $self = shift; croak "Can't pass parms to ", ref( $self ), "->_all_parts" if @_; confess "undef Parts" unless defined $self->{Parts}; return map $_->{Processor}, $self->_all_part_recs; } #=item add_parts # # $m->add_parts( { Foo => $foo, Bar => $bar } ); # #On linear machines: # # $m->add_parts( @parts ); # #Adds one or more parts to the machine. Does not connect them, you need #to do that manually (we need to add a $m->connect_parts() style API). # #=cut # #sub add_parts { # my $self = shift; #confess "TODO"; #} #=item remove_parts # # $m->remove_parts( qw( Foo Bar ) ); # #Slower, but possible: # # $m->remove_parts( $m->Foo, $m->Bar ); # #On linear machines: # # $m->remove_parts( 1, 3 ); # #Removes one or more parts from the machine. Does not connect them #except on linear machines. Attempts to disconnect any parts that #point to them, and that they point to. This attempt will fail for any #part that does not provide a handler() or handlers() method. # #This is breadth-first recursive, like C<$m->find_part( $id )> is. This #will remove *all* parts with the given names from a complex #machine (this does not apply to index numbers). # #Returns a list of the removed parts. # #If a name is not found, it is ignored. # #=cut # #sub remove_parts { # my $self = shift; # # my %names; # my @found; # # for my $doomed ( @_ ) { # unless ( ref $doomed ) { # $names{$doomed} = undef; # if ( my $f = delete $self->{Parts}->{$doomed} ) { # push @found, $f; # } # else { # for my $c ( $self->parts ) { # if ( $c->can( "remove_parts" ) # && ( my @f = $c->remove_parts( $doomed ) ) # ) { # push @found, @f; # } # } # } # } # else { # ## It's a reference. Do this the slow, painful way. # for my $name ( keys %{$self->{Parts}} ) { # if ( $doomed == $self->{Parts}->{$name} ) { # $names{$name} = undef; # push @found, delete $self->{Parts}->{$name}; # } # } # # for my $c ( $self->parts ) { # if ( $c->can( "remove_parts" ) # && ( my @f = $c->remove_parts( $doomed ) ) # ) { # push @found, @f; # } # } # } # } # # for my $c ( sort keys %{$self->{Connections}} ) { # if ( exists $names{$self->{Connections}->{$c}} ) { ###TODO: Unhook the processors if possible # delete $self->{Connections}->{$c}; # } # if ( exists $names{$c} ) { ###TODO: Unhook the processors if possible # delete $self->{Connections}->{$c}; # } # } # # return @found; #} sub set_handler { my $self = shift; my ( $handler, $type ) = reverse @_; $type ||= "Handler"; for my $part_rec ( @{$self->{Parts}} ) { my $proc = $part_rec->{Processor}; my $hs = $part_rec->{Handlers}; if ( grep ref $_ ? $_ == $self->{$type} : $_ eq "Exhaust", @$hs ) { if ( @$hs == 1 && $proc->can( "set_handler" ) ) { $proc->set_handler( $type ne "Handler" ? $type : (), $handler ); next; } unless ( $proc->can( "set_handlers" ) ) { croak ref $proc, @$hs == 1 ? " has no set_handler or set_handlers method" : " has no set_handlers method" } $proc->set_handlers( map { my $h; my $t; if ( ref $_ ) { $h = $_; $t = "Handler"; } elsif ( $_ eq "Exhaust" ) { $h = $handler; $t = $type; } else { ( $h, $t ) = reverse split /=>/, $_; $h = $self->find_part( $h ); $t = $type; croak "Can't locate part $_ to be a handler for ", $part_rec->string_description unless $h; } { $type => $h } } @$hs ); } } $self->{$type} = $handler; } my $warned_about_missing_sax_tracer; sub trace_parts { my $self = shift; unless ( eval "require Devel::TraceSAX; 1" ) { warn $@ unless $warned_about_missing_sax_tracer++; return; } for ( @_ ? map $self->_find_part_rec( $_ ), @_ : @{$self->{Parts}} ) { Devel::TraceSAX::trace_SAX( $_->{Processor}, $_->string_description ); } ## some parts are created lazily, let's trace those, too $self->{TraceAdHocParts} ||= 1 unless @_; } sub trace_all_parts { my $self = shift; croak "Can't pass parms to trace_all_parts" if @_; unless ( eval "require Devel::TraceSAX; 1" ) { warn $@ unless $warned_about_missing_sax_tracer++; return; } for ( @{$self->{Parts}} ) { Devel::TraceSAX::trace_SAX( $_->{Processor}, $_->string_description ); $_->{Processor}->trace_all_parts if $_->{Processor}->can( "trace_all_parts" ); } ## some parts are created lazily, let's trace those, too $self->{TraceAdHocParts} = 1; } sub untracify_parts { my $self = shift; for ( @_ ? map $self->find_part( $_ ), @_ : $self->parts ) { XML::SAX::TraceViaISA::remove_tracing_subclass( $_ ); } } compile_methods __PACKAGE__, <<'EOCODE', sax_event_names "ParseMethods" ; sub { my $self = shift; my $h = $self->find_part( "Intake" ); croak "SAX machine 'Intake' undefined" unless $h; if ( $h->can( "" ) ) { my ( $ok, @result ) = eval { ( 1, wantarray ? $h->( @_ ) : scalar $h->( @_ ) ); }; ## Not sure how/where causes me to need this next line, but ## in perl5.6.1 it seems necessary. return wantarray ? @result : $result[0] if $ok; die $@ unless $@ =~ /No .*routine defined/; undef $@; if ( $h->isa( "XML::SAX::Base" ) ) { ## Due to a bug in old versions of X::S::B, we need to reset ## this so that it will pass events on. ## TODO: when newer X::S::B's are common, jack up the ## version in Makefile.PL's PREREQ_PM :). delete $h->{ParseOptions}; } } require XML::SAX::ParserFactory; $self->{Parser} = XML::SAX::ParserFactory->parser( Handler => $h ); Devel::TraceSAX::trace_SAX( $self->{Parser}, "Ad hoc parser (" . ref( $self->{Parser} ) . ")" ) if $self->{TraceAdHocParts}; return $self->{Parser}->(@_); } EOCODE compile_methods __PACKAGE__, <<'EOCODE', sax_event_names ; sub { my $self = shift; my $h = $self->find_part( "Intake" ); croak "SAX machine 'Intake' undefined" unless $h; return $h->( @_ ) if $h->can( "" ); } EOCODE my %basic_types = ( ARRAY => undef, CODE => undef, GLOB => undef, HASH => undef, REF => undef, ## Never seen this one, but it's listed in perlfunc Regexp => undef, SCALAR => undef, ); sub _resolve_spec { my $self = shift; my ( $spec ) = @_; croak "undef passed instead of a filter to ", ref( $self ), "->new()" unless defined $spec; croak "Empty filter name ('') passed to ", ref( $self ), "->new()" unless length $spec; my $type = ref $spec; if ( $type eq "SCALAR" ## TODO: || $type eq "ARRAY" <== need XML::SAX::Writer to supt this. || $type eq "GLOB" || UNIVERSAL::isa( $spec, "IO::Handle" ) || ( ! $type && $spec =~ /^\s*([>|]|\+>)/ ) ) { ## Cheat until XML::SAX::Writer cat grok it if ( ! $type ) { use Symbol; my $fh = gensym; open $fh, $spec or croak "$! opening '$spec'" ; $spec = $fh; } require XML::SAX::Writer; $spec = XML::SAX::Writer->new( Output => $spec ); } elsif ( !$type ) { if ( $spec !~ /^\s*<|\|\s*(?!\n)$/ ) { ## Doesn't look like the caller wants to slurp a file ## Let's require it now to catch errors early, then ## new() it later after all requires are done. ## delaying the new()s might help us from doing things ## like blowing away output files and then finding ## errors, for instance. croak $@ unless $spec->can( "new" ) || eval "require $spec"; } } else { croak "'$type' not supported in a SAX machine specification\n" if exists $basic_types{$type}; } return $spec; } my $is_name_like = has_named_regexp_character_classes ? '^[[:alpha:]]\w*(?!\n)$' : '^[a-zA-Z]\w*(?!\n)$'; sub _valid_name($) { my ( $prospect ) = @_; return 0 unless defined $prospect && length $prospect; my $fc = substr $prospect, 0, 1; ## I wonder how close to valid Perl method names this is? ( $fc eq uc $fc && $prospect =~ /$is_name_like/o ) ? 1 : 0; } sub _push_spec { my $self = shift; my ( $name, $spec, @handlers ) = ref $_[0] ? ( undef, @_ ) ## Implictly unnamed: [ $obj, ... ] : @_; ## Named or explicitly unnamed: [ $name, ...] my $part = XML::SAX::Machine::Part->new( Name => $name, Handlers => \@handlers, ); # if ( grep $_ eq "Exhaust", @handlers ) { # $self->{OverusedNames}->{Exhaust} ||= undef # if exists $self->{PartsByName}->{Exhaust}; # # $self->{PartsByName}->{Exhaust} = $self->{Parts}->[-1]; # # @handlers = grep $_ ne "Exhaust", @handlers; # } ## NOTE: This may ## still return a non-reference, which is the type of processor ## wanted here. We construct those lazily below; see the docs ## about order of construction. my $proc = $self->_resolve_spec( $spec ); $part->{Processor} = $proc; croak "SAX machine BUG: couldn't resolve spec '$spec'" unless defined $proc; push @{$self->{Parts}}, $part; $part->{Number} = $#{$self->{Parts}}; if ( defined $name ) { $self->{OverusedNames}->{$name} ||= undef if exists $self->{PartsByName}->{$name}; $self->{IllegalNames}->{$name} ||= undef unless _valid_name $name && $name ne "Exhaust"; $self->{PartsByName}->{$name} = $self->{Parts}->[-1]; } ## This HASH is used to detect cycles even if the user uses ## preconstructed references instead of named parts. $self->{PartsByProcessor}->{$proc} = $part if ref $proc; } sub _names_err_msgs { my ( $s, @names ) = @_ ; @names = map ref $_ eq "HASH" ? keys %$_ : $_, @names; return () unless @names; @names = keys %{ { map { ( $_ => undef ) } @names } }; if ( @names == 1 ) { $s =~ s/%[A-Z]+//g; } else { $s =~ s/%([A-Z]+)/\L$1/g; } return $s . join ", ", map "'$_'", sort @names ; } sub _build_part { my $self = shift; my ( $part ) = @_; my $part_num = $part->{Number}; return if $self->{BuiltParts}->[$part_num]; confess "SAX machine BUG: cycle found too late" if $self->{SeenParts}->[$part_num]; ++$self->{SeenParts}->[$part_num]; ## We retun a list of all cycles that have been discovered but ## not yet completed. We don't return cycles that have been ## completely discovered; those are placed in DetectedCycles. my @open_cycles; eval { ## This eval is to make sure we decrement SeenParts so that ## we don't encounter spurious cycle found too late exceptions. ## Build any handlers, detect cycles my @handler_procs; ## I decided not to autolink one handler to the next in order to keep ## from causing hard to diagnose errors when unintended machines are ## passed in. The special purpose machines, like Pipeline, have ## that logic built in. ## ## Link any part with no handlers to the next part. ## push @{$part->{Handlers}}, $part->{Number} + 1 ## if ! @{$part->{Handlers}} && $part->{Number} < $#{$self->{Parts}}; for my $handler_spec ( @{$part->{Handlers}} ) { my $handler; if ( ref $handler_spec ) { ## The caller specified a handler with a real reference, so ## we don't need to build it, but we do need to do ## cycle detection. _build_part won't build it in this case ## but it will link it and do cycle detection. $handler = $self->{PartsByProcessor}->{$handler_spec} if exists $self->{PartsByProcessor}->{$handler_spec}; if ( ! defined $handler ) { ## It's a processor not in this machine. Hope the ## caller knows what it's doing. push @handler_procs, $handler_spec; next; } } else { $handler = $self->_find_part_rec( $handler_spec ); ## all handler specs were checked earlier, so "survive" this ## failure and let the queued error message tell the user ## about it. next unless defined $handler; } if ( $self->{SeenParts}->[$handler->{Number}] ) { ## Oop, a cycle, and we don't want to recurse or we'll ## recurse forever. push @open_cycles, $part eq $handler ? [ $handler ] : [ $part, $handler ]; next; } my @nested_cycles = $self->_build_part( $handler ); my $handler_proc = $handler->{Processor}; confess "SAX machine BUG: found a part with no processor: ", $handler->string_description unless defined $handler_proc; confess "SAX machine BUG: found a unbuilt '", $handler->{Processor}, "' processor: ", $handler->string_description unless ref $handler_proc; push @handler_procs, $handler_proc; for my $nested_cycle ( @nested_cycles ) { if ( $nested_cycle->[-1] == $part ) { ## the returned cycle "ended" with our part, so ## we have a complete description of the cycle, log it ## and move on. push @{$self->{DetectedCycles}}, $nested_cycle; } else { ## This part is part of this cycle but not it's "beginning" push @open_cycles, [ $part, $nested_cycle ]; } } } ## Create this processor if need be, otherwise just set the handlers. my $proc = $part->{Processor}; confess "SAX machine BUG: undefined processor for ", $part->string_description unless defined $proc; unless ( ref $proc ) { ## TODO: Figure a way to specify the type of handler, probably ## using a DTDHandler=>Name syntax, not sure. Perhaps ## using a hash would be best. if ( $proc =~ /^\s*<|\|\s*(?!\n)$/ ) { ## Looks like the caller wants to slurp a file ## We open it ourselves to get all of Perl's magical ## "open" goodness. TODO: also check for a URL scheme ## and handle that :). ## TODO: Move this in to a/the parse method so it can ## be repeated. require Symbol; my $fh = Symbol::gensym; open $fh, $proc or croak "$! opening '$proc'"; require XML::SAX::ParserFactory; require IO::Handle; $proc = XML::SAX::ParserFactory->parser( Source => { ByteStream => $fh, }, map { ( Handler => $_ ), } @handler_procs ); } elsif ( XML::SAX::Machines->processor_class_option( $proc, "ConstructWithHashedOptions" ) ) { ## This is designed to build options in a format compatible ## with SAXT style constructors when multiple handlers are ## defined. $proc = $proc->new( map { { Handler => $_ }, ## Hashes } @handler_procs ## 0 or more of 'em ); } else { ## More common Foo->new( Handler => $h ); croak "$proc->new doesn't allow multiple handlers.\nSet ConstructWithOptionsHashes => 1 in XML::SAX::Machines::ConfigDefaults if need be" if @handler_procs > 1; $proc = $proc->new( map { ( Handler => $_ ), ## A plain list } @handler_procs ## with 0 or 1 elts ); } $self->{PartsByProcessor}->{$proc} = $part; } elsif ( @handler_procs ) { if ( $proc->can( "set_handlers" ) ) { $proc->set_handlers( @handler_procs ); } elsif ( $proc->can( "set_handler" ) ) { if ( @handler_procs == 1 ) { $proc->set_handler( @handler_procs ); } else { die "SAX machine part ", $part->string_description, " can only take one handler at a time\n"; } } else { die "SAX machine part ", $part->string_description, " does not provide a set_handler() or set_handlers() method\n" } } $part->{Processor} = $proc; }; --$self->{SeenParts}->[$part->{Number}]; $self->{BuiltParts}->[$part_num] = 1; if ( $@ ) { chomp $@; $@ .= "\n ...while building " . $part->string_description . "\n"; die $@; } return @open_cycles; } sub _compile_specs { my $self = shift; my @errors; ## Init the permanent structures $self->{Parts} = []; $self->{PartsByName} = {}; $self->{PartsByProcessor} = {}; ## And some temporary structures. $self->{IllegalNames} = {}; $self->{OverusedNames} = {}; ## Scan the specs and figure out the connectivity, names and load ## any requirements, etc. for my $spec ( @_ ) { eval { $self->_push_spec( ref $spec eq "ARRAY" ? @$spec : ( undef, $spec ) ); }; ## This could be ugly if $@ contains a stack trace, but it'll have ## to do. if ( $@ ) { chomp $@; push @errors, $@; } } push @errors, ( _names_err_msgs( "illegal SAX machine part name%S ", $self->{IllegalNames} ), _names_err_msgs( "undefined SAX machine part%S specified as handler%S ", grep defined && ! $self->_find_part_rec( $_ ), grep ! ref && $_ ne "Exhaust", map @{$_->{Handlers}}, @{$self->{Parts}} ), _names_err_msgs( "multiple SAX machine parts named ", $self->{OverusedNames} ) ); ## Free some memory and make object dumps smaller delete $self->{IllegalNames}; delete $self->{OverusedNames}; ## If we made it this far, all classes have been loaded and all ## non-processor refs have been converted in to processors. ## Now ## we need to build and that were specified by type name and do ## them in reverse order so we can pass the ## Handler option(s) in. ## If multiple handlers are defined, then ## we assume that the constructor takes a SAXT like parameter list. ## TODO: figure out how to allow DocumentHandler, etc. Perhaps allow ## HASH refs in ARRAY syntax decls. ## Some temporaries $self->{BuiltParts} = []; $self->{SeenParts} = []; $self->{DetectedCycles} = []; ## _build_part is recursive and builds any downstream handlers ## needed to build a part. for ( @{$self->{Parts}} ) { eval { push @{$self->{DetectedCycles}}, $self->_build_part( $_ ); }; if ( $@ ) { chomp $@; push @errors, $@; } } # $self->{PartsByName}->{Intake} ||= $self->{Parts}->[0]; # $self->{PartsByName}->{Exhaust} ||= $self->{Parts}->[-1]; if ( @{$self->{DetectedCycles}} ) { ## Remove duplicate (cycles are found once for each processor in ## the cycle. my %unique_cycles; for my $cycle ( @{$self->{DetectedCycles}} ) { my $start = 0; for ( 1..$#$cycle ) { $start = $_ if $cycle->[$_]->{Number} < $cycle->[$start]->{Number}; } my $key = join( ",", map $_->{Number}, @{$cycle}[$start..($#$cycle),0..($start-1)] ); $unique_cycles{$key} ||= $cycle; } push @errors, map { "Cycle detected in SAX machine: " . join( "->", map $_->string_description, $_->[-1], @$_ ); } map $unique_cycles{$_}, sort keys %unique_cycles; } delete $self->{SeenParts}; delete $self->{BuiltParts}; delete $self->{DetectedCycles}; croak join "\n", @errors if @errors; } sub _SAX2_attrs { my %a = @_; return { map { defined $a{$_} ? ( $_ => { LocalName => $_, Name => $_, Value => $a{$_}, } ) : () ; } keys %a }; } my %ids; sub _idify($) { $ids{$_[0]} = keys %ids unless exists $ids{$_[0]}; return $ids{$_[0]}; } sub pointer_elt { my $self = shift; my ( $elt_type, $h_spec, $options ) = @_; my $part_rec; $h_spec = $self->{Handler} if $h_spec eq "Exhaust" && defined $self->{Handler}; ## Look locally first in case the name is not ## unique among parts in RootMachine. $part_rec = $self->_find_part_rec( $h_spec ) if ! $part_rec; ## Don't look for indexes in RootMachine $part_rec = $options->{RootMachine}->_find_part_rec( $h_spec ) if ! $part_rec && defined $options->{RootMachine} && $h_spec != /^-?\d+$/ ; my %attrs; if ( $part_rec ) { %attrs = ( name => $part_rec->{Name} || $h_spec, "handler-id" => _idify $part_rec->{Processor}, ); } else { if ( ref $h_spec ) { %attrs = ( type => ref $h_spec, "handler-id" => _idify $h_spec, ); } else { %attrs = ( name => $h_spec, ); } } return { Name => $elt_type, LocalName => $elt_type, Attributes => _SAX2_attrs( %attrs ), }; } sub generate_part_descriptions { my $self = shift; my ( $options ) = @_; my $h = $options->{Handler}; croak "No Handler passed" unless $h; for my $part_rec ( @{$self->{Parts}} ) { my $proc = $part_rec->{Processor}; if ( $proc->can( "generate_description" ) ) { $proc->generate_description( { %$options, Name => $part_rec->{Name}, Description => $part_rec->string_description, } ); } else { my $part_elt = { LocalName => "part", Name => "part", Attributes => _SAX2_attrs( id => _idify $proc, type => ref $part_rec, name => $part_rec->{Name}, description => $part_rec->string_description, ), }; $h->start_element( $part_elt ); for my $h_spec ( @{$part_rec->{Handlers}} ) { my $handler_elt = $self->pointer_elt( "handler", $h_spec ); $h->start_element( $handler_elt ); $h->end_element( $handler_elt ); } $h->end_element( $part_elt ); } } } sub generate_description { my $self = shift; my $options = @_ == 1 ? ref $_[0] eq "HASH" ? { %{$_[0]} } : { Handler => ref $_[0] ? $_[0] : $self->_resolve_spec( $_[0] ) } : { @_ }; my $h = $options->{Handler}; croak "No Handler passed" unless $h; unless ( $options->{Depth} ) { %ids = (); $options->{RootMachine} = $self; $h->start_document({}); } ++$options->{Depth}; my $root_elt = { LocalName => "sax-machine", Name => "sax-machine", Attributes => _SAX2_attrs( id => _idify $self, type => ref $self, name => $options->{Name}, description => $options->{Description}, ), }; $h->start_element( $root_elt ); ## Listing the handler first so it doesn't look like a part's ## handler (which it kinda does if it's hanging out *after* a ## tag :). Also makes following the links by hand a tad easier. if ( defined $self->{Handler} ) { my $handler_elt = $self->pointer_elt( "handler", $self->{Handler} ); $handler_elt->{Attributes}->{name} = { Name => "name", LocalName => "name", Value => "Exhaust" } unless exists $handler_elt->{Attributes}->{Name}; $h->start_element( $handler_elt ); $h->end_element( $handler_elt ); } for ( sort keys %{$self->{PartsByName}} ) { if ( $self->{PartsByName}->{$_}->{Name} ne $_ ) { warn $self->{PartsByName}->{$_}->{Name}, " : ", $_; my $handler_elt = $self->pointer_elt( "alias", $_ ); %{$handler_elt->{Attributes}} = ( %{$handler_elt->{Attributes}}, %{_SAX2_attrs( alias => $_ )}, ); $h->start_element( $handler_elt ); $h->end_element( $handler_elt ); } } $self->generate_part_descriptions( $options ); $h->end_element( $root_elt ); --$options->{Depth}; $h->end_document({}) unless $options->{Depth}; } ## ## This is a private class, only this class should use it directly. ## package XML::SAX::Machine::Part; { $XML::SAX::Machine::Part::VERSION = '0.46'; } use fields ( 'Name', ## The caller-given name of the part 'Number', ## Where it sits in the parts list. 'Processor', ## The actual SAX processor 'Handlers', ## The handlers the caller specified ); sub new { my $proto = shift; my $class = ref $proto || $proto; my $self = bless {}, $class; my %options = @_ ; $self->{$_} = $options{$_} for keys %options; return $self; } sub string_description { my $self = shift; return join( "", $self->{Name} ? $self->{Name} : ( "#", $self->{Number} ), " (", $self->{Processor} ? ( ref $self->{Processor} || $self->{Processor} ) : "", ")" ); } 1; __END__ =pod =head1 NAME XML::SAX::Machine - Manage a collection of SAX processors =head1 VERSION version 0.46 =head1 SYNOPSIS ## Note: See XML::SAX::Pipeline and XML::SAX::Machines first, ## this is the gory, detailed interface. use My::SAX::Machines qw( Machine ); use My::SAX::Filter2; use My::SAX::Filter3; my $filter3 = My::SAX::Filter3->new; ## A simple pipeline. My::SAX::Filter1 will be autoloaded. my $m = Machine( # # Name => Class/object => handler(s) # [ Intake => "My::SAX::Filter1" => "B" ], [ B => My::SAX::Filter2->new() => "C" ], [ C => $filter3 => "D" ], [ D => \*STDOUT ], ); ## A parser will be created unless My::SAX::Filter1 can parse_file $m->parse_file( "foo.revml" ); my $m = Machine( [ Intake => "My::SAX::Filter1" => qw( Tee ) ], [ Tee => "XML::Filter::SAXT" => qw( Foo Bar ) ], [ Foo => "My::SAX::Filter2" => qw( Out1 ) ], [ Out1 => \$log ], [ Bar => "My::SAX::Filter3" => qw( Exhaust ) ], ); =head1 DESCRIPTION B: This API is alpha!!! It I be changing. A generic SAX machine (an instance of XML::SAX::Machine) is a container of SAX processors (referred to as "parts") connected in arbitrary ways. Each parameter to C (or Cnew()>) represents one top level part of the machine. Each part has a name, a processor, and one or more handlers (usually specified by name, as shown in the SYNOPSIS). Since SAX machines may be passed in as single top level parts, you can also create nested, complex machines ($filter3 in the SYNOPSIS could be a Pipeline, for example). A SAX machines can act as a normal SAX processors by connecting them to other SAX processors: my $w = My::Writer->new(); my $m = Machine( ...., { Handler => $w } ); my $g = My::Parser->new( Handler => $w ); =head2 Part Names Although it's not required, each part in a machine can be named. This is useful for retrieving and manipulating the parts (see L, for instance), and for debugging, since debugging output (see L and L) includes the names. Part names must be valid Perl subroutine names, beginning with an uppercase character. This is to allow convenience part accessors methods like $c = $m->NameOfAFilter; to work without ever colliding with the name of a method (all method names are completely lower case). Only filters named like this can be accessed using the magical accessor functions. =head2 Reserved Names: Intake and Exhaust The names c and C are reserved. C refers to the first part in the processing chain. This is not necessarily the first part in the constructor list, just the first part to receive external events. C refers to the output of the machine; no part may be named C, and any parts with a handler named C will deliver their output to the machine's handler. Normally, only one part should deliver it's output to the Exhaust port. Calling $m->set_handler() alters the Exhaust port, assuming any processors pointing to the C provide a C method like L's. C and C are usually assigned automatically by single-purpose machines like L and L. =head2 SAX Processor Support The XML::SAX::Machine class is very agnostic about what SAX processors it supports; about the only constraint is that it must be a blessed reference (of any type) that does not happen to be a Perl IO::Handle (which are assumed to be input or output filehandles). The major constraint placed on SAX processors is that they must provide either a C or C method (depending on how many handlers a processor can feed) to allow the SAX::Machine to disconnect and reconnect them. Luckily, this is true of almost any processor derived from XML::SAX::Base. Unfortunately, many SAX older (SAX1) processors do not meet this requirement; they assume that SAX processors will only ever be connected together using their constructors. =head2 Connections SAX machines allow you to connect the parts however you like; each part is given a name and a list of named handlers to feed. The number of handlers a part is allowed depends on the part; most filters only allow once downstream handler, but filters like L and L are meant to feed multiple handlers. Parts may not be connected in loops ("cycles" in graph theory terms). The machines specified by: [ A => "Foo" => "A" ], ## Illegal! and [ A => "Foo" => "B" ], ## Illegal! [ B => "Foo" => "A" ], . Configuring a machine this way would cause events to flow in an infinite loop, and/or cause the first processor in the cycle to start receiving events from the end of the cycle before the input document was complete. Besides, it's not a very useful topology :). SAX machines detect loops at construction time. =head1 NAME XML::SAX::Machine - Manage a collection of SAX processors =head1 API =head2 Public Methods These methods are meant to be used by users of SAX machines. =over =item new() my $m = $self->new( @machine_spec, \%options ); Creates $self using %options, and compiles the machine spec. This is the longhand form of C. =item find_part Gets a part contained by this machine by name, number or object reference: $c = $m->find_part( $name ); $c = $m->find_part( $number ); $c = $m->find_part( $obj ); ## useful only to see if $obj is in $m If a machine contains other machines, parts of the contained machines may be accessed by name using unix directory syntax: $c = $m->find_part( "/Intake/Foo/Bar" ); (all paths must be absolute). Parts may also be accessed by number using array indexing: $c = $m->find_part(0); ## Returns first part or undef if none $c = $m->find_part(-1); ## Returns last part or undef if none $c = $m->find_part( "Foo/0/1/-1" ); There is no way to guarantee that a part's position number means anything, since parts can be reconnected after their position numbers are assigned, so using a part name is recommended. Throws an exception if the part is not found, so doing things like $m->find_part( "Foo" )->bar() garner informative messages when "Foo" is not found. If you want to test a result code, do something like my $p = eval { $m->find_part }; unless ( $p ) { ...handle lookup failure... } =item parts for ( $m->parts ) { ... } Gets an arbitrarily ordered list of top level parts in this machine. This is all of the parts directly contained by this machine and none of the parts that may be inside them. So if a machine contains an L as one of it's parts, the pipeline will be returned but not the parts inside the pipeline. =item all_parts for ( $m->all_parts ) { ... } Gets all parts in this machine, not just top level ones. This includes any machines contained by this machine and their parts. =item set_handler $m->set_handler( $handler ); $m->set_handler( DTDHandler => $handler ); Sets the machine's handler and sets the handlers for all parts that have C specified as their handlers. Requires that any such parts provide a C or (if the part has multiple handlers) a C method. NOTE: handler types other than "Handler" are only supported if they are supported by whatever parts point at the C. If the handler type is C, then the appropriate method is called as: $part->set_handler( $handler ); $part->set_handlers( $handler0, $handler1, ... ); If the type is some other handler type, these are called as: $part->set_handler( $type => $handler ); $part->set_handlers( { $type0 => $handler0 }, ... ); =item trace_parts $m->trace_parts; ## trace all top-level parts $m->trace_parts( @ids ); ## trace the indicated parts Uses Devel::TraceSAX to enable tracing of all events received by the parts of this machine. Does not enable tracing of parts contained in machines in this machine; for that, see trace_all_parts. =item trace_all_parts $m->trace_all_parts; ## trace all parts Uses Devel::TraceSAX to trace all events received by the parts of this machine. =item untracify_parts $m->untracify_parts( @ids ); Converts the indicated parts to SAX processors with tracing enabled. This may not work with processors that use AUTOLOAD. =back =head1 Events and parse routines XML::SAX::Machine provides all SAX1 and SAX2 events and delgates them to the processor indicated by $m->find_part( "Intake" ). This adds some overhead, so if you are concerned about overhead, you might want to direct SAX events directly to the Intake instead of to the machine. It also provides parse...() routines so it can whip up a parser if need be. This means: parse(), parse_uri(), parse_string(), and parse_file() (see XML::SAX::EventMethodMaker for details). There is no way to pass methods directly to the parser unless you know that the Intake is a parser and call it directly. This is not so important for parsing, because the overhead it takes to delegate is minor compared to the effort needed to parse an XML document. =head2 Internal and Helper Methods These methods are meant to be used/overridden by subclasses. =over =item _compile_specs my @comp = $self->_compile_specs( @_ ); Runs through a list of module names, output specifiers, etc., and builds the machine. $scalar --> "$scalar"->new $ARRAY_ref --> pipeline @$ARRAY_ref $SCALAR_ref --> XML::SAX::Writer->new( Output => $SCALAR_ref ) $GLOB_ref --> XML::SAX::Writer->new( Output => $GLOB_ref ) =item generate_description $m->generate_description( $h ); $m->generate_description( Handler => $h ); $m->generate_description( Pipeline ... ); Generates a series of SAX events to the handler of your choice. See L on CPAN for a way of visualizing machine innards. =back =head1 TODO =over =item * Separate initialization from construction time; there should be somthing like a $m->connect( ....machine_spec... ) that new() calls to allow you to delay parts speficication and reconfigure existing machines. =item * Allow an XML doc to be passed in as a machine spec. =back =head1 LIMITATIONS =over =back =head1 AUTHOR Barrie Slaymaker =head1 LICENSE Artistic or GPL, any version. =head1 AUTHORS =over 4 =item * Barry Slaymaker =item * Chris Prather =back =head1 COPYRIGHT AND LICENSE This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. =cut XML-SAX-Machines-0.46/README0000644000175000001440000000046412204442306014700 0ustar perigrinusers This archive contains the distribution XML-SAX-Machines, version 0.46: manage collections of SAX processors This software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. XML-SAX-Machines-0.46/dist.ini0000644000175000001440000000066212204442306015464 0ustar perigrinusersname = XML-SAX-Machines author = Barry Slaymaker author = Chris Prather license = Perl_5 copyright_holder = Barry Slaymaker copyright_year = 2013 [@Basic] [PkgVersion] [NextRelease] filename = CHANGES [Prereqs::FromCPANfile] [PruneFiles] filenames = Makefile.PL match = ^nytprof.* match = ^perl5 match = ^cpan.* [Test::Compile] [PodWeaver] [@Git] changelog = CHANGES [Git::NextVersion] [GithubMeta] XML-SAX-Machines-0.46/MANIFEST0000644000175000001440000000155512204442306015153 0ustar perigrinusersCHANGES LICENSE MANIFEST MANIFEST.SKIP META.yml Makefile.PL README README.too dist.ini examples/append_to_xml_file examples/output_to_process examples/xml_records_thru_libxslt examples/xml_records_to_data_structures lib/XML/Filter/Distributor.pm lib/XML/Filter/DocSplitter.pm lib/XML/Filter/Merger.pm lib/XML/Filter/Tee.pm lib/XML/SAX/ByRecord.pm lib/XML/SAX/EventMethodMaker.pm lib/XML/SAX/Machine.pm lib/XML/SAX/Machines.pm lib/XML/SAX/Machines/ConfigDefaults.pm lib/XML/SAX/Machines/ConfigHelper.pm lib/XML/SAX/Manifold.pm lib/XML/SAX/Pipeline.pm lib/XML/SAX/Tap.pm t/00-compile.t t/00config.t t/00eventmethodmaker.t t/00machines.t t/00valid_name.t t/00xsw_version.t t/01compile_spec.t t/02find_part.t t/09machine.t t/09merger.t t/10bad_machine.t t/10distributor.t t/10docsplitter.t t/10pipeline.t t/10tap.t t/11byrecord.t t/11manifold.t t/20saxt.t t/lib/XML/Filter/SAXT.pm XML-SAX-Machines-0.46/t/0000755000175000001440000000000012204442306014257 5ustar perigrinusersXML-SAX-Machines-0.46/t/09machine.t0000644000175000001440000000401712204442306016223 0ustar perigrinusersuse strict; use Test; use XML::SAX::Machines qw( Machine ); my $m; my $out; ## Make a filter that is *not* in a file that can be require()ed. @Foo::Filter::ISA = qw( XML::SAX::Base ); my @tests = ( ( map { my $m = $_; sub { $out = ""; ok $m->isa( "XML::SAX::Machine" ); }, sub { $m->start_document; ok 1, 1, "start_document"}, sub { $m->start_element( { Name => "foo" } );ok 1, 1, "start_elt foo" }, sub { $m->start_element( { Name => "bar" } );ok 1, 1, "start_elt bar" }, sub { $m->end_element( { Name => "bar" } );ok 1, 1, "end_elt bar" }, sub { $m->end_element( { Name => "foo" } );ok 1, 1, "end_elt foo" }, sub { $m->end_document; ok 1, 1, "end_document" }, sub { $out =~ m{} ? ok 1 : ok $out, "something like " ; }, sub { $out = ""; ok $m->parse_string( "" ); }, sub { $out =~ m{} ? ok 1 : ok $out, "something like " ; }, } ( Machine( [ Intake => "XML::SAX::Base", 1 ], [ undef() => "XML::SAX::Base", 2 ], \$out ), Machine( [ Intake => XML::SAX::Base->new(), 1 ], [ undef() => XML::SAX::Base->new(), 2 ], \$out ), Machine( [ Intake => "Foo::Filter", 1 ], [ undef() => XML::SAX::Base->new(), 2 ], \$out ), Machine( [ Intake => "XML::SAX::Base", "W" ], [ "W" => XML::SAX::Writer->new( Output => \$out ) ], ), Machine( [ Intake => "XML::SAX::Base", "Exhaust" ], { Handler => XML::SAX::Writer->new( Output => \$out ) }, ), ) ), ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/lib/0000755000175000001440000000000012204442306015025 5ustar perigrinusersXML-SAX-Machines-0.46/t/lib/XML/0000755000175000001440000000000012204442306015465 5ustar perigrinusersXML-SAX-Machines-0.46/t/lib/XML/Filter/0000755000175000001440000000000012204442306016712 5ustar perigrinusersXML-SAX-Machines-0.46/t/lib/XML/Filter/SAXT.pm0000644000175000001440000001040412204442306020026 0ustar perigrinusers# # To do: # - later: ErrorHandler, Locale? package XML::Filter::SAXT; use strict; use vars qw( %SAX_HANDLERS ); %SAX_HANDLERS = ( DocumentHandler => [ "start_document", "end_document", "start_element", "end_element", "characters", "processing_instruction", "comment", "start_cdata", "end_cdata", "entity_reference", "set_document_locator" # !! passes {Locator=>$perlsax} ], DTDHandler => [ "notation_decl", "unparsed_entity_decl", "entity_decl", "element_decl", "attlist_decl", "doctype_decl", "xml_decl" ], EntityResolver => [ "resolve_entity" ]); # # Usage: # # $saxt = new XML::Filter::SAXT ( { Handler => $out0 }, # { DocumentHandler => $out1 }, # { DTDHandler => $out3, # Handler => $out4 # } # ); # # $perlsax = new XML::Parser::PerlSAX ( Handler => $saxt ); # $perlsax->parse ( [OPTIONS] ); # sub new { my ($class, @out) = @_; my $self = bless { Out => \@out }, $class; for (my $i = 0; $i < @out; $i++) { for my $handler (keys %SAX_HANDLERS) { my $callbacks = $SAX_HANDLERS{$handler}; my $h = ($self->{Out}->[$i]->{$handler} ||= $self->{Out}->[$i]->{Handler}); next unless defined $h; for my $cb (@$callbacks) { if (UNIVERSAL::can ($h, $cb)) { $self->{$cb} .= "\$out->[$i]->{$handler}->$cb (\@_);\n"; } } } } for my $handler (keys %SAX_HANDLERS) { my $callbacks = $SAX_HANDLERS{$handler}; for my $cb (@$callbacks) { my $code = $self->{$cb}; if (defined $code) { $self->{$cb} = eval "sub { my \$out = shift->{Out}; $code }"; } else { $self->{$cb} = \&noop; } } } return $self; } sub noop { # does nothing } for my $cb (map { @{ $_ } } values %SAX_HANDLERS) { eval "sub $cb { shift->{$cb}->(\@_); }"; } 1; # package return code __END__ =head1 NAME XML::Filter::SAXT - Replicates SAX events to several SAX event handlers =head1 SYNOPSIS $saxt = new XML::Filter::SAXT ( { Handler => $out1 }, { DocumentHandler => $out2 }, { DTDHandler => $out3, Handler => $out4 } ); $perlsax = new XML::Parser::PerlSAX ( Handler => $saxt ); $perlsax->parse ( [OPTIONS] ); =head1 DESCRIPTION SAXT is like the Unix 'tee' command in that it multiplexes the input stream to several output streams. In this case, the input stream is a PerlSAX event producer (like XML::Parser::PerlSAX) and the output streams are PerlSAX handlers or filters. The SAXT constructor takes a list of hash references. Each hash specifies an output handler. The hash keys can be: DocumentHandler, DTDHandler, EntityResolver or Handler, where Handler is a combination of the previous three and acts as the default handler. E.g. if DocumentHandler is not specified, it will try to use Handler. =head2 EXAMPLE In this example we use L to parse an XML file and to invoke the PerlSAX callbacks of our SAXT object. The SAXT object then forwards the callbacks to L, which will 'die' if it encounters an error, and to L, which will store the XML in an L. use XML::Parser::PerlSAX; use XML::Filter::SAXT; use XML::Handler::BuildDOM; use XML::Checker; my $checker = new XML::Checker; my $builder = new XML::Handler::BuildDOM (KeepCDATA => 1); my $tee = new XML::Filter::SAXT ( { Handler => $checker }, { Handler => $builder } ); my $parser = new XML::Parser::PerlSAX (Handler => $tee); eval { # This is how you set the error handler for XML::Checker local $XML::Checker::FAIL = \&my_fail; my $dom_document = $parser->parsefile ("file.xml"); ... your code here ... }; if ($@) { # Either XML::Parser::PerlSAX threw an exception (bad XML) # or XML::Checker found an error and my_fail died. ... your error handling code here ... } # XML::Checker error handler sub my_fail { my $code = shift; die XML::Checker::error_string ($code, @_) if $code < 200; # warnings and info messages are >= 200 } =head1 CAVEATS This is still alpha software. Package names and interfaces are subject to change. =head1 AUTHOR Send bug reports, hints, tips, suggestions to Enno Derksen at >. XML-SAX-Machines-0.46/t/20saxt.t0000644000175000001440000000203212204442306015562 0ustar perigrinusersuse strict; use Test; use XML::SAX::Machines qw( Machine ); use lib qw( t/lib ); my $m; my $out1; my $out2; my $out3; my @tests = ( sub { $m = Machine( [ Intake => "XML::Filter::SAXT" => ( 1, 2, 3 ) ], [ undef, "XML::SAX::Base" => 4 ], [ undef, "XML::SAX::Base" => 5 ], [ undef, "XML::SAX::Base" => 6 ], \$out1, \$out2, \$out3, ), ok $m->isa( "XML::SAX::Machine" ); }, sub { $out1 = ""; $out2 = ""; $out3 = ""; ok $m->parse_string( "" ); }, sub { $out1 =~ m{} ? ok 1 : ok $out1, "out1: something like " ; }, sub { $out2 =~ m{} ? ok 1 : ok $out2, "out2: something like " ; }, sub { $out3 =~ m{} ? ok 1 : ok $out3, "out3: something like " ; }, ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/11byrecord.t0000644000175000001440000000410412204442306016416 0ustar perigrinusers#!/usr/local/bin/perl -w use strict; use Test; use XML::SAX::Machines qw( Pipeline ByRecord ); package My::Id::Adder; ## Identical to code in t/10docsplitter.t use vars qw( @ISA ); @ISA = qw( XML::SAX::Base ); use XML::SAX::Base; my $id; sub start_element { my $self = shift; my ( $elt ) = @_; $elt->{Attributes}->{id} = { Name => "id", LocalName => "id", Value => ++$id, }; $self->SUPER::start_element( @_ ); } sub characters { my $self = shift; my ( $data ) = @_; $data->{Data} = uc $data->{Data}; $self->SUPER::characters( @_ ); } ## This is the example from XML::SAX::ByRecord POD. package My::Filter::Uc; use vars qw( @ISA ); @ISA = qw( XML::SAX::Base ); use XML::SAX::Base; sub characters { my $self = shift; my ( $data ) = @_; $data->{Data} = uc $data->{Data}; $self->SUPER::characters( @_ ); } package main; my $m; my $out; my @tests = ( sub { $out = ""; $m = Pipeline( ByRecord( "My::Id::Adder" ), \$out, ); ok $m->isa( "XML::SAX::Machine" ); }, sub { $out = ""; $m->parse_string( "abcdefg" ); ok 1; }, sub { $out =~ m{aBcDeFg} ? ok 1 : ok qq{this outout $out}, qq{something like aBcDeFg} ; }, sub { $out = ""; $m = Pipeline( ByRecord( "My::Filter::Uc" ), \$out, ); $m->parse_string( "abcdefg" ); $out =~ m{aBcDeFg} ? ok 1 : ok qq{this outout $out}, qq{something like aBcDeFg} ; }, ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/09merger.t0000644000175000001440000000667412204442306016113 0ustar perigrinusersuse strict; use Test; use XML::SAX::PurePerl; use XML::Filter::Merger; use XML::SAX::Writer; use UNIVERSAL; my $p; my $h; my $w; my $out; my @tests = ( sub { $w = XML::SAX::Writer->new( Output => \$out ); $h = XML::Filter::Merger->new( Handler => $w ); $p = XML::SAX::PurePerl->new( Handler => $h ); ok UNIVERSAL::isa( $h, "XML::Filter::Merger" ); }, ## ## sequential docs in default (non-IncludeAllRoots) mode ## sub { $out = ""; $h->start_manifold_document( {} ); $p->parse_string( "" ); $p->parse_string( "" ); $h->end_manifold_document( {} ); $out =~ m{} ? ok 1 : ok $out, "something like " ; }, ## ## sequential, IncludeAllRoots mode ## sub { $out = ""; $h->set_include_all_roots( 1 ); $h->start_manifold_document( {} ); $p->parse_string( "" ); $p->parse_string( "" ); $h->end_manifold_document( {} ); $out =~ m{} ? ok 1 : ok qq{This output $out}, qq{something like } ; }, ## ## Nested documents ## sub { $out = ""; $h->set_include_all_roots( 0 ); $h->reset; $h->start_document( {} ); $h->start_element( { Name => "foo1" } ); $p->parse_string( "" ); $h->end_element( { Name => "foo1" } ); $h->end_document( {} ); $out =~ m{} ? ok 1 : ok qq{This output $out}, qq{something like } ; }, sub { $out = ""; $h->set_include_all_roots( 1 ); $h->reset; $h->start_document( {} ); $h->start_element( { Name => "foo1" } ); $p->parse_string( "" ); $h->end_element( { Name => "foo1" } ); $h->end_document( {} ); $out =~ m{} ? ok 1 : ok qq{This output $out}, qq{something like } ; }, ## ## Sequential and Nested documents, a deviant corner condition ## sub { $out = ""; $h->set_include_all_roots( 0 ); $h->start_manifold_document( {} ); $h->start_document( {} ); $h->start_element( { Name => "foo1" } ); $p->parse_string( "" ); $h->end_element( { Name => "foo1" } ); $h->end_document( {} ); $p->parse_string( "" ); $h->end_manifold_document( {} ); $out =~ m{} ? ok 1 : ok qq{This output $out}, qq{something like } ; }, ## ## Subclassing ## sub { my $s = do { package Subclass; use vars qw( @ISA ); @ISA = qw( XML::Filter::Merger ); sub characters { my $self = shift; my $r = $self->SUPER::characters( @_ ); $self->set_include_all_roots( 1 ); XML::SAX::PurePerl->new( Handler => $self )->parse_string( "" ); return $r; } __PACKAGE__ ; }->new( Handler => $w ); $p->set_handler( $s ); $p->parse_string( " " ); $out =~ m{ } ? ok 1 : ok qq{This output $out}, qq{something like } ; }, ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/00xsw_version.t0000644000175000001440000000065512204442306017200 0ustar perigrinusersuse Test; use XML::SAX::Writer; my $v_ok = ! ( defined $XML::SAX::Writer::VERSION && $XML::SAX::Writer::VERSION == 0.41 ); unless ( $v_ok ) { warn < 1; ok $v_ok; XML-SAX-Machines-0.46/t/00config.t0000644000175000001440000000121712204442306016052 0ustar perigrinusersuse strict; use Test; use XML::SAX::Machines; use XML::SAX::Machine; ## Need to register the option names my @tests = ( sub { ok( XML::SAX::Machines->processor_class_option( "XML::SAX::Machine", "ConstructWithHashedOptions" ) ? 1 : 0, 1, "XML::SAX::Machine's ConstructWithHashedOptions", ); }, sub { ok( XML::SAX::Machines->processor_class_option( "XML::SAX::Pipeline", "ConstructWithHashedOptions" ) ? 1 : 0, 1, "XML::SAX::Machine's ConstructWithHashedOptions", ); }, ); plan tests => scalar @tests ; $_->() for @tests; XML-SAX-Machines-0.46/t/11manifold.t0000644000175000001440000000146412204442306016404 0ustar perigrinusers#!/usr/local/bin/perl -w use strict; use Test; use XML::SAX::Machines qw( Pipeline Manifold ); my $m; my $out; my @tests = ( sub { $out = ""; $m = Pipeline( Manifold( "XML::SAX::Base", "XML::SAX::Base", ), \$out, ); ok $m->isa( "XML::SAX::Machine" ); # $m->generate_description( Pipeline( "|xmllint --format -" ) ); warn "\n"; }, sub { $out = ""; # $m->trace_all_parts; # Devel::TraceSAX::trace_SAX( $m, "Pipeline" ); $m->parse_string( "" ); ok 1; }, sub { $out =~ m{} ? ok 1 : ok $out, "something like " ; }, ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/00valid_name.t0000644000175000001440000000055512204442306016710 0ustar perigrinusersuse strict; use Test; use XML::SAX::Machine; my @tests = ( ["A", 1], ["Foo_1", 1], ["Foo::Bar", 0], ["Foo-Bar", 0], ["1Foo", 0], ["Foo\n", 0], ["Foo ", 0], [" Foo", 0], ["Foo,", 0], ["foo,", 0], ["1", 0], ); plan tests => scalar @tests; for (@tests) { ok XML::SAX::Machine::_valid_name $_->[0], $_->[1], $_->[0]; } XML-SAX-Machines-0.46/t/10docsplitter.t0000644000175000001440000000302512204442306017141 0ustar perigrinusersuse strict; use Test; use XML::SAX::Machines qw( Machine ); package My::Id::Adder; ## Identical to code in t/11byrecord.t use vars qw( @ISA ); @ISA = qw( XML::SAX::Base ); use XML::SAX::Base; my $id; sub start_element { my $self = shift; my ( $elt ) = @_; $elt->{Attributes}->{id} = { Name => "id", LocalName => "id", Value => ++$id, }; $self->SUPER::start_element( @_ ); } sub characters { my $self = shift; my ( $data ) = @_; $data->{Data} = uc $data->{Data}; $self->SUPER::characters( @_ ); } package main; my $m; my $out; my @tests = ( sub { $out = ""; $m = Machine( [ Intake => "XML::Filter::DocSplitter" => qw( A ) ], [ A => "My::Id::Adder" => qw( Merger ) ], [ Merger => "XML::Filter::Merger" => qw( Output ) ], [ Output => \$out ], ); $m->Intake->set_aggregator( $m->Merger ); ok $m->isa( "XML::SAX::Machine" ); }, sub { $out = ""; ok $m->parse_string( "abcdefg" ); }, sub { $out =~ s/^<\?.*?\?>//; $out =~ m{aBcDeFg} ? ok 1 : ok qq{this output $out}, qq{something like abcdefg} ; }, ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/00eventmethodmaker.t0000644000175000001440000000405512204442306020152 0ustar perigrinusersuse strict; use Test; use XML::SAX::EventMethodMaker qw( :all ); my @sax_event_names_tests = ( ## These tests extracted manually from Robin's paper at #' ## http://robin.menilmontant.com/perl/xml/sax-chart.html ## Thanks to Kip and Robin. [ [qw( )], 33 ], [ [qw( 1 )], 33 ], [ [qw( 2 )], 28 ], [ [qw( 1 2 )], 33 ], [ [qw( Handler )], 33 ], [ [qw( Handler 1 )], 33 ], [ [qw( Handler 2 )], 28 ], [ [qw( Handler 1 2 )], 33 ], [ [qw( DTDHandler )], 6 ], [ [qw( DTDHandler 1 )], 6 ], [ [qw( DTDHandler 2 )], 2 ], [ [qw( LexicalHandler )], 7 ], [ [qw( DocumentHandler )], 9 ], [ [qw( DeclHandler )], 4 ], [ [qw( ErrorHandler )], 3 ], [ [qw( DocumentHandler 1 )], 9 ], [ [qw( DocumentHandler 2 )], 0 ], [ [qw( DocumentHandler 1 2 )], 9 ], ## These are my own madness, cribbed from XML::SAX::Base source code. [ [qw( ParseMethods )], 4 ], [ [qw( ParseMethods 1 )], 1 ], [ [qw( ParseMethods 2 )], 4 ], [ [qw( ParseMethods 1 2 )], 4 ], [ [qw( Handler ParseMethods )], 37 ], [ [qw( Handler ParseMethods 1 )], 34 ], [ [qw( Handler ParseMethods 2 )], 32 ], [ [qw( Handler ParseMethods 1 2 )], 37 ], ); my @missing_methods_tests = ( [ "Foo1", 33 ], [ "Test", 29 ], ); sub Test::start_document; sub Test::end_document; sub Test::start_element; sub Test::end_element; plan( tests => @sax_event_names_tests + @missing_methods_tests + 33 + 4 ); for (@sax_event_names_tests) { ok scalar sax_event_names( @{$_->[0]} ), $_->[1], join ",", @{$_->[0]}; } for (@missing_methods_tests) { ok scalar missing_methods( $_->[0], sax_event_names ), $_->[1], join ",", $_->[0]; } compile_methods __PACKAGE__, "sub {}", sax_event_names ; compile_methods __PACKAGE__, "sub {}", sax_event_names "ParseMethods" ; for ( sax_event_names "Handler", "ParseMethods" ) { ok __PACKAGE__->can( $_ ) ? 1 : 0, 1, $_; } XML-SAX-Machines-0.46/t/01compile_spec.t0000644000175000001440000000660312204442306017254 0ustar perigrinusersuse strict; ## This tests the parsing of sax machine specs, but does not test the resulting ## machines. It is mostly a check of the syntax handling. use Test; use Data::Dumper; use XML::SAX::Machines qw( Machine ); my $precon1 = bless { A => 1 }, "Preconstructed1" ; my $precon2 = bless { A => 2 }, "Preconstructed2" ; my $precon3 = bless { A => 3 }, "Preconstructed3" ; sub Preconstructed1::set_handler {} sub Preconstructed2::set_handler {} sub Preconstructed3::set_handler {} my @tests = ( ## Some simple non-errors [ "XML::SAX::Machine", 1 ], ## Already loaded. [ "XML::SAX::Manifold", 1 ], ## Need to load this. [ $precon1, 1 ], ## Preconstructed. [ \*STDOUT, 1 ], ## A writer [ [ A => $precon1], 1 ], ## ARRAY [ [ undef() => $precon1], 1 ], ## explicitly unnamed, via ARRAY [ [ $precon1 ], 1 ], ## implicitly unnamed, via ARRAY ## Explicit linking [ [ A => "XML::SAX::Machine" => "B" ], [ B => "XML::SAX::Machine" => "C" ], [ C => "XML::SAX::Machine" ], 3, ], ## Explicit linking by name [ [ undef() => "XML::SAX::Machine" => 1 ], [ undef() => "XML::SAX::Machine" => 2 ], [ undef() => "XML::SAX::Machine" ], 3, ], ## Explicit linking by reference to other parts in the machine [ [ undef() => $precon1 => $precon2 ], [ undef() => $precon2 => $precon3 ], [ undef() => $precon3 ], 3, ], ## Explicit linking by reference to parts not in the machine [ [ undef() => $precon1 => $precon2 ], 1, ], ## Errors. [ "My::Filter", qr{ My\WFilter.pm} ], [ qr/^/, qr{Regexp} ], [ {}, {}, qr{HASH} ], [ sub {}, qr{CODE} ], [ [ "42illegal" => $precon1 ], qr{'42illegal'} ], [ [ A => $precon1, "UndefName" ], qr{'UndefName'} ], [ [ A => $precon1, 99999 ], qr{'99999'} ], [ [ DupName => $precon1 ], [ DupName => $precon2 ], qr{'DupName'} ], [ [ DupName => $precon1 => "UndefName1" ], [ DupName => $precon2 => "UndefName2" ], qr{(('DupName'|'UndefName1'|'UndefName2').*){3}}s ], [ [ Cyclical => $precon1 => "Cyclical" ], qr{Cyclical.*Cyclical}, ], [ [ Cyclical1 => $precon1 => "Cyclical2" ], [ Cyclical2 => $precon1 => "Cyclical1" ], qr{Cyclical1.*Cyclical2.*Cyclical1}, ], [ [ Cyclical1 => $precon1 => "Cyclical2" ], [ Cyclical2 => $precon1 => "Cyclical1", "Cyclical1" ], qr{Cyclical1.*Cyclical2.*Cyclical1}, ], [ [ Cyclical1 => $precon1 => qw( Cyclical2a Cyclical2b ) ], [ Cyclical2a => $precon2 => "Cyclical1" ], [ Cyclical2b => $precon3 => "Cyclical1" ], qr{Cyclical1.*Cyclical2a.*Cyclical1(?s:.*)Cyclical1.*Cyclical2b.*Cyclical1}, ], ## Now mess with prebuild handlers in various ways. ); plan tests => scalar @tests; sub c { eval { scalar XML::SAX::Machine->new( @_ )->parts } || $@ } for (@tests) { my $expected = pop @$_; my $got = c @$_; my $desc = [ @$_ ]; $desc = [ map ref $_ ? "$_" : $_, @$desc ] if $] < 5.006001; $desc = Dumper $desc; if ( ref $expected ) { ## Older Test.pms do not know about qr// for expected values. $got =~ $expected ? ok 1 : ok $got, $expected, $desc; } else { ok $got, $expected, $desc; } } XML-SAX-Machines-0.46/t/00machines.t0000644000175000001440000000052312204442306016373 0ustar perigrinusersuse strict; use Test; use XML::SAX::Machines qw( :all ); my @tests = ( sub { ok Pipeline()->isa( "XML::SAX::Pipeline" ); }, sub { ok Manifold()->isa( "XML::SAX::Manifold" ); }, sub { ok Machine() ->isa( "XML::SAX::Machine" ); }, sub { ok Tap() ->isa( "XML::SAX::Tap" ); }, ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/10tap.t0000644000175000001440000000144412204442306015374 0ustar perigrinusersuse strict; use Test; use XML::SAX::Machines qw( Machine Tap ); my $m; my $tap_out; my $main_out; my @tests = ( sub { $m = Machine( [ Intake => Tap( "XML::SAX::Base", \$tap_out ) => qw( B ) ], [ B => "XML::SAX::Base" => qw( C ) ], [ C => \$main_out ], ); ok $m->isa( "XML::SAX::Machine" ); }, sub { $m->parse_string( "" ); ok 1; }, sub { $tap_out =~ m{} ? ok 1 : ok $tap_out, "something like ", "tap_out" ; }, sub { $main_out =~ m{} ? ok 1 : ok $main_out, "something like ", "main_out" ; }, ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/02find_part.t0000644000175000001440000000217712204442306016563 0ustar perigrinusers#!/usr/local/bin perl -w use strict; use Test; use XML::SAX::Machines qw( Machine ); use XML::SAX::Base; use UNIVERSAL; my $m; my $p1 = XML::SAX::Base->new; my $p2 = XML::SAX::Base->new; my @tests = ( sub { $m = Machine( [ Intake => $p1 ], [ B => Machine( [ "BA", Machine( [ "BAA", $p2 ] ) ] ) ], ); ok UNIVERSAL::isa( $m, "XML::SAX::Machine" ); }, sub { my $p = $m->find_part( "Intake" ); ok "$p", "$p1", "Intake"; }, sub { my $p = $m->find_part( "/Intake" ); ok "$p", "$p1", "/Intake"; }, sub { my $p = $m->find_part( "/Intake/" ); ok "$p", "$p1", "/Intake/"; }, sub { my $p = $m->find_part( "//../////.//Intake///.///" ); ok "$p", "$p1", "//../////.//Intake///.///"; }, sub { my $p = $m->find_part( "B" ); ok ref $p, "XML::SAX::Machine", "B"; }, sub { my $p = $m->find_part( "/B" ); ok ref $p, "XML::SAX::Machine", "/B"; }, sub { my $p = $m->find_part( "/B/BA" ); ok ref $p, "XML::SAX::Machine", "/B/BA"; }, sub { my $p = $m->find_part( "/B/BA/BAA" ); ok "$p", "$p2", "/B/BA/BAA"; }, ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/10pipeline.t0000644000175000001440000000323612204442306016416 0ustar perigrinusersuse strict; use Test; use XML::SAX::Machines qw( Pipeline ); my $m; my $out; my @tests = ( ( map { my $m = $_; sub { $out = ""; ok $m->isa( "XML::SAX::Machine" ); }, sub { $m->start_document; ok 1, 1, "start_document"}, sub { $m->start_element( { Name => "foo" } );ok 1, 1, "start_elt foo" }, sub { $m->start_element( { Name => "bar" } );ok 1, 1, "start_elt bar" }, sub { $m->end_element( { Name => "bar" } );ok 1, 1, "end_elt bar" }, sub { $m->end_element( { Name => "foo" } );ok 1, 1, "end_elt foo" }, sub { $m->end_document; ok 1, 1, "end_document" }, sub { $out =~ m{} ? ok 1 : ok $out, "something like " ; }, sub { $out = ""; ok $m->parse_string( "" ); }, sub { $out =~ m{} ? ok 1 : ok $out, "something like " ; }, } ( Pipeline( \$out ), Pipeline( "XML::SAX::Base", \$out ), Pipeline( "XML::SAX::Base", "XML::SAX::Base", \$out ), Pipeline( "XML::SAX::Base", XML::SAX::Writer->new( Output => \$out ), ), Pipeline( "XML::SAX::Base", { Handler => XML::SAX::Writer->new( Output => \$out ), }, ), ) ), ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/10distributor.t0000644000175000001440000000155512204442306017165 0ustar perigrinusersuse strict; use Test; use XML::SAX::Machines qw( Machine ); my $m; my $out; my @tests = ( sub { $out = ""; $m = Machine( [ Intake => "XML::Filter::Distributor" => qw( A B ) ], [ A => "XML::SAX::Base" => qw( Merger ) ], [ B => "XML::SAX::Base" => qw( Merger ) ], [ Merger => "XML::Filter::Merger" => qw( Output ) ], [ Output => \$out ], ); $m->Intake->set_aggregator( $m->Merger ); ok $m->isa( "XML::SAX::Machine" ); #use XML::Handler::Machine2GraphViz; #open FOO, ">foo.png" or die $!; #print FOO machine2graphviz( $m )->as_png; #close FOO; }, sub { $out = ""; ok $m->parse_string( "" ); }, sub { $out =~ m{} ? ok 1 : ok $out, "something like " ; }, ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/10bad_machine.t0000644000175000001440000000105212204442306017015 0ustar perigrinusersuse strict; use Test; use XML::SAX::Machines qw( Machine ); my $m; my $out; my @tests = ( sub { eval { Machine( [ A => undef ] ) }; ok $@ =~ /undef/ ? "undef passed exception" : $@, "undef passed exception"; }, sub { eval { Machine( [ A => '' ] ) }; ok $@ =~ /empty/i ? "empty string exception" : $@, "empty string exception"; }, sub { eval { Machine( [ A => "BlarneyFilter" ] ) }; ok $@ =~ /BlarneyFilter/ ? "missing filter exception" : $@, "missing filter exception"; }, ); plan tests => scalar @tests; $_->() for @tests; XML-SAX-Machines-0.46/t/00-compile.t0000644000175000001440000000156312204442306016316 0ustar perigrinusersuse strict; use warnings; # This test was generated via Dist::Zilla::Plugin::Test::Compile 2.018 use Test::More 0.88; use Capture::Tiny qw{ capture }; my @module_files = qw( XML/Filter/Distributor.pm XML/Filter/DocSplitter.pm XML/Filter/Merger.pm XML/Filter/Tee.pm XML/SAX/ByRecord.pm XML/SAX/EventMethodMaker.pm XML/SAX/Machine.pm XML/SAX/Machines.pm XML/SAX/Machines/ConfigDefaults.pm XML/SAX/Machines/ConfigHelper.pm XML/SAX/Manifold.pm XML/SAX/Pipeline.pm XML/SAX/Tap.pm ); my @scripts = qw( ); # no fake home requested my @warnings; for my $lib (@module_files) { my ($stdout, $stderr, $exit) = capture { system($^X, '-Mblib', '-e', qq{require q[$lib]}); }; is($?, 0, "$lib loaded ok"); warn $stderr if $stderr; push @warnings, $stderr if $stderr; } is(scalar(@warnings), 0, 'no warnings found') if $ENV{AUTHOR_TESTING}; done_testing; XML-SAX-Machines-0.46/MANIFEST.SKIP0000644000175000001440000000035512204442306015715 0ustar perigrinusers^\.git ^_build ^Build$ ^blib ~$ \.bak$ CVS \.svn \.DS_Store cover_db \..*\.sw.?$ ^Makefile$ ^pm_to_blib$ ^MakeMaker-\d ^blibdirs$ \.old$ ^#.*#$ ^\.# ^TODO$ ^PLANS$ ^doc/ ^benchmarks ^\._.*$ ^t\/600_todo_tests\/$ \.shipit local/* perl5/* XML-SAX-Machines-0.46/LICENSE0000644000175000001440000004366412204442306015036 0ustar perigrinusersThis software is copyright (c) 2013 by Barry Slaymaker. This is free software; you can redistribute it and/or modify it under the same terms as the Perl 5 programming language system itself. Terms of the Perl programming language system itself a) the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version, or b) the "Artistic License" --- The GNU General Public License, Version 1, February 1989 --- This software is Copyright (c) 2013 by Barry Slaymaker. This is free software, licensed under: The GNU General Public License, Version 1, February 1989 GNU GENERAL PUBLIC LICENSE Version 1, February 1989 Copyright (C) 1989 Free Software Foundation, Inc. 51 Franklin St, Suite 500, Boston, MA 02110-1335 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The license agreements of most software companies try to keep users at the mercy of those companies. By contrast, our General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. The General Public License applies to the Free Software Foundation's software and to any other program whose authors commit to using it. You can use it for your programs, too. When we speak of free software, we are referring to freedom, not price. Specifically, the General Public License is designed to make sure that you have the freedom to give away or sell copies of free software, that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of a such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must tell them their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any work containing the Program or a portion of it, either verbatim or with modifications. Each licensee is addressed as "you". 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this General Public License and to the absence of any warranty; and give any other recipients of the Program a copy of this General Public License along with the Program. You may charge a fee for the physical act of transferring a copy. 2. You may modify your copy or copies of the Program or any portion of it, and copy and distribute such modifications under the terms of Paragraph 1 above, provided that you also do the following: a) cause the modified files to carry prominent notices stating that you changed the files and the date of any change; and b) cause the whole of any work that you distribute or publish, that in whole or in part contains the Program or any part thereof, either with or without modifications, to be licensed at no charge to all third parties under the terms of this General Public License (except that you may choose to grant warranty protection to some or all third parties, at your option). c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the simplest and most usual way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this General Public License. d) You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. Mere aggregation of another independent work with the Program (or its derivative) on a volume of a storage or distribution medium does not bring the other work under the scope of these terms. 3. You may copy and distribute the Program (or a portion or derivative of it, under Paragraph 2) in object code or executable form under the terms of Paragraphs 1 and 2 above provided that you also do one of the following: a) accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Paragraphs 1 and 2 above; or, b) accompany it with a written offer, valid for at least three years, to give any third party free (except for a nominal charge for the cost of distribution) a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Paragraphs 1 and 2 above; or, c) accompany it with the information you received as to where the corresponding source code may be obtained. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form alone.) Source code for a work means the preferred form of the work for making modifications to it. For an executable file, complete source code means all the source code for all modules it contains; but, as a special exception, it need not include source code for modules which are standard libraries that accompany the operating system on which the executable file runs, or for standard header files or definitions files that accompany that operating system. 4. You may not copy, modify, sublicense, distribute or transfer the Program except as expressly provided under this General Public License. Any attempt otherwise to copy, modify, sublicense, distribute or transfer the Program is void, and will automatically terminate your rights to use the Program under this License. However, parties who have received copies, or rights to use copies, from you under this General Public License will not have their licenses terminated so long as such parties remain in full compliance. 5. By copying, distributing or modifying the Program (or any work based on the Program) you indicate your acceptance of this license to do so, and all its terms and conditions. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. 7. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of the license which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the license, you may choose any version ever published by the Free Software Foundation. 8. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS Appendix: How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to humanity, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19xx name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (a program to direct compilers to make passes at assemblers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice That's all there is to it! --- The Artistic License 1.0 --- This software is Copyright (c) 2013 by Barry Slaymaker. This is free software, licensed under: The Artistic License 1.0 The Artistic License Preamble The intent of this document is to state the conditions under which a Package may be copied, such that the Copyright Holder maintains some semblance of artistic control over the development of the package, while giving the users of the package the right to use and distribute the Package in a more-or-less customary fashion, plus the right to make reasonable modifications. Definitions: - "Package" refers to the collection of files distributed by the Copyright Holder, and derivatives of that collection of files created through textual modification. - "Standard Version" refers to such a Package if it has not been modified, or has been modified in accordance with the wishes of the Copyright Holder. - "Copyright Holder" is whoever is named in the copyright or copyrights for the package. - "You" is you, if you're thinking about copying or distributing this Package. - "Reasonable copying fee" is whatever you can justify on the basis of media cost, duplication charges, time of people involved, and so on. (You will not be required to justify it to the Copyright Holder, but only to the computing community at large as a market that must bear the fee.) - "Freely Available" means that no fee is charged for the item itself, though there may be fees involved in handling the item. It also means that recipients of the item may redistribute it under the same conditions they received it. 1. You may make and give away verbatim copies of the source form of the Standard Version of this Package without restriction, provided that you duplicate all of the original copyright notices and associated disclaimers. 2. You may apply bug fixes, portability fixes and other modifications derived from the Public Domain or from the Copyright Holder. A Package modified in such a way shall still be considered the Standard Version. 3. You may otherwise modify your copy of this Package in any way, provided that you insert a prominent notice in each changed file stating how and when you changed that file, and provided that you do at least ONE of the following: a) place your modifications in the Public Domain or otherwise make them Freely Available, such as by posting said modifications to Usenet or an equivalent medium, or placing the modifications on a major archive site such as ftp.uu.net, or by allowing the Copyright Holder to include your modifications in the Standard Version of the Package. b) use the modified Package only within your corporation or organization. c) rename any non-standard executables so the names do not conflict with standard executables, which must also be provided, and provide a separate manual page for each non-standard executable that clearly documents how it differs from the Standard Version. d) make other distribution arrangements with the Copyright Holder. 4. You may distribute the programs of this Package in object code or executable form, provided that you do at least ONE of the following: a) distribute a Standard Version of the executables and library files, together with instructions (in the manual page or equivalent) on where to get the Standard Version. b) accompany the distribution with the machine-readable source of the Package with your modifications. c) accompany any non-standard executables with their corresponding Standard Version executables, giving the non-standard executables non-standard names, and clearly documenting the differences in manual pages (or equivalent), together with instructions on where to get the Standard Version. d) make other distribution arrangements with the Copyright Holder. 5. You may charge a reasonable copying fee for any distribution of this Package. You may charge any fee you choose for support of this Package. You may not charge a fee for this Package itself. However, you may distribute this Package in aggregate with other (possibly commercial) programs as part of a larger (possibly commercial) software distribution provided that you do not advertise this Package as a product of your own. 6. The scripts and library files supplied as input to or produced as output from the programs of this Package do not automatically fall under the copyright of this Package, but belong to whomever generated them, and may be sold commercially, and may be aggregated with this Package. 7. C or perl subroutines supplied by you and linked into this Package shall not be considered part of this Package. 8. The name of the Copyright Holder may not be used to endorse or promote products derived from this software without specific prior written permission. 9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. The End XML-SAX-Machines-0.46/META.yml0000644000175000001440000000131312204442306015263 0ustar perigrinusers--- abstract: 'manage collections of SAX processors' author: - 'Barry Slaymaker' - 'Chris Prather ' build_requires: Capture::Tiny: 0 Test::More: 0.88 blib: 0 configure_requires: ExtUtils::MakeMaker: 6.30 dynamic_config: 0 generated_by: 'Dist::Zilla version 4.300035, CPAN::Meta::Converter version 2.131560' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: XML-SAX-Machines requires: Capture::Tiny: 0 XML::SAX: 0 XML::SAX::Base: 0 XML::SAX::Expat: 0 XML::SAX::Writer: 0 resources: homepage: https://github.com/perigrin/xml-sax-machines repository: https://github.com/perigrin/xml-sax-machines.git version: 0.46 XML-SAX-Machines-0.46/Makefile.PL0000644000175000001440000000304212204442306015765 0ustar perigrinusers use strict; use warnings; use ExtUtils::MakeMaker 6.30; my %WriteMakefileArgs = ( "ABSTRACT" => "manage collections of SAX processors", "AUTHOR" => "Barry Slaymaker, Chris Prather ", "BUILD_REQUIRES" => {}, "CONFIGURE_REQUIRES" => { "ExtUtils::MakeMaker" => "6.30" }, "DISTNAME" => "XML-SAX-Machines", "EXE_FILES" => [], "LICENSE" => "perl", "NAME" => "XML::SAX::Machines", "PREREQ_PM" => { "Capture::Tiny" => 0, "XML::SAX" => 0, "XML::SAX::Base" => 0, "XML::SAX::Expat" => 0, "XML::SAX::Writer" => 0 }, "TEST_REQUIRES" => { "Capture::Tiny" => 0, "Test::More" => "0.88", "blib" => 0 }, "VERSION" => "0.46", "test" => { "TESTS" => "t/*.t" } ); unless ( eval { ExtUtils::MakeMaker->VERSION(6.63_03) } ) { my $tr = delete $WriteMakefileArgs{TEST_REQUIRES}; my $br = $WriteMakefileArgs{BUILD_REQUIRES}; for my $mod ( keys %$tr ) { if ( exists $br->{$mod} ) { $br->{$mod} = $tr->{$mod} if $tr->{$mod} > $br->{$mod}; } else { $br->{$mod} = $tr->{$mod}; } } } unless ( eval { ExtUtils::MakeMaker->VERSION(6.56) } ) { my $br = delete $WriteMakefileArgs{BUILD_REQUIRES}; my $pp = $WriteMakefileArgs{PREREQ_PM}; for my $mod ( keys %$br ) { if ( exists $pp->{$mod} ) { $pp->{$mod} = $br->{$mod} if $br->{$mod} > $pp->{$mod}; } else { $pp->{$mod} = $br->{$mod}; } } } delete $WriteMakefileArgs{CONFIGURE_REQUIRES} unless eval { ExtUtils::MakeMaker->VERSION(6.52) }; WriteMakefile(%WriteMakefileArgs); XML-SAX-Machines-0.46/README.too0000644000175000001440000000616212204442306015501 0ustar perigrinusersREADME for XML-SAX-Machines XML::SAX::Machines is a collection of APIs that allow complex SAX machines to be constructed without a huge amount of extra typing. This distribution contains three kinds of modules: machines, helpers, and filters. Here's how they are laid out: - XML::SAX::* contains machines and helpers. - XML::SAX::Machines lets you import the "classic" constructor functions like Tap(), Pipeline(), Manifold(), and ByRecord(). - Each machine type has a class that implements it, like XML::SAX::Tap, XML::SAX::Pipeline, etc. - There is currently only one available helper, XML::SAX::EventMethodMaker, which is most useful for building a collection of methods to handle different events in the same way, without having to know all of their names. It is also useful as a reference for all of the SAX events by looking at the source code, which contains simple tables of what events occur for what kind of handler (compiled by Robin Berjon). - XML::Filter::* contains filters that are used by ByRecord and Manifold machines to handle SAX events (machines don't handle SAX events, they delegate to the generators/filters/handlers they contain). - XML::Filter::DocSplitter - Splits one doc in to multiple documents, optionally coordinating with an aggregator like XML::Filter::Merger to reassemble them. ByRecord uses this. - XML::Filter::Distributor - buffers a document and reemits it to each handler in turn. Used by Manifold. - XML::Filter::Tee - a dynamically reconfigurable tee fitting. Does not buffer. Used by Tap. Morally equivalent to XML::Filter::SAXT but more flexible. - XML::Filter::Merger - collects multiple documents and merges them, inserting all secondary documents in to one master document. Used by both ByRecord and Manifold. All of the XML::Filter::* classes are useful outside of the machines that use them. For instance, XML::Filter::DocSplitter has been used (not by me) in a Pipeline to split a huge record oriented file in to individual files containing single records (using a custom class derived from XML::SAX::Writer). XML::Filter::Merger is useful as a general way to implement style processing when XInclude is not a good fit. See the examples/ directory for, well, examples (and feel free to write up creative examples, eventually I'd like to compile a cookbook). To give a more concrete idea of how SAX machines are typically used, here's how to build a pipeline of SAX processors: use XML::SAX::Machines qw( Pipeline ); use My::SAX::Filter2; my $p = Pipeline( "My::SAX::Filter1", My::SAX::Filter2->new( ... ), \$output ); $p->parse_uri( $ARGV[0] ); That loads (if need be) XML::SAX::Writer and calls it's new() function with an Output => \$output option, calls the passed-in instance of XML::SAX::Filter2 and calls its set_handler() method to point it to the XML::SAX::Writer that was just created, and then loads (if need be) My::SAX::Filter1 and calls it's new() function with a Handler => option pointing to the XML::SAX::Filter2 instance. XML-SAX-Machines-0.46/examples/0000755000175000001440000000000012204442306015632 5ustar perigrinusersXML-SAX-Machines-0.46/examples/output_to_process0000644000175000001440000000020712204442306021354 0ustar perigrinusersuse XML::SAX::Machines qw( Pipeline ); Pipeline( "<$ARGV[0]", ## Put some filter here :) "| xmllint --format -" )->parse; XML-SAX-Machines-0.46/examples/append_to_xml_file0000644000175000001440000000135012204442306021404 0ustar perigrinuserspackage My::Appender; use XML::Filter::Merger; @ISA = qw( XML::Filter::Merger ); use strict; sub end_element { my $self = shift; if ( $self->in_master_document && ! $self->element_depth ) { XML::SAX::ParserFactory->parser( Handler => $self )->parse_string( <<'END_DOC' ); END_DOC } $self->SUPER::end_element( @_ ); } package main; use XML::SAX::Machines qw( Pipeline ); Pipeline( My::Appender => \*STDOUT )->parse_string( <<'END_DOC' ); END_DOC XML-SAX-Machines-0.46/examples/xml_records_to_data_structures0000644000175000001440000000057712204442306024105 0ustar perigrinusersuse XML::Handler::2Simple; use XML::SAX::Machines qw( ByRecord ); use IO::Handle; ## Older XML::LibXML versions need this use Data::Dumper; $Data::Dumper::Indent = 1; $Data::Dumper::Terse = 1; $Data::Dumper::Quotekeys = 1; ByRecord( XML::Handler::2Simple->new( DataHandler => sub { warn Dumper( $_[1] ); }, ) )->parse_file( \*STDIN ); XML-SAX-Machines-0.46/examples/xml_records_thru_libxslt0000644000175000001440000000140512204442306022701 0ustar perigrinusersuse XML::SAX::Machines 0.31; use XML::SAX::Machines qw( Pipeline ByRecord Tap ); use XML::Filter::XSLT; my $f = XML::Filter::XSLT->new( Source => { ByteStream => \*DATA } ); Pipeline( ByRecord( $f ), \*STDOUT )->parse_uri( $ARGV[0] ); ## "in-place upgrades" until some new releases hit CPAN ;) use IO::Handle; ## XML::LibXML needs this to read from filehandles... sub XML::Filter::XSLT::LibXSLT::set_handler { my $self = shift; $self->{Handler} = shift; $self->{Parser}->set_handler( $self->{Handler} ) if $self->{Parser}; } __END__