XML-Twig-3.52/0000755000175000017500000000000013015347632013261 5ustar mrodrigumrodriguXML-Twig-3.52/t/0000755000175000017500000000000013015347632013524 5ustar mrodrigumrodriguXML-Twig-3.52/t/xmlxpath_08name.t0000755000175000017500000000105212732215763016733 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 5); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '//*[name() = "BBB"]'); ok(@nodes, 5); @nodes = $t->findnodes( '//*[starts-with(name(), "B")]'); ok(@nodes, 7); @nodes = $t->findnodes( '//*[contains(name(), "C")]'); ok(@nodes, 3); exit 0; __DATA__ XML-Twig-3.52/t/xmlxpath_additional.t0000755000175000017500000001417312732215763017763 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; BEGIN { if( eval( 'require XML::Twig::XPath')) { import XML::Twig::XPath; } elsif( $@ =~ m{^cannot use XML::XPath or XML::XPathEngine}) { print "1..1\nok 1\n"; warn no_xpath_engine(); exit; } else { die $@; } } print "1..75\n"; use XML::Twig::XPath; ok(1, "loading"); { my $t= XML::Twig::XPath->new->parse( '5'); is( $t->to_number->as_string, 5, "to_number"); } { my $t= XML::Twig::XPath->new->parse( '

5

'); is( $t->first_elt( 'p')->to_number->as_string, 5, "element to_number"); is( $t->getValue, 5, '$t->getValue'); } { my $t= XML::Twig::XPath->new->parse( '

foo

'); is( $t->first_elt( 'p')->to_number->as_string, "NaN", "to_number (NaN)"); } { my $t= XML::Twig::XPath->new->parse( '

p1

toto

:p2

e1:e2 e3:e4
'); is( $t->findnodes_as_string( '//p'), '

p1

:p2

', "findnodes_as_string"); is( $t->root->findnodes_as_string( '//p'), '

p1

:p2

', "findnodes_as_string"); is( $t->root->findnodes_as_string( 'p'), '

p1

:p2

', "findnodes_as_string (from root)"); if( defined( $XML::XPathEngine::VERSION) || ( $XML::XPath::VERSION && (($XML::XPath::VERSION eq '1.13.1') || $XML::XPath::VERSION >= 1.13) ) ) { ok( $t->root->exists( '//p'), "exists //p (on root)"); ok( $t->exists( '//p'), "exists //p (on root)"); } else { skip( 2, "your version of XML::XPath has a bug in the 'exists' method, you cannot use it with XML::Twig::XPath"); } my $p= $t->first_elt( 'p'); ok( $p->matches( 'p'), "\$p->matches( 'p')"); ok( $t->matches( '//p', $p), "\$p->matches( 'p') (from the twig)"); my $p2_set= $t->root->find( 'p[text()= ":p2"]'); is( $p2_set->size, 1, "find 1 node"); is( $p2_set->to_literal, ':p2', 'p2 text'); my $s_set= $t->find( '//s[e/text()= ":e2"]'); is( $s_set->size, 1, "find 1 s node (nodeset)"); my @s= $s_set->get_nodelist; is( scalar @s, 1, "find 1 s node nodelist"); my $s= shift @s; is( $s->getValue, 'e1:e2', 's text'); } { my $t= XML::Twig::XPath->new( pi => 'process', comments => 'process') ->parse( '

text

foobar
'); nok( $t->isElementNode, '$t isElementNode'); nok( $t->isAttributeNode, '$t isAttributeNode'); nok( $t->isTextNode, '$t isTextNode'); nok( $t->isProcessingInstructionNode, '$t isProcessingInstructionNode'); nok( $t->isPINode, '$t isPINode'); nok( $t->isCommentNode, '$t isCommentNode'); nok( $t->isNamespaceNode, '$t isNamespaceNode'); ok( $t->getAttributes, '$t->getAttributes'); my $root= $t->root; ok( $root->isElementNode, '$root isElementNode'); nok( $root->isAttributeNode, '$root isAttributeNode'); nok( $root->isTextNode, '$root isTextNode'); nok( $root->isProcessingInstructionNode, '$root isProcessingInstructionNode'); nok( $root->isPINode, '$root isPINode'); nok( $root->isCommentNode, '$root isCommentNode'); nok( $root->isNamespaceNode, '$root isNamespaceNode'); my $p= $t->first_elt( 'p'); ok( $p->isElementNode, '$p isElementNode'); nok( $p->isAttributeNode, '$p isAttributeNode'); nok( $p->isTextNode, '$p isTextNode'); nok( $p->isProcessingInstructionNode, '$p isProcessingInstructionNode'); nok( $p->isPINode, '$p isPINode'); nok( $p->isCommentNode, '$p isCommentNode'); nok( $p->isNamespaceNode, '$p isNamespaceNode'); my @att= $p->getAttributes; my $att= shift @att; is( $att->getName, 'att', '$att->getName'); is( $att->getValue, 'foo', '$att->getValue'); is( $att->toString, 'att="foo"', '$p attribute'); nok( $att->isElementNode, '$att isElementNode'); ok( $att->isAttributeNode, '$att isAttributeNode'); nok( $att->isTextNode, '$att isTextNode'); nok( $att->isProcessingInstructionNode, '$att isProcessingInstructionNode'); nok( $att->isPINode, '$att isPINode'); nok( $att->isCommentNode, '$att isCommentNode'); nok( $att->isNamespaceNode, '$att isNamespaceNode'); my $comment=$t->first_elt( '#COMMENT'); nok( $comment->isElementNode, '$comment isElementNode'); nok( $comment->isAttributeNode, '$comment isAttributeNode'); nok( $comment->isTextNode, '$comment isTextNode'); nok( $comment->isProcessingInstructionNode, '$comment isProcessingInstructionNode'); nok( $comment->isPINode, '$comment isPINode'); ok( $comment->isCommentNode, '$comment isCommentNode'); nok( $comment->isNamespaceNode, '$comment isNamespaceNode'); my $pi=$t->first_elt( '#PI'); nok( $pi->isElementNode, '$pi isElementNode'); nok( $pi->isAttributeNode, '$pi isAttributeNode'); nok( $pi->isTextNode, '$pi isTextNode'); ok( $pi->isProcessingInstructionNode, '$pi isProcessingInstructionNode'); ok( $pi->isPINode, '$pi isPINode'); nok( $pi->isCommentNode, '$pi isCommentNode'); nok( $pi->isNamespaceNode, '$pi isNamespaceNode'); is( $t->findvalue( '//foo:bar'), 'foobar', '//foo:bar'); is( $t->findvalue( '//*[@foo:att]'), 'foobar', '//*[@foo:att'); is( $t->findvalue( '//*[@foo:att=1]'), 'foobar', '//*[@foo:att=1]'); is( $t->findvalue( '//*[@foo:att=2]'), '', '//*[@foo:att=2]'); my $twig= ($root->findnodes( '..'))[0]; ok( UNIVERSAL::isa( $twig, 'XML::Twig'), 'findnodes returning the document node (' . ref( $twig) . ')'); my $back_to_root= ($root->findnodes( '../*'))[0]; is( $back_to_root->gi, 'doc' , 'findnodes returning the root through the document node'); } # test namespace methods { my $ns= XML::Twig::XPath::Namespace->new( foo => "uri"); ok( $ns->isNamespaceNode, '$ns isNamespaceNode'); is( $ns->getPrefix, 'foo', 'getPrefix'); is( $ns->getExpanded, 'uri', 'getExpanded'); is( $ns->getValue, 'uri', 'getValue'); is( $ns->getData, 'uri', 'getData'); } # check that set_text works also with XML::Twig::XPath { my $elt= XML::Twig::XPath::Elt->new( p => "foo"); $elt->set_text( "bar"); ok( 1, "set_text using XML::Twig::XPath"); } exit 0; XML-Twig-3.52/t/xmlxpath_04pos.t0000755000175000017500000000063312732215763016614 0ustar mrodrigumrodriguuse FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my $first = $t->findvalue( '/AAA/BBB[1]/@id'); ok($first, "first"); my $last = $t->findvalue( '/AAA/BBB[last()]/@id'); ok($last, "last"); exit 0; __DATA__ XML-Twig-3.52/t/test_new_features_3_16.t0000755000175000017500000001576512732215763020214 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use XML::Twig; my $TMAX=85; my $DEBUG=0; print "1..$TMAX\n"; # state information are now attached to each twig # default/fixed attribute values are now filled when the "load_DTD" option is used my $dtd_file= 'test_default_att.dtd'; my $dtd=<<'DTD'; DTD my $doc = q{}; my $filled_doc = q{} .q{} .q{} .q{} .q{} .q{}; { open( FHDTD, ">$dtd_file") or die "cannot open dtd file '$dtd': $!"; print FHDTD $dtd; close FHDTD; my $doc_with_external_dtd= qq{$doc}; my $result= XML::Twig->new( error_context => 1, load_DTD => 1) ->parse( $doc_with_external_dtd) ->root->sprint; is( $result => $filled_doc, 'filling attribute default values with EXTERNAL DTD'); unlink $dtd_file; } { my $doc_with_internal_dtd= qq{$doc}; my $result= XML::Twig->new( error_context => 1, load_DTD => 1) ->parse( $doc_with_internal_dtd) ->root->sprint; is( $result => $filled_doc, 'filling attribute default values with INTERNAL DTD'); } # test the first_descendant method { my $t= XML::Twig->new->parse( ''); is( $t->root->first_child->first_descendant( 'a')->tag, 'a', 'first_descendant succeeds'); nok( $t->root->first_child->first_descendant( 'b'), 'first_descendant fails (match outside of the subtree)'); } # test the index option and method { my $doc=q{t1t2}; my $t= XML::Twig->new( index => [ 't', 'none' ])->parse( $doc); is( $t->index( 't', 0)->text, 't1', 'index'); is( $t->index( 't', 1)->text, 't2', 'index'); is_undef( $t->index( 't', 2), 'index'); is( $t->index( 't', -1)->text, 't2', 'index'); my $index= $t->index( 't'); is( $index->[0]->text, 't1', 'index'); is( $index->[ 1]->text, 't2', 'index'); is_undef( $index->[ 2], 'index'); is( $index->[-1]->text, 't2', 'index'); } { my $doc=q{t1t2}; my $t= XML::Twig->new( index => { target => 't' })->parse( $doc); is( $t->index( 'target', 0)->text, 't1', 'index'); is( $t->index( 'target', 1)->text, 't2', 'index'); is_undef( $t->index( 'target', 2), 'index'); is( $t->index( 'target', -1)->text, 't2', 'index'); my $index= $t->index( 'target'); is( $index->[0]->text, 't1', 'index'); is( $index->[ 1]->text, 't2', 'index'); is_undef( $index->[ 2], 'index'); is( $index->[-1]->text, 't2', 'index'); } # test the remove_cdata option { my $doc = q{]]>}; my $escaped_doc= q{<tag&>}; my $t= XML::Twig->new( remove_cdata => 1)->parse( $doc); is( $t->sprint, $escaped_doc, 'remove_cdata on'); $t= XML::Twig->new( remove_cdata => 0)->parse( $doc); is( $t->sprint, $doc, 'remove_cdata off'); } # test the create_accessors method if( $] < 5.006) { skip( 11 => "create_accessors not tested with perl < 5.006"); } else { my $doc= ''; my $t= XML::Twig->new->parse( $doc); $t->create_accessors( qw(att1 att2)); my $root= $t->root; is( $root->att1, 1, 'attribute getter'); $root->att1( 2); is( $root->att1, 2, 'attribute setter'); eval '$root->att1=3'; # eval'ed to keep 5.005 from barfing is( $root->att1, 3, 'attribute as lvalue'); eval '$root->att1++'; # eval'ed to keep 5.005 from barfing is( $root->att1, 4, 'attribute as lvalue (++)'); is( $root->att1, $root->att( 'att1'), 'check with regular att method'); eval { $^W=0; $root->att3; $^W=1; }; matches( $@, q{^Can't locate object method "att3" via package "XML::Twig::Elt" }, 'unknow accessor'); is( $root->att2, undef, 'get non-existent att'); $root->att2( 'bar'); is( $root->att2, "bar", 'get non-existent att'); is( $t->sprint, '', 'final output'); eval { $t->create_accessors( 'tag'); }; matches( $@, q{^attempt to redefine existing method tag using att_accessors }, 'duplicate accessor'); $@=''; eval { XML::Twig->create_accessors( 'att2'); }; is( $@, '', 'redefining existing accessor'); } { # test embedded comments/pis foreach my $doc ( q{text }, q{textmore}, q{textmore}, q{textmoremore2}, q{moremore2}, q{}, q{tatatoto}, q{tata <toto <}, q{textmore & even moremore2}, q{text }, q{ more more2 }, q{ more more2}, ) { my $t= XML::Twig->new->parse( $doc); is( $t->sprint, $doc, "comment within pcdata ($doc)"); my $t2= XML::Twig->new( keep_encoding => 1)->parse( $doc); is( $t2->sprint, $doc, "comment within pcdata in keep encoding mode($doc)"); my $doc_pi= $doc; $doc_pi=~ s{}{?>}g; my $t3= XML::Twig->new->parse( $doc_pi); is( $t3->sprint, $doc_pi, "pi within pcdata ($doc_pi)"); my $t4= XML::Twig->new( keep_encoding => 1)->parse( $doc_pi); is( $t4->sprint, $doc_pi, "pi within pcdata in keep encoding mode($doc_pi)"); } } { # test processing of embedded comments/pis my $doc= q{foobarfoobar}; my $t= XML::Twig->new->parse( $doc); my @elt= $t->findnodes( '//elt[string()="foobar"]'); is( scalar( @elt), 2, 'searching on text with embedded comments'); foreach my $elt (@elt) { $elt->set_text( 'toto'); } is( $t->sprint, q{totototo}, "set_text"); my $t2= XML::Twig->new( keep_encoding => 1)->parse( $doc); @elt= $t2->findnodes( '//elt[string()="foobar"]'); is( scalar( @elt), 2, 'searching on text with embedded comments'); foreach my $elt (@elt) { $elt->set_text( 'toto'); } is( $t2->sprint, q{totototo}, "set_text"); } XML-Twig-3.52/t/xmlxpath_12axisdescendant.t0000755000175000017500000000115512732215763021007 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 6); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '/descendant::*'); ok(@nodes, 11); @nodes = $t->findnodes( '/AAA/BBB/descendant::*'); ok(@nodes, 4); @nodes = $t->findnodes( '//CCC/descendant::*'); ok(@nodes, 6); @nodes = $t->findnodes( '//CCC/descendant::DDD'); ok(@nodes, 3); exit 0; __DATA__ XML-Twig-3.52/t/test2_2.res0000644000175000017500000000232113015347616015521 0ustar mrodrigumrodrigu ]>
S1 I1S1 I2S1 TitleS1 P1S2 P2Note P1S1 para 3
S2 introS2 TitleS2 P1S2 P2S2 P3
Annex TitleAnnex P1Annex P2
XML-Twig-3.52/t/test_memory.t0000755000175000017500000001070413015053270016255 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; # only display warnings, test is too unreliable (especially under Devel::Cover) to trust my $mem_size= mem_size(); unless( $mem_size) { print "1..1\nok 1\n"; warn "skipping: memory size not available\n";; exit; } if( !XML::Twig::_weakrefs()) { print "1..1\nok 1\n"; warn "skipping: weaken not available\n";; exit; } my $long_test= $ARGV[0] && $ARGV[0] eq '-L'; my $conf= $long_test ? { iter => 10, p => 1000 } : { iter => 5, p => 500 } ; $conf->{normal}= $conf->{p} * $conf->{iter}; $conf->{normal_html}= $conf->{normal} * 2; my $TMAX=6; print "1..$TMAX\n"; my $warn=0; my $paras= join '', map { qq{

lorem ipsus whatever (clever latin stuff) no $_

}} 1..$conf->{p}; my $test_nb=1; foreach my $wr (0..1) { # first pass if with weakrefs, second without my $wrm=''; if( $wr) { XML::Twig::_set_weakrefs( 0); $wrm= " (no weak references, disregard unless running with a _really_ old perl, like pre 5.8)"; } { my $xml= qq{$paras}; XML::Twig->new->parse( $xml); my $before= mem_size(); for (1..$conf->{iter}) { my $t= XML::Twig->new->parse( $xml); if( $wr) { really_clear( $t) } } my $after= mem_size(); if( $after - $before > $conf->{normal}) { warn "test $test_nb: possible memory leak parsing xml ($after > $before)$wrm"; $warn++; } elsif( $long_test) { warn "$before => $after\n"; } ok(1, "testing memory leaks for xml parsing$wrm"); $test_nb++; } { if( XML::Twig::_use( 'HTML::TreeBuilder', 3.13)) { my $html= qq{with HTB$paras}; XML::Twig->new->parse_html( $html); my $before= mem_size(); for (1..$conf->{iter}) { XML::Twig->new->parse_html( $html); } my $after= mem_size(); if( $after - $before > $conf->{normal_html}) { warn "test $test_nb: possible memory leak parsing html ($after > $before)$wrm"; $warn++; } elsif( $long_test) { warn "$before => $after\n"; } ok(1, "testing memory leaks for html parsing$wrm"); } else { skip( 1, "need HTML::TreeBuilder 3.13+"); } $test_nb++; } { if( XML::Twig::_use( 'HTML::Tidy')) { my $html= qq{with tidy$paras}; XML::Twig->new( use_tidy => 1)->parse_html( $html); my $before= mem_size(); for (1..$conf->{iter}) { XML::Twig->new( use_tidy => 1)->parse_html( $html); } my $after= mem_size(); if( $after - $before > $conf->{normal_html}) { warn "test $test_nb: possible memory leak parsing html ($after > $before)$wrm"; $warn++; } elsif( $long_test) { warn "$before => $after\n"; } ok(1, "testing memory leaks for html parsing using HTML::Tidy$wrm"); } else { skip( 1, "need HTML::Tidy"); } $test_nb++; } } if( $warn) { warn "\nnote that memory leaks can happen even if the module itself doesn't leak, if running", "\ntests under Devel::Cover for example. So do not panic if you get a warning here.\n"; } sub mem_size { open( STATUS, "/proc/$$/status") or return; my( $size)= map { m{^VmSize:\s+(\d+\s+\w+)} } ; $size=~ s{ kB}{}; #warn "data size found: $size\n"; return $size; } sub really_clear { my( $t)= shift; my $elt= $t->root->DESTROY; delete $t->{twig_dtd}; delete $t->{twig_doctype}; delete $t->{twig_xmldecl}; delete $t->{twig_root}; delete $t->{twig_parser}; return; local $SIG{__WARN__} = sub {}; while( $elt) { my $nelt= nelt( $elt); $elt->del_id( $t); foreach ( qw(gi att empty former)) { undef $elt->{$_}; delete $elt->{$_}; } $elt->delete; $elt= $nelt; } $t->dispose; } sub nelt { my( $elt)= @_; if( $elt->_first_child) { return deepest_child( $elt); } if( $elt->_next_sibling) { return deepest_child( $elt->_next_sibling); } return $elt->parent; } sub deepest_child { my( $elt)= @_; while( $elt->_first_child) { $elt= $elt->_first_child; } return $elt; } XML-Twig-3.52/t/test_ignore_elts.t0000755000175000017500000000446112732215763017276 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use XML::Twig; $|=1; my $TMAX=1; # do not forget to update! print "1..$TMAX\n"; my $doc= read_data(); my $t= XML::Twig->new( ignore_elts => { ignore => 1 }, keep_spaces => 1, ); my $result_file= "test_ignore_elt.res1"; open( RESULT, ">$result_file") or die "cannot create $result_file: $!"; select RESULT; $t->parse( $doc); $t->print; select STDOUT; close RESULT; check_result( $result_file, 1); exit 0; # Not yet implemented # test 2 $doc= read_data(); $t= XML::Twig->new( ignore_elts => { ignore => 'print' }, twig_handlers => { elt => sub { $_->print; } }, keep_spaces => 1, ); $result_file= "test_ignore_elt.res2"; open( RESULT, ">$result_file") or die "cannot create $result_file: $!"; select RESULT; $t->parse( $doc); $t->print; select STDOUT; close RESULT; check_result( $result_file, 2); sub read_data { local $/="\n\n"; my $data= ; $data=~ s{^\s*#.*\n}{}m; # get rid of comments $data=~ s{\s*$}{}s; # remove trailing spaces (and \n) $data=~ s{(^|\n)\s*(\n|$)}{}g; # remove empty lines return $data; }; sub check_result { my( $result_file, $test_no)= @_; # now check result my $expected_result= read_data(); my $result= read_result( $result_file); if( $result eq $expected_result) { print "ok $test_no\n"; } else { print "not ok $test_no\n"; print STDERR "\ntest $test_no:\n", "expected: \n$expected_result\n", "real: \n$result\n"; } } sub read_result { my $file= shift; local $/="\n"; open( RESULT, "<$file") or die "cannot read $file: $!"; my @result= grep {m/\S/} ; close RESULT; unlink $file; return join '', @result; } __DATA__ # doc 1 text text # expected result 1 #doc 2 text text # expected result 2 text text XML-Twig-3.52/t/xmlxpath_03star.t0000755000175000017500000000105012732215763016755 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 5); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '/AAA/CCC/DDD/*'); ok(@nodes, 4); @nodes = $t->findnodes( '/*/*/*/BBB'); ok(@nodes, 5); @nodes = $t->findnodes( '//*'); ok(@nodes, 17); exit 0; __DATA__ XML-Twig-3.52/t/test_with_lwp_not_wf.xml0000644000175000017500000000003612732215763020521 0ustar mrodrigumrodrigu text XML-Twig-3.52/t/xmlxpath_test_with_handlers.t0000755000175000017500000000354712732215763021550 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 7); use XML::Twig::XPath; $|=1; my $doc= ' elt 1 elt 2 elt 3 2 3 2 3 2 3 in_elt6-1 in_elt7-1 in_elt7-2 <:elt id=":elt">yep, that is a valid name ' ; my $t= XML::Twig::XPath->new( twig_handlers => { elt5 => sub { my @res1= $_->findnodes( './elt3/elt4[@att_int="3"] | elt3'); ok( ids( @res1), "elt3-1 - elt4-2 - elt3-2 - elt4-4"); # 1 ok( $_->field( 'elt7[@id="elt7-2"]'), "in_elt7-2"); # 2 ok( $_->findvalue( 'elt7[@id="elt7-2"]'), "in_elt7-2"); # 3 ok( $_->findvalue( 'elt7[preceding-sibling::*[1][self::elt6]]'), "in_elt7-1"); # 4 ok( $_->findvalue( 'elt7[preceding-sibling::elt6]'), "in_elt7-1in_elt7-2"); # 5 ok( $_->findvalue( "elt7"), "in_elt7-1in_elt7-2"); # 6 }, }, ); $t->parse( $doc); ok( ids( $t->findnodes( '//elt3/elt4[@att_int="3"] | //elt3') ), "elt3-1 - elt4-2 - elt3-2 - elt4-4"); # 7 exit 0; sub ids { return join( " - ", map { $_->id } @_); } XML-Twig-3.52/t/test_even_more_coverage.t0000755000175000017500000000061712732215763020615 0ustar mrodrigumrodrigu#!/usr/bin/perl -w # test designed to improve coverage of the module use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; #$|=1; my $DEBUG=0; use XML::Twig; my $TMAX=1; print "1..$TMAX\n"; { my $t= XML::Twig->new( parse_start_tag => sub { return 'a'; })->parse( 'c'); is( $t->sprint, '
c', "dummy parse_start_tag"); } exit 0; XML-Twig-3.52/t/pod_coverage.t0000755000175000017500000000061512732215763016357 0ustar mrodrigumrodrigu#!/usr/bin/perl use strict; use warnings; if( ! $ENV{TEST_AUTHOR} ) { print "1..1\nok 1\n"; warn "Author test. Set \$ENV{TEST_AUTHOR} to a true value to run.\n"; exit; } eval "use Test::Pod::Coverage 1.00 tests => 1"; if( $@) { print "1..1\nok 1\n"; warn "Test::Pod::Coverage 1.00 required for testing POD coverage"; exit; } pod_coverage_ok( "XML::Twig", { trustme => [ 'isa' ] }); XML-Twig-3.52/t/test_xml_split.t0000755000175000017500000001360412732215763016776 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use Config; my $devnull = File::Spec->devnull; my $DEBUG=0; my $extra_flags= $Devel::Cover::VERSION ? '-MDevel::Cover -Ilib' : '-Ilib'; # be cautious: run this only on systems I have tested it on my %os_ok=( linux => 1, solaris => 1, darwin => 1, MSWin32 => 1); if( !$os_ok{$^O}) { print "1..1\nok 1\n"; warn "skipping, test runs only on some OSs\n"; exit; } if( $] < 5.006) { print "1..1\nok 1\n"; warn "skipping, xml_merge runs only on perl 5.6 and later\n"; exit; } print "1..59\n"; my $perl= $Config{perlpath}; if ($^O ne 'VMS') { $perl .= $Config{_exe} unless $perl =~ m/$Config{_exe}$/i; } $perl.= " $extra_flags"; my $xml_split = File::Spec->catfile( "tools", "xml_split", "xml_split"); my $xml_merge = File::Spec->catfile( "tools", "xml_merge", "xml_merge"); my $xml_pp = File::Spec->catfile( "tools", "xml_pp", "xml_pp"); sys_ok( "$perl -c $xml_split", "xml_split compilation"); sys_ok( "$perl -c $xml_merge", "xml_merge compilation"); my $test_dir = File::Spec->catfile( "t", "test_xml_split"); my $test_file = File::Spec->catfile( "t", "test_xml_split.xml"); my $base_nb; # global, managed by test_split_merge test_split_merge( $test_file, "", "" ); test_split_merge( $test_file, "-i", "-i" ); test_split_merge( $test_file, "-c elt1", "" ); test_split_merge( $test_file, "-i -c elt1", "-i" ); test_split_merge( $test_file, "-c elt2", "" ); test_split_merge( $test_file, "-i -c elt2", "-i" ); test_split_merge( $test_file, "-s 1K", "" ); test_split_merge( $test_file, "-i -s 1K", "-i" ); test_split_merge( $test_file, "-l 1", "" ); test_split_merge( $test_file, "-i -l 1", "-i" ); test_split_merge( $test_file, "-g 5", "" ); test_split_merge( $test_file, "-i -g 5", "-i" ); $test_file=File::Spec->catfile( "t", "test_xml_split_entities.xml"); test_split_merge( $test_file, "", "" ); test_split_merge( $test_file, "-g 2", "" ); test_split_merge( $test_file, "-l 1", "" ); $test_file=File::Spec->catfile( "t", "test_xml_split_w_decl.xml"); test_split_merge( $test_file, "", "" ); test_split_merge( $test_file, "-c elt1", "" ); test_split_merge( $test_file, "-g 2", "" ); test_split_merge( $test_file, "-l 1", "" ); test_split_merge( $test_file, "-s 1K", "" ); test_split_merge( $test_file, "-g 2 -l 2", "" ); if( _use( 'IO::CaptureOutput')) { test_error( $xml_split => "-h", 'xml_split '); test_error( $xml_merge => "-h", 'xml_merge '); test_out( $xml_split => "-V", 'xml_split '); test_out( $xml_merge => "-V", 'xml_merge '); if( `pod2text -h` && $^O !~ m{^MS}) { test_out( $xml_split => "-m", 'NAME\s*xml_split '); test_out( $xml_merge => "-m", 'NAME\s*xml_merge '); test_out( $xml_pp => "-h", 'NAME\s*xml_pp '); } else { skip( 3, "pod2text not found in the path, cannot use -m oprion for xml_split and xml_merge"); } test_error( $xml_split => "-c foo -s 1K", 'cannot use -c and -s at the same time'); test_error( $xml_split => "-g 100 -s 1K", 'cannot use -g and -s at the same time'); test_error( $xml_split => "-g 100 -c fo", 'cannot use -g and -c at the same time'); test_error( $xml_split => "-s 1Kc", 'invalid size'); test_error( $xml_pp => "-s --style", 'usage:'); test_error( $xml_pp => "-i --in_place", 'usage:'); test_error( $xml_pp => "-e utf8 --encoding utf8", 'usage:'); test_error( $xml_pp => "-l --load", 'usage:'); } else { skip( 15, 'need IO::CaptureOutput to test tool options'); } sub test_error { my( $command, $options, $expected)= @_; my( $stdout, $stderr, $success, $exit_code) = IO::CaptureOutput::capture_exec( "$perl $command $options test_xml_split.xml"); matches( $stderr, qr/$expected/, "$command $options"); } sub test_out { my( $command, $options, $expected)= @_; my( $stdout, $stderr, $success, $exit_code) = IO::CaptureOutput::capture_exec( "$perl $command $options test_xml_split.xml"); matches( $stdout, qr/^$expected/, "$command $options"); } sub test_split_merge { my( $file, $split_opts, $merge_opts)= @_; $split_opts ||= ''; $merge_opts ||= ''; $base_nb++; my $verbifdebug = $DEBUG ? '-v' : ''; my $expected_base= File::Spec->catfile( "$test_dir", "test_xml_split_expected-$base_nb"); my $base= File::Spec->catfile( "$test_dir", "test_xml_split-$base_nb"); systemq( "$perl $xml_split $verbifdebug -b $base $split_opts $file"); ok( same_files( $expected_base, $base), "xml_split $split_opts $test_file"); my $merged= "$base.xml"; system "$perl $xml_merge $verbifdebug -o $merged $merge_opts $base-00.xml"; system "$perl $xml_pp -i $merged"; ok( same_file( $merged, $file), "xml_merge $merge_opts $test_file ($merged $base-00.xml"); unlink( glob( "$base*")) unless( $DEBUG); } sub same_files { my( $expected_base, $base)= @_; my $nb="00"; while( -f "$base-$nb.xml") { my( $real, $expected)= ( "$base-$nb.xml", "$expected_base-$nb.xml"); if( ! -z $expected) { _use( 'File::Copy'); copy( $real, $expected); } unless( same_file( $expected, $real)) { warn " $expected and $real are different"; if( $DEBUG) { warn `diff $expected, $real`; } return 0; } $nb++; } return 1; } sub same_file { my( $file1, $file2)= @_; my $eq= slurp_mod( $file1) eq slurp_mod( $file2); if( $DEBUG && ! $eq) { system "diff $file1 $file2\n"; } return $eq; } # slurp and remove spaces and _expected from the file sub slurp_mod { my( $file)= @_; local undef $/; open( FHSLURP, "<$file") or return "$file not found:$!"; my $content=; $content=~ s{\s}{}g; $content=~ s{_expected}{}g; return $content; } sub systemq { if( !$DEBUG) { system "$_[0] 1>$devnull 2>$devnull"; } else { warn "$_[0]\n"; system $_[0]; } } XML-Twig-3.52/t/test_new_features_3_22.xml0000644000175000017500000000001412732215763020520 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_3_35.t0000755000175000017500000000463712732215763015442 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=10; print "1..$TMAX\n"; # escape_gt option { is( XML::Twig->parse( '')->root->insert_new_elt( '#COMMENT' => '- -- -')->twig->sprint, '', 'comment escaping'); } { my $t= XML::Twig->parse( 'foobarbazfoobarbaz2foobar2'); $t->root->cut_descendants( 'e[@a="c"]'); is( $t->sprint, 'bazfoobarbaz2', 'cut_descendants'); } { my $t=XML::Twig->new( pretty_print => 'none')->parse( ''); is( $t->root->_pretty_print, 0, '_pretty_print'); $t->set_pretty_print( 'indented'); is( $t->root->_pretty_print, 3, '_pretty_print'); } # additional tests to increase coverage { is( XML::Twig->parse( no_expand => 1, q{]>&foo;})->root->sprint, "&foo;\n", 'external entities with no_expand'); } { my $doc= q{fi4}; open( my $fh, '>', 'tmp_file'); my $t= XML::Twig->new( twig_handlers => { e => sub { $_->flush( $fh); }, g => sub { is( $_[0]->elt_id( 'i4')->text, 'fi4', 'elt_id, id exists'); nok( $_[0]->elt_id( 'i3'), 'elt_id, id flushed'); }, } ) ->parse( $doc); } { my $xpath=''; XML::Twig->parse( map_xmlns => { "http://foo.com" => 'bar' }, twig_handlers => { "bar:e" => sub { $xpath= $_[0]->path( $_->gi);}, }, q{} ); is( $xpath, '/bar:d/bar:e'); XML::Twig->parse( map_xmlns => { "http://foo.com" => 'bar' }, twig_handlers => { "bar:e" => sub { $xpath= $_[0]->path( $_->local_name);}, }, q{} ); is( $xpath, '/bar:d/bar:e'); } { my $t=XML::Twig->parse( pretty_print => 'none', ''); $t->first_elt( 'e3')->replace( $t->first_elt( 'e1')); is( $t->sprint, '', 'replace called on an element that has not been cut yet'); } 1; XML-Twig-3.52/t/test_changes.t0000755000175000017500000000037312732215763016372 0ustar mrodrigumrodrigu#!/usr/bin/perl use Test::More; eval 'use Test::CPAN::Changes'; plan skip_all => 'Test::CPAN::Changes required for this test' if $@; plan skip_all => 'Author test. Set $ENV{TEST_AUTHOR} to a true value to run.' if ! $ENV{TEST_AUTHOR}; changes_ok(); XML-Twig-3.52/t/test_xml_split_w_decl.xml0000644000175000017500000000057612732215763020651 0ustar mrodrigumrodrigu elt1 content 1 elt1 content 2 elt1 content 3 elt1 content 4 elt1 content 5 elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/xmlxpath_17axisfollowing.t0000755000175000017500000000130312732215763020677 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '/AAA/XXX/following::*'); ok(@nodes, 2); @nodes = $t->findnodes( '//ZZZ/following::*'); ok(@nodes, 12); exit 0; __DATA__ XML-Twig-3.52/t/test_unique_xpath.t0000755000175000017500000000372112732215763017474 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use XML::Twig; print "1..65\n"; my $t= XML::Twig->new->parse( \*DATA); foreach my $c ($t->descendants( 'c')) { is( $c->xpath, $c->text, "xpath"); is( $t->findvalue( $c->text), $c->text, "findvalue (>0)"); } foreach my $d ($t->descendants( 'd')) { is( $t->findvalue( $d->text), $d->text, "findvalue (<0)"); } foreach( 1..4) { is( $_, $t->root->first_child( "[$_]")->att( 'pos'), "first_child[$_]"); is( 5-$_, $t->root->first_child( "[-$_]")->att( 'pos'), "first_child[-$_]"); is( $_, $t->root->first_child( "b[$_]")->att( 'pos'), "first_child b[$_]"); is( 5-$_, $t->root->first_child( "b[-$_]")->att( 'pos'), "first_child b[-$_]"); } my $e= $t->get_xpath( '/a/b[-1]/e', 0); foreach( 1..4) { is( $_, $e->first_child( "f[$_]")->att( 'fpos'), "first_child f[$_]"); is( 5-$_, $e->first_child( "f[-$_]")->att( 'fpos'), "first_child f[-$_]"); is( $_, $e->first_child( "g[$_]")->att( 'gpos'), "first_child g[$_]"); is( 5-$_, $e->first_child( "g[-$_]")->att( 'gpos'), "first_child g[-$_]"); } foreach( 1..8) { is( $_, $e->first_child( "[$_]")->att( 'pos'), "first_child [$_]"); is( 9-$_, $e->first_child( "[-$_]")->att( 'pos'), "first_child [-$_]"); } exit 0; __DATA__ /a/b[1]/c[1] /a/b[1]/c[2] /a/b[-4]/d[-2] /a/b[-4]/d[-1] /a/b[2]/c[1] /a/b[-3]/d[-2] /a/b[-3]/d[-1] tata /a/b[2]/c[2] /a/b[3]/c titi /a/b[4]/c /a/b[4]/d[-1] tutu XML-Twig-3.52/t/xmlxpath_07count.t0000755000175000017500000000114312732215763017143 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 7); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '//*[count(BBB) = 2]'); ok($nodes[0]->getName, "DDD"); @nodes = $t->findnodes( '//*[count(*) = 2]'); ok(@nodes, 2); @nodes = $t->findnodes( '//*[count(*) = 3]'); ok(@nodes, 2); ok($nodes[0]->getName, "AAA"); ok($nodes[1]->getName, "CCC"); exit 0; __DATA__ XML-Twig-3.52/t/test2_1.xml0000644000175000017500000000257312732215763015542 0ustar mrodrigumrodrigu ]>
S1 I1 S1 I2 S1 Title S1 P1 S2 P2 Note P1 S1 para 3
S2 intro S2 Title S2 P1 S2 P2 S2 P3
Annex Title Annex P1 Annex P2
XML-Twig-3.52/t/test_errors.t0000755000175000017500000003725412732215763016306 0ustar mrodrigumrodrigu#!/usr/bin/perl -w # test error conditions use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use Config; use tools; #$|=1; use XML::Twig; my $TMAX=121; print "1..$TMAX\n"; my $error_file= File::Spec->catfile('t','test_errors.errors'); my( $q, $q2) = ( ($^O eq "MSWin32") || ($^O eq 'VMS') ) ? ('"', "'") : ("'", '"'); { # test insufficient version of XML::Parser (not that easy, it is already too late here) my $need_version= 2.23; use Config; my $perl= used_perl(); my $version= $need_version - 0.01; unlink $error_file if -f $error_file; if ($^O eq 'VMS') { system( qq{$perl $q-Mblib$q -e$q use vmsish qw(hushed);use XML::Parser; BEGIN { \$XML::Parser::VERSION=$version}; use XML::Twig $q 2> $error_file}); } else { system( qq{$perl $q-Iblib/lib$q -e$q use XML::Parser; BEGIN { \$XML::Parser::VERSION=$version}; use XML::Twig $q 2> $error_file}); } ok( -f $error_file, "error generated for low version of XML::Parser"); matches( slurp_error( $error_file), "need at least XML::Parser version ", "error message for low version of XML::Parser"); $version= $need_version; unlink $error_file if -f $error_file; system( qq{$perl $q-Mblib$q -e$q use XML::Parser; BEGIN { \$XML::Parser::VERSION=$version}; use XML::Twig $q 2> $error_file}); ok( ! -f $error_file || slurp_error( $error_file)!~ "need at least XML::Parser version", "no error generated for proper version of XML::Parser" ); $version= $need_version + 0.01; unlink $error_file if -f $error_file; system( qq{$^X -e$q use XML::Parser; BEGIN { \$XML::Parser::VERSION=$version}; use XML::Twig$q 2> $error_file}); ok( ! -f $error_file || slurp_error( $error_file)!~ "need at least XML::Parser version", "no error generated for high version of XML::Parser" ); unlink $error_file if -f $error_file; } my $warning; my $init_warn= $SIG{__WARN__}; { $SIG{__WARN__}= sub { $warning= join '', @_; }; XML::Twig->new( dummy => 1); $SIG{__WARN__}= $init_warn; matches( $warning, "invalid option Dummy", "invalid option"); } { eval { XML::Twig::_slurp( $error_file) }; matches( $@, "cannot open '\Q$error_file\E'", "_slurp inexisting file"); } { eval {XML::Twig->new->parse( '')->root->first_child( 'du,')}; matches( $@, "wrong navigation condition", "invalid navigation expression"); } { eval {XML::Twig->new->parse( '')->root->first_child( '@val=~/[/')}; matches( $@, "wrong navigation condition", "invalid navigation expression"); } { eval {XML::Twig->new( twig_print_outside_roots => 1)}; matches( $@, "cannot use twig_print_outside_roots without twig_roots", "invalid option"); } { eval {XML::Twig->new( keep_spaces => 1, discard_spaces => 1 )}; matches( $@, "cannot use both keep_spaces and discard_spaces", "invalid option combination keep_spaces and discard_spaces"); eval {XML::Twig->new( keep_spaces => 1, discard_all_spaces => 1 )}; matches( $@, "cannot use both keep_spaces and discard_all_spaces", "invalid option combination keep_spaces and discard_all_spaces"); eval {XML::Twig->new( keep_spaces => 1, keep_spaces_in => ['p'])}; matches( $@, "cannot use both keep_spaces and keep_spaces_in", "invalid option combination keep_spaces and keep_spaces_in"); eval {XML::Twig->new( discard_spaces => 1, discard_all_spaces => 1)}; matches( $@, "cannot use both discard_spaces and discard_all_spaces", "invalid option combination discard_spaces and discard_all_spaces"); eval {XML::Twig->new( discard_spaces => 1, keep_spaces_in => ['p'])}; matches( $@, "cannot use both discard_spaces and keep_spaces_in", "invalid option combination discard_spaces and keep_spaces_in"); eval {XML::Twig->new( keep_spaces_in => [ 'doc' ], discard_spaces_in => ['p'])}; matches( $@, "cannot use both keep_spaces_in and discard_spaces_in", "invalid option combination keep_spaces_in and discard_spaces_in"); eval {XML::Twig->new( discard_spaces => 1, discard_spaces_in => ['p'])}; matches( $@, "cannot use both discard_spaces and discard_spaces_in", "invalid option combination discard_spaces and discard_spaces_in"); eval {XML::Twig->new( keep_spaces_in => [ 'doc' ], discard_all_spaces => 1)}; matches( $@, "cannot use both keep_spaces_in and discard_all_spaces", "invalid option combination keep_spaces_in and discard_all_spaces"); eval {XML::Twig->new( discard_all_spaces => 1, discard_spaces_in => ['p'])}; matches( $@, "cannot use both discard_all_spaces and discard_spaces_in", "invalid option combination discard_all_spaces and discard_spaces_in"); eval {XML::Twig->new( comments => 'wrong') }; matches( $@, "wrong value for comments argument: 'wrong'", "invalid option value for comment"); eval {XML::Twig->new( pi => 'wrong') }; matches( $@, "wrong value for pi argument: 'wrong'", "invalid option value for pi"); } { my $t=XML::Twig->new->parse( '

p1

p 2

'); my $elt= $t->root; eval { $elt->sort_children( sub { }, type => 'wrong'); }; matches( $@, "wrong sort type 'wrong', should be either 'alpha' or 'numeric'", "sort type"); } { foreach my $wrong_path ( 'wrong path', 'wrong##path', '1', '1tag', '///tag', 'tag/') { eval {XML::Twig->new( twig_handlers => { $wrong_path => sub {}});}; matches( $@, "unrecognized expression in handler: '$wrong_path'", "wrong handler ($wrong_path)"); } eval {XML::Twig->new( input_filter => 'dummy')}; matches( $@, "invalid input filter:", "input filter"); eval {XML::Twig->new( input_filter => {})}; matches( $@, "invalid input filter:", "input filter"); } { foreach my $bad_tag ( 'toto', '<1toto', 'new( twig_handlers => { sax => sub { $_[0]->toSAX1 } }); eval {$t->parse( '')}; matches( $@, "cannot use toSAX1 while parsing", "toSAX1 during parsing"); } { my $t= XML::Twig->new( twig_handlers => { sax => sub { $_[0]->toSAX2 } }); eval {$t->parse( '')}; matches( $@, "cannot use toSAX2 while parsing", "toSAX2 during parsing"); } { my $t= XML::Twig->new->parse( ''); foreach my $bad_cond ( 'foo bar', 'foo:bar:baz', '.', '..', '...', '**', 'con[@to:ta:ti]') { eval { $t->root->first_child( qq{$bad_cond})}; matches( $@, "wrong navigation condition '\Q$bad_cond\E'", "bad navigation condition '$bad_cond'"); } } { my $t= XML::Twig->new->parse( ''); eval { XML::Twig->parse( twig_handlers => { q{foo[@a="$sd"]} => sub { } }, ""); }; matches( $@, "^wrong handler condition", 'perl syntax in attribute value'); } { my $t= XML::Twig->new->parse( ''); eval { $t->root->set_field( '*[2]'); }; matches( $@, "can't create a field name from", 'set_field'); } { my $t= XML::Twig->new( twig_handlers => { erase => sub { $_->parent->erase } }); eval { $t->parse( '

toto

'); }; matches( $@, "trying to erase an element before it has been completely parsed", 'erase current element'); } { my $t= XML::Twig->new->parse( ''); my $e= $t->first_elt( 'erase')->cut; eval { $e->erase }; matches( $@, "can only erase an element with no parent if it has a single child", 'erase cut element'); $e->paste( $t->root); eval { $e->paste( first_child => $t->root); }; matches( $@, "cannot paste an element that belongs to a tree", 'paste uncut element'); $e->cut; eval { $e->paste( $t->root => 'first_child' ); }; matches( $@, "wrong argument order in paste, should be", 'paste uncut element'); eval { $e->paste( first_child => {} ); }; matches( $@, "wrong target type in paste: 'HASH', should be XML::Twig::Elt", 'paste with wrong ref'); eval { $e->paste( 'first_child' ); }; matches( $@, "missing target in paste", 'paste with no target'); eval { $e->paste( 'first_child', 1 ); }; matches( $@, 'wrong target type in paste \(not a reference\)', 'paste with no ref'); eval { $e->paste( 'first_child', bless( {}, 'foo') ); }; matches( $@, "wrong target type in paste: 'foo'", 'paste with wrong object type'); eval { $e->paste( wrong => $t->root ); }; matches( $@, "tried to paste in wrong position 'wrong'", 'paste in wrong position'); eval { $e->paste( before => $t->root); }; matches( $@, "cannot paste before root", 'paste before root'); eval { $e->paste( after => $t->root); }; matches( $@, "cannot paste after root", 'paste after root'); eval { $e->paste_before( $t->root); }; matches( $@, "cannot paste before root", 'paste before root'); eval { $e->paste_after( $t->root); }; matches( $@, "cannot paste after root", 'paste after root'); } { my $t= XML::Twig->new->parse( '

text1

text2

'); my $p1= $t->root->first_child( 'p'); my $p2= $t->root->first_child( 'p[2]'); eval { $p1->merge_text( 'toto'); } ; matches( $@, "invalid merge: can only merge 2 elements", 'merge elt and string'); eval { $p1->merge_text( $p2); } ; matches( $@, "invalid merge: can only merge 2 text elements", 'merge non text elts'); $p1->first_child->merge_text( $p2->first_child); is( $t->sprint, '

text1text2

', 'merge_text'); my $p3= XML::Twig::Elt->new( '#CDATA' => 'foo'); eval { $p1->first_child->merge_text( $p3); }; matches( $@, "invalid merge: can only merge 2 text elements", 'merge cdata and pcdata elts'); } { my $t= XML::Twig->new; $t->save_global_state; eval { $t->set_pretty_print( 'foo'); }; matches( $@, "invalid pretty print style 'foo'", 'invalid pretty_print style'); eval { $t->set_pretty_print( 987); }; matches( $@, "invalid pretty print style 987", 'invalid pretty_print style'); eval { $t->set_empty_tag_style( 'foo'); }; matches( $@, "invalid empty tag style 'foo'", 'invalid empty_tag style'); eval { $t->set_empty_tag_style( '987'); }; matches( $@, "invalid empty tag style 987", 'invalid empty_tag style'); eval { $t->set_quote( 'foo'); }; matches( $@, "invalid quote 'foo'", 'invalid quote style'); eval { $t->set_output_filter( 'foo'); }; matches( $@, "invalid output filter 'foo'", 'invalid output filter style'); eval { $t->set_output_text_filter( 'foo'); }; matches( $@, "invalid output text filter 'foo'", 'invalid output text filter style'); } { my $t= XML::Twig->new->parse( ''); my @methods= qw( depth in_element within_element context current_line current_column current_byte recognized_string original_string xpcroak xpcarp xml_escape base current_element element_index position_in_context ); my $method; foreach $method ( @methods) { eval "\$t->$method"; matches( $@, "calling $method after parsing is finished", $method); } $SIG{__WARN__}= $init_warn; } { my $t= XML::Twig->new->parse( ''); my $elt= $t->root->first_child( 'elt')->cut; foreach my $pos ( qw( before after)) { eval { $elt->paste( $pos => $t->root); }; matches( $@, "cannot paste $pos root", "paste( $pos => root)"); } } { my $t= XML::Twig->new->parse( 'f1f2'); eval { $t->root->simplify( group_tags => { a => 'f1' }); }; matches( $@, "error in grouped tag a", "grouped tag error f1"); eval { $t->root->simplify( group_tags => { a => 'f2' }); }; matches( $@, "error in grouped tag a", "grouped tag error f2"); eval { $t->root->simplify( group_tags => { a => 'f3' }); }; matches( $@, "error in grouped tag a", "grouped tag error f3"); } { eval { XML::Twig::Elt->parse( 'foo')->subs_text( "foo", '&elt( 0/0)'); }; matches( $@, "(invalid replacement expression |Illegal division by zero)", "invalid replacement expression in subs_text"); } { eval { my $t=XML::Twig->new( twig_handlers => { e => sub { $_[0]->parse( "") } }); $t->parse( ""); }; matches( $@, "cannot reuse a twig that is already parsing", "error re-using a twig during parsing"); } { ok( XML::Twig->new( twig_handlers => { 'elt[string()="foo"]' => sub {}} ), 'twig_handlers with string condition' ); eval { XML::Twig->new( twig_roots => { 'elt[string()="foo"]' => sub {}} ) }; matches( $@, "string.. condition not supported on twig_roots option", 'twig_roots with string condition' ); ok( XML::Twig->new( twig_handlers => { 'elt[string()=~ /foo/]' => sub {}} ), 'twig_handlers with regexp' ); eval { XML::Twig->new( twig_roots => { 'elt[string()=~ /foo/]' => sub {}} ) }; matches( $@, "string.. condition not supported on twig_roots option", 'twig_roots with regexp condition' ); #ok( XML::Twig->new( twig_handlers => { 'elt[string()!="foo"]' => sub {}} ), 'twig_handlers with !string condition' ); #eval { XML::Twig->new( twig_roots => { 'elt[string()!="foo"]' => sub {}} ) }; #matches( $@, "string.. condition not supported on twig_roots option", 'twig_roots with !string condition' ); #ok( XML::Twig->new( twig_handlers => { 'elt[string()!~ /foo/]' => sub {}} ), 'twig_handlers with !regexp' ); #eval { XML::Twig->new( twig_roots => { 'elt[string()!~ /foo/]' => sub {}} ) }; #matches( $@, "regexp condition not supported on twig_roots option", 'twig_roots with !regexp condition' ); } { XML::Twig::_disallow_use( "XML::Parser"); nok( XML::Twig::_use( "XML::Parser"), '_use XML::Parser (disallowed)'); XML::Twig::_allow_use( "XML::Parser"); ok( XML::Twig::_use( "XML::Parser"), '_use XML::Parser (allowed)'); ok( XML::Twig::_use( "XML::Parser"), '_use XML::Parser (allowed, 2cd try)'); nok( XML::Twig::_use( "XML::Parser::foo::nonexistent"), '_use XML::Parser::foo::nonexistent'); } { XML::Twig::_disallow_use( "Tie::IxHash"); eval { XML::Twig->new( keep_atts_order => 1); }; matches( $@, "Tie::IxHash not available, option keep_atts_order not allowed", 'no Tie::IxHash' ); } { eval { XML::Twig::_first_n { $_ } 0, 1, 2, 3; }; matches( $@, "illegal position number 0", 'null argument to _first_n' ); } { if( ( $] <= 5.008) || ($^O eq 'VMS') ) { skip(1, 'test perl -CSDAL'); } elsif( ! can_check_for_pipes() ) { skip( 1, 'your perl cannot check for pipes'); } else { my $infile= File::Spec->catfile('t','test_new_features_3_22.xml'); my $script= File::Spec->catfile('t','test_error_with_unicode_layer'); my $error=File::Spec->catfile('t','error.log'); my $perl = used_perl(); my $cmd= qq{$perl $q-CSDAL$q $script $infile 2>$error}; system $cmd; matches( slurp( $error), "cannot parse the output of a pipe", 'parse a pipe with perlIO layer set to UTF8 (RT #17500)'); } } { my $e1= XML::Twig::Elt->new( 'foo'); my $e2= XML::Twig::Elt->new( 'foo'); eval { $e1->paste_before( $e2); }; matches( $@, "cannot paste before an orphan element", 'paste before an orphan element' ); eval { $e1->paste_after( $e2); }; matches( $@, "cannot paste after an orphan element", 'paste after an orphan element' ); } { my $r= XML::Twig->parse( '')->root; eval { $r->find_nodes( '//foo/1following::') }; matches( $@, "error in xpath expression", 'error in xpath expression //foo/following::'); } # tests for https://rt.cpan.org/Public/Bug/Display.html?id=97461 (wrong error message due to filehandle seen as a file) { eval { XML::Twig->new->parse( do { open( my $fh, '<', $0); $fh}); }; not_matches( $@, "you seem to have used the parse method on a filename", "parse on a filehandle containing invalid XML"); open FOO, "<$0"; eval { XML::Twig->new->parse( \*FOO); }; not_matches( $@, "you seem to have used the parse method on a filename", "parse on a GLOBAL filehandle containing invalid XML"); } exit 0; sub can_check_for_pipes { my $perl = used_perl(); open( FH, qq{$perl -e$q print 1$q |}) or die "error opening pipe: $!"; return -p FH; } XML-Twig-3.52/t/test_expand_external_entities.dtd0000644000175000017500000000015312732215763022350 0ustar mrodrigumrodrigu ent2 text

"> XML-Twig-3.52/t/test_3_38.t0000755000175000017500000000666412732215763015447 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=21; print "1..$TMAX\n"; my $d= ''; { my $r= XML::Twig->parse( $d)->root; my $result = $r->att('a'); is( $r->sprint, $d, 'att'); } { my $r= XML::Twig->parse( $d)->root; my $result = foo($r->att('a')); is( $r->sprint, $d, 'att in sub(1)'); } { my $r= XML::Twig->parse( $d)->root; my $result = sub { return @_ }->($r->att('a')); is( $r->sprint, $d, 'att in anonymous sub'); } { my $r= XML::Twig->parse( $d)->root; my $a= $r->att( 'a'); is( $r->sprint, $d, 'att in scalar context'); } { my $r= XML::Twig->parse( $d)->root; my( $a1, $a2)= ($r->att( 'a1'), $r->att( 'a2')); is( $r->sprint, $d, 'att in list context'); } { my $r= XML::Twig->parse( $d)->root; $r->att( 'a'); is( $r->sprint, $d, 'att in void context'); } { my $r= XML::Twig->parse( $d)->root; my $result = $r->att('a'); is( $r->sprint, $d, 'att'); } { my $r= XML::Twig->parse( $d)->root; my $result = foo($r->class); is( $r->sprint, $d, 'class in sub(1)'); } { my $r= XML::Twig->parse( $d)->root; my $result = sub { return @_ }->($r->class); is( $r->sprint, $d, 'att in anonymous sub'); } { my $r= XML::Twig->parse( $d)->root; my $a= $r->class; is( $r->sprint, $d, 'class in scalar context'); } { my $r= XML::Twig->parse( $d)->root; my( $a1, $a2)= ($r->class, $r->class); is( $r->sprint, $d, 'class in list context'); } { my $r= XML::Twig->parse( $d)->root; $r->class; is( $r->sprint, $d, 'class in void context'); } { my $t= XML::Twig->new->parse( ''); $t->root->latt( 'a')= 1; is( $t->sprint, '', 'latt'); } { my $r= XML::Twig->parse( $d)->root; my $att= $r->att( 'foo'); is( $att, undef, 'unexisting att'); } # my $value = $root->att('any_attribute'); # $result = length($value); sub foo { return @_; } { my $r; my $doc='<_e id="e1"><_e id="e2"><_foo a="2" id="foo"/>'; my $t= XML::Twig->new( twig_handlers => { _e => sub { $r.= $_->id } }) ->parse( $doc); is( $r, 'e1e2', 'handler, condition on tag starting with an underscore'); is( $t->first_elt( '_foo')->id, 'foo', 'navigation, element name starts with underscore'); is( $t->first_elt( '*[@_a="2"]')->id, 'bar', 'navigation, attribute name starts with underscore'); } { if( _use( 'LWP') && _use( 'HTML::TreeBuilder') ) { my $html=q{

Title

foo
bar

}; my $expected= qq{

Title

foo
bar

}; my $html_file= "t/test_3_38.html"; spit( $html_file, $html); is( scrub_xhtml( XML::Twig->new( )->parseurl_html( "file:$html_file")->sprint), $expected, 'parseurl_html'); unlink $html_file; } else { skip( 1, "LWP and/or HTML::TreeBuilder not available, cannot test safe_parseurl_html"); } } { my $doc=" foo bar baz"; is( XML::Twig->parse( $doc)->simplify( normalize_space => 2)->{e}, 'foo bar baz', 'simplify with normalize_space => 2'); } { my $doc="foo bar foofoo foobar totofoo"; my $t= XML::Twig->parse( $doc); is( $t->subs_text( qr/(f)o(o)/, '&elt(b => $1) $2')->sprint, 'f o bar f of o f obar totof o', 'complex subs_text'); } { my $t= XML::Twig->parse( 'e1e2'); is( join( '-', $t->findvalues( '//e')), 'e1-e2', 'findvalues'); } 1; XML-Twig-3.52/t/xmlxpath_13axisparent.t0000755000175000017500000000070712732215763020173 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '//DDD/parent::*'); ok(@nodes, 4); ok($nodes[3]->getName, "EEE"); exit 0; __DATA__ XML-Twig-3.52/t/xmlxpath_23func.t0000755000175000017500000000143512732215763016750 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 5); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '//BBB[position() mod 2 = 0 ]'); ok(@nodes, 4); @nodes = $t->findnodes('//BBB [ position() = floor(last() div 2 + 0.5) or position() = ceiling(last() div 2 + 0.5) ]'); ok(@nodes, 2); @nodes = $t->findnodes('//CCC [ position() = floor(last() div 2 + 0.5) or position() = ceiling(last() div 2 + 0.5) ]'); ok(@nodes, 1); exit 0; __DATA__ XML-Twig-3.52/t/test_need_3_args_open.t0000755000175000017500000000234212732215763020152 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; use XML::Twig; # abort (before compiling so the 3 arg open doesn't cause a crash) unless perl 5.8+ BEGIN { if( $] < 5.008) { print "1..1\nok 1\n"; warn "skipping tests that require 3 args open\n"; exit 0; } } my $TMAX=4; print "1..$TMAX\n"; { my $out=''; open( my $fh, '>', \$out); my $doc=q{foobar}; my $t= XML::Twig->new( twig_handlers => { elt => sub { $_->flush( $fh) } }); $t->parse( $doc); is( $out, $doc, "flush to a scalar (with autoflush)"); $t->flush( $fh); is( $out, $doc, "double flush"); $t->flush(); is( $out, $doc, "triple flush"); } { my $out= ''; my $twig = XML::Twig->new( output_encoding => 'utf-8',); $twig->parse( ""); my $greet = $twig->root->insert_new_elt( last_child => 'g'); $greet->set_text("Gr\x{00FC}\x{00DF}"); open(my $fh, '>:utf8', \$out); $twig->print(\*$fh); print {*$fh} "Copyright \x{00A9} 2008 Me"; close($fh); is( $out, qq{GrüßCopyright © 2008 Me}, '$t->print and regular print mixed, with utf-8 encoding' ); } XML-Twig-3.52/t/test_3_48.t0000755000175000017500000000037012732215763015434 0ustar mrodrigumrodrigu#!/usr/bin/perl use strict; use warnings; use XML::Twig; use Test::More tests => 1; use utf8; { XML::Twig::_disallow_use( 'Tie::IxHash'); my $t; eval { $t= XML::Twig->new( keep_atts_order => 0); }; ok( $t, 'keep_atts_order => 0'); } exit; XML-Twig-3.52/t/test_bugs_3_15.t0000755000175000017500000001231612732215763016451 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; #$|=1; my $DEBUG=0; use XML::Twig; my $TMAX=17; print "1..$TMAX\n"; { # test bug outputing end tag with pretty_print => nsgmls on my $out= XML::Twig->new( pretty_print => 'nsgmls')->parse( "text")->sprint; ok( XML::Twig->new( error_context => 1)->safe_parse( $out), "end tag with nsgmls option" . ($@ || '') ); } { # test bug RT #8830: simplify dies on mixed content ok( XML::Twig->new->parse( "text1")->root->simplify, "simplify mixed content"); } { # testing to see if bug RT #7523 is still around my $t= XML::Twig->new->parse( ''); if( eval( '$t->iconv_convert( "utf8");')) { $t->set_output_encoding( 'utf8'); eval { $t->sprint;}; ok( !$@, 'checking bug RT 7523'); } else { if( $@=~ m{^Can't locate Text/Iconv.pm} || $@=~ m{^Text::Iconv not available} ) { skip( 1, "Text::Iconv not available"); } elsif( $@=~ m{^Unsupported (encoding|conversion): utf8}) { skip( 1, "your version of iconv does not support utf8"); } else { skip( 1, "odd error creating filter with iconv: $@"); } } } { # bug on comments my $doc= "\n \n foo\n\n"; my $t= XML::Twig->new( comments => 'keep', pretty_print => 'indented') ->parse( $doc); is( $t->sprint => $doc, "comment with comments => 'keep'"); } { # bug with disapearing entities in attributes my $text= '&ent3;'; my $doc= qq{]>$text}; XML::Twig::Elt::init_global_state(); my $regular=XML::Twig->new( pretty_print => 'none')->parse( $doc)->root->sprint; (my $expected= $text)=~ s{&(uuml|ent2);}{}g; # yes, entities in attributes just vanish! is( $regular => $expected, "entities in atts, no option"); XML::Twig::Elt::init_global_state(); my $with_keep=XML::Twig->new(keep_encoding => 1)->parse( $doc)->root->sprint; is( $with_keep => $text, "entities in atts with keep_encoding"); XML::Twig::Elt::init_global_state(); my $with_dneaia=XML::Twig->new(do_not_escape_amp_in_atts => 1)->parse( $doc)->root->sprint; if( $with_dneaia eq '&ent3;') { skip( 1, "option do_not_escape_amp_in_atts not available (it's only available in an old version of expat), no worries"); } else { is( $with_dneaia => $text, "entities in atts with do_not_escape_amp_in_atts"); } # checking that all goes back to normal XML::Twig::Elt::init_global_state(); $regular=XML::Twig->new()->parse( $doc)->root->sprint; is( $regular => $expected, "entities in atts, no option"); } # bug on xmlns in path expression trigger { my $matched=0; my $twig = XML::Twig->new( map_xmlns => { uri1 => 'aaa', }, twig_handlers => { '/aaa:doc/aaa:elt' => sub { $matched=1; } } ) ->parse( q{}); ok( $matched, "using name spaces in path expression trigger"); $matched=0; $twig = XML::Twig->new( map_xmlns => { uri1 => 'aaa', }, twig_handlers => { 'aaa:doc/aaa:elt' => sub { $matched=1; } } ) ->parse( q{}); ok( $matched, "using name spaces in partial path expression trigger"); } # bug where the leading spaces are discarded in an element like

foobar

{ # check that leading spaces after a \n are discarded my $doc= "

\n foo\n

"; my $expected= "

foo

"; my $result= XML::Twig->new->parse( $doc)->sprint; is( $result => $expected, 'leading spaces kept when not after a \n'); } { # check that leading spaces NOT after a \n are kept around my $doc= "

foobar

"; my $result= XML::Twig->new->parse( $doc)->sprint; is( $result => $doc, 'leading spaces kept when not after a \n'); } { my $t= XML::Twig->new->parse( " elt 1 elt 2 "); is( scalar $t->descendants( '#PCDATA'), 3, 'properly parsed pcdata'); } { my $t= XML::Twig->new->parse( "\n elt 1 \n elt 2 \n"); is( scalar $t->descendants( '#PCDATA'), 2, 'properly parsed pcdata'); } { # bug RT 8137 my $doc= q{}; (my $expected= $doc)=~ s{ }{ }; is( XML::Twig->new( keep_encoding => 1)->parse( $doc)->sprint, $expected, 'keep_encoding and 2 spaces between gi and attribute' ); } { # copy of an element with extra_data_before_end_tag my $doc= 'datamore'; my $expected= 'datamore'; # pi's are not being moved around anymore my $elt= XML::Twig->new( pi => 'keep')->parse( $doc)->root->copy; is( $elt->sprint, $expected, 'copy of an element with extra_data_before_end_tag'); } { # copy of an element with extra_data_before_end_tag my $doc= ''; my $elt= XML::Twig->new( pi => 'keep')->parse( $doc)->root->copy; is( $elt->sprint, $doc, 'copy of an element with extra_data_before_end_tag'); } XML-Twig-3.52/t/test_3_26.t0000755000175000017500000001301212732215763015425 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $DECL=qq{\n}; $DECL=''; my $TMAX=18; print "1..$TMAX\n"; { # testing set_inner_xml my $doc= 'with content

toto

'; my $t= XML::Twig->nparse( $doc); my $inner= '

foo

duh'; $t->first_elt( 'elt')->set_inner_xml( $inner); (my $expected= $doc)=~ s{}{$inner}; is( $t->sprint, $expected, "set_inner_xml"); $t->first_elt( 'elt2')->set_inner_xml( $inner); $expected=~ s{.*}{$inner}; is( $t->sprint, $expected, "set_inner_xml (of an elt with content)"); } { # testing set_inner_html if( !XML::Twig::_use( 'HTML::TreeBuilder', 3.13)) { skip( 4 => "need HTML::TreeBuilder 3.13+ to use set_inner_html method"); } elsif( !XML::Twig::_use( 'LWP')) { skip( 4 => "need LWP to use set_inner_html method"); } else { my $doc= 'a titlepar 1

par 2
after the break'; my $t= XML::Twig->nparse( $doc); my $inner= '

  • foo
  • bar
'; $t->first_elt( 'p')->set_inner_html( $inner); (my $expected= $t->sprint)=~ s{

.*

}{

$inner

}; is( $t->sprint, $expected, "set_inner_html"); $inner= q{2cd title}; $t->first_elt( 'head')->set_inner_html( $inner); $inner=~ s{>$}{/>}; $expected=~ s{.*}{$inner}; $expected=~ s{(]*)(/>)}{$1 $2}g; is( $t->sprint, $expected, "set_inner_html (in head)"); $inner= q{

just a p

}; $t->root->set_inner_html( $inner); $expected= qq{$DECL$inner}; is( $t->sprint, $expected, "set_inner_html (all doc)"); $inner= q{the content of the
body}; $t->first_elt( 'body')->set_inner_html( $inner); $expected= qq{$DECL$inner}; $expected=~ s{
}{
}g; is( $t->sprint, $expected, "set_inner_html (body)"); } } { if( !XML::Twig::_use( "File::Temp")) { skip( 5, "File::Temp not available"); } else { # parsefile_inplace my $file= "test_3_26.xml"; spit( $file, q{nice hey?}); XML::Twig->new( twig_handlers => { foo => sub { $_->set_tag( 'bar')->flush; }}) ->parsefile_inplace( $file); matches( slurp( $file), qr//, "parsefile_inplace"); XML::Twig->new( twig_handlers => { bar => sub { $_->set_tag( 'toto')->flush; }}) ->parsefile_inplace( $file, '.bak'); matches( slurp( $file), qr//, "parsefile_inplace (with backup, checking file)"); matches( slurp( "$file.bak"), qr//, "parsefile_inplace (with backup, checking backup)"); unlink( "$file.bak"); XML::Twig->new( twig_handlers => { toto => sub { $_->set_tag( 'tata')->flush; }}) ->parsefile_inplace( $file, 'bak_*'); matches( slurp( $file), qr//, "parsefile_inplace (with complex backup, checking file)"); matches( slurp( "bak_$file"), qr//, "parsefile_inplace (with complex backup, checking backup)"); unlink( "bak_$file"); unlink $file; } } { if( !XML::Twig::_use( "File::Temp")) { skip( 5, "File::Temp not available"); } elsif( !XML::Twig::_use( "HTML::TreeBuilder")) { skip( 5, "HTML::TreeBuilder not available"); } elsif( !XML::Twig::_use( "LWP")) { skip( 5, "LWP not available"); } elsif( !XML::Twig::_use( "LWP::UserAgent")) { skip( 5, "LWP::UserAgent not available"); } else { # parsefile_html_inplace my $file= "test_3_26.html"; spit( $file, q{foo

this is it

>}); XML::Twig->new( twig_handlers => { p => sub { $_->set_tag( 'h1')->flush; }}) ->parsefile_html_inplace( $file); matches( slurp( $file), qr/

/, "parsefile_html_inplace"); XML::Twig->new( twig_handlers => { h1 => sub { $_->set_tag( 'blockquote')->flush; }}, error_context => 6) ->parsefile_html_inplace( $file, '.bak'); matches( slurp( $file), qr/
/, "parsefile_html_inplace (with backup, checking file)"); matches( slurp( "$file.bak"), qr/

/, "parsefile_html_inplace (with backup, checking backup)"); unlink( "$file.bak"); XML::Twig->new( twig_handlers => { blockquote => sub { $_->set_tag( 'div')->flush; }}) ->parsefile_html_inplace( $file, 'bak_*'); matches( slurp( $file), qr/
/, "parsefile_html_inplace (with complex backup, checking file)"); matches( slurp( "bak_$file"), qr/
/, "parsefile_html_inplace (with complex backup, checking backup)"); unlink( "bak_$file"); unlink $file; } } { use Cwd; if( XML::Twig::_use( "LWP::Simple") && XML::Twig::_use( "LWP::UserAgent")) { my $file = "test_uri"; my $uri = sprintf( "file://%s/%s", getcwd, $file); my $content= "ok"; spit( test_uri => $content); is( XML::Twig::_slurp_uri( $uri), $content, "testing _slurp_uri"); } else { skip( 1, "LWP::Simple or LWP::UserAgent not available"); } } { # test syntax error in XPath predicate (RT #19499) my $t= XML::Twig->nparse( ''); eval { $t->get_xpath( '/*[@!a]'); }; matches( $@, qr/^error in xpath expression/, "syntax error in XPath predicate"); } XML-Twig-3.52/t/xmlxpath_18axispreceding.t0000755000175000017500000000127612732215763020651 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '/AAA/XXX/preceding::*'); ok(@nodes, 4); @nodes = $t->findnodes( '//GGG/preceding::*'); ok(@nodes, 8); exit 0; __DATA__ XML-Twig-3.52/t/test4.t0000755000175000017500000001414612732215763014771 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; use XML::Twig; my $TMAX=19; # do not forget to update! print "1..$TMAX\n"; my $s='
Title <b>bold</b>

para1

para2

Title

para2

para3

'; my @toc; my $t= new XML::Twig( TwigHandlers => { title => sub { push @toc, $_[1]->text; } }); $t->parse( $s); my $toc= join ':', @toc; stest( $toc, "Title bold:Title", "text method"); undef @toc; $t= new XML::Twig( TwigHandlers => { title => sub { push @toc, $_[1]->sprint( 1); } }); $t->parse( $s); $toc= join ':', @toc; stest( $toc, "Title bold:Title", "sprint method"); undef @toc; $t= new XML::Twig( TwigHandlers => { title => sub { push @toc, $_[1]->sprint( 1); $_[0]->purge; } }); $t->parse( $s); $toc= join ':', @toc; stest( $toc, "Title bold:Title", "sprint method with purge"); my $purged_doc= $t->sprint; stest( $purged_doc, '

para2

para3

', "sprint purged doc"); $t= new XML::Twig( TwigRoots => { title => 1}); $t->parse( $s); my $doc= $t->sprint; stest( $doc, 'Title <b>bold</b>Title', "using title as TwigRoots"); $t= new XML::Twig( TwigHandlers => { doc => sub { $_[1]->set_att( mod => "yes"); } }, TwigRoots => { title => 1}); $t->parse( $s); $doc= $t->sprint; stest( $doc, 'Title <b>bold</b>Title', "using title as TwigRoots (with doc handler)"); $s='
t1 <b>b1</b>

para1

ts1 <b>b2</b> para1

para2

t2

para3

para4
ts2

para6

para7

'; $t= new XML::Twig( TwigHandlers => { doc => sub { $_[1]->set_att( mod => "yes"); } }, TwigRoots => { title => 1}); $t->parse( $s); $doc= $t->sprint; stest( $doc, 't1 <b>b1</b>ts1 <b>b2</b>t2ts2', "using title as TwigRoots (with doc handler)"); $t= new XML::Twig( TwigHandlers => { doc => sub { $_[1]->set_att( mod => "yes"); } }, TwigRoots => { title => 1, p2 => 1}); $t->parse( $s); $doc= $t->sprint; stest( $doc, 't1 <b>b1</b>ts1 <b>b2</b>para1t2para4ts2', "using title, p2 as TwigRoots (with doc handler)"); $s="string with ' here"; $t= new XML::Twig(); $t->parse( $s); $doc= $t->sprint; stest( $doc, "string with ' here", "apos without KeepEncoding"); $t= new XML::Twig( KeepEncoding => 1); $t->parse( $s); $doc= $t->sprint; stest( $doc, "string with ' here", "apos WITH KeepEncoding"); $s="string with " here"; $t= new XML::Twig(); $t->parse( $s); $doc= $t->sprint; stest( $doc, "string with \" here", "quote without KeepEncoding"); $t= new XML::Twig( KeepEncoding => 1); $t->parse( $s); $doc= $t->sprint; stest( $doc, 'string with " here', "quote WITH KeepEncoding"); $s="string with & here"; $t= new XML::Twig(); $t->parse( $s); $doc= $t->sprint; stest( $doc, $s, "& in text"); $s='string'; $t= new XML::Twig(); $t->parse( $s); $doc= $t->sprint; stest( $doc, $s, "& in attribute"); $s="string with < here"; $t= new XML::Twig(); $t->parse( $s); $doc= $t->sprint; stest( $doc, $s, "< in text"); $s='string'; $t= new XML::Twig(); $t->parse( $s); $doc= $t->sprint; stest( $doc, $s, "< in attribute"); $s="string with " here"; $t= new XML::Twig(); $t->parse( $s); $doc= $t->sprint; stest( $doc, 'string with " here', "" in text"); $s='string'; $t= new XML::Twig(); $t->parse( $s); $doc= $t->sprint; stest( $doc, $s, "" in attribute"); #$s='string'; #$t= new XML::Twig(); #$t->parse( $s); #$doc= $t->sprint; #stest( $doc, $s, "‚ in attribute"); #$s="string with ‰ here"; #$t= new XML::Twig(); #$t->parse( $s); #$doc= $t->sprint; #stest( $doc, "string with ‚ here", "eacute without KeepEncoding"); #$t= new XML::Twig( KeepEncoding => 1); #$t->parse( $s); #$doc= $t->sprint; #stest( $doc, 'string with ‰ here', "eacute WITH KeepEncoding"); #$s='string with ‚ here'; #$t= new XML::Twig(); #$t->parse( $s); #$doc= $t->sprint; #stest( $doc, "string with ‚ here", "‚ without KeepEncoding"); #$t= new XML::Twig( KeepEncoding => 1); #$t->parse( $s); #$doc= $t->sprint; #stest( $doc, 'string with ‚ here', "‚ WITH KeepEncoding"); #$s='text'; #$t= new XML::Twig(); #$t->parse( $s); #$doc= $t->sprint; #stest( $doc, $s, "PI"); if( $] > 5.008) { my (@called); my $t= XML::Twig->new( twig_handlers => { a => sub { push @called, 'a'; 1; }, 'b/a' => sub { push @called, 'b/a'; 1; }, '/b/a' => sub { push @called, '/b/a'; 1; }, '/a' => sub { push @called, '/a'; 1; }, }, ); $t->parse( ''); my $calls= join( ':', @called); my $expected= "/b/a:b/a:a"; if( $calls eq $expected) { print "ok 19\n"; } else { print "not ok 19\n"; warn "\n[$calls] instead of [$expected]\n"; } } else { warn "skipped for perl < 5.8\n"; print "ok 19\n"; } exit 0; XML-Twig-3.52/t/test_xml_split_entities.xml0000644000175000017500000000016312732215763021230 0ustar mrodrigumrodrigu text with < > & and ' & and ']]> XML-Twig-3.52/t/test_new_features_3_22.t0000755000175000017500000001421012732215763020171 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use FindBin qw($Bin); use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use XML::Twig; my $DEBUG=0; print "1..20\n"; { my $doc= q{ ]> &foo;}; XML::Twig->new( keep_encoding => 1)->parse( $doc); } { # testing parse_html if( XML::Twig::_use( 'HTML::TreeBuilder', 3.13) && XML::Twig::_use( 'LWP::Simple') && XML::Twig::_use( 'LWP::UserAgent')) { my $html= q{Tt
t2

t3}; my $expected= HTML::TreeBuilder->new->parse( $html)->as_XML; $expected=~ s{>new->parse_html( $html)->sprint, $expected, 'parse_html string using HTML::TreeBuilder'); my $html_file= File::Spec->catfile( "t", "test_new_features_3_22.html"); spit( $html_file => $html); if( -f $html_file) { is_like( XML::Twig->new->parsefile_html( $html_file)->sprint, $expected, 'parsefile_html using HTML::TreeBuilder'); open( HTML, "<$html_file") or die "cannot open HTML file '$html_file': $!"; is_like( XML::Twig->new->parse_html( \*HTML)->sprint, $expected, 'parse_html fh using HTML::TreeBuilder'); } else { skip( 2, "could not write HTML file in t directory, check permissions"); } } else { skip( 3 => 'need HTML::TreeBuilder 3.13+ and LWP to test parse_html'); } } { # testing _use ok( XML::Twig::_use( 'XML::Parser'), '_use XML::Parser'); ok( XML::Twig::_use( 'XML::Parser'), '_use XML::Parser (2cd time)'); # second time tests the caching nok( XML::Twig::_use( 'I::HOPE::THIS::MODULE::NEVER::MAKES::IT::TO::CPAN'), '_use non-existent-module'); nok( XML::Twig::_use( 'I::HOPE::THIS::MODULE::NEVER::MAKES::IT::TO::CPAN'), '_use non-existent-module (2cd time)'); } { # testing auto-new features my $doc= ''; is( XML::Twig->nparse( empty_tags => 'normal', $doc)->sprint, $doc, 'nparse string'); is( XML::Twig->nparse( empty_tags => 'expand', $doc)->sprint, '', 'nparse string and option'); my $doc_file= 'doc.xml'; spit( $doc_file => $doc); # doc is still expanded because empty_tags was set above is( XML::Twig->nparse( $doc_file)->sprint, '', 'nparse file'); is( XML::Twig->nparse( twig_handlers => { doc => sub { $_->set_tag( 'foo'); } }, $doc_file)->sprint, '', 'nparse file and option'); unlink $doc_file; if( XML::Twig::_use( 'HTML::TreeBuilder', 3.13) && XML::Twig::_use( 'LWP::Simple') && XML::Twig::_use( 'LWP::UserAgent')) { $doc=q{foo

toto

}; is( XML::Twig->nparse( $doc)->sprint, $doc, 'nparse well formed html string'); $doc_file="doc.html"; spit( $doc_file => $doc); is( XML::Twig->nparse( $doc_file)->sprint, $doc, 'nparse well formed html file'); #is( XML::Twig->nparse( "file://$doc_file")->sprint, $doc, 'nparse well formed url'); unlink $doc_file; XML::Twig::_disallow_use( 'HTML::TreeBuilder'); eval{ XML::Twig->new->parse_html( ''); }; matches( $@, "^cannot parse HTML: missing HTML::TreeBuilder", "parse_html without HTML::TreeBuilder"); XML::Twig::_allow_use( 'HTML::TreeBuilder'); } else { skip( 3, "need HTML::TreeBuilder 3.13+"); } if( XML::Twig::_use( 'HTML::TreeBuilder', 3.13) && XML::Twig::_use( 'LWP::Simple') && XML::Twig::_use( 'LWP::UserAgent')) { $doc=q{foo

toto
tata

}; (my $expected= $doc)=~ s{
}{
}; $doc_file="doc.html"; spit( $doc_file => $doc); is( XML::Twig->nparse( $doc_file)->sprint, $expected, 'nparse html file'); #is( XML::Twig->nparse( "file://$doc_file")->sprint, $doc, 'nparse html url'); unlink $doc_file; } else { skip ( 1, "need HTML::TreeBuilder 3.13+"); } } { my $file= File::Spec->catfile( $Bin, "test_new_features_3_22.html"); if( -f $file) { XML::Twig::_disallow_use( 'LWP::Simple'); eval { XML::Twig->nparse( "file://$file"); }; matches( $@, "^missing LWP::Simple", "nparse html url without LWP::Simple"); XML::Twig::_allow_use( 'LWP::Simple'); if( XML::Twig::_use( 'LWP::Simple') && XML::Twig::_use( 'LWP::UserAgent') && XML::Twig::_use( 'HTML::TreeBuilder', 3.13)) { my $url= "file://$file"; $url=~ s{\\}{/}g; # we need a URL, not a file name my $content= XML::Twig->nparse( $url)->sprint; (my $expected= slurp( $file))=~ s{(<(meta|br)[^>]*>)}{$1}g; $expected=~s{

t3}{

t3

}; $expected=~ s{> "cannot test html url parsing without LWP::Simple and HTML::TreeBuilder 3.13+"); } } else { skip( 2 => "cannot find $file"); } } { my $file= File::Spec->catfile( $Bin, "test_new_features_3_22.xml"); if( -f $file) { XML::Twig::_disallow_use( 'LWP::Simple'); eval { XML::Twig->nparse( "file://$file"); }; matches( $@, "^missing LWP::Simple", "nparse url without LWP::Simple"); XML::Twig::_allow_use( 'LWP::Simple'); if( perl_io_layer_used()) { skip( 1 => "cannot test url parsing when UTF8 perlIO layer used"); } elsif( XML::Twig::_use( 'LWP::Simple') && XML::Twig::_use( 'LWP::UserAgent')) { my $url= "file://$file"; $url=~ s{\\}{/}g; # we need a URL, not a file name if( LWP::Simple::get( $url)) { my $content= XML::Twig->nparse( $url)->sprint; is( $content, "", "nparse url (nothing there)"); } else { skip( 1 => "it looks like your LWP::Simple's get cannot handle '$url'"); } } else { skip( 1 => "cannot test url parsing without LWP"); } } else { skip( 2 => "cannot find $file"); } } { my $file= File::Spec->catfile( "t", "test_new_features_3_22.xml"); open( FH, "<$file") or die "cannot find test file '$file': $!"; my $content= XML::Twig->nparse( \*FH)->sprint; is( $content, "", "nparse glob"); } XML-Twig-3.52/t/test2_2.xml0000644000175000017500000000173612732215763015543 0ustar mrodrigumrodrigu ]>
S1 I1 S1 I2 S1 Title S1 P1 S2 P2 Note P1 S1 para 3
S2 intro S2 Title S2 P1 S2 P2 S2 P3
Annex Title Annex P1 Annex P2
XML-Twig-3.52/t/xmlxpath_09a_string_length.t0000755000175000017500000000127612732215763021173 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 5); use XML::Twig::XPath; my $doc_one = qq|para one|; my $t= XML::Twig::XPath->new( keep_spaces => 1); $t->parse( $doc_one); ok( $t); my $doc_one_chars = $t->find( 'string-length(/doc/text())'); ok($doc_one_chars == 0, 1); my $doc_two = qq| para one has bold text |; $t->parse( $doc_two); ok( $t); my $doc_two_chars = $t->find( 'string-length(/doc/text())'); ok($doc_two_chars == 3, 1); my $doc_two_para_chars = $t->find( 'string-length(/doc/para/text())'); ok($doc_two_para_chars == 13, 1); exit 0; XML-Twig-3.52/t/xmlxpath_09string_length.t0000755000175000017500000000103212732215763020661 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 5); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '//*[string-length(name()) = 3]'); ok(@nodes, 2); @nodes = $t->findnodes( '//*[string-length(name()) < 3]'); ok(@nodes, 2); @nodes = $t->findnodes( '//*[string-length(name()) > 3]'); ok(@nodes, 3); exit 0; __DATA__ XML-Twig-3.52/t/test_3_50.t0000755000175000017500000000713013015347375015426 0ustar mrodrigumrodrigu#!/usr/bin/perl use strict; use warnings; use XML::Twig; use Test::More tests => 18; use utf8; SKIP: { if( XML::Twig::_use( 'XML::XPathEngine') && XML::Twig::_use( 'XML::Twig::XPath')) { ok( XML::Twig::XPath->new()->parse('')->findnodes('//namespace::*'), '//namespace::* does not crash'); } else { skip 'cannot use XML::Twig::XPath', 1; } } { my $doc=q{titlep 1p 2}; my $out; open( my $out_fh, '>', \$out); my $t= XML::Twig->new ( twig_handlers => { _default_ => sub { $_->flush( $out_fh); } }); $t->parse( $doc); is( $out, $doc, 'flush with _default_ handler'); } { my $doc=q{titlep 1p 2}; my $out; open( my $out_fh, '>', \$out); my $t= XML::Twig->new ( twig_handlers => { 'd' => sub { $_->flush( $out_fh); } }); $t->parse( $doc); #is( $out, $doc, 'flush with handler on the root'); } { # test notations my $doc=q{ ]> DirectionalLight { direction 0 -1 0 } XML::Twig->parse( 'file.xml'); }; my $t= XML::Twig->parse( $doc); my $n= $t->notation_list; is( join( ':', sort $t->notation_names), 'perl:vrml', 'notation_names'); is( join( ':', sort map { $_->name } $n->list), 'perl:vrml', 'notation_list (names)'); is( join( ':', sort map { $_->pubid } $n->list), 'Perl 22.4:VRML 1.0', 'notation_list (pubid)'); is( join( ':', sort map { $_->sysid || '' } $n->list), ':/usr/bin/perl', 'notation_list (pubid)'); is( $n->notation( 'perl')->pubid, 'Perl 22.4', 'individual notation pubid'); is( $n->notation( 'vrml')->base, undef, 'individual notation base'); is( $n->text, qq{\n}, 'all notations'); my $notations= () = ( $t->sprint() =~ m{sprint( update_DTD => 1) =~ m{delete( 'perl'); $notations= () = ( $t->sprint( update_DTD => 1) =~ m{notation( 'vrml')->pubid(), 'VRML 1.0', 'notation method'); $n->add_new_notation( 'svg', '', 'image/svg', 'SVG'); is( $n->notation( 'svg')->text, qq{}, 'new notation'); } { # somehow these were never tested (they are inlined within the module) my $t= XML::Twig->parse( ''); my $d= $t->root; my $e2= $t->first_elt( 'e2'); my $e1= XML::Twig::Elt->new( 'e1'); $d->set_first_child( $e1); $e2->set_prev_sibling( $e1); $e1->set_next_sibling( $e2); is( $t->sprint, '', 'set_first_child'); my $e3= XML::Twig::Elt->new( 'e3'); $d->set_last_child( $e3); $e2->set_next_sibling( $e3); $e3->set_prev_sibling( $e2); is( $t->sprint, '', 'set_last_child'); $e2->insert_new_elt( first_child => '#PCDATA')->_set_pcdata( 'foo'); is( $t->sprint, 'foo', '_set_pcdata'); $e1->insert_new_elt( first_child => '#CDATA')->_set_cdata( 'bar'); is( $t->sprint, 'foo', '_set_cdata'); } exit; XML-Twig-3.52/t/latin1_accented_char.iso-8859-10000644000175000017500000000000212732215763021020 0ustar mrodrigumrodrigué XML-Twig-3.52/t/test_class_methods.t0000755000175000017500000000416712732215763017617 0ustar mrodrigumrodrigu#!/usr/bin/perl -w # testing methods on class attribute: # class set_class add_to_class att_to_class add_att_to_class move_att_to_class # tag_to_class add_tag_to_class set_tag_class in_class use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use XML::Twig; my $DEBUG=0; my $TMAX=26; print "1..$TMAX\n"; { my $root= XML::Twig->new->parse( q{})->root; nok( $root->class, "no class"); $root->set_class( 'foo'); is( $root->class, 'foo', 'set_class'); $root->set_class( 'bar'); is( $root->class, 'bar', 'set_class'); ok( $root->in_class( 'bar'), 'in_class (ok)'); nok( $root->in_class( 'foo'), 'in_class (nok)'); $root->add_to_class( 'foo'); ok( $root->in_class( 'bar'), 'in_class (ok)'); ok( $root->in_class( 'foo'), 'in_class (ok)'); nok( $root->in_class( 'baz'), 'in_class (nok)'); $root->tag_to_class; is( $root->class, 'doc', 'tag_to__class'); ok( $root->in_class( 'doc'), 'in_class (ok)'); nok( $root->in_class( 'foo'), 'in_class (nok)'); $root->tag_to_class; is( $root->class, 'doc', 'tag_to_class (with existing class)'); $root->add_tag_to_class; is( $root->class, 'doc', 'add_tag_to_class'); $root->att_to_class( 'att1'); is( $root->class, 'val1', 'att_to_class'); $root->att_to_class( 'att1'); is( $root->class, 'val1', 'att_to_class (with existing class)'); $root->add_att_to_class( 'att'); is( $root->class, 'val1', 'att_to_class (non existing att)'); $root->add_att_to_class( 'att2'); is( $root->class, 'val1 val2', 'att_to_class (2 classes now)'); ok( $root->in_class( 'val1'), 'in_class'); ok( $root->in_class( 'val2'), 'in_class'); nok( $root->in_class( 'val'), 'in_class (nok)'); $root->set_tag_class( 'new'); is( $root->sprint, '', 'set_tag_class'); $root->move_att_to_class( 'att2'); is( $root->sprint, '', 'set_tag_class'); ok( $root->matches( '.doc'), 'match on class (first)'); ok( $root->matches( '.val1'), 'match on class (middle)'); ok( $root->matches( '.val2'), 'match on class (last)'); nok( $root->matches( '.val'), 'match on class (not good)'); } exit 0; XML-Twig-3.52/t/xmlxpath_01basic.t0000755000175000017500000000102612732215763017066 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 5); ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok($t); my @root = $t->findnodes('/AAA'); ok(@root, 1); my @ccc = $t->findnodes('/AAA/CCC'); ok(@ccc, 3); my @bbb = $t->findnodes('/AAA/DDD/BBB'); ok(@bbb, 2); exit 0; __DATA__ Text XML-Twig-3.52/t/xmlxpath_test_twig_roots.t0000755000175000017500000001760012732215763021110 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; #use diagnostics; use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; my $DEBUG=0; print "1..12\n"; $|=1; $/= "\n\n"; my $t= XML::Twig::XPath->new( twig_roots => { }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 1); $t= XML::Twig::XPath->new( twig_roots => { elt2 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 2); $t= XML::Twig::XPath->new( twig_roots => { elt3 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 3); $t= XML::Twig::XPath->new( twig_roots => { }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 4); $t= XML::Twig::XPath->new( twig_roots => { elt2 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 5); $t= XML::Twig::XPath->new( twig_roots => { elt3 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 6); $t= XML::Twig::XPath->new( twig_roots => { }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 7); $t= XML::Twig::XPath->new( twig_roots => { elt2 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 8); $t= XML::Twig::XPath->new( twig_roots => { elt3 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 9); $t= XML::Twig::XPath->new( twig_roots => { elt => sub { print RESULT "elt handler called on ", $_->gi, "\n"; }, }, start_tag_handlers => { doc => sub { print RESULT "start tag handler called on ", $_->gi, "\n"; }, }, end_tag_handlers => { doc => sub { print RESULT "end tag handler called on $_[1]\n"; }, }, ); test_twig( $t, 10); # test with doc root as root $t= XML::Twig::XPath->new( twig_roots => { doc => sub { $_->print( \*RESULT); } }); test_twig( $t, 11); # test with elt as root $t= XML::Twig::XPath->new( twig_roots => { elt => sub { $_->print( \*RESULT); } }); test_twig( $t, 12); exit 0; sub test_twig { my( $t, $test_nb)= @_; my $doc= read_doc(); my $expected_result= read_expected_result(); my $result_file= "test_twig_roots.res1"; open( RESULT, ">$result_file") or die "cannot create $result_file: $!"; $t->parse( $doc); check_result( $result_file, $test_nb, $expected_result); close RESULT; } sub check_result { my( $result_file, $test_no, $expected_result)= @_; # now check result my $result= read_result( $result_file); if( $result eq $expected_result) { print "ok $test_no\n"; } else { print "nok $test_no\n"; print STDERR "\ntest $test_no:\n", "expected: \n$expected_result\n", "real: \n$result\n"; } } { my $last_doc; my $buffered_result; sub read_doc { local $/="\n\n"; my $doc= ; # if the data starts with #doc then it's a doc, otherwise use the previous one if( $doc=~ /^\s*#\s*doc/) { $doc= clean_data( $doc); $last_doc= $doc; $buffered_result=''; return $doc; } else { $buffered_result= clean_data( $doc); return $last_doc; } } sub read_expected_result { if( $buffered_result) { return $buffered_result; } else { local $/="\n\n"; my $expected_result= ; $expected_result= clean_data( $expected_result); return $expected_result; } } } sub clean_data { my $data= shift; $data=~ s{^\s*#.*\n}{}m; # get rid of comments $data=~ s{\s*$}{}s; # remove trailing spaces (and \n) $data=~ s{(^|\n)\s*(\n|$)}{}g; # remove empty lines return $data; } sub read_result { my $file= shift; local $/="\n"; open( RESULT, "<$file") or die "cannot read $file: $!"; my @result= grep {m/\S/} ; my $result= join( '', @result); $result=~ s{(^|\n)\s*(\n|$)}{}g; # remove empty lines close RESULT; unlink $file; return $result; } __DATA__ # doc 1 text subelt text another elt text an other type of element text of subelt # expected_res 1 text subelt text another elt text an other type of element text of subelt # expected_res 2 text subelt text another elt text text of subelt # expected_res 3 text subelt text another elt text an other type of element # doc 2 text subelt text another elt text an other type of element text of subelt # expected_res 4 text subelt text another elt text an other type of element text of subelt # expected_res 5 text subelt text another elt text text of subelt # expected_res 6 text subelt text another elt text an other type of element # doc 3 text subelt text another elt text an other type of element text of subelt # expected_res 7 text subelt text another elt text an other type of element text of subelt # expected_res 8 text subelt text another elt text text of subelt # expected_res 9 text subelt text another elt text an other type of element # doc 4 # expected_res 10 start tag handler called on doc elt handler called on elt end tag handler called on doc # expected_res 11 # expected_res 12 XML-Twig-3.52/t/tests_3_23.t0000755000175000017500000000204712732215763015613 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use XML::Twig; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; my $DEBUG=0; print "1..6\n"; if( _use( 'XML::XPathEngine') || _use( 'XML::XPath') ) { _use( 'XML::Twig::XPath'); my $t= XML::Twig::XPath->nparse( q{ foobar bazfoobar }); is( $t->findvalue( '//e[.="foo"]/@a'), "ea1", 'xpath on attributes'); is( $t->findvalue( '//s[./e="foo"]/@a'), "sa1", 'xpath with elt content test'); is( $t->findvalue( '/d/s[e="foo"]/@a'), "sa1", 'xpath with elt content test (short form)'); } else { skip( 3); } { my $t= XML::Twig->nparse( ''); my @xpath_result= $t->get_xpath( '/'); is( ref( $xpath_result[0]), 'XML::Twig', "get_xpath( '/')"); @xpath_result= $t->get_xpath( '/doc[1]'); is( $xpath_result[0]->tag, 'doc', "get_xpath( '/doc[1]')"); @xpath_result= $t->get_xpath( '/notdoc[1]'); is( scalar( @xpath_result), 0, "get_xpath( '/notdoc[1]')"); } XML-Twig-3.52/t/test_autoencoding_conversion.t0000755000175000017500000000223712732215763021707 0ustar mrodrigumrodrigu#!/usr/bin/perl -w # use strict; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; print "1..2\n"; if( $] < 5.008) { skip( 2, "needs perl 5.8 or above to test auto conversion"); } elsif( $ENV{PERL_UNICODE} && $ENV{PERL_UNICODE}=~ m{SA}) { skip( 2, 'auto conversion does not happen when $PERL_UNICODE set to SA'); } else { _use( 'Encode'); my $char_utf8 = qq{\x{e9}}; my $char_latin1 = encode("iso-8859-1", $char_utf8); my $doc_utf8 = qq{$char_utf8}; my $doc_latin1 = qq{$char_latin1}; my $file_utf8 = "doc_utf8.xml"; spit( $file_utf8, $doc_utf8); my $file_latin1 = "doc_latin1.xml"; spit( $file_latin1, $doc_latin1); my( $q, $q2) = ( ($^O eq "MSWin32") || ($^O eq 'VMS') ) ? ('"', "'") : ("'", '"'); my $lib= File::Spec->catfile( 'blib', 'lib'); my $run_it=qq{$^X -I $lib -MXML::Twig -e$q print XML::Twig->parse( $q2$file_latin1$q2)->root->text$q}; my $parsed= `$run_it`; is( $parsed, $char_utf8, 'testing auto transcoding of latin1 output'); is( $parsed, $char_latin1, 'testing auto transcoding of latin1 output'); } XML-Twig-3.52/t/test_bugs_3_22.t0000755000175000017500000005170312732215763016452 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use lib File::Spec->catdir(File::Spec->curdir,"blib/lib"); use XML::Twig; my $TMAX=181; print "1..$TMAX\n"; { # testing how well embedded comments and pi's are kept when changing the content my @tests= ( [ "foo bar baz", "foo bar", "foo bar" ], [ "foo bar baz", "foo bar baz foobar", "foo bar baz foobar" ], [ "foo bar foobar tutu", "bar tutu", "bar tutu" ], [ "foo bar foobar baz", "foobar baz", "foobar baz"], [ "foo baz", "foo bar baz", "foo bar baz"], [ "foo baz", "foo bar baz", "foo bar baz"], [ "foo bar baz", "bar baz", "bar baz"], [ "foo bar baz toto", "foo toto", "foo toto"], ); foreach my $test (@tests) { my( $initial, $set, $expected)= @$test; my $t= XML::Twig->nparse( "$initial"); $t->root->set_content( $set); is( $t->sprint, "$expected", "set_content '$initial' => '$set'"); } } { # RT #17145 my $twig= new XML::Twig()->parse(""); is( scalar( $twig->get_xpath('//root/elt[1]/child')), 0, "Context position of non-existent elements in XPATH expressions"); } { # some extra coverage my @siblings= XML::Twig->nparse( "")->root->following_elts; is( scalar( @siblings), 0, "following_elts on last sibling"); is( XML::Twig->nparse( "")->root->del_id->sprint, "", "del_id on elt with no atts"); # next_elt with deep tree ( my $t= XML::Twig->nparse( q{ }); foreach my $e ($t->root->descendants_or_self) { is( scalar( $e->_descendants), $e->att( 'n'), "_descendant " . $e->tag . "\n"); is( scalar( $e->_descendants( 1)), $e->att( 'n') + 1, "_descendant(1) " . $e->tag . "\n"); } } { my $exp= '/foo/1^%'; eval { XML::Twig->nparse( "")->get_xpath( $exp); }; matches( $@, "^error in xpath expression", "xpath with valid expression then stuff left"); } { my $t = XML::Twig->nparse( ""); my $root = $t->root; my $elt =XML::Twig::Elt->new( 'foo'); foreach my $pos ( qw( before after)) { eval { $elt->paste( $pos => $root); }; matches( $@, "^cannot paste $pos root", "paste $pos root"); eval " \$elt->paste_$pos( \$root)"; matches( $@, "^cannot paste $pos root", "paste $pos root"); } } { is( XML::Twig->nparse( comments => "process", pi => "process", "")->_dump, "document\n|-doc\n| |-COMMENT: ''\n| |-PI: 't' - 'data'\n| |-PI: 't' - ''\n", "_dump PI/comment" ); } { is( XML::Twig->nparse( '')->root->get_xpath( '.', 0)->gi, 'doc', 'get_xpath: .'); } { my $t= XML::Twig->nparse( ''); $t->first_elt( '#CDATA')->set_text( 'bar'); is( $t->sprint, '', " set_text on CDATA"); $t->root->set_text( 'bar'); is( $t->sprint, 'bar', " set_text on elt containing CDATA"); $t= XML::Twig->nparse( ''); $t->first_elt( '#CDATA')->set_text( 'bar', force_pcdata => 1); is( $t->sprint, 'bar', " set_text on CDATA with force_pcdata");} # print/flush entity # SAX export entity { my $enc= "a_non_existent_encoding_bwaaahhh"; eval { XML::Twig->iconv_convert( $enc); }; matches( $@, "^(Unsupported|Text::Iconv not available|Can't locate)", "unsupported encoding"); } { # test comments handlers my $doc= qq{}; is( XML::Twig->nparse( twig_handlers => { '#COMMENT' => sub { return uc( $_[1]); } }, $doc)->sprint, qq{}, "comment handler" ); is( XML::Twig->nparse( twig_handlers => { '#COMMENT' => sub { return uc( $_[1]); } }, keep_encoding => 1, $doc)->sprint, qq{}, "comment handler (with keep_encoding)" ); is( XML::Twig->nparse( twig_handlers => { '#COMMENT' => sub { return; } }, keep_encoding => 0, $doc)->sprint, qq{}, "comment handler returning undef comment" ); is( XML::Twig->nparse( twig_handlers => { '#COMMENT' => sub { return ''; } }, keep_encoding => 1, $doc)->sprint, qq{}, "comment handler returning empty comment (with keep_encoding)" ); is( XML::Twig->nparse( comments => 'process', twig_handlers => { '#COMMENT' => sub { $_->set_comment( uc( $_->comment)); } }, keep_encoding => 0, $doc)->sprint, qq{}, "comment handler, process mode" ); is( XML::Twig->nparse( comments => 'process', twig_handlers => { '#COMMENT' => sub { $_->set_comment( uc( $_->comment)); } }, keep_encoding => 1, $doc)->sprint, qq{}, "comment handler (with keep_encoding), process mode" ); is( XML::Twig->nparse( comments => 'process', twig_handlers => { elt => sub { $_->cut; } }, keep_encoding => 0, $doc)->sprint, qq{}, "comment handler deletes comment, process mode" ); is( XML::Twig->nparse( comments => 'process', twig_handlers => { '#COMMENT' => sub { $_->cut; } }, keep_encoding => 0, $doc)->sprint, qq{}, "comment handler deletes comment, process mode" ); is( XML::Twig->nparse( comments => 'process', twig_handlers => { '#COMMENT' => sub { $_->set_comment( ''); } }, keep_encoding => 1, $doc)->sprint, qq{}, "comment handler returning empty comment (with keep_encoding), process mode" ); } { # check pi element handler in keep_encoding mode is( XML::Twig->nparse( pi => 'process', twig_handlers => { '?t' => sub { $_->set_data( uc( $_->data)); } }, '')->sprint, '', 'pi element handler'); is( XML::Twig->nparse( pi => 'process', keep_encoding => 1,twig_handlers => { '?t' => sub { $_->set_data( uc( $_->data)); } }, '')->sprint, '', 'pi element handler in keep_encoding mode'); } { # test changes on comments before the root element my $doc= q{}; is( XML::Twig->nparse( $doc)->sprint, $doc, 'comment after root element'); is_like( XML::Twig->nparse( pi => 'process', comments => 'process', $doc)->sprint, $doc, 'comment before root element (pi/comment => process)'); is_like( XML::Twig->nparse( pi => 'process', $doc)->sprint, $doc, 'comment before root element (pi => process)'); is_like( XML::Twig->nparse( comments => 'process', $doc)->sprint, $doc, 'comment before root element (comment => process)'); } { # test bug on comments after the root element RT #17064 my $doc= q{}; is( XML::Twig->nparse( $doc)->sprint, $doc, 'comment after root element'); is( XML::Twig->nparse( pi => 'process', comments => 'process', $doc)->sprint, $doc, 'comment after root element (pi/comment => process)'); is_like( XML::Twig->nparse( pi => 'process', $doc)->sprint, $doc, 'comment before root element (pi => process)'); is_like( XML::Twig->nparse( comments => 'process', $doc)->sprint, $doc, 'comment before root element (comment => process)'); } { # test bug on doctype declaration (RT #17044) my $doc= qq{\n}; is( XML::Twig->nparse( $doc)->sprint, $doc, "doctype with public id"); is( XML::Twig->nparse( $doc)->sprint( Update_DTD => 1), $doc, "doctype with public id (update_DTD => 1)"); $doc= qq{\n}; is( XML::Twig->nparse( $doc)->sprint, $doc, "doctype with public id"); is( XML::Twig->nparse( $doc)->sprint( updateDTD => 1) , $doc, "doctype with public id (update_DTD => 1)"); } { # test bug on tag names similar to internal names RT #16540 ok( XML::Twig->nparse( twig_handlers => { level => sub {} }, ''), " bug on tag names similar to internal names RT #16540"); } { # test parsing of an html string if( XML::Twig::_use( 'HTML::TreeBuilder', 3.13) && XML::Twig::_use( 'HTML::Entities::Numbered')) { ok( XML::Twig->parse( error_context => 1, ' foo

bar
été '), "parsing an html string"); } else { skip( 1, "need HTML::TreeBuilder 3.13+ and HTML::Entities::Numbered for this test"); } } { # testing print_to_file my $tmp= "print_to_file.xml"; my $doc= "foo"; unlink( $tmp); # no check, it could not be there my $t1= XML::Twig->nparse( $doc)->print_to_file( $tmp); ok( -f $tmp, "print_to_file created document"); my $t2= XML::Twig->nparse( $tmp); is( $t2->sprint, $t1->sprint, "generated document identical to original document"); unlink( $tmp); my $e1= XML::Twig->parse( '
foobar')->first_elt( 'b')->print_to_file( $tmp); ok( -f $tmp, "print_to_file on elt created document"); $t2= XML::Twig->nparse( $tmp); is( $t2->sprint, 'bar', "generated sub-document identical to original sub-document"); unlink( $tmp); # failure modes eval { XML::Twig->nparse( $tmp); }; mtest( $@, "Couldn't open $tmp:"); my $non_existent="non_existent_I_hope_01/tmp"; while( -f $non_existent) { $non_existent++; } # most likely unnecessary ;--) eval { $t1->print_to_file( $non_existent); }; mtest( $@, "cannot create file $non_existent:"); } { my $doc=q{}; my $t= XML::Twig->nparse( $doc); test_get_xpath( $t, q{/doc/elt[1][@att2="v2"]}, ''); } { my $doc=q{foobarbazfoobar}; my $t= XML::Twig->nparse( $doc); test_get_xpath( $t, q{/d/e[@a="1"][2]}, 'e2'); test_get_xpath( $t, q{/d/e[@a="1"][-2]}, 'e2'); test_get_xpath( $t, q{/d/e[@a="1"][-1]}, 'e4'); test_get_xpath( $t, q{/d/e[@a="1"][-3]}, 'e1'); } { # test support for new conditions condition in get_xpath my $doc=q{foobarbaz}; my $t= XML::Twig->nparse( $doc); # just checking test_get_xpath( $t, q{//elt[@a]}, 'elt1'); is( ids( $t->get_xpath( q{//*[@a]})), 'd1:elt1', '//*[@a] xpath exp'); # test support for !@att condition in get_xpath is( ids( $t->get_xpath( q{//elt[!@a]})), 'elt2:elt3', '//elt[!@a] xpath exp'); is( ids( $t->get_xpath( q{//elt[not@a]})), 'elt2:elt3', '//elt[not@a] xpath exp'); is( ids( $t->get_xpath( q{/doc/elt[not@a]})), 'elt2:elt3', '/doc/elt[not@a] xpath exp'); is( ids( $t->get_xpath( q{//*[!@a]})), 'elt2:elt3', '//*[!@a] xpath exp'); is( ids( $t->get_xpath( q{//*[not @a]})), 'elt2:elt3', '//*[not @a] xpath exp'); is( ids( $t->get_xpath( q{/doc/*[not @a]})), 'elt2:elt3', '/doc/*[not @a] xpath exp'); # support for ( and ) test_get_xpath( $t, q{//*[@id="d1" or @a and @id="elt1"]}, 'd1:elt1'); test_get_xpath( $t, q{//*[(@id="d1" or @a) and @id="elt1"]}, 'elt1'); } { # more test on new XPath support: axis in node test part my $doc=q{ }; my $t= XML::Twig->nparse( $doc); # parent axis in node test part test_get_xpath( $t, q{/doc//selt/..}, 'elt1:elta1'); test_get_xpath( $t, q{/doc//selt/parent::elt}, 'elt1'); test_get_xpath( $t, q{/doc//selt/parent::elta}, 'elta1'); test_get_xpath( $t, q{//sseltb/ancestor::eltc}, 'eltc1'); test_get_xpath( $t, q{//sseltb/ancestor::*}, 'd1:eltb1:seltb1:eltc1:seltb2'); test_get_xpath( $t, q{//sseltb/ancestor-or-self::eltc}, 'eltc1'); test_get_xpath( $t, q{//sseltb/ancestor-or-self::*}, 'd1:eltb1:seltb1:sseltb1:eltc1:seltb2:sseltb2'); test_get_xpath( $t, q{/doc//*[@id="eltc1"]/descendant::*}, 'seltb2:sseltb2'); test_get_xpath( $t, q{/doc//*[@id="eltc1"]/descendant::sseltb}, 'sseltb2'); test_get_xpath( $t, q{/doc//*[@id="eltc1"]/descendant::eltc}, ''); test_get_xpath( $t, q{/doc//*[@id="eltc1"]/descendant-or-self::*}, 'eltc1:seltb2:sseltb2'); test_get_xpath( $t, q{/doc//*[@id="eltc1"]/descendant-or-self::eltc}, 'eltc1'); test_get_xpath( $t, q{/doc//*[@id="eltc1"]/descendant-or-self::seltb}, 'seltb2'); test_get_xpath( $t, q{/doc/elt/following-sibling::*}, 'elta1:elt2:eltb1:eltc1'); test_get_xpath( $t, q{/doc/elt/preceding-sibling::*}, 'elt1:elta1'); test_get_xpath( $t, q{/doc/elt[@id="elt1"]/preceding-sibling::*}, ''); test_get_xpath( $t, q{/doc/elt/following-sibling::elt}, 'elt2'); test_get_xpath( $t, q{/doc/elt/preceding-sibling::elt}, 'elt1'); test_get_xpath( $t, q{/doc/elt[@id="elt1"]/preceding-sibling::elt}, ''); is( $t->elt_id( "sseltb1")->following_elt->id, 'eltc1', 'following_elt'); is( ids( $t->elt_id( "sseltb1")->following_elts), 'eltc1:seltb2:sseltb2', 'following_elts'); is( ids( $t->elt_id( "sseltb1")->following_elts( '')), 'eltc1:seltb2:sseltb2', 'following_elts( "")'); my @elts= $t->elt_id( "eltc1")->descendants_or_self; is( ids( @elts), 'eltc1:seltb2:sseltb2', 'descendants_or_self'); is( ids( XML::Twig::_unique_elts( @elts)), 'eltc1:seltb2:sseltb2', '_unique_elts'); test_get_xpath( $t, q{/doc//[@id="sseltb1"]/following::*}, 'eltc1:seltb2:sseltb2'); test_get_xpath( $t, q{/doc//[@id="sseltb1"]/following::seltb}, 'seltb2'); test_get_xpath( $t, q{/doc//[@id="selt1"]/following::elt}, 'elt2'); ok( $t->root->last_descendant( 'doc'), "checking if last_descendant returns the element itself"); test_get_xpath( $t, q{/doc/preceding::*}, ''); test_get_xpath( $t, q{/doc/elt[1]/preceding::*}, ''); test_get_xpath( $t, q{/doc/elt/preceding::*}, 'd1:elt1:selt1:elta1:selt2'); test_get_xpath( $t, q{/doc//[@id="sseltb2"]/preceding::seltb}, 'seltb1'); test_get_xpath( $t, q{/doc//[@id="selt1"]/preceding::elt}, ''); test_get_xpath( $t, q{/doc//[@id="selt2"]/preceding::elt}, 'elt1'); test_get_xpath( $t, q{/doc/self::doc}, 'd1'); test_get_xpath( $t, q{/doc/self::*}, 'd1'); test_get_xpath( $t, q{/doc/self::elt}, ''); test_get_xpath( $t, q{//[@id="selt1"]/self::*}, 'selt1'); test_get_xpath( $t, q{//[@id="selt1"]/self::selt}, 'selt1'); test_get_xpath( $t, q{//[@id="selt1"]/self::elt}, ''); } { # more tests: more than 1 predicate my $doc=q{}; my $t= XML::Twig->nparse( $doc); test_get_xpath( $t, q{/doc/elt[@id][@att="v1"]}, 'elt1:elt2'); test_get_xpath( $t, q{/doc/elt[@id][@att2="v1"]}, 'elt1'); test_get_xpath( $t, q{/doc/elt[@id][1]}, 'elt1'); test_get_xpath( $t, q{/doc/elt[@att="v1"][1]}, 'elt1'); test_get_xpath( $t, q{/doc/elt[@att="v2"][1]}, ''); test_get_xpath( $t, q{/doc/elt[@att="v1"][2]}, 'elt2'); test_get_xpath( $t, q{/doc/elt[1][@att2="v1"]}, 'elt1'); test_get_xpath( $t, q{/doc/elt[1][@att2="v2"]}, ''); test_get_xpath( $t, q{/doc/elt[@att2="v2"][1]}, 'elt2'); test_get_xpath( $t, q{/doc/elt[@att2="v2"][2]}, ''); test_get_xpath( $t, q{/doc/elt[@att2][1]}, 'elt1'); test_get_xpath( $t, q{/doc/elt[@att2][2]}, 'elt2'); test_get_xpath( $t, q{/doc/elt[@att2][3]}, ''); test_get_xpath( $t, q{/doc/elt[@att2][-1]}, 'elt2'); test_get_xpath( $t, q{/doc/elt[@att2][-2]}, 'elt1'); test_get_xpath( $t, q{/doc/elt[@att2][-3]}, ''); } { # testing creation of elements in the proper class package foo; use base 'XML::Twig::Elt'; package main; my $t= XML::Twig->new( elt_class => "foo")->parse( ''); my $elt= $t->first_elt( 'elt'); $elt->set_text( 'bar'); is( $elt->first_child->text, 'bar', "content of element created with set_text"); is( ref( $elt->first_child), 'foo', "class of element created with set_text"); $elt->set_content( 'baz'); is( $elt->first_child->text, 'baz', "content of element created with set_content"); is( ref( $elt->first_child), 'foo', "class of element created with set_content"); $elt->insert( 'toto'); is( $elt->first_child->tag, 'toto', "tag of element created with set_content"); is( ref( $elt->first_child), 'foo', "class of element created with insert"); $elt->insert_new_elt( first_child => 'tata'); is( $elt->first_child->tag, 'tata', "tag of element created with insert_new_elt"); is( ref( $elt->first_child), 'foo', "class of element created with insert"); $elt->wrap_in( 'tutu'); is( $t->root->first_child->tag, 'tutu', "tag of element created with wrap_in"); is( ref( $t->root->first_child), 'foo', "class of element created with wrap_in"); $elt->prefix( 'titi'); is( $elt->first_child->text, 'titi', "content of element created with prefix"); is( ref( $elt->first_child), 'foo', "class of element created with prefix"); $elt->suffix( 'foobar'); is( $elt->last_child->text, 'foobar', "content of element created with suffix"); is( ref( $elt->last_child), 'foo', "class of element created with suffix"); $elt->last_child->split_at( 3); is( $elt->last_child->text, 'bar', "content of element created with split_at"); is( ref( $elt->last_child), 'foo', "class of element created with split_at"); is( ref( $elt->copy), 'foo', "class of element created with copy"); $t= XML::Twig->new( elt_class => "foo")->parse( 'toto'); $t->root->subs_text( qr{(to)} => '&elt( p => $1)'); is( $t->sprint, '

to

to

', "subs_text result"); my $result= join( '-', map { join( ":", ref($_), $_->tag) } $t->root->descendants); is( $result, "foo:p-foo:#PCDATA-foo:p-foo:#PCDATA", "subs_text classes and tags"); } { # wrap children with > in attribute my $doc=q{}; my $result =XML::Twig->new->parse( $doc)->root->wrap_children( '+', "w")->strip_att( 'id')->sprint; my $expected = q{}; is( $result => $expected, "wrap_children with > in attributes"); $result =XML::Twig->new->parse( $doc)->root->wrap_children( '+', "w")->strip_att( 'id')->sprint; $expected = q{}; is( $result => $expected, "wrap_children with > in attributes, > in condition"); $result =XML::Twig->new->parse( $doc)->root->wrap_children( '+', "w")->strip_att( 'id')->sprint; $expected = q{}; is( $result => $expected, "wrap_children with > in attributes un-escaped > in condition"); $result =XML::Twig->new->parse( $doc)->root->wrap_children( '+', "w")->strip_att( 'id')->sprint; $expected = q{}; is( $result => $expected, "wrap_children with > in attributes, 2 atts in condition"); $result =XML::Twig->new->parse( $doc)->root->wrap_children( '+', "w")->strip_att( 'id')->sprint; $expected = q{}; is( $result => $expected, "wrap_children with > in attributes, 2 atts in condition (no child matches)"); } { # test improvements to wrap_children my $doc= q{okNOK}; my $expected= q{okNOK}; my $t= XML::Twig->new->parse( $doc); $t->root->wrap_children( '+', w => { a => "&" }); $t->root->strip_att( 'id'); is( $t->sprint, $expected, "wrap_children with &"); } { # test bug on tests on attributes with a value of 0 (RT #15671) my $t= XML::Twig->nparse( ''); my $root = $t->root(); is( scalar $root->children('*[@id="1"]'), 1, 'testing @att="1"'); is( scalar $root->children('*[@id="0"]'), 1, 'testing @att="0"'); is( scalar $root->children('*[@id="0" or @id="1"]'), 2, 'testing @att="0" or'); is( scalar $root->children('*[@id="0" and @id="1"]'), 0, 'testing @att="0" and'); } { # test that the '>' after the doctype is properly output when there is no DTD RT# my $doctype=''; my $doc="$doctype"; is_like( XML::Twig->nparse( $doc)->sprint, $doc); is_like( XML::Twig->nparse( $doc)->doctype, $doctype); } XML-Twig-3.52/t/test_xml_split/0000755000175000017500000000000013015347632016576 5ustar mrodrigumrodriguXML-Twig-3.52/t/test_xml_split/test_xml_split_expected-2-03.xml0000644000175000017500000000003313015347624024627 0ustar mrodrigumrodriguelt1 content 3XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-4-03.xml0000644000175000017500000000003313015347624024631 0ustar mrodrigumrodriguelt1 content 3XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-1-03.xml0000644000175000017500000000003313015347624024626 0ustar mrodrigumrodriguelt1 content 3XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-5-02.xml0000644000175000017500000000012613015347624024634 0ustar mrodrigumrodrigu elt1 content 6 elt1 content 7 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-19-01.xml0000644000175000017500000000006113015347626024720 0ustar mrodrigumrodrigu elt1 content 1XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-21-01.xml0000644000175000017500000000024613015347627024717 0ustar mrodrigumrodrigu elt1 content 4 elt1 content 5 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-1-04.xml0000644000175000017500000000012013015347624024624 0ustar mrodrigumrodrigu elt1 content 4 elt1 content 5 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-17-08.xml0000644000175000017500000000006113015347626024725 0ustar mrodrigumrodrigu elt1 content 8XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-16-03.xml0000644000175000017500000000006113015347626024717 0ustar mrodrigumrodrigu elt1 content 3XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-1-01.xml0000644000175000017500000000003313015347624024624 0ustar mrodrigumrodriguelt1 content 1XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-9-04.xml0000644000175000017500000000012013015347625024635 0ustar mrodrigumrodrigu elt1 content 4 elt1 content 5 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-1-00.xml0000644000175000017500000000052713015347624024633 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-6-01.xml0000644000175000017500000000012013015347625024627 0ustar mrodrigumrodrigu elt1 content 4 elt1 content 5 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-4-01.xml0000644000175000017500000000003313015347624024627 0ustar mrodrigumrodriguelt1 content 1XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-3-09.xml0000644000175000017500000000003313015347624024636 0ustar mrodrigumrodriguelt1 content 9XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-3-05.xml0000644000175000017500000000003313015347624024632 0ustar mrodrigumrodriguelt1 content 5XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-17-01.xml0000644000175000017500000000006113015347626024716 0ustar mrodrigumrodrigu elt1 content 1XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-15-00.xml0000644000175000017500000000022713015347626024717 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-4-04.xml0000644000175000017500000000003313015347624024632 0ustar mrodrigumrodriguelt1 content 4XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-17-04.xml0000644000175000017500000000006113015347626024721 0ustar mrodrigumrodrigu elt1 content 4XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-1-05.xml0000644000175000017500000000025313015347624024634 0ustar mrodrigumrodrigu elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-19-03.xml0000644000175000017500000000006113015347626024722 0ustar mrodrigumrodrigu elt1 content 3XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-16-05.xml0000644000175000017500000000030113015347626024716 0ustar mrodrigumrodrigu elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-9-00.xml0000644000175000017500000000052713015347625024644 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-13-02.xml0000644000175000017500000000005513015347626024716 0ustar mrodrigumrodrigu & and ']]>XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-4-02.xml0000644000175000017500000000003313015347624024630 0ustar mrodrigumrodriguelt1 content 2XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-17-02.xml0000644000175000017500000000006113015347626024717 0ustar mrodrigumrodrigu elt1 content 2XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-7-00.xml0000644000175000017500000000011613015347625024634 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-8-00.xml0000644000175000017500000000011513015347625024634 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-18-03.xml0000644000175000017500000000042613015347626024726 0ustar mrodrigumrodrigu elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-14-02.xml0000644000175000017500000000005512732215763024720 0ustar mrodrigumrodrigu & and ']]>XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-15-02.xml0000644000175000017500000000005513015347626024720 0ustar mrodrigumrodrigu & and ']]>XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-8-01.xml0000644000175000017500000000065413015347625024645 0ustar mrodrigumrodrigu elt1 content 1 elt1 content 2 elt1 content 3 elt1 content 4 elt1 content 5 elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-3-01.xml0000644000175000017500000000003313015347624024626 0ustar mrodrigumrodriguelt1 content 1XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-19-00.xml0000644000175000017500000000056213015347626024725 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-9-02.xml0000644000175000017500000000003313015347625024636 0ustar mrodrigumrodriguelt1 content 2XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-7-02.xml0000644000175000017500000000005612732215763024643 0ustar mrodrigumrodrigu & and ']]> XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-13-01.xml0000644000175000017500000000005713015347626024717 0ustar mrodrigumrodrigutext with < > & and 'XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-16-01.xml0000644000175000017500000000006113015347626024715 0ustar mrodrigumrodrigu elt1 content 1XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-17-03.xml0000644000175000017500000000006113015347626024720 0ustar mrodrigumrodrigu elt1 content 3XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-19-05.xml0000644000175000017500000000030113015347626024721 0ustar mrodrigumrodrigu elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-10-00.xml0000644000175000017500000000052713015347625024714 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-19-02.xml0000644000175000017500000000006113015347626024721 0ustar mrodrigumrodrigu elt1 content 2XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-20-00.xml0000644000175000017500000000014513015347626024712 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-5-03.xml0000644000175000017500000000022513015347624024635 0ustar mrodrigumrodrigu elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-3-03.xml0000644000175000017500000000003313015347624024630 0ustar mrodrigumrodriguelt1 content 3XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-14-01.xml0000644000175000017500000000026613015347626024722 0ustar mrodrigumrodrigu text with < > & and ' & and ']]> XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-12-01.xml0000644000175000017500000000065513015347625024721 0ustar mrodrigumrodrigu elt1 content 1 elt1 content 2 elt1 content 3 elt1 content 4 elt1 content 5 elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-3-00.xml0000644000175000017500000000126513015347624024635 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-4-08.xml0000644000175000017500000000003313015347624024636 0ustar mrodrigumrodriguelt1 content 8XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-10-03.xml0000644000175000017500000000003313015347625024707 0ustar mrodrigumrodriguelt1 content 3XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-9-01.xml0000644000175000017500000000003313015347625024635 0ustar mrodrigumrodriguelt1 content 1XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-20-01.xml0000644000175000017500000000070213015347626024712 0ustar mrodrigumrodrigu elt1 content 1 elt1 content 2 elt1 content 3 elt1 content 4 elt1 content 5 elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-17-06.xml0000644000175000017500000000006113015347626024723 0ustar mrodrigumrodrigu elt1 content 6XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-21-03.xml0000644000175000017500000000021013015347627024710 0ustar mrodrigumrodrigu elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-3-07.xml0000644000175000017500000000003313015347624024634 0ustar mrodrigumrodriguelt1 content 7XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-17-09.xml0000644000175000017500000000006113015347626024726 0ustar mrodrigumrodrigu elt1 content 9XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-2-01.xml0000644000175000017500000000003313015347624024625 0ustar mrodrigumrodriguelt1 content 1XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-1-02.xml0000644000175000017500000000003313015347624024625 0ustar mrodrigumrodriguelt1 content 2XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-5-00.xml0000644000175000017500000000035513015347624024636 0ustar mrodrigumrodrigu elt1 content 1 elt1 content 2 elt1 content 3 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-4-05.xml0000644000175000017500000000003313015347624024633 0ustar mrodrigumrodriguelt1 content 5XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-21-02.xml0000644000175000017500000000034113015347627024714 0ustar mrodrigumrodrigu elt1 content 6 elt1 content 7 elt1 content 8 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-4-06.xml0000644000175000017500000000003313015347624024634 0ustar mrodrigumrodriguelt1 content 6XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-17-00.xml0000644000175000017500000000132413015347626024720 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-7-01.xml0000644000175000017500000000065413015347625024644 0ustar mrodrigumrodrigu elt1 content 1 elt1 content 2 elt1 content 3 elt1 content 4 elt1 content 5 elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-9-03.xml0000644000175000017500000000003313015347625024637 0ustar mrodrigumrodriguelt1 content 3XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-9-05.xml0000644000175000017500000000025313015347625024645 0ustar mrodrigumrodrigu elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-5-01.xml0000644000175000017500000000012013015347624024625 0ustar mrodrigumrodrigu elt1 content 4 elt1 content 5 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-16-02.xml0000644000175000017500000000006113015347626024716 0ustar mrodrigumrodrigu elt1 content 2XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-19-04.xml0000644000175000017500000000014613015347626024727 0ustar mrodrigumrodrigu elt1 content 4 elt1 content 5 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-4-07.xml0000644000175000017500000000003313015347624024635 0ustar mrodrigumrodriguelt1 content 7XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-4-00.xml0000644000175000017500000000123213015347624024630 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-6-03.xml0000644000175000017500000000022213015347625024634 0ustar mrodrigumrodrigu elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-2-04.xml0000644000175000017500000000012013015347624024625 0ustar mrodrigumrodrigu elt1 content 4 elt1 content 5 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-17-07.xml0000644000175000017500000000006113015347626024724 0ustar mrodrigumrodrigu elt1 content 7XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-21-00.xml0000644000175000017500000000055713015347627024723 0ustar mrodrigumrodrigu elt1 content 1 elt1 content 2 elt1 content 3 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-18-01.xml0000644000175000017500000000024413015347626024722 0ustar mrodrigumrodrigu elt1 content 1 elt1 content 2 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-14-00.xml0000644000175000017500000000012213015347626024710 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-3-06.xml0000644000175000017500000000003313015347624024633 0ustar mrodrigumrodriguelt1 content 6XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-13-00.xml0000644000175000017500000000022713015347626024715 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-10-04.xml0000644000175000017500000000012013015347625024705 0ustar mrodrigumrodrigu elt1 content 4 elt1 content 5 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-2-02.xml0000644000175000017500000000003313015347624024626 0ustar mrodrigumrodriguelt1 content 2XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-18-00.xml0000644000175000017500000000035313015347626024722 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-10-05.xml0000644000175000017500000000025313015347625024715 0ustar mrodrigumrodrigu elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-11-00.xml0000644000175000017500000000012013015347625024702 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-6-00.xml0000644000175000017500000000036313015347625024637 0ustar mrodrigumrodrigu elt1 content 1 elt1 content 2 elt1 content 3 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-16-04.xml0000644000175000017500000000014613015347626024724 0ustar mrodrigumrodrigu elt1 content 4 elt1 content 5 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-10-02.xml0000644000175000017500000000003313015347625024706 0ustar mrodrigumrodriguelt1 content 2XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-3-08.xml0000644000175000017500000000003313015347624024635 0ustar mrodrigumrodriguelt1 content 8XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-8-02.xml0000644000175000017500000000005612732215763024644 0ustar mrodrigumrodrigu & and ']]> XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-15-01.xml0000644000175000017500000000005713015347626024721 0ustar mrodrigumrodrigutext with < > & and 'XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-4-09.xml0000644000175000017500000000003313015347624024637 0ustar mrodrigumrodriguelt1 content 9XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-16-00.xml0000644000175000017500000000056213015347626024722 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-3-02.xml0000644000175000017500000000003313015347624024627 0ustar mrodrigumrodriguelt1 content 2XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-12-00.xml0000644000175000017500000000011713015347625024711 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-2-00.xml0000644000175000017500000000052213015347624024627 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-6-02.xml0000644000175000017500000000012613015347625024636 0ustar mrodrigumrodrigu elt1 content 6 elt1 content 7 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-10-01.xml0000644000175000017500000000003313015347625024705 0ustar mrodrigumrodriguelt1 content 1XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-2-05.xml0000644000175000017500000000025313015347624024635 0ustar mrodrigumrodrigu elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-18-02.xml0000644000175000017500000000033113015347626024720 0ustar mrodrigumrodrigu elt1 content 3 elt1 content 4 elt1 content 5 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-11-01.xml0000644000175000017500000000065513015347625024720 0ustar mrodrigumrodrigu elt1 content 1 elt1 content 2 elt1 content 3 elt1 content 4 elt1 content 5 elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-17-05.xml0000644000175000017500000000006113015347626024722 0ustar mrodrigumrodrigu elt1 content 5XML-Twig-3.52/t/test_xml_split/test_xml_split_expected-3-04.xml0000644000175000017500000000003313015347624024631 0ustar mrodrigumrodriguelt1 content 4XML-Twig-3.52/t/test_3_44.t0000755000175000017500000002774612732215763015450 0ustar mrodrigumrodrigu#!/usr/bin/perl use strict; use warnings; use XML::Twig; use Test::More tests => 86; { my $e= XML::Twig::Elt->new( 'foo'); $e->set_content( { bar => 'baz', toto => 'titi' }); is( $e->sprint, '', 'set_content with just attributes'); } { my $e= XML::Twig::Elt->parse( 't'); $e->set_content( 'x'); is( $e->sprint, 'x', 'set_content on element that contains just text'); $e->first_child( '#PCDATA')->set_content( 'y'); is( $e->sprint, 'y', 'set_content on text element'); $e->set_content( XML::Twig::Elt->new( 'e')); is( $e->sprint, '', 'set_content element on element that contains just text'); $e->set_content( 'z', XML::Twig::Elt->new( 'e')); is( $e->sprint, 'z', 'set_content with 2 elements on element that contains just text'); $e->set_content( ''); is( $e->sprint, '', 'set_content with empty content'); $e->set_content( '#EMPTY'); is( $e->sprint, '', 'set_content with empty content and #EMPTY'); $e->set_content( 'x', 'y'); is( $e->sprint, 'xy', 'set_content with 2 strings'); $e->set_content( '', 'y'); is( $e->sprint, 'y', 'set_content with 2 strings, first one empty'); } { my $t= XML::Twig->parse( ''); my $s= $t->first_elt( 's'); $s->att_to_field( 'a'); is( $s->sprint, '1', 'att_to_field with default name'); $s->field_to_att( 'a'); is( $s->sprint, '', 'field_to_att with default name'); $s->att_to_field( a => 'b'); is( $s->sprint, '1', 'att_to_field with non default name'); $s->field_to_att( b => 'c'); is( $s->sprint, '', 'field_to_att with non default name'); } { my $t= XML::Twig->parse( 'f'); my $r= $t->root; $r->suffix( '&1', 'opt' ); is( $t->sprint, 'f&1', 'suffix, non asis option'); $r->suffix( '&2', 'asis'); is( $t->sprint, 'f&1&2', 'suffix, asis option'); $r->suffix( '&3'); is( $t->sprint, 'f&1&2&3', 'suffix, after a suffix with an asis option'); } { my $t= XML::Twig->parse( 'f'); $t->root->last_child->suffix( '&1', 'opt' ); is( $t->sprint, 'f&1', 'pcdata suffix, non asis option'); $t->root->last_child->suffix( '&2', 'asis'); is( $t->sprint, 'f&1&2', 'pcdata suffix, asis option'); $t->root->last_child->suffix( '&3', 'asis'); is( $t->sprint, 'f&1&2&3', 'pcdata suffix, asis option, after an asis element'); $t->root->last_child->suffix( '&4'); is( $t->sprint, 'f&1&2&3&4', 'pcdata suffix, after a suffix with an asis option'); } { my $t= XML::Twig->parse( 'f'); my $r= $t->root; $r->prefix( '&1', 'opt' ); is( $t->sprint, '&1f', 'prefix, non asis option'); $r->prefix( '&2', 'asis'); is( $t->sprint, '&2&1f', 'prefix, asis option'); $r->prefix( '&3'); is( $t->sprint, '&3&2&1f', 'prefix, after a prefix with an asis option'); } { my $t= XML::Twig->parse( 'f'); $t->root->first_child->prefix( '&1', 'opt' ); is( $t->sprint, '&1f', 'pcdata prefix, non asis option'); $t->root->first_child->prefix( '&2', 'asis'); is( $t->sprint, '&2&1f', 'pcdata prefix, asis option'); $t->root->first_child->prefix( '&3', 'asis'); is( $t->sprint, '&3&2&1f', 'pcdata prefix, asis option, before an asis element'); $t->root->first_child->prefix( '&4'); is( $t->sprint, '&4&3&2&1f', 'pcdata prefix, after a prefix with an asis option'); } { my $weakrefs= XML::Twig::_weakrefs(); XML::Twig::_set_weakrefs(0); my $t= XML::Twig->parse( 'f'); my $e= $t->first_elt( 'e'); XML::Twig::Elt->new( x => 'g')->replace( $e); is( $t->sprint, 'g', 'replace non root element without weakrefs'); XML::Twig::Elt->new( y => 'h')->replace( $t->root); is( $t->sprint, 'h', 'replace root element without weakrefs'); XML::Twig::_set_weakrefs( $weakrefs); } { my $t= XML::Twig->parse( '

foo

barbaz

'); my $r= $t->root; is( $r->children_count, 2, '2 p'); $t->root->first_child->merge( $t->root->last_child); is( $r->children_count, 1, 'merged p'); is( $t->sprint, '

foobarbaz

', 'merged p with extra data'); } { my $t= XML::Twig->parse( '

foo

bazbar

'); my $r= $t->root; is( $r->children_count, 2, '2 p, one with mixed content'); $t->root->first_child->merge( $t->root->last_child); is( $r->children_count, 1, 'merged p, one with mixed content'); is( $t->sprint, '

foobazbar

', 'merged p with extra children in the second element'); } { my $t= XML::Twig->parse( ''); my $r= $t->root; $r->insert_new_elt( first_child => '#PCDATA') foreach 0..1; is( $r->children_count, 2, '2 empty texts'); $r->first_child->merge( $r->last_child); is( $r->children_count, 1, 'merged empty texts, number of children'); is( $t->sprint, '', 'merged empty texts'); } { my $t= XML::Twig->parse( 'a foo afoobar'); my $c=$t->root->copy->subs_text( qr/(foo)/, '&elt( e => "$1")'); is( $c->sprint, 'a foo afoobar', 'subs_text'); $c=$t->root->copy->subs_text( qr/(foo)/, 'X &elt( e => "$1") X'); is( $c->sprint, 'a X foo X aX foo Xbar', 'subs_text'); $c=$t->root->copy->subs_text( qr/(foo)/, 'X &elt( e => "Y $1 Y") X'); is( $c->sprint, 'a X Y foo Y X aX Y foo Y Xbar', 'subs_text'); $c->subs_text( qr/(foo)/, 'X &elt( e => "Y $1 Y") X'); is( $c->sprint, 'a X Y X Y foo Y X Y X aX Y X Y foo Y X Y Xbar', 'subs_text (re-using previous substitution)'); } { my $e= XML::Twig::Elt->new( 'e'); is( $e->att_nb, 0, 'att_nb on element with no attributes'); ok( $e->has_no_atts, 'has_no_atts on element with no attributes'); my $e2= XML::Twig::Elt->new( e => { a => 1 })->del_att( 'a');; is( $e->att_nb, 0, 'att_nb on element with no more attributes'); ok( $e->has_no_atts, 'has_no_atts on element with no more attributes'); is( $e->split_at( 1), '', 'split_at on a non text element'); } SKIP: { skip 'XML::XPath not available', 1 unless XML::Twig::_use( 'XML::XPath'); XML::Twig::_disallow_use( 'XML::XPathEngine'); XML::Twig::_use( 'XML::Twig::XPath'); my $t= XML::Twig::XPath->parse( 'e1e2e3'); is( $t->findvalue( '//e[@a>=3]|//e[@a<=1]'), 'e1e3', 'xpath search with XML::XPath'); } SKIP: { # various tests on _fix_xml skip 'HTML::TreeBuilder not available', 2 unless XML::Twig::_use( 'HTML::TreeBuilder'); my $html= '

&Amp;

'; my $t= HTML::TreeBuilder->new_from_content( $html); local $@='not well-formed (invalid token)'; local $HTML::TreeBuilder::VERSION=3.23; XML::Twig::_fix_xml( $t, \$html); unlike( $html, qr{Amp}, '&Amp with old versions of HTML::TreeBuilder'); like( $html, qr{

parse( 'pre1e1pre2a 1'); is( $t->findvalue( '/d/*[local-name()="e"]'), 'pre1e1pre2', 'local-name()'); } { my $doc= qq{\n\n}; (my $expected= $doc)=~ s{("default">)\n}{$1}; # this space should be discarded my $t= XML::Twig->parse( $doc); is( $t->sprint, $expected, 'xml:space effect on whitespace discarding'); } { my $d= ""; my $got=0; my $t= XML::Twig->new( start_tag_handlers => { e => sub { $got=1; } } ); $t->parse( $d); is( $got, 1, 'setStartTagHandlers'); $t->setStartTagHandlers( { e => sub { $got=2; } }); $t->parse( $d); is( $got, 2, 'setStartTagHandlers changed'); } { my $d= ""; my $got=0; my $st; my $t= XML::Twig->new( start_tag_handlers => { se => sub { $got=1; } }, ignore_elts => { e => \$st }, ); $t->parse( $d); is( $got, 0, 'check that ignore_elts skips element'); is( $st, '', 'check that ignore_elts stores the ignored content'); $st=''; $t->setIgnoreEltsHandler( e => 'discard'); is( $got, 0, 'check that ignore_elts still skips element'); is( $st, '', 'check that ignore_elts now discards the ignored content'); } { my $content= '

here a dodo bird

'; is( XML::Twig::Elt->new( $content)->sprint, $content, 'XML::Twig::Elt->new with litteral content'); } { my $doc= ''; my $doc_no_pi= ''; my $t= XML::Twig->parse( $doc); is( $t->sprint, $doc, 'pi is keep by default'); my $tk= XML::Twig->parse( pi => 'keep', $doc); is( $tk->sprint, $doc, 'pi is keep'); my $td= XML::Twig->parse( pi => 'drop', $doc); is( $td->sprint, $doc_no_pi, 'pi is keep'); my $tp= XML::Twig->parse( pi => 'process', $doc); is( $tp->sprint, $doc, 'pi is process'); foreach my $pi ($t->descendants( '#PI')) { $pi->delete; } is( $t->sprint, $doc, 'pi cannot be cut when pi => keep (by default)'); foreach my $pi ($tk->descendants( '#PI')) { $pi->delete; } is( $tk->sprint, $doc, 'pi cannot be cut when pi => keep'); foreach my $pi ($tp->descendants( '#PI')) { $pi->delete; } is( $tp->sprint, $doc_no_pi, 'pi can be cut when pi => process'); } { my $doc= ''; my $doc_no_comment= ''; my $t= XML::Twig->parse( $doc); is( $t->sprint, $doc, 'comments is keep by default'); my $tk= XML::Twig->parse( comments => 'keep', $doc); is( $tk->sprint, $doc, 'comments is keep'); my $td= XML::Twig->parse( comments => 'drop', $doc); is( $td->sprint, $doc_no_comment, 'comments is keep'); my $tp= XML::Twig->parse( comments => 'process', $doc); is( $tp->sprint, $doc, 'comments is process'); foreach my $comment ($t->descendants( '#COMMENT')) { $comment->delete; } is( $t->sprint, $doc, 'comment cannot be cut when comment => keep (by default)'); foreach my $comment ($tk->descendants( '#COMMENT')) { $comment->delete; } is( $tk->sprint, $doc, 'comment cannot be cut when comment => keep'); foreach my $comment ($tp->descendants( '#COMMENT')) { $comment->delete; } is( $tp->sprint, $doc_no_comment, 'comment can be cut when comment => process'); } { my $d='t1t2

p

'; my $t= XML::Twig->parse( $d); my $p= $t->elt_id( 't'); is( $p->level, 3, 'level'); is( $p->level( 's'), 2, 'level with cond'); is( $p->level( 's[@l]'), 2, 'level with cond on attr'); is( $p->level( 's[@l="2"]'), 1, 'level with more cond on attr'); is( $p->level( 's[@g]'), 0, 'level with unsatisfied more cond on attr'); } { my $d='e1e2e3e4e5f1f1f2'; my $r; my $t; $t= XML::Twig->parse( twig_handlers => { 'e#i' => sub { $r.= $_->text}}, $d); is( $r, 'e1', '# in twig handlers (1 letter id)'); is( $t->findvalue( '//e#i'), 'e1', 'findvalue with # (1 letter id)'); $r=''; $t= XML::Twig->parse( twig_handlers => { 'e#iii' => sub { $r.= $_->text}}, $d); is( $r, 'e5', '# in twig handlers (3 letter id)'); is( $t->findvalue( '//e#iii'), 'e5', 'findvalue with # (3 letter id)'); $r=''; $t= XML::Twig->parse( twig_handlers => { 'e#i2' => sub { $r.= $_->text}}, $d); is( $r, 'e2', '# in twig handlers (letter + digits)'); is( $t->findvalue( '//e#i2'), 'e2', 'findvalue with # (letter + digits)'); $r=''; $t= XML::Twig->parse( twig_handlers => { '*#ff' => sub { $r.= $_->text}}, $d); is( $r, 'f1', '*# in twig handlers'); is( $t->findvalue( '//*#ff'), 'f1', 'findvalue with *#'); } XML-Twig-3.52/t/dummy.dtd0000644000175000017500000000042512732215763015361 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_need_use_bytes.t0000755000175000017500000000333212732215763017755 0ustar mrodrigumrodrigu#!/usr/bin/perl -w # tests that require IO::Scalar to run use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; #$|=1; my $DEBUG=0; use XML::Twig; BEGIN { eval "use bytes"; if( $@) { print "1..1\nok 1\n"; warn "skipping, need to be able to use bytes\n"; exit; } } print "1..2\n"; my $text= "été"; my $text_safe= "été"; my $text_safe_hex= "été"; my $doc=qq{\n$text}; my $doc_safe=qq{\n$text_safe}; my $doc_safe_hex=qq{\n$text_safe_hex}; my $t= XML::Twig->new()->parse( $doc); if( $] == 5.008) { skip( 2); } else { $t->set_output_text_filter( sub { my $text= shift; use bytes; $text=~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} {XML::Twig::_XmlUtf8Decode($1)}egs; return $text; } ); is( $t->sprint, $doc_safe, 'safe with _XmlUtf8Decode'); # test 338 $t->set_output_text_filter( sub { my $text= shift; use bytes; $text=~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} {XML::Twig::_XmlUtf8Decode($1, 1)}egs; return $text; } ); is( $t->sprint, $doc_safe_hex, 'safe_hex with _XmlUtf8Decode'); # test 339 } exit 0; XML-Twig-3.52/t/test_bugs_3_21.t0000755000175000017500000001046612732215763016452 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=25; print "1..$TMAX\n"; { # testing creation of elements in the proper class package foo; use base 'XML::Twig::Elt'; package main; my $t= XML::Twig->new( elt_class => "foo")->parse( ''); my $elt= $t->first_elt( 'elt'); $elt->set_text( 'bar'); is( $elt->first_child->text, 'bar', "content of element created with set_text"); is( ref( $elt->first_child), 'foo', "class of element created with set_text"); $elt->set_content( 'baz'); is( $elt->first_child->text, 'baz', "content of element created with set_content"); is( ref( $elt->first_child), 'foo', "class of element created with set_content"); $elt->insert( 'toto'); is( $elt->first_child->tag, 'toto', "tag of element created with set_content"); is( ref( $elt->first_child), 'foo', "class of element created with insert"); $elt->insert_new_elt( first_child => 'tata'); is( $elt->first_child->tag, 'tata', "tag of element created with insert_new_elt"); is( ref( $elt->first_child), 'foo', "class of element created with insert"); $elt->wrap_in( 'tutu'); is( $t->root->first_child->tag, 'tutu', "tag of element created with wrap_in"); is( ref( $t->root->first_child), 'foo', "class of element created with wrap_in"); $elt->prefix( 'titi'); is( $elt->first_child->text, 'titi', "content of element created with prefix"); is( ref( $elt->first_child), 'foo', "class of element created with prefix"); $elt->suffix( 'foobar'); is( $elt->last_child->text, 'foobar', "content of element created with suffix"); is( ref( $elt->last_child), 'foo', "class of element created with suffix"); $elt->last_child->split_at( 3); is( $elt->last_child->text, 'bar', "content of element created with split_at"); is( ref( $elt->last_child), 'foo', "class of element created with split_at"); is( ref( $elt->copy), 'foo', "class of element created with copy"); $t= XML::Twig->new( elt_class => "foo")->parse( 'toto'); $t->root->subs_text( qr{(to)} => '&elt( p => $1)'); is( $t->sprint, '

to

to

', "subs_text result"); my $result= join( '-', map { join( ":", ref($_), $_->tag) } $t->root->descendants); is( $result, "foo:p-foo:#PCDATA-foo:p-foo:#PCDATA", "subs_text classes and tags"); } { # wrap children with > in attribute my $doc=q{}; my $result =XML::Twig->new->parse( $doc)->root->wrap_children( '+', "w")->strip_att( 'id')->sprint; my $expected = q{}; is( $result => $expected, "wrap_children with > in attributes"); $result =XML::Twig->new->parse( $doc)->root->wrap_children( '+', "w")->strip_att( 'id')->sprint; $expected = q{}; is( $result => $expected, "wrap_children with > in attributes, > in condition"); $result =XML::Twig->new->parse( $doc)->root->wrap_children( '+', "w")->strip_att( 'id')->sprint; $expected = q{}; is( $result => $expected, "wrap_children with > in attributes un-escaped > in condition"); $result =XML::Twig->new->parse( $doc)->root->wrap_children( '+', "w")->strip_att( 'id')->sprint; $expected = q{}; is( $result => $expected, "wrap_children with > in attributes, 2 atts in condition"); $result =XML::Twig->new->parse( $doc)->root->wrap_children( '+', "w")->strip_att( 'id')->sprint; $expected = q{}; is( $result => $expected, "wrap_children with > in attributes, 2 atts in condition (no child matches)"); } { # test improvements to wrap_children my $doc= q{okNOK}; my $expected= q{okNOK}; my $t= XML::Twig->new->parse( $doc); $t->root->wrap_children( '+', w => { a => "&" }); $t->root->strip_att( 'id'); is( $t->sprint, $expected, "wrap_children with &"); } XML-Twig-3.52/t/test_3_24.t0000755000175000017500000000663612732215763015441 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=15; print "1..$TMAX\n"; { # adding comments or pi's before/after the root my $doc= XML::Twig->nparse( ''); my $xsl = XML::Twig::Elt->new('#PI'); $xsl->set_target('xml-stylesheet'); $xsl->set_data('type= "text/xsl" href="xsl_style.xsl"'); $xsl->paste( before => $doc->root); is( $doc->sprint, '', 'PI before the root' ); my $comment= XML::Twig::Elt->new( '#COMMENT'); $comment->set_comment( 'foo'); $comment->paste( before => $doc->root); is( $doc->sprint, '', 'Comment before the root' ); XML::Twig::Elt->new( '#COMMENT')->set_comment( 'bar')->paste( after => $doc->root); XML::Twig::Elt->new( '#PI')->set_target( 'foo')->set_data( 'bar')->paste( after => $doc->root); is( $doc->sprint, '', 'Pasting things after the root' ); } { # adding comments or pi's before/after the root my $doc= XML::Twig->nparse( ''); $doc->add_stylesheet( xsl => 'xsl_style.xsl'); is( $doc->sprint, '', 'add_stylesheet'); eval{ $doc->add_stylesheet( foo => 'xsl_style.xsl') }; matches( $@, q{^unsupported style sheet type 'foo'}, 'unsupported stylesheet type'); } { # creating a CDATA element my $elt1= XML::Twig::Elt->new( foo => { '#CDATA' => 1 }, '<&>'); is( $elt1->sprint, ']]>', "creating a CDATA element"); my $elt2= XML::Twig::Elt->new( foo => { '#CDATA' => 1, att => 'v1' }, '<&>'); is( $elt2->sprint, ']]>', "creating a CDATA element"); eval { my $elt3= XML::Twig::Elt->new( foo => { '#CDATA' => 1 }, "bar", $elt1); }; matches( $@, qr/^element #CDATA can only be created from text/, "error in creating CDATA element"); my $elt4= XML::Twig::Elt->new( foo => { '#CDATA' => 1 }, '<&>', 'bar'); is( $elt4->sprint, 'bar]]>', "creating a CDATA element (from list)"); } { # errors creating text/comment/pi elements eval { my $elt= XML::Twig::Elt->new( '#PCDATA', []); }; matches( $@, qr/^element #PCDATA can only be created from text/, "error in creating PCDATA element"); eval { my $elt= XML::Twig::Elt->new( '#COMMENT', "foo", []); }; matches( $@, qr/^element #COMMENT can only be created from text/, "error in creating COMMENT element"); eval { my $elt= XML::Twig::Elt->new( '#PI', "foo", [], "bah!"); }; matches( $@, qr/^element #PI can only be created from text/, "error in creating PI element"); } { # set_cdata on non CDATA element my $elt = XML::Twig::Elt->new("qux"); $elt->set_cdata("test this '<' & this '>'"); is( $elt->sprint, q{']]>}, "set_cdata on non CDATA element"); } { # set_comment on non comment element my $elt = XML::Twig::Elt->new(qux => "toto"); $elt->set_comment( " booh "); is( $elt->sprint, q{}, "set_comment on non comment element"); } { # set_pi on non pi element my $elt = XML::Twig::Elt->new(qux => "toto"); $elt->set_pi( ta => "tie ramisu"); is( $elt->sprint, q{}, "set_pi on non pi element"); } XML-Twig-3.52/t/test_comment_handler.t0000755000175000017500000000357212732215763020125 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; # test for the various conditions in navigation methods use XML::Twig; if( $] < 5.008) { warn "skipped, not tested under perl < 5.8\n"; print "1..1\nok 1\n"; exit 0; } my $nb_tests=4; print "1..$nb_tests\n"; { my $result; my $t= XML::Twig->new( comments => 'process', twig_handlers => { '#COMMENT' => sub { $result .=$_->text; } }, ); $t->parse( q{}); my $expected= ' comment in doc '; if( $result eq $expected) { print "ok 1\n"; } else { print "not ok 1\n"; warn "expected: $expected\nfound : $result\n"; } } { my $result=''; my $t= XML::Twig->new( comments => 'process', twig_handlers => { '#COMMENT' => sub { $result .=$_->text; } }, ); $t->parse( q{}); my $expected= ' comment in doc '; if( $result eq $expected) { print "ok 2\n"; } else { print "not ok 2\n"; warn "expected: $expected\nfound : $result\n"; } } { my $result=''; my $t= XML::Twig->new( twig_handlers => { 'doc' => sub { $result= $_->{extra_data}; } },); $t->parse( q{}); my $expected= ''; if( $result eq $expected) { print "ok 3\n"; } else { print "not ok 3\n"; warn "expected: $expected\nfound : $result\n"; } } { my $result=''; my $t= XML::Twig->new( comments => 'process', twig_roots => { '/#COMMENT' => sub { $result= $_->{extra_data}; }, elt => sub { }, }); $t->parse( q{}); my $expected= ''; # This is a bug! if( $result eq $expected) { print "ok 4\n"; } else { print "not ok 4\n"; warn "expected: $expected\nfound : $result\n"; } } exit 0; XML-Twig-3.52/t/test_mark.t0000755000175000017500000000462212732215763015715 0ustar mrodrigumrodrigu#!/usr/bin/perl -w # test the mark method use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; #$|=1; my $DEBUG=0; use XML::Twig; my $perl= $]; my @data= map { chomp; [split /\t+/] } ; my $TMAX= 2 * @data; print "1..$TMAX\n"; foreach my $test (@data) { my( $doc, $regexp, $elts, $hits, $result)= @$test; (my $quoted_elts= $elts)=~ s{(\w+)}{'$1'}g; my @elts= eval( "($quoted_elts)"); my $t= XML::Twig->new->parse( $doc); my $root= $t->root; my @hits= $root->mark( $regexp, @elts); is( $t->sprint, $result, "mark( /$regexp/, $quoted_elts) on $doc"); is( scalar @hits, $hits, 'nb hits'); } exit 0; # doc regexp elts hits result __DATA__ text X (X) s 1 text X text X X s 1 text text X s 0 text text (X) s 0 text text X X s 1 text text X (X) s 1 text X text X \s*X\s* s 1 text text X \s*(X)\s* s 1 textX text X (\s*X\s*) s 1 text X text X text X s 1 text text text X text (X) s 1 text X text text X text \s*X\s* s 1 texttext text X text \s*(X)\s* s 1 textXtext text X text (\s*X\s*) s 1 text X text text XX X s 2 text text XX (X) s 2 text XX text X X X s 2 text text X X (X) s 2 text X X text XX text X s 2 text text text XX text (X) s 2 text XX text text XY text Y text X ([XY]+) s 3 text XY text Y text X text X X s, {a => 1} 1 text text X (X) s, {a => 1, b => 2} 1 text X text X1Y2 text X0 Y0X3Y4 text X X(\d)Y(\d) s 4 text 12 text X0 Y034 text X XML-Twig-3.52/t/xmlxpath_test1.t0000755000175000017500000003214712732215763016714 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; my $DEBUG=0; print "1..114\n"; # This just tests a complete twig, no callbacks $|=1; my $doc=' ]>
S1 I1 S1 I2 S1 Title S1 P1 S2 P2 Note P1 S1 para 3
S2 intro S2 Title S2 P1 S2 P2 S2 P3
Annex Title Annex P1 Annex P2
'; my $i=0; my $failed=0; # test twig creation my $t= XML::Twig::XPath->new; ok( $t, 'twig creation'); # test parse $t->parse( $doc, ErrorContext=>2); ok( $t, 'parse'); ok( ($t->node_cmp( $t->root) == -1), 'cmp twig to root'); ok( ($t->node_cmp( $t) == 0), 'cmp twig to root'); # test the root my $root= $t->root; etest( $t->root, 'doc', 'doc1', 'root'); # print in a file open( TMP, '>tmp'); select TMP; $t->print(); $root->print(); select STDOUT; $t->print( \*TMP); $root->print( \*TMP); ok( 'ok', "print"); # test the element root and twig functions on the root ok( $root->twig, 'root->twig'); etest( $root->root, 'doc', 'doc1', 'root->root'); # navigation my $section1= etest( $root->first_child, 'section', 'section1', 'first_child'); my $annex= etest( $root->first_child( 'annex'), 'annex', 'annex1', 'first_child( annex)'); etest( $root->last_child, 'annex', 'annex1', 'last_child'); my $section2= etest( $root->last_child( 'section'), # 10 'section', 'section2', 'last_child( section)'); etest( $section2->prev_sibling, 'section', 'section1', 'prev_sibling'); etest( $section1->next_sibling, 'section', 'section2', 'next_sibling'); my $note= etest( $root->next_elt( 'note'), 'note', 'note1', 'next_elt( note)'); etest( $note->root, 'doc', 'doc1', 'root'); ok( $note->twig, 'twig'); etest( $note->twig->root, 'doc', 'doc1', 'twig->root'); # playing with next_elt and prev_elt my $para2= etest( $note->prev_sibling, 'para', 'para2', 'prev_sibling'); etest( $note->prev_elt( 'para'), 'para', 'para2', 'prev_elt( para)'); my $para3= etest( $note->next_sibling, 'para', 'para3', 'next_sibling'); my $paranote1= etest( $note->next_elt( 'para'), # 20 'para', 'paranote1', 'next_elt( para)'); etest( $paranote1->next_elt( 'para'), 'para', 'para3', 'next_elt( para)'); # difference between next_sibling and next_sibling( gi) etest( $para2->next_sibling, 'note', 'note1', 'next_sibling'); etest( $para2->next_sibling( 'para'), 'para', 'para3', 'next_sibling( para)'); # testing in/parent/in_context ok( $paranote1->in( $note), 'in'); ok( $paranote1->in( $section1), 'in'); ok( !$paranote1->in( $section2), 'not in'); ok( $paranote1->in_context( 'note'), 'in_context'); ok( $paranote1->in_context( 'section'), 'in_context'); ok( !$paranote1->in_context( 'intro'), 'not in_context'); etest( $paranote1->parent, # 30 'note', 'note1', 'parent'); # testing list methods (ancestors/children) stest( (join ":", map { $_->id} $paranote1->ancestors), 'note1:section1:doc1', 'ancestors'); stest( (join ":", map { $_->id} $paranote1->ancestors('section')), 'section1', 'ancestors( section)'); stest( (join ":", map { $_->id} $section1->children), 'intro1:title1:para1:para2:note1:para3', 'children'); stest( (join ":", map { $_->id} $section1->children( 'para')), 'para1:para2:para3', 'children( para)'); stest( $paranote1->level, 3, 'level'); # testing attributes my $title1= etest( $root->next_elt( 'title'), 'title', 'title1', 'next_elt( title)'); stest( $title1->id, 'title1', 'id'); stest( $title1->att('id'), 'title1', 'att( id)'); stest( $title1->att('no'), '1', 'att( no)'); $title1->set_att('no', 'Auto'); stest( $title1->att('no'), 'Auto', 'set att( no)'); $title1->set_att('no', '1'); $title1->set_att('newatt', 'newval'); stest( $title1->att('newatt'), 'newval', 'set att( newval)'); $title1->del_att('newatt'); stest( stringifyh( %{$title1->atts}), 'id:title1:no:1', 'del_att'); $title1->set_att('id', 'newid'); stest( $title1->id, 'newid', 'set_att(id)'); stest( $title1->att( 'id'), 'newid', 'set_att(id)'); $title1->set_id( 'title1'); stest( $title1->id, 'title1', 'set_id'); stest( $title1->att( 'id'), 'title1', 'set_id'); stest( stringifyh( %{$title1->atts}), 'id:title1:no:1', 'atts'); $title1->del_atts; stest( $title1->att( 'id'), '', 'del_atts'); $title1->set_atts( { 'no' => '1', 'id' => 'newtitleid'}); stest( stringifyh( %{$title1->atts}), 'id:newtitleid:no:1', 'set_atts'); stest( $title1->id, 'newtitleid', 'id'); stest( $title1->att('id'), 'newtitleid', 'att( id)'); $title1->set_id( 'title1'); # now cut and paste $title1->cut; stest( (join ":", map { $_->id} $section1->children), 'intro1:para1:para2:note1:para3', 'cut (1)'); my $intro1= $section1->first_child( 'intro'); $intro1->cut; stest( (join ":", map { $_->id} $section1->children), 'para1:para2:note1:para3', 'cut (2)'); $intro1->paste( $section1); stest( (join ":", map { $_->id} $section1->children), 'intro1:para1:para2:note1:para3', 'paste'); $title1->paste( 'first_child', $section2, ); stest( (join ":", map { $_->id} $section2->children), 'title1:intro2:title2:para4:para5:para6', 'paste( first_child)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'paste'); $title1->paste( $section2); stest( (join ":", map { $_->id} $section2->children), 'title1:intro2:title2:para4:para5:para6', 'paste'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut (3)'); $title1->paste( 'last_child', $section2); stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6:title1', 'paste( last_child)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut(4)'); my $intro2= etest( $section2->first_child( 'intro'), 'intro', 'intro2', 'first_sibling( intro)'); $title1->paste( 'after', $intro2); stest( (join ":", map { $_->id} $section2->children), 'intro2:title1:title2:para4:para5:para6', 'paste( after)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut (5)'); $title1->paste( 'before', $intro2); stest( (join ":", map { $_->id} $section2->children), 'title1:intro2:title2:para4:para5:para6', 'paste( before)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut (6)'); my $para4= etest( $t->elt_id( 'para4'), 'para', 'para4', 'elt_id'); $title1->paste( 'after', $para4); stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:title1:para5:para6', 'paste( after)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut (7)'); $title1->paste( 'before', $para4); stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:title1:para4:para5:para6', 'paste( before)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut (8)'); # now we mess up the document # erase that pesky intro $intro2->erase; stest( (join ":", map { $_->id} $section2->children), 'paraintro3:title2:para4:para5:para6', 'erase'); $para4->delete; stest( (join ":", map { $_->id} $section2->children), 'paraintro3:title2:para5:para6', 'delete'); $t->change_gi( 'paraintro', 'para'); stest( (join ":", map { $_->gi} $section2->children), 'para:title:para:para', 'change_gi'); $para3= etest( $t->elt_id( 'para3'), 'para', 'para3', 'elt_id'); $para3->cut; stest( $section1->text, 'S1 I1S1 I2S1 P1S2 P2Note P1', 'text'); stest( $section1->sprint, '
S1 I1S1 I2S1 P1S2 P2Note P1
', 'sprint'); # have a look at those entities # first their names stest( join( ':', $t->entity_names), 'e1:e2:e3', 'entity_list'); # look at their content my $e1= $t->entity( 'e1'); stest( $e1->text, '', 'e1 text'); my $e2= $t->entity( 'e2'); stest( $e2->text, '', 'e2 text'); my $e3= $t->entity( 'e3'); stest( $e3->text, '', 'e3 text'); # additionnal erase test $section1= $root->first_child; stest( (join ":", map { $_->id} $section1->children), 'intro1:para1:para2:note1', 'erase (2)'); $intro1= $section1->first_child( 'intro'); $intro1->erase; stest( (join ":", map { $_->id} $section1->children), 'paraintro1:paraintro2:para1:para2:note1', 'erase (3)'); # new elt test my $new_elt= new XML::Twig::XPath::Elt; stest( ref $new_elt, 'XML::Twig::XPath::Elt', "new"); my $new_elt1= new XML::Twig::XPath::Elt( 'subclass'); stest( ref $new_elt, 'XML::Twig::XPath::Elt', "new subclass"); my $new_elt2= new XML::Twig::XPath::Elt; stest( ref $new_elt2, 'XML::Twig::XPath::Elt', "create no gi"); my $new_elt3= new XML::Twig::XPath::Elt( 'elt3'); $new_elt3->set_id( 'elt3'); etest( $new_elt3, 'elt3', 'elt3', "create with gi"); my $new_elt4= new XML::Twig::XPath::Elt( 'elt4', 'text of elt4'); ttest( $new_elt4, 'text of elt4', "create with gi and text"); my $new_elt5= new XML::Twig::XPath::Elt( 'elt5', 'text of elt5 ', $new_elt4); ttest( $new_elt5, 'text of elt5 text of elt4', "create with gi and content"); my $new_elt6= new XML::Twig::XPath::Elt( PCDATA, 'text of elt6'); ttest( $new_elt6, 'text of elt6', "create PCDATA"); # test CDATA my $st1='bold]]>'; my $t1= new XML::Twig::XPath; $t1->parse( $st1); sttest( $t1->root, $st1, "CDATA Section"); my $st2='text bold]]> more text'; my $t2= new XML::Twig::XPath; $t2->parse( $st2); sttest( $t2->root, $st2, "CDATA Section"); my $st3='bold]]> text'; my $t3= new XML::Twig::XPath; $t3->parse( $st3); sttest( $t3->root, $st3, "CDATA Section"); my $st4='textbold]]>more text'; my $t4= new XML::Twig::XPath; $t4->parse( $st4); sttest( $t4->root, $st4, "CDATA Section"); my $st5='text more text'; my $t5= new XML::Twig::XPath; $t5->parse( $st5); sttest( $t5->root, $st5, "CDATA Section with ]]<"); # test prefix my $st6='textmore text'; my $t6= new XML::Twig::XPath; $t6->parse( $st6); $doc= $t6->root; $doc->prefix( 'p1:'); sttest( $t6->root,'p1:textmore text', "prefix doc"); my $el1= $doc->first_child( 'el1'); $el1->prefix( 'p2:'); sttest( $t6->root,'p1:p2:textmore text', "prefix el1"); my $el2= $doc->first_child( 'el2'); my $pcdata= $el2->first_child( PCDATA); $pcdata->prefix( 'p3:'); sttest( $t6->root,'p1:p2:textp3:more text', "prefix pcdata"); is( $t6->node_cmp( 1), -1, "compare twig with scalar"); ok( UNIVERSAL::isa( $t->root->getParentNode, 'XML::Twig::XPath'), 'getParentNode on the root'); ok( UNIVERSAL::isa( $t->root->first_child->getParentNode, 'XML::Twig::XPath::Elt'), 'getParentNode on an elt'); eval '$t6->root->node_cmp( []);'; matches( $@, "^unknown node type ", "compare elt with scalar"); my $elt= XML::Twig::XPath::Elt->new( elt => { att1 => 1, att2 => 2 }, "99"); my( $att1, $att2)= $elt->getAttributes; is( $att1->node_cmp( $att2), -1, "attribute comparison"); is( $att2->node_cmp( $att1), 1, "attribute comparison (reverse order)"); is( $att2->node_cmp( $elt), 1, "compare attribute with elt"); is( $att2->node_cmp( $t6), 1, "compare attribute with elt"); is( $elt->node_cmp( $att1), -1, "compare elt with attribute"); is( $att1->node_cmp( $att1), 0, "compare attribute with itself"); is( $elt->node_cmp( $elt), 0, "compare elt with itself"); eval( '$att1->node_cmp( 1)'); matches( $@, "^unknown node type ", "compare att with scalar"); $elt->set_att( att3 => 3); my $att3= XML::Twig::XPath::Attribute->new( $elt => 'att3'); is( $att1->node_cmp( $att3), -1, "attribute comparison"); ok( $att2->to_number == 2, "to_number on att"); ok( $elt->to_number == 99, "to_number on elt"); exit 0; XML-Twig-3.52/t/test_new_features_3_22.html0000644000175000017500000000014013015347623020660 0ustar mrodrigumrodriguTt
t2

t3XML-Twig-3.52/t/test_simplify.t0000755000175000017500000001002212732215763016606 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use XML::Twig; foreach my $module ( qw( XML::Simple Test::More Data::Dumper ) ) # add YAML if using Dump() below. { if( eval "require $module") { import $module; } else { print "1..1\nok 1\n"; warn "skipping: $module is not installed\n"; exit; } } if( $XML::Simple::VERSION < 2.09) { print "1..1\nok 1\n"; warn "skipping: need XML::Simple 2.09 or above\n"; exit; } undef $XML::Simple::PREFERRED_PARSER; $XML::Simple::PREFERRED_PARSER= 'XML::Parser'; $/="\n\n"; my @doc= ; my @options= ( { }, { content_key => 'foo' }, { group_tags => { templates => 'template'} }, { group_tags => { dirs => 'dir', templates => 'template'} }, { forcearray => 1 }, { forcearray => [ qw(server) ] }, { noattr => 1, }, { noattr => 0, }, { content_key => 'mycontent' }, { content_key => '-mycontent' }, { var_attr => 'var' }, { var_attr => 'var', var_regexp => qr/\$\{?(\w+)\}?/ }, { variables => { var => 'foo' } }, { keyattr => [ qw(name)] }, { keyattr => [ 'name' ] }, { keyattr => [ qw(foo bar)] }, { keyattr => {server => 'name' } }, { keyattr => {server => '+name' } }, { keyattr => {server => '-name' } }, { normalize_space => 1 }, { normalise_space => 2 }, { group_tags => { f1_ar => 'f1' } }, { group_tags => { f1_ar => 'f1', f2_ar => 'f2'} }, ); plan( tests => @options * @doc); $SIG{__WARN__} = sub { }; foreach my $doc (@doc) { foreach my $options (@options) { (my $options_text= Dumper( $options))=~ s{\s*\n\s*}{ }g; $options_text=~ s{^\$VAR1 = }{}; my( $options_twig, $options_simple)= UNIVERSAL::isa( $options, 'ARRAY') ? @$options : ($options, $options); my $t = XML::Twig->new->parse( $doc); my $twig = $t->root->simplify( %$options_twig); my $doc_name = $t->root->att( 'doc'); delete $options_simple->{var_regexp}; my $simple = XMLin( $doc, %$options_simple); my $res=is_deeply( $twig, $simple, "doc: $doc_name - options: $options_text" ); #. Dump( {twig => $twig, simple => $simple})); #exit unless( $res); } } exit 0; __DATA__

10.0.0.101
10.0.1.101
10.0.0.102
10.0.0.103
10.0.1.103
localhost /home/mrodrigu/standards ${base}/tools foovar is ${var} text with spaces text with spaces text with spaces text with spaces f1 1f1 2 f2 1f2 2
something
0
XML-Twig-3.52/t/test_meta_json.t0000755000175000017500000000022312732215763016733 0ustar mrodrigumrodriguuse Test::More; eval "use Test::CPAN::Meta::JSON"; plan skip_all => "Test::CPAN::Meta::JSON required for testing META.json" if $@; meta_json_ok(); XML-Twig-3.52/t/xmlxpath_19axisd_or_s.t0000755000175000017500000000102312732215763020145 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '/AAA/XXX/descendant-or-self::*'); ok(@nodes, 8); @nodes = $t->findnodes( '//CCC/descendant-or-self::*'); ok(@nodes, 4); exit 0; __DATA__ XML-Twig-3.52/t/test_spaces.t0000755000175000017500000000216412732215763016240 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use XML::Twig; $/="\n\n"; print "1..3\n"; my $twig=XML::Twig->new( keep_spaces_in => [ 'e']); test( $twig, 1); $twig=XML::Twig->new( keep_spaces_in => [ 'e', 'sub1']); test( $twig, 2); $twig=XML::Twig->new( keep_spaces => 1); test( $twig, 3); sub test { my( $twig, $test_nb)= @_; my $doc= ; chomp $doc; my $expected_res= ; chomp $expected_res; $twig->parse( $doc); my $res= $twig->sprint; $res=~ s/\n+$//; if( $res eq $expected_res) { print "ok $test_nb\n"; } else { print "not ok $test_nb\n"; warn " expected: \n$expected_res\n result: \n$res\n"; } } exit 0; __DATA__ &c;b &c;b &c;b &c; &c;b &c; &c;b &c; &c;b &c; XML-Twig-3.52/t/test_cdata.t0000755000175000017500000000344312732215763016037 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use XML::Twig; $|=1; $/= "\n\n"; my $xml= ; print "1..4\n"; my( $t, $result, $expected_result); $t= XML::Twig->new( twig_handlers => { 'ehtml/#CDATA' => sub { $_->set_asis; } }); $t->parse( $xml); $result= $t->sprint; ($expected_result=)=~ s{\n*$}{}s; if( $result eq $expected_result) { print "ok 1\n"; } else { print "not ok 1\n"; warn "expected: $expected_result\n result : $result"; } $t= XML::Twig->new( twig_handlers => { 'ehtml/#CDATA' => sub { $_->remove_cdata; } }); $t->parse( $xml); $result= $t->sprint; ($expected_result=)=~ s{\n*$}{}s; if( $result eq $expected_result) { print "ok 2\n"; } else { print "not ok 2\n"; warn "expected: $expected_result\n result : $result"; } $t= XML::Twig->new( keep_encoding => 1, twig_handlers => { 'ehtml/#CDATA' => sub { $_->set_asis; } }); $t->parse( $xml); $result= $t->sprint; ($expected_result=)=~ s{\n*$}{}s; if( $result eq $expected_result) { print "ok 3\n"; } else { print "not ok 3\n"; warn "test keep_encoding / asis\n expected: $expected_result\n result : $result"; } $t= XML::Twig->new( keep_encoding => 1, twig_handlers => { 'ehtml/#CDATA' => sub { $_->remove_cdata; } }); $t->parse( $xml); $result= $t->sprint; ($expected_result=)=~ s{\n*$}{}s; if( $result eq $expected_result) { print "ok 4\n"; } else { print "not ok 4\n"; warn "test keep_encoding / remove_cdata\n expected: $expected_result\n result : $result"; } exit 0; __DATA__ text world & all]]> texthello
world & all
texthello<br>world & all texthello
world & all
texthello<br>world & all XML-Twig-3.52/t/test_entities.t0000755000175000017500000001154712732215763016613 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; #use diagnostics; use XML::Twig; $|=1; my $TMAX=6; # do not forget to update! print "1..$TMAX\n"; my $doc= read_data(); # test 1 : roots and twig_print_outside_roots my $result_file= "test_entities.res1"; open( RESULT, ">$result_file") or die "cannot create $result_file: $!"; my $t= XML::Twig->new( twig_roots => { elt2 => sub { $_->print} }, twig_print_outside_roots => 1, #load_DTD => 1, error_context => 2, ); select RESULT; $t->safe_parse( $doc) or die "This error is probably due to an incompatibility between XML::Twig and the version of libexpat that you are using\n See the README and the XML::Twig FAQ for more information\n";; close RESULT; select STDOUT; check_result( $result_file, 1); # test 2 : roots only, test during parsing $result_file= "test_entities.res2"; open( RESULT, ">$result_file") or die "cannot create $result_file: $!"; $t= XML::Twig->new( twig_roots => { elt2 => sub { $_->print} }, error_context => 1, ); select RESULT; $t->parse( $doc); close RESULT; select STDOUT; check_result( $result_file, 2); # test 3 : roots only, test parse result $result_file= "test_entities.res3"; open( RESULT, ">$result_file") or die "cannot create $result_file: $!"; $t= XML::Twig->new( twig_roots => { elt2 => 1 }, pretty_print => 'indented', error_context => 1, ); $t->parse( $doc); $t->print( \*RESULT); close RESULT; check_result( $result_file, 3); # test 4 : roots and twig_print_outside_roots $result_file= "test_entities.res4"; open( RESULT, ">$result_file") or die "cannot create $result_file: $!"; $t= XML::Twig->new( twig_roots => { elt2 => sub { $_->print} }, twig_print_outside_roots => 1, keep_encoding => 1, error_context => 1, ); select RESULT; $t->parse( $doc); close RESULT; select STDOUT; check_result( $result_file, 4); # test 5 : roots only, test during parsing $result_file= "test_entities.res5"; open( RESULT, ">$result_file") or die "cannot create $result_file: $!"; $t= XML::Twig->new( twig_roots => { elt2 => sub { $_->print} }, keep_encoding => 1, error_context => 1, ); select RESULT; $t->parse( $doc); close RESULT; select STDOUT; check_result( $result_file, 5); # test 6 : roots only, test parse result $result_file= "test_entities.res6"; open( RESULT, ">$result_file") or die "cannot create $result_file: $!"; $t= XML::Twig->new( twig_roots => { elt2 => 1 }, pretty_print => 'indented', keep_encoding => 1, error_context => 1, ); $t->parse( $doc); $t->print( \*RESULT); close RESULT; check_result( $result_file, 6); exit 0; sub check_result { my( $result_file, $test_no)= @_; # now check result my $expected_result= read_data(); my $result= read_result( $result_file); if( $result eq $expected_result) { print "ok $test_no\n"; } else { print "not ok $test_no\n"; print STDERR "\ntest $test_no:\n", "expected: \n$expected_result\n", "real: \n$result\n"; } } sub read_data { local $/="\n\n"; my $data= ; $data=~ s{^\s*#.*\n}{}m; # get rid of comments $data=~ s{\s*$}{}s; # remove trailing spaces (and \n) $data=~ s{(^|\n)\s*(\n|$)}{}g; # remove empty lines return $data; } sub read_result { my $file= shift; local $/="\n"; open( RESULT, "<$file") or die "cannot read $file: $!"; my @result= grep {m/\S/} ; my $result= join( '', @result); $result=~ s{(^|\n)\s*(\n|$)}{}g; # remove empty lines close RESULT; unlink $file; return $result; } __DATA__ # doc 1 toto &ent1; tata &ent2; tutu &ent3; tutu &ent4; tutu &ent5; # expected_res 1 toto &ent1; tata &ent2; tutu &ent3; tutu &ent4; tutu &ent5; # expected_res 2 tata &ent2;tutu &ent4; # expected_res 3 tata &ent2; tutu &ent4; # expected_res 4 toto &ent1; tata &ent2; tutu &ent3; tutu &ent4; tutu &ent5; # expected_res 5 tata &ent2; tutu &ent4; # expected_res 6 tata &ent2; tutu &ent4; XML-Twig-3.52/t/test_with_lwp.t0000755000175000017500000000470412732215763016621 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; use XML::Twig; eval { require LWP; }; if( $@) { import LWP; print "1..1\nok 1\n"; warn "skipping, LWP not available\n"; exit } # skip on Win32, it looks like we have a problem there (named pipes?) if( ($^O eq "MSWin32") && ($]<5.008) ) { print "1..1\nok 1\n"; warn "skipping, *parseurl methods not available on Windows with perl < 5.8.0\n"; exit } if( perl_io_layer_used()) { print "1..1\nok 1\n"; warn "cannot test parseurl when UTF8 perIO layer used (due to PERL_UNICODE or -C option used)\n"; exit; } my $TMAX=13; chdir 't'; print "1..$TMAX\n"; { my $t= XML::Twig->new->parseurl( 'file:test_with_lwp.xml', LWP::UserAgent->new); is( $t->sprint, 'text', "parseurl"); } { my $t= XML::Twig->new->parseurl( 'file:test_with_lwp.xml'); is( $t->sprint, 'text', "parseurl"); } { my $t= XML::Twig->new->safe_parseurl( 'file:test_with_lwp.xml'); is( $t->sprint, 'text', "parseurl"); } { warn "\n\n### warning is normal here ###\n\n"; my $t=0; if ($^O ne 'VMS') { # On VMS we get '%SYSTEM-F-ABORT, abort' and an exit when a file does not exist # Behaviour is probably different on VMS due to it not having 'fork' to do the # LWP::UserAgent request and (safe) parse of that request not happening in a child process. $t = XML::Twig->new->safe_parseurl( 'file:test_with_lwp_no_file.xml'); ok( !$t, "no file"); matches( $@, '^\s*(no element found|Ran out of memory for input buffer)', "no file, error message"); } else { skip( 2 => "running on VMS, cannot test error message for non-existing file"); } } { my $t= XML::Twig->new->safe_parseurl( 'file:test_with_lwp_not_wf.xml'); ok( !$t, "not well-formed"); matches( $@, '^\s*mismatched tag', "not well-formed, error message"); } { my $t= XML::Twig->new->parsefile( 'test_with_lwp.xml'); is( $t->sprint, 'text', "parseurl"); } { my $t= XML::Twig->new->safe_parsefile( 'test_with_lwp.xml'); is( $t->sprint, 'text', "parseurl"); } { my $t= XML::Twig->new->safe_parsefile( 'test_with_lwp_no_file.xml'); ok( !$t, "no file"); matches( $@, '^\s*Couldn', "no file, error message"); } { my $t= XML::Twig->new->safe_parsefile( 'test_with_lwp_not_wf.xml'); ok( !$t, "not well-formed"); matches( $@, '^\s*mismatched tag', "not well-formed, error message"); } exit 0; XML-Twig-3.52/t/xmlxpath_30lang.t0000755000175000017500000000100412732215763016724 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new( keep_spaces => 1)->parse( \*DATA); ok( $t); my @en = $t->findnodes( '//*[lang("en")]'); ok(@en, 2); my @de = $t->findnodes( '//content[lang("de")]'); ok(@de, 1); exit 0; __DATA__ Here we go... und hier deutschsprachiger Text :-) XML-Twig-3.52/t/test_with_lwp.xml0000644000175000017500000000003712732215763017146 0ustar mrodrigumrodrigu text XML-Twig-3.52/t/test_3_47.t0000755000175000017500000000260212732215763015433 0ustar mrodrigumrodrigu#!/usr/bin/perl use strict; use warnings; use XML::Twig; use Test::More tests => 3; use utf8; # test CDATA sections in HTML escaping https://rt.cpan.org/Ticket/Display.html?id=86773 # module => XML::Twig->new options my %html_conv= ( 'HTML::TreeBuilder' => {}, 'HTML::Tidy' => { use_tidy => 1 }, ); foreach my $module ( sort keys %html_conv) { SKIP: { eval "use $module"; skip "$module not available", 1 if 1 ; my $in = q{

Here&there v&r;

marco&company; and marco&company £ £ £ £

}; my $expected= q{

Here&there v&r;

marco&company; and marco&company £ £ £ £

}; my $parser= XML::Twig->new( %{$html_conv{$module}}); my $t = $parser->safe_parse_html($in); print $@ if $@; like $t->sprint, qr{\Q$expected\E}, "In and out are the same ($module)"; } } { # test RT #94295 https://rt.cpan.org/Public/Bug/Display.html?id=94295 # in twig_handlers, '=' in regexps on attributes are turned into 'eq' my $xml= 'e1e2'; my $r; my $t= XML::Twig->new( twig_handlers => { 'e[@dn =~ /host=0/]' => sub { $r.= $_->text } }) ->parse( $xml); is( $r, 'e1', 'regexp on attribute, including an = sign'); } exit; XML-Twig-3.52/t/test_safe_encode.t0000755000175000017500000000362112732215763017214 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use XML::Twig; my $DEBUG=0; print "1..8\n"; if( $] >= 5.006) { eval "use utf8;"; } # suitable for perl 5.6.* my $doc=q{<élément att="été">été}; (my $safe_xml_doc= $doc)=~ s{é}{é}g; (my $safe_hex_doc= $doc)=~ s{é}{é}g; (my $text_safe_xml_doc= $doc)=~ s{été}{ét&233;}g; (my $text_safe_hex_doc= $doc)=~ s{é}{ét&xe9;}g; is( XML::Twig->new( output_filter => 'safe')->parse( $doc)->sprint, $safe_xml_doc, "output_filter => 'safe'"); is( XML::Twig->new( output_filter => 'safe_hex')->parse( $doc)->sprint, $safe_hex_doc, "output_filter => 'safe_hex'"); is( XML::Twig->new( output_text_filter => 'safe')->parse( $doc)->sprint, $safe_xml_doc, "output_text_filter => 'safe'"); is( XML::Twig->new( output_text_filter => 'safe_hex')->parse( $doc)->sprint, $safe_hex_doc, "output_text_filter => 'safe_hex'"); # suitable for 5.8.* and above (you can't have utf-8 hash keys before that) if( $] < 5.008) { skip( 4 => "cannot process utf-8 attribute names with a perl before 5.8"); } else { my $doc='<élément atté="été">été'; (my $safe_xml_doc= $doc)=~ s{é}{é}g; (my $safe_hex_doc= $doc)=~ s{é}{é}g; (my $text_safe_xml_doc= $doc)=~ s{été}{ét&233;}g; (my $text_safe_hex_doc= $doc)=~ s{é}{ét&xe9;}g; is( XML::Twig->new( output_filter => 'safe')->parse( $doc)->sprint, $safe_xml_doc, "output_filter => 'safe'"); is( XML::Twig->new( output_filter => 'safe_hex')->parse( $doc)->sprint, $safe_hex_doc, "output_filter => 'safe_hex'"); is( XML::Twig->new( output_text_filter => 'safe')->parse( $doc)->sprint, $safe_xml_doc, "output_text_filter => 'safe'"); is( XML::Twig->new( output_text_filter => 'safe_hex')->parse( $doc)->sprint, $safe_hex_doc, "output_text_filter => 'safe_hex'"); } XML-Twig-3.52/t/xmlxpath_20axisa_or_s.t0000755000175000017500000000102712732215763020136 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '/AAA/XXX/DDD/EEE/ancestor-or-self::*'); ok(@nodes, 4); @nodes = $t->findnodes( '//GGG/ancestor-or-self::*'); ok(@nodes, 5); exit 0; __DATA__ XML-Twig-3.52/t/xmlxpath_26predicate.t0000755000175000017500000000074312732215763017761 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @bbb = $t->findnodes( '//a/b[2]'); ok(@bbb, 2); @bbb = $t->findnodes( '(//a/b)[2]'); ok(@bbb, 1); exit 0; __DATA__ some 1 value 1 some 2 value 2 XML-Twig-3.52/t/test_nav.t0000755000175000017500000001070612732215763015547 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; # test for the various conditions in navigation methods use XML::Twig; my $t= XML::Twig->new; $t->parse( ' text1 text text text} text" text\' text 2 text level1 '); my $root= $t->root; my @data= grep { !/^##/ && m{\S} } ; my %result= map { chomp; split /\s*=>\s*/} @data; my $nb_tests= keys %result; print "1..$nb_tests\n"; foreach my $cond ( sort keys %result) { my $expected_result= $result{$cond}; my $result; my $res= $root->first_child( $cond); if( $res) { if( $res->id) { $result= $res->id; } else { $result= $res->text; $result=~ s/^\s+//; $result=~ s/\s+$//; } } else { $result= 'none'; } is( $result => $expected_result, "$cond"); } exit 0; __DATA__ => elt-1 elt => elt-1 #ELT => elt-1 !#ELT => text level1 #TEXT => text level1 !#TEXT => elt-1 elt2 => elt2-1 foo => none elt[@id] => elt-1 elt[@id!="elt-1"] => elt-2 elt[@duh!="elt-1"] => elt-1 elt[@toto] => elt-1 elt[!@toto] => elt-2 /2$/ => elt2-1 elt[@id="elt-1"] => elt-1 elt[@id="elt-1" or @foo="bar"] => elt-1 elt[@id="elt-1" and @foo!="bar"] => elt-1 elt[@id="elt-1" and @foo="bar"] => none elt2[@id=~/elt2/] => elt2-1 elt[@id="elt2-1"] => none elt2[@id="elt2-1"] => elt2-1 elt[@id=~/elt2/] => none *[@id="elt1-1"] => none *[@foo] => none *[@id] => elt-1 *[@id="elt-1" or @foo="bar"] => elt-1 *[@id=~/elt2$/] => none *[@id=~/2-2$/] => elt2-2 *[@id=~/^elt2/] => elt2-1 [@id="elt1-1"] => none [@foo] => none [@id] => elt-1 [@id="elt-1" or @foo="bar"] => elt-1 [@id=~/elt2$/] => none [@id=~/2-2$/] => elt2-2 [@id=~/^elt2/] => elt2-1 #PCDATA => text level1 elt[text(subelt)="text}" ] => none elt2[text(subelt)="text}"] => elt2-3 elt2[text()="text}"] => none elt2[text(subelt)='text"'] => elt2-3 elt2[text(subelt)="text'"] => elt2-3 [text(subelt)="text}"] => elt2-3 [text(subelt)="text1"] => elt-1 [text(subelt)="text 2"] => elt2-3 *[text(subelt)="text1"] => elt-1 *[text(subelt)="text 2"] => elt2-3 elt2[text(subelt)="text 2"]=> elt2-3 elt[text(subelt)="text 2"] => none *[text(subelt)="foo"] => none *[text(subelt)=~/text/] => elt-1 *[text(subelt)=~/^ext/] => none [text(subelt)="foo"] => none [text(subelt)=~/text/] => elt-1 [text(subelt)=~/^ext/] => none elt2[text(subelt)="text"] => elt2-2 elt[text(subelt)="text"] => none elt[text(subelt)="foo"] => none elt[text(subelt)=~/text/] => elt-1 elt[text(subelt)=~/^ext/] => none elt2[text(subelt)="text"] => elt2-3 elt2[text(subelt)="foo"] => none elt2[text(subelt)=~/tex/] => elt2-3 elt2[text(subelt)=~/^et/] => none elt2[text(subelt)=~/^et}/] => none /ELT/i => elt-1 elt2[text(subelt)='text"'] => elt2-3 elt[@val>'1'] => elt-2 @val>"1" => elt-2 elt[@val<"2"] => elt-1 @val<"2" => elt-1 elt[@val>1] => elt-2 @val>1 => elt-2 elt[@val<2] => elt-1 @val<2 => elt-1 @val => elt-1 [@val="1" or @dummy="2"] => elt-1 [@val="2" or @dummy="2"] => elt-2 *[@val="1" or @dummy="2"] => elt-1 *[@val="2" or @dummy="2"] => elt-2 @val="1" and @dummy="2" => none @val="1" or @dummy="2" => elt-1 @val="2" or @dummy="2" => elt-2 [@val=~/2/] => elt-2 *[@val=~/2/] => elt-2 @val=~/^2/ => elt-2 @val!~/^1/ => elt-2 XML-Twig-3.52/t/test_expand_external_entities.t0000755000175000017500000000254512732215763022052 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; use XML::Twig; my $TMAX=3; print "1..$TMAX\n"; my $xml_file= File::Spec->catfile( "t", "test_expand_external_entities.xml"); my $dtd_file= File::Spec->catfile( "t", "test_expand_external_entities.dtd"); my( $xml, $dtd, $xml_expanded, %ent); { local undef $/; open XML, "<$xml_file" or die "cannot open $xml_file: $!"; $xml= ; close XML; open DTD, "<$dtd_file" or die "cannot open $dtd_file: $!"; $dtd= ; close DTD; } # extract entities while( $dtd=~ m{}gx) { $ent{$1}= $2; } #" # replace in xml ($xml_expanded= $xml)=~ s{&(\w+);}{$ent{$1}}g; { my $t= XML::Twig->new( load_DTD => 1); $t->set_expand_external_entities; $t->parsefile( $xml_file); is( normalize_xml( $t->sprint), normalize_xml( $xml_expanded), "expanded document"); } { my $t= XML::Twig->new( load_DTD => 1, expand_external_ents => 1); $t->parsefile( $xml_file); is( normalize_xml( $t->sprint), normalize_xml( $xml_expanded), "expanded document"); } { (my $xml_no_dtd= $xml_expanded)=~ s{^}{}s; my $t= XML::Twig->new( load_DTD => 1, expand_external_ents => 1, do_not_output_DTD => 1); $t->parsefile( $xml_file); is( normalize_xml( $t->sprint), normalize_xml( $xml_no_dtd), "expanded document"); } exit 0; XML-Twig-3.52/t/test2_3.res0000644000175000017500000000162113015347616015524 0ustar mrodrigumrodrigu ]>
S1 I1S1 I2S1 TitleS1 P1S2 P2Note P1S1 para 3
S2 introS2 TitleS2 P1S2 P2S2 P3
Annex TitleAnnex P1Annex P2
XML-Twig-3.52/t/test_pos.t0000755000175000017500000000275512732215763015571 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; $|=1; # test for the various conditions in navigation methods use XML::Twig; my $t= XML::Twig->new; $t->parse( ' an element an element an element an element an element an element an element '); my @data=; my @data_without_comments= grep { !m{^\s*(#.*)?$} } @data; my @test= map { s{\#.*$}{}; $_ } @data_without_comments; #my @test= map { s{#.*$}{}; $_ } grep { !m{^\s*(#.*)?$} } ; my $nb_test= @test; print "1..$nb_test\n"; my $i=1; foreach my $test (@test) { my( $id, $exp, $expected_pos)= split /\t+/, $test; chomp $expected_pos; $exp= '' if( $exp eq '_'); test( $i++, $id, $exp, $expected_pos); } sub test { my( $i, $id, $exp, $expected_pos)= @_; my $elt= $t->elt_id( $id); my $pos= $elt->pos( $exp); if( $pos == $expected_pos) { print "ok $i\n"; } else { print "not ok $i\n"; my $filter= $exp ? " filter: $exp" : ''; warn "test $i: $id $filter - expected $expected_pos, actual $pos\n"; } } exit 0; __DATA__ #id exp expected doc _ 1 doc elt1 0 doc toto 0 elt1_1 _ 1 elt1_1 elt1 1 elt1_1 toto 0 elt1_2 _ 2 elt1_2 elt1 2 elt1_2 toto 0 elt2_1 _ 4 elt2_1 elt1 0 elt2_1 elt2 1 elt2_1 toto 0 elt2_2 _ 6 elt2_2 elt1 0 elt2_2 elt2 2 elt2_2 toto 0 XML-Twig-3.52/t/is_field.t0000755000175000017500000000270612732215763015503 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use XML::Twig; $|=1; my $i=1; my $TMAX=43; # do not forget to update! print "1..$TMAX\n"; print "ok $i\n"; # loading $i++; my $t= XML::Twig->new(); $t->parse( \*DATA); foreach my $elt ($t->descendants) { if( ($elt->tag eq 'field') && !$elt->is_field) { print "not ok $i "; warn $elt->id, " not recognized as field\n"; } elsif( ($elt->tag ne 'field') && $elt->is_field) { print "not ok $i "; my $elt_id= $elt->id || $elt->text; warn " $elt_id recognized as field\n"; } else { print "ok $i\n"; } $i++; } exit 0; __DATA__ field 1 text 1 text 2 text 3 field 2 text 4 text 5field field 3 field 4 field 5field 6 field 7 field 8 field 9 0 a field 10 XML-Twig-3.52/t/test_3_27.t0000755000175000017500000004326612732215763015444 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=82; print "1..$TMAX\n"; { # test reverse call mode my $doc=q{ }; my $res=''; my $t= XML::Twig->new( twig_handlers => { '_all_' => sub { $res.= $_->id } }, top_down_handlers => 1, ) ->parse( $doc); is( $res, 'da1b1b2c1b3e1a2b4b5c2b6', 'top_down_handlers _all_'); $res=''; $t= XML::Twig->new( twig_handlers => { 'b' => sub { $res.= $_->id } }, top_down_handlers => 1, ) ->parse( $doc); is( $res, 'b1b2b3b4b5b6', 'top_down_handlers b)'); $res=''; $t= XML::Twig->new( twig_handlers => { _default_ => sub { $res.= $_->id } }, top_down_handlers => 1, ) ->parse( $doc); is( $res, 'da1b1b2c1b3e1a2b4b5c2b6', 'top_down_handlers _default_)'); $res=''; $t= XML::Twig->new( twig_handlers => { a => sub { $res.= $_->id; }, b => sub { $res.= $_->id; }, c => sub { $res.= $_->id; }, e => sub { $res.= $_->id; }, }, top_down_handlers => 1, ) ->parse( $doc); is( $res, 'a1b1b2c1b3e1a2b4b5c2b6', 'top_down_handlers with purge)'); } { my $called=0; my $t= XML::Twig->new( twig_handlers => { 'doc[@a="="]' => sub { $called++; } }) -> parse( ''); is( $called, 1, 'handler on attribute with a value of "="'); } { # test error message for XPath query starting with a / on a node when the twig is not available my $sect; { my $t= XML::Twig->nparse( ''); $sect= $t->root->first_child( 'sect'); } $sect->cut; is( $sect->get_xpath( './elt', 0)->sprint, '', " XPath query ok"); eval { $sect->get_xpath( '/doc/elt'); }; matches( $@, qr/^cannot use an XPath query starting with a \/ on a node not attached to a whole twig/, "XPath query starting with a /") ; } { # test updating #att in start_tag_handlers my( $b, $e11, $e12)= '' x 3; my $t= XML::Twig->new( start_tag_handlers => { a => sub { $_->parent->set_att( '#a' => 1); }, }, twig_handlers => { 'e1[@#a]/b' => sub { $b .= $_->id || $_->tag }, 'e1[@#a]' => sub { $e11 .= $_->id || $_->tag }, 'e1[!@#a]' => sub { $e12 .= $_->id || $_->tag }, 'e1[@#a=1]/b' => sub { $b .= $_->id || $_->tag }, }, ) ->parse( q{}) ; is( $b , 'b1b1', 'trigger on e1[@#a]/b'); is( $e11, 'e1-1', 'trigger on e1[@#a]' ); is( $e12, 'e1-2', 'trigger on e1[!@#a]' ); } { # numerical tests in handlers my( $ngt, $nlt, $nge, $nle, $neq, $nne)= '' x 6; my( $agt, $alt, $age, $ale, $aeq, $ane)= '' x 6; my $t= XML::Twig->new( twig_handlers => { 'n[@a>2]' => sub { $ngt .= $_->id }, 'n[@a>=2]' => sub { $nge .= $_->id }, 'n[@a<2]' => sub { $nlt .= $_->id }, 'n[@a<=2]' => sub { $nle .= $_->id }, 'n[@a=2]' => sub { $neq .= $_->id }, 'n[@a!=2]' => sub { $nne .= $_->id }, 'a[@a>"b"]' => sub { $agt .= $_->id }, 'a[@a>="b"]' => sub { $age .= $_->id }, 'a[@a<"b"]' => sub { $alt .= $_->id }, 'a[@a<="b"]' => sub { $ale .= $_->id }, 'a[@a="b"]' => sub { $aeq .= $_->id }, 'a[@a!="b"]' => sub { $ane .= $_->id }, }, ) ->parse( q{
}); is( $ngt, 'n3', ' numerical test: >' ); is( $nge, 'n2n3', ' numerical test: >='); is( $nlt, 'n1n4', ' numerical test: <' ); is( $nle, 'n1n2n4', ' numerical test: <='); is( $neq, 'n2', ' numerical test: ='); is( $nne, 'n1n3n4', ' numerical test: !='); is( $agt, 'a3', ' string test: >' ); is( $age, 'a2a3', ' string test: >='); is( $alt, 'a1a4', ' string test: <' ); is( $ale, 'a1a2a4', ' string test: <='); is( $aeq, 'a2', ' string test: ='); is( $ane, 'a1a3a4', ' string test: !='); } { # test former_* methods my $t= XML::Twig->nparse( ''); my $e2= $t->elt_id( 'e2'); ok( ! defined( $e2->former_parent), "former_parent on uncut element" ); ok( ! defined( $e2->former_prev_sibling), "former_prev_sibling on uncut element"); ok( ! defined( $e2->former_next_sibling), "former_next_sibling on uncut element"); $e2->cut; is( $e2->former_parent->id, "d", "former_parent on cut element" ); is( $e2->former_prev_sibling->id, "e1", "former_prev_sibling on cut element"); is( $e2->former_next_sibling->id, "e3", "former_next_sibling on cut element"); $e2->paste( after => $e2->former_next_sibling); is( $e2->former_parent->id, "d", "former_parent on cut element (after paste)" ); is( $e2->former_prev_sibling->id, "e1", "former_prev_sibling on cut element (after paste)"); is( $e2->former_next_sibling->id, "e3", "former_next_sibling on cut element (after paste)"); } { # test merge my $t= XML::Twig->nparse( 'foobar'); my $e= $t->first_elt( 'e'); $e->merge( $e->next_sibling); is( $e->text, 'foobar', "merge"); } if( $] > 5.008) { # testing ignore on the current element my $calls; my $h= sub { $calls.= $_[1]->tag; }; my $t= XML::Twig->new( twig_handlers => { _all_ => sub { $calls.= $_[1]->tag; } }, start_tag_handlers => { b => sub { shift()->ignore } } ) ->parse( q{}) ; is( $calls, 'ga', 'ignore on an element'); is( $t->sprint, '', 'tree build with ignore on an element'); # testing ignore on a non-current element $calls=''; my $t2= XML::Twig->new( twig_handlers => { _all_ => sub { $calls.= $_[1]->tag; } }, start_tag_handlers => { d => sub { $_[1]->parent->ignore } } ) ->parse( q{}) ; is( $calls, 'cfcfa', 'ignore on a parent element'); is( $t2->sprint, '', 'tree build with ignore on the parent of an element'); $calls=''; my $t3= XML::Twig->new( twig_handlers => { _all_ => sub { $calls.= $_[1]->tag; } }, start_tag_handlers => { d => sub { $_[1]->parent( 'b')->ignore } } ) ->parse( q{}) ; is( $calls, 'cfcfha', 'ignore on a grand-parent element'); is( $t3->sprint, '', 'tree build with ignore on the grand parent of an element'); $calls=''; # ignore from a regular handler my $t4= XML::Twig->new( twig_handlers => { _default_ => sub { $calls.= $_[1]->tag; }, g => sub { $calls.= $_[1]->tag; $_[1]->parent( 'b')->ignore; }, } ) ->parse( q{}) ; is( $calls, 'cdgfcdgfha', 'ignore from a regular handler'); is( $t4->sprint, '', 'tree build with ignore on the parent of an element in a regular handler'); $calls=''; # ignore from a regular handler my $t5= XML::Twig->new( twig_handlers => { _default_ => sub { $calls.= $_[1]->tag; }, g => sub { $calls.= $_[1]->tag; $_[1]->parent( 'b')->ignore; }, } ) ->parse( q{}) ; is( $calls, 'xcdgfcdgfha', 'ignore from a regular handler (2)'); is( $t5->sprint, '', 'tree build with ignore from a regular handler (2)'); eval { my $t6= XML::Twig->new( twig_handlers => { c => sub { $_->prev_elt( 'f')->ignore } }) ->parse( ''); }; matches( $@, '^element to be ignored must be ancestor of current element', 'error ignore-ing an element (not ancestor)'); eval { my $t6= XML::Twig->new( twig_handlers => { f => sub { $_->first_child( 'c')->ignore } }) ->parse( ''); }; matches( $@, '^element to be ignored must be ancestor of current element', 'error ignore-ing an element ( descendant)'); } else { skip( 12, "not tested under perl < 5.8"); } { my $doc=''; (my $indented_doc= $doc)=~ s{()}{" " x $2 . $1}eg; $indented_doc=~ s{>}{>\n}g; $indented_doc=~ s{\s*}{}g; is( XML::Twig->nparse( $doc)->sprint, $doc, "nparse output"); is( XML::Twig->nparse_e( $doc)->sprint, $doc, "nparse_e output"); is( XML::Twig->nparse_pp( $doc)->sprint, $indented_doc, "nparse_pp output"); is( XML::Twig->nparse_ppe( $doc)->sprint, $indented_doc, "nparse_ppe output"); } if( _use( 'HTML::TreeBuilder', 4.00) ) { # first alternative is pre-3.23_1, second one with 3.23_1 (and beyond?) { my $doc=qq{

dummy

}; is_like( XML::Twig->nparse( $doc)->sprint, '

dummy

', 'invalid att'); is_like( XML::Twig->nparse_e( $doc)->sprint, '

dummy

', 'invalid att (nparse_e)'); } { my $doc=qq{

dummy

}; # used to trigger an error, now XML::Twig is fault tolerant to bad attributes #eval { XML::Twig->nparse_e( $doc); }; #ok( $@, "error in html (nparse_e mode 2, HTB < 3.23 or >= 4.00: $@)"); is_like( XML::Twig->nparse_e( $doc)->sprint, '

dummy

', 'wrong attributes, nparse_e mode 2, HTB < 3.23 or >= 4.00' ); } { my $doc=qq{

dummy

}; # used to trigger an error, now XML::Twig is fault tolerant to bad attributes #eval { XML::Twig->nparse_e( $doc); }; #ok( $@, "error in html (nparse_e mode 3, HTB < 3.23 or >= 4.00: $@)"); is_like( XML::Twig->nparse_e( $doc)->sprint, '

dummy

', 'wrong attributes, nparse_e mode 2, HTB < 3.23 or >= 4.00' ); } } else { skip( 4 => "need HTML::TreeBuilder > 4.00 to test error display with HTML data"); } { my $e= XML::Twig::Elt->new( 'e'); is( $e->tag_to_span->sprint, '', "tag_to_span"); is( $e->tag_to_span->sprint, '', "tag_to_span again "); is( $e->tag_to_div->sprint, '
', "tag_to_div"); is( $e->tag_to_div->sprint, '
', "tag_to_div again "); } # added coverage { my $doc= "\n"; my $t= XML::Twig->nparse( $doc); (my $expected= $doc)=~ s{foo}{bar}; $t->root->first_child( '#CDATA')->set_content( ' bar '); is( $t->root->sprint , $expected, 'set_content on a CDATA element'); } { my $doc= "



"; my $t= XML::Twig->nparse( pretty_print => 'none', $doc); (my $expected= $doc)=~ s{(

|)}{

}g; is( $t->root->sprint( { empty_tags => 'expand' } ) , $expected, 'sprint br with empty_tags expand'); ($expected= $doc)=~ s{(

|)}{
}g; is( $t->root->sprint( { empty_tags => 'html' } ) , $expected, 'sprint br with empty_tags html'); ($expected= $doc)=~ s{(

|)}{
}g; is( $t->root->sprint( { empty_tags => 'normal' } ) , $expected, 'sprint br with empty_tags normal'); } { my $doc= "

foo

bar

"; my $t= XML::Twig->nparse( pretty_print => 'none', $doc); is( $t->root->sprint( { pretty_print => 'indented' } ) , "\n

foo

\n

bar

\n
\n", 'sprint br with pretty_print indented'); is( $t->root->sprint( { pretty_print => 'none' } ) , $doc, 'sprint br with pretty_print none'); } { my $doc='&'; my $t= XML::Twig->new; $t->set_keep_encoding( 1); is( $t->parse( $doc)->sprint, $doc, 'set_keep_encoding(1)'); $t->set_keep_encoding( 0); is( $t->parse( $doc)->sprint, $doc, 'set_keep_encoding(1)'); } { my $doc=''; is( XML::Twig->nparse( quote => 'single', $doc)->sprint, q{}, 'quote option'); } { my $doc= qq{]>\n}; (my $expected= $doc)=~ s{ \[.*?\]}{}; my $t= XML::Twig->nparse( $doc); my $entity_list = $t->entity_list; foreach my $entity ($entity_list->list()) { $entity_list->delete($entity->name); } is( $t->sprint( Update_DTD => 1 ), $expected, 'parse entities with all chars in their name'); } { my $tmp= "tmp"; foreach my $doc ( qq{]>}, qq{}, qq{}, ) { foreach my $keep_encoding ( 0..1) { open( MYOUT, ">$tmp") or die "cannot open $tmp: $!"; my $t= XML::Twig->new( twig_roots=> { dummy => sub {} }, twig_print_outside_roots => \*MYOUT, keep_encoding => $keep_encoding, ) ->parse( $doc); close MYOUT; is_like( slurp( $tmp), $doc, "file with no DTD but entities (keep_encoding: $keep_encoding)"); unlink $tmp; } } } { my $doc=qq{foobarbaztoto tutu}; my $t= XML::Twig->parse( $doc); is( $t->elt_id( "e1")->text( 'no_recurse'), 'foobaz', "text_only"); is( $t->elt_id( "e2")->text_only, 'toto tata tutu', "text_only (cdata section)"); is( $t->elt_id( "e")->text_only, 'bar', "text_only (no embedded elt)"); } { my $doc=qq{tutu <&ent; notata}; my $t= XML::Twig->parse( $doc); is( $t->elt_id( "e1")->text(), 'tutu <&ent; notata', "text with ent"); is( $t->elt_id( "e1")->text( 'no_recurse'), 'tutu <&ent; tata', "text no_recurse with ent"); is( $t->elt_id( "e1")->xml_text( ), 'tutu <&ent; notata', "xml_text with ent"); is( $t->elt_id( "e1")->xml_text( 'no_recurse'), 'tutu <&ent; tata', "xml_text no_recurse with ent"); } if( $] > 5.008) { my $r; XML::Twig->parse( twig_handlers => { '/a/b//c' => sub { $r++; } }, q{foo} ); ok( $r, "handler condition with // and nested elts (/a//b/c)"); } else { skip( 1, "not tested under perl < 5.8"); } if( $] > 5.008) { my @r; XML::Twig->parse( twig_handlers => { 's[@#a="1"]' => sub { push @r, $_->id}, 's/e[@x="1"]' => sub { $_->parent->set_att( '#a' => 1); }, }, q{ }, ); is( join( ':', @r), 's2:s3', 'inner handler changing parent attribute value'); } else { skip( 1, "not tested under perl < 5.8"); } if( $] > 5.008) { my @r; XML::Twig->parse( twig_roots => { '/d/s[@a="1"]/e[@a="1"]' => => sub { push @r, $_->id}, }, q{ }, ); is( join( ':', @r), 'e3:e8', 'complex condition with twig_roots'); } else { skip( 1, "not tested under perl < 5.8"); } exit 0; # or you get a weird error under 5.6.2 XML-Twig-3.52/t/xmlxpath_29desc_with_predicate.t0000755000175000017500000000067412732215763022020 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @bbb = $t->findnodes( '/descendant::BBB[1]'); ok(@bbb, 1); ok($bbb[0]->string_value, "OK"); exit 0; __DATA__ OK NOT OK XML-Twig-3.52/t/xmlxpath_28ancestor2.t0000755000175000017500000000223612732215763017722 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 5); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '//Footnote'); ok(@nodes, 1); my $footnote = $nodes[0]; #@nodes = $footnote->findnodes('ancestor::*', $t); @nodes = $footnote->findnodes( 'ancestor::*'); ok(@nodes, 3); @nodes = $footnote->findnodes('ancestor::text:footnote', $t); ok(@nodes, 1); exit 0; __DATA__ 2 AxKit is very flexible in how it lets you transform the XML on the server, and there are many modules you can plug in to AxKit to allow you to do these transformations. For this reason, the AxKit installation does not mandate any particular modules to use, instead it will simply suggest modules that might help when you install AxKit. XML-Twig-3.52/t/test_xpath_cond.t0000755000175000017500000000613112732215763017107 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use XML::Twig; $|=1; my $t= XML::Twig->new; $t->parse( ' elt 1 elt 2 elt 3 2 3 <:elt id=":elt">yep, that is a valid name '); my @data= grep { !/^##/ && m{\S} } ; my @exp; my %result; foreach( @data) { chomp; my ($exp, $id_list) = split /\s*=>\s*/ ; $id_list=~ s{\s+$}{}; $result{$exp}= $id_list; push @exp, $exp; } my $nb_tests= keys %result; print "1..$nb_tests\n"; my $i=1; foreach my $exp ( @exp) { my $expected_result= $result{$exp}; my @result= $t->get_xpath( $exp); my $result; if( @result) { $result= join ' ', map { $_->id || $_->gi } @result; } else { $result= 'none'; } if( $result eq $expected_result) { print "ok $i\n"; } else { print "not ok $i\n"; print STDERR "$exp: expected '$expected_result' - real '$result'\n"; } $i++; } exit 0; __DATA__ /elt => none /elt[@foo="bar"] => none /*[@foo="bar"] => none //*[@foo="bar"] => none /* => doc /*[@id="doc"] => doc //*[@id="doc"] => doc //elt => elt-1 elt-2 elt-3 //*/elt => elt-1 elt-2 elt-3 /doc/elt => elt-1 elt-2 /*/elt => elt-1 elt-2 /doc/elt[ last()] => elt-2 /doc/*[ last()] => :elt //elt[@id='elt-1'] => elt-1 //*[@id='elt-1'] => elt-1 //[@id='elt-1'] => elt-1 //elt[@id='elt-1' or @id='elt-2'] => elt-1 elt-2 //elt[@id='elt-1' and @id='elt-2'] => none //elt[@id='elt-1' and @id!='elt-2'] => elt-1 //elt[@id=~ /elt/] => elt-1 elt-2 elt-3 //[@id='elt-1' or @id='elt-2'] => elt-1 elt-2 //[@id='elt-1' and @id='elt-2'] => none //[@id='elt-1' and @id!='elt-2'] => elt-1 //[@id=~ /elt/] => elt-1 elt-2 elt2-1 elt-3 elt2-2 elt2-3 elt2-4 :elt //*[@id='elt-1' or @id='elt-2'] => elt-1 elt-2 //*[@id='elt-1' and @id='elt-2'] => none //*[@id='elt-1' and @id!='elt-2'] => elt-1 //*[@id=~ /elt/] => elt-1 elt-2 elt2-1 elt-3 elt2-2 elt2-3 elt2-4 :elt //elt2[@att_int > 2] => elt2-4 /doc/elt2[ last()]/* => elt2-3 elt2-4 //*[@id=~/elt2/] => elt2-1 elt2-2 elt2-3 elt2-4 /doc/*[@id=~/elt2/] => elt2-1 elt2-2 /doc//*[@id=~/elt2/] => elt2-1 elt2-2 elt2-3 elt2-4 //*[@id=~/elt2-[34]/] => elt2-3 elt2-4 //*[@id!~/^elt/] => doc :elt //[@id=~/elt2-[34]/] => elt2-3 elt2-4 //[@id!~/elt2-[34]/] => doc elt-1 elt-2 elt2-1 elt-3 elt2-2 :elt //elt2[@id=~/elt2-[34]/] => elt2-3 elt2-4 //*[@id!~/elt2-[34]/] => doc elt-1 elt-2 elt2-1 elt-3 elt2-2 :elt //:elt => :elt //elt[string()="elt 1"] => elt-1 //elt[string()=~/elt 1/] => elt-1 //elt[string()=~/^elt 1/] => elt-1 //*[string()="elt 1"] => elt-1 #PCDATA //*[string()=~/elt 1/] => doc elt-1 #PCDATA //*[string()=~/^elt 1/] => doc elt-1 #PCDATA //[string()="elt 1"] => elt-1 #PCDATA //[string()=~/elt 1/] => doc elt-1 #PCDATA //[string()=~/^elt 1/] => doc elt-1 #PCDATA //[string()="elt 2"] => elt-2 #PCDATA //[string()=~/elt 2/] => doc elt-2 #PCDATA //[string()=~/^elt 2/] => elt-2 #PCDATA XML-Twig-3.52/t/xmlxpath_10pipe.t0000755000175000017500000000105512732215763016744 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 6); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '//CCC | //BBB'); ok(@nodes, 3); ok($nodes[0]->getName, "BBB"); # test document order @nodes = $t->findnodes( '/AAA/EEE | //BBB'); ok(@nodes, 2); @nodes = $t->findnodes( '/AAA/EEE | //DDD/CCC | /AAA | //BBB'); ok(@nodes, 4); exit 0; __DATA__ XML-Twig-3.52/t/test_variables.t0000755000175000017500000000263512732215763016735 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use XML::Twig; $|=1; print "1..6\n"; #warn "\n\n### warnings are normal here ###\n\n"; my $t= XML::Twig->new->parse( \*DATA); # intercept warnings $SIG{__WARN__} = sub { print STDERR @_ if( $_[0]=~ /^test/); }; my $s= $t->simplify( var_attr => 'var', variables => { 'v2' => 'elt2'}); if( $s->{elt2} eq 'elt using elt1') { print "ok 1\n" } else { print "not ok 1\n"; warn "test 1: /$s->{elt2}/ instead of 'elt using elt1'\n"; } if( $s->{elt3} eq 'elt using elt1') { print "ok 2\n" } else { print "not ok 2\n"; warn "test 2: /$s->{elt3}/ instead of 'elt using elt1'\n"; } if( $s->{elt4} eq 'elt using elt2') { print "ok 3\n"; warn "\n"; } else { print "not ok 3\n"; warn "test 3: /$s->{elt4}/ instead of 'elt using elt2'\n"; } if( $s->{elt5}->{att1} eq 'att with elt1') { print "ok 4\n" } else { print "not ok 4\n"; warn "test 4: /$s->{elt5}->{att1}/ instead of 'att with elt1'\n"; } $s= $t->simplify( variables => { 'v2' => 'elt2'}); if( $s->{elt2} eq 'elt using $v1') { print "ok 5\n" } else { print "not ok 5\n"; warn "test 5: /$s->{elt2}/ instead of 'elt using \$v1'\n"; } if( $s->{elt4} eq 'elt using elt2') { print "ok 6\n" } else { print "not ok 6\n"; warn "test 6: /$s->{elt4}/ instead of 'elt using elt2'\n"; } exit 0; __DATA__ elt1 elt using $v1 elt using ${v1} elt using $v2 XML-Twig-3.52/t/xmlxpath_24namespaces.t0000755000175000017500000001016412732215763020134 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; if( defined $XML::XPathEngine::VERSION && $XML::XPathEngine::VERSION < 0.09) { print "1..1\nok 1\n"; warn "cannot use set_namespace, needs XML::XPathEngine 0.09+ (installed version is $XML::XPathEngine::VERSION)\n"; exit; } plan( tests => 15); my $t= XML::Twig::XPath->new->parse( *DATA); my $node= $t->findvalue( '//attr:node/@attr:findme'); ok( $node, 'someval'); my @nodes; # Do not set namespace prefixes - uses element context namespaces @nodes = $t->findnodes('//foo:foo', $t); # should find foobar.com foos ok( @nodes, 3); @nodes = $t->findnodes('//goo:foo', $t); # should find no foos ok( @nodes, 0); @nodes = $t->findnodes('//foo', $t); # should find default NS foos ok( @nodes, 2); $node= $t->findvalue( '//*[@attr:findme]'); ok( $node, 'attr content'); # Set namespace mappings. $t->set_namespace("foo" => "flubber.example.com"); $t->set_namespace("goo" => "foobar.example.com"); @nodes = $t->findnodes('//foo:foo', $t); # should find flubber.com foos ok( @nodes, 2); @nodes = $t->findnodes('//goo:foo', $t); # should find foobar.com foos ok( @nodes, 3); @nodes = $t->findnodes('//foo', $t); # should find default NS foos ok( @nodes, 2); ok( $t->findvalue('//attr:node/@attr:findme'), 'someval'); ## added to test set_namespace if( ! defined $XML::XPathEngine::VERSION ) { my_skip( 5, "can only test set_strict_namespaces with XML::XPathEngine 0.09+ installed"); } else { my $xml= ' Node 1 Node 2 '; { my $twig = XML::Twig::XPath->new(); $twig->parse( $xml); $twig->set_namespace('example','http://example.com/'); $twig->set_strict_namespaces(1); my $v = $twig->findvalue('//foo'); ok( $v, '', '//foo (strict_namespaces)'); $twig->set_strict_namespaces(0); my $v1 = $twig->findvalue('//foo'); ok( $v1, 'Node 1', '//foo (default behaviour)'); } { my $twig = XML::Twig::XPath->new(); $twig->set_namespace('example','http://example.com/'); $twig->parse( $xml); my $v = $twig->findvalue('//example:foo'); ok( $v, 'Node 1Node 2', '//example:foo'); } { my $twig = XML::Twig::XPath->new(); $twig->parse( $xml); my $v = $twig->findvalue('//foo'); ok( $v, 'Node 1', '//foo'); } { my $twig = XML::Twig::XPath->new(); $twig->parse( $xml); $twig->set_namespace('example','http://example.com/'); my $v = $twig->findvalue('//foo'); ok( $v, 'Node 1', '//foo (default behaviour)'); } } # added to test namespaces on attributes { my $xml= ' Node 1 Node 2 Node 3 Node 4 '; my $twig = XML::Twig::XPath->new(); $twig->parse( $xml); $twig->set_namespace('b','http://example.com/'); ok( $twig->findvalue( '//*[@b:att]'), 'Node 1Node 3', 'namespaces on attributes'); } { my %seen_message; sub my_skip { my( $nb_skip, $message)= @_; $message ||=''; unless( $seen_message{$message}) { warn "\n$message: skipping $nb_skip tests\n"; $seen_message{$message}++; } for (1..$nb_skip) { ok( 1); } } } exit 0; __DATA__ attr content XML-Twig-3.52/t/test_xml_split.xml0000644000175000017500000000055012732215763017324 0ustar mrodrigumrodrigu elt1 content 1 elt1 content 2 elt1 content 3 elt1 content 4 elt1 content 5 elt1 content 6 elt1 content 7 elt1 content 8 elt1 content 9 XML-Twig-3.52/t/xmlxpath_14axisancestor.t0000755000175000017500000000105412732215763020515 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 5); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '/AAA/BBB/DDD/CCC/EEE/ancestor::*'); ok(@nodes, 4); ok($nodes[1]->getName, "BBB"); # test document order @nodes = $t->findnodes( '//FFF/ancestor::*'); ok(@nodes, 5); exit 0; __DATA__ XML-Twig-3.52/t/test_bugs_3_18.t0000755000175000017500000006323412732215763016461 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=158; print "1..$TMAX\n"; { #bug with long CDATA # get an accented char in iso-8859-1 my $char_file=File::Spec->catfile('t', "latin1_accented_char.iso-8859-1"); open( CHARFH, "<$char_file") or die "cannot open $char_file: $!"; my $latin1_char=; chomp $latin1_char; close CHARFH; my %cdata=( "01- 1023 chars" => 'x' x 1022 . 'a', "02- 1024 chars" => 'x' x 1023 . 'a', "03- 1025 chars" => 'x' x 1024 . 'a', "04- 1026 chars" => 'x' x 1025 . 'a', "05- 2049 chars" => 'x' x 2048 . 'a', "06- 1023 chars spaces" => 'x' x 1020 . ' a', "07- 1024 chars spaces" => 'x' x 1021 . ' a', "08- 1025 chars spaces" => 'x' x 1022 . ' a', "09- 1026 chars spaces" => 'x' x 1023 . ' a', "10- 2049 chars spaces" => 'x' x 2048 . ' a', "11- 1023 accented chars" => $latin1_char x 1022 . 'a', "12- 1024 accented chars" => $latin1_char x 1023 . 'a', "13- 1025 accented chars" => $latin1_char x 1024 . 'a', "14- 1026 accented chars" => $latin1_char x 1025 . 'a', "15- 2049 accented chars" => $latin1_char x 2048 . 'a', "16- 1023 accented chars spaces" => $latin1_char x 1020 . ' a', "17- 1024 accented chars spaces" => $latin1_char x 1021 . ' a', "18- 1025 accented chars spaces" => $latin1_char x 1022 . ' a', "19- 1026 accented chars spaces" => $latin1_char x 1023 . ' a', "20- 2049 accented chars spaces" => $latin1_char x 2048 . ' a', "21- 511 accented chars" => $latin1_char x 511 . 'a', "22- 512 accented chars" => $latin1_char x 512 . 'a', "23- 513 accented chars" => $latin1_char x 513 . 'a', #"00- lotsa chars" => 'x' x 2000000 . 'a', # do not try this at home # but if you do with a higher number, let me know! ); if( ($] == 5.008) || ($] < 5.006) || ($XML::Parser::VERSION <= 2.27) ) { skip( scalar keys %cdata, "KNOWN BUG in 5.8.0 and 5.005 or with XML::Parser 2.27 with keep_encoding and long (>1024 char) CDATA, " . "see RT #14008 at http://rt.cpan.org/Ticket/Display.html?id=14008" ); } elsif( perl_io_layer_used()) { skip( scalar keys %cdata, "cannot test parseurl when UTF8 perIO layer used " . "(due to PERL_UNICODE being set or -C command line option being used)\n" ); } else { foreach my $test (sort keys %cdata) { my $cdata=$cdata{$test}; my $doc= qq{}; my $twig= XML::Twig->new( keep_encoding => 1)->parse($doc); my $res = $twig->root->first_child->cdata; is( $res, $cdata, "long CDATA with keep_encoding $test"); } } } # subs_text on text with new lines { my $doc= " foo1 \n foo2 "; my $t= XML::Twig->new->parse( $doc); (my $expected= $doc)=~ s{foo}{bar}g; $t->subs_text( qr{foo}, "bar"); is( $t->sprint, $expected, "subs_text on string with \n"); $expected=~ s{ }{ }g; $t->subs_text( qr{ }, q{&ent( " ")} ); if( 0 && $] =~ m{^5.006}) { skip( 1, "known bug in perl 5.6.*: subs_text with an entity matches line returns\n" . " this bug is under investigation\n"); } else { is( $t->sprint, $expected, "subs_text on string with \n"); } } # testing ID processing { # setting existing id to a different value my $t= XML::Twig->new->parse( ''); $t->root->set_id( "i2"); is( id_list( $t), "i2", "changing an existing id"); $t->root->del_id(); is( id_list( $t), "", "deleting an id"); $t->root->del_id(); is( id_list( $t), "", "deleting again an id"); $t->root->set_id( "0"); is( id_list( $t), "0", "changing an existing id to 0"); $t->root->del_id(); is( id_list( $t), "", "deleting again an id"); } { # setting id through the att my $t= XML::Twig->new->parse( ''); $t->root->set_att( id => "i2"); is( fid( $t, "i2"), "i2", "changing an existing id using set_att"); $t->root->set_att( id => "0"); is( fid( $t, "0"), "0", "using set_att with a id of 0"); $t->root->set_atts( { id => "i3" }); is( fid( $t, "i3"), "i3", "using set_atts"); $t->root->set_atts( { id => "0" }); is( fid( $t, "0"), "0", "using set_atts with an if of 0"); } { # setting id through a new element my $t= XML::Twig->new->parse( ''); my $n= $t->root->insert_new_elt( elt => { id => "i2" }); is( id_list( $t), "i1-i2", "setting id through a new element"); $n= $t->root->insert_new_elt( elt => { id => "0" }); is( id_list( $t), "0-i1-i2", "setting id through a new element"); } { # setting ids through a parse my $t= XML::Twig->new->parse( ''); my $elt= XML::Twig::Elt->parse( ''); $elt->paste( $t->root); is( id_list( $t), "0-i1-i2-i3", "setting id through a parse"); } { # test ]]> in text my $doc=q{]]>}; is( XML::Twig->new->parse( $doc)->sprint, $doc, "]]> in char data"); } sub fid { my $elt= $_[0]->elt_id( $_[1]) or return "unknown"; return $elt->att( $_[0]->{twig_id}); } # testing ignore messing up with whitespace handling { my $doc=qq{\n ba\n foo\n bar\n}; my $res; my $t= XML::Twig->new( twig_roots => { elt => sub { $_->ignore; }, elt2 => sub { $res.= $_->text; }, }, start_tag_handlers => { elt2 => sub { $_[0]->ignore if( $_->att( 'ignore')); }, }, ); $t->parse( $doc); is( $res => 'bar', 'checking that ignore and whitespace handling work well together'); } # test on handlers with ns { my $doc=q{ elt with ns att elt with no ns att }; my( $res1, $res2); my $t= XML::Twig->new( map_xmlns => { uri => 'n' }, twig_handlers => { 'n:elt[@n:att="val"]' => sub { $res1 .= $_->text; }, 'n:elt[@att="val"]' => sub { $res2 .= $_->text; }, }, ) ->parse( $doc); is( $res1 => 'elt with ns att', 'twig handler on n:elt[@n:att="val"]'); is( $res2 => 'elt with no ns att', 'twig handler on n:elt[@att="val"]'); } # same with start_tag handlers { my $doc=q{ elt with ns att elt with no ns att }; my( $res1, $res2); my $t= XML::Twig->new( map_xmlns => { uri => 'n' }, start_tag_handlers => { 'n:elt[@n:att="val"]' => sub { $res1 .= $_->att( 'att2'); }, 'n:elt[@att="val"]' => sub { $res2 .= $_->att( 'att2'); }, }, ) ->parse( $doc); is( $res1 => 'ns_att', 'start_tag handler on n:elt[@n:att="val"]'); is( $res2 => 'non_ns_att', 'start_tag handler on n:elt[@att="val"]'); } # same with start_tag handlers and twig_roots { my $doc=q{ elt with ns att elt with no ns att }; my( $res1, $res2); my $t= XML::Twig->new( map_xmlns => { uri => 'n' }, twig_roots => { foo => 1 }, start_tag_handlers => { 'n:elt[@n:att="val"]' => sub { my( $t, $gi, %atts)= @_; $res1 .= $atts{att2}; }, 'n:elt[@att="val"]' => sub { my( $t, $gi, %atts)= @_; $res2 .= $atts{att2}; }, }, ) ->parse( $doc); is( $res1 => 'ns_att', 'start_tag handler on n:elt[@n:att="val"]'); is( $res2 => 'non_ns_att', 'start_tag handler on n:elt[@att="val"]'); } # tests for additional coverage { my $doc=q{foobar}; my $res=''; my $t= XML::Twig->new; $t->setTwigHandlers( { elt => sub { $res.= $_->text}, }); $t->setTwigHandlers(); $t->parse( $doc); is( $res => '', 'setTwigHandlers with no argument'); } { my $doc=q{foobar}; my $res; my $t= XML::Twig->new; $t->setTwigHandlers( { elt => sub { $res.= $_->text}, }); $t->parse( $doc); is( $res => 'foo', 'setTwigHandlers by itself'); } { my $doc=q{foobar}; my $res=''; my $t= XML::Twig->new; $t->setTwigHandlers( { '/doc/elt' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { '/doc/elt' => undef, }); $t->parse( $doc); is( $res => '', 'setTwigHandlers with an undef path'); } { my $doc=q{foobar}; my $res=''; my $t= XML::Twig->new; $t->setTwigHandlers( { 'doc/elt' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { 'doc/elt' => undef, }); $t->parse( $doc); is( $res => '', 'setTwigHandlers with an undef subpath'); } { my $doc=q{foobar}; my $res=''; my $t= XML::Twig->new; $t->setTwigHandlers( { 'elt[@att="baz"]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { 'elt[@att="bak"]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { 'elt[@att="baz"]' => undef, }); $t->setTwigHandlers( { 'elt[@att="bal"]' => undef, }); $t->parse( $doc); is( $res => '', 'setTwigHandlers with an undef att cond'); } { my $doc=q{foobar}; my $res=''; my $t= XML::Twig->new; $t->setTwigHandlers( { 'elt[@att=~/baz/]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { 'elt[@att=~/bar/]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { 'elt[@att=~/baz/]' => undef, }); $t->setTwigHandlers( { 'elt[@att=~/bas/]' => undef, }); $t->parse( $doc); is( $res => '', 'setTwigHandlers with undef regexp on att conds'); } { my $doc=q{foobar}; my $res=''; my $t= XML::Twig->new; $t->setTwigHandlers( { 'elt[string()="foo"]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { 'elt[string()="fool"]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { 'elt[string()="foo"]' => undef} ); $t->setTwigHandlers( { 'elt[string()="food"]' => undef} ); $t->parse( $doc); is( $res => '', 'setTwigHandlers with undef string conds'); } { my $doc=q{foobar}; my $res=''; my $t= XML::Twig->new; $t->setTwigHandlers( { 'elt[string()=~/foo/]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { 'elt[string()=~/fool/]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { 'elt[string()=~/foo/]' => undef}); $t->setTwigHandlers( { 'elt[string()=~/food/]' => undef}); $t->parse( $doc); is( $res => '', 'setTwigHandlers with undef string regexp conds'); } { my $doc=q{foobar}; my $res=''; my $t= XML::Twig->new; $t->setTwigHandlers( { '*[@att="baz"]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { '*[@att="bak"]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { '*[@att="baz"]' => undef, }); $t->setTwigHandlers( { '*[@att="bal"]' => undef, }); $t->parse( $doc); is( $res => '', 'setTwigHandlers with an undef start att cond'); } { my $doc=q{foobar}; my $res=''; my $t= XML::Twig->new; $t->setTwigHandlers( { '*[@att=~/baz/]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { '*[@att=~/bak/]' => sub { $res.= $_->text}, }); $t->setTwigHandlers( { '*[@att=~/baz/]' => undef, }); $t->setTwigHandlers( { '*[@att=~/bal/]' => undef, }); $t->parse( $doc); is( $res => '', 'setTwigHandlers with an undef start att regexp cond'); } { my $doc=q{foobar}; my $res=''; my $t= XML::Twig->new; $t->setStartTagHandlers( { 'elt[@att="baz"]' => sub { $res.= 'not this one'}, }); $t->setStartTagHandlers( { 'elt[@att="bal"]' => sub { $res.= $_->att( 'att') || 'none'}, }); $t->setStartTagHandlers( { 'elt[@att="baz"]' => sub { $res.= $_->att( 'att') || 'none'}, }); $t->parse( $doc); is( $res => 'baz', 'setStartTagHandlers'); } { my $doc=q{titlefoobar}; my $res=''; my $t= XML::Twig->new( twig_handlers => { 'level(2)' => sub { $res .= $_->text;} }) ->parse( $doc); is( $res => 'foobar', 'level cond'); } { my $doc=q{titlefoobar}; my $res=''; my $t= XML::Twig->new( twig_roots => { 'level(2)' => sub { $res .= $_->text;} }) ->parse( $doc); is( $res => 'foobar', 'level cond'); } { my $doc=q{}; my $res=''; XML::Twig->new( pi => 'process', twig_handlers => { '?' => sub { $res.=$_->data } })->parse( $doc); is( $res => 'd1d2', '? (any pi) handler'); } { my $doc=q{foo bar}; my $t= XML::Twig->new->parse( $doc); is( $t->sprint, $doc, 'embedded comments, output asis'); $t->root->first_child( 'elt')->first_child->set_pcdata( 'toto'); is( $t->sprint, 'toto', 'embedded comment removed'); } { my $doc=q{ ] > a &ent; is here }; my $t= XML::Twig->new->parse( $doc); $t->entity_list->add_new_ent( ent2 => 'bar'); my $res= $t->sprint(); is_like( $res, qq{]>} .qq{ a foo is here}, 'new ent, no update dtd'); $res=$t->sprint( updateDTD => 1); is_like( $res, qq{} . qq{]> a foo is here}, 'new ent update dtd' ); } { my $t=XML::Twig->new->parse( ''); $t->{entity_list}= XML::Twig::Entity_list->new; $t->entity_list->add_new_ent( foo => 'bar'); is_like( $t->sprint( update_DTD => 1), ']>', "new entity with update DTD"); } { my $t=XML::Twig->new( keep_encoding => 1)->parse( ''); $t->{entity_list}= XML::Twig::Entity_list->new; $t->entity_list->add_new_ent( foo => 'bar'); is_like( $t->sprint( update_DTD => 1), ']>', "new entity (keep_encoding)with update DTD" ); } { my $dtd= q{ ]> }; my $doc= q{tata}; my $t= XML::Twig->new->parse( $dtd . $doc); is_like( $t->dtd_text, $dtd, "dtd_text"); } { my $t=XML::Twig->new->parse( ''); is( $t->root->first_child( 'elt')->sprint, '', "nav, first pass"); is( $t->root->first_child( 'elt')->sprint, '', "nav, second pass"); is_undef( scalar $t->root->first_child( 'elt')->parent( 'toto'), "undef parent 1"); is_undef( scalar $t->root->parent( 'toto'), "undef parent 2"); is_undef( scalar $t->root->parent(), "undef parent 3"); } { my $t= XML::Twig->new->parse( ''); my $id= $t->root->id; $t->root->add_id(); is( $t->root->id, $id, "add_id on existing id"); my $elt= $t->root->first_child( 'elt'); $elt->cut; $elt->set_id( 'elt1'); is_undef( $t->elt_id( 'elt1'), "id added to elt outside the doc"); $elt->paste( $t->root); is( $t->elt_id( 'elt1')->gi => 'elt', "elt put back in the tree"); # these tests show a bug: the id list is not updated when an element is cut $elt->cut; $elt->del_id; $elt->del_id; # twice to go through a different path $elt->paste( $t->root); is( $t->elt_id( 'elt1')->gi => 'elt', "elt put back in the tree without id"); $elt->del_id; is( $t->elt_id( 'elt1')->gi => 'elt', "deleting an inexisting id which remains in the list"); is( scalar $elt->ancestors_or_self( 'elt'), 1, "ancestors_or_self with cond"); is( scalar $elt->ancestors_or_self(), 2, "ancestors_or_self without cond"); my @current_ns_prefixes= $elt->current_ns_prefixes; is( scalar @current_ns_prefixes, 0, "current_ns_prefixes"); is_undef( $elt->next_elt( $elt), 'next_elt on an empty elt (limited to the subtree)'); is_undef( $elt->next_elt( $elt, 'foo'), 'next_elt on an empty elt (subtree and elt name)'); is_undef( $elt->next_elt( 'foo'), 'next_elt on an empty elt (elt name)'); is_undef( $elt->prev_elt( $elt), 'prev_elt on an empty elt (limited to the subtree)'); is_undef( $elt->prev_elt( $elt, 'foo'), 'prev_elt on an empty elt (subtree and elt name)'); is_undef( $elt->prev_elt( 'foo'), 'prev_elt on an empty elt (elt name)'); is_undef( $elt->next_n_elt( 1, 'foo'), 'next_n_elt'); is_undef( $elt->next_n_elt( 0, 'foo'), 'next_n_elt'); is( $elt->level(), 1, "level"); is( $elt->level( 'elt'), 0, "level"); is( $elt->level( 'doc'), 1, "level"); is( $elt->level( 'foo'), 0, "level"); ok( $elt->in_context( 'doc'), "in_context doc "); ok( $elt->in_context( 'doc', 0), "in_context doc with level (0)"); ok( $elt->in_context( 'doc', 1), "in_context doc with level"); ok( $elt->in_context( 'doc', 2), "in_context doc with level"); nok( $elt->in_context( 'foo'), "in_context foo"); nok( $elt->in_context( 'foo', 0), "in_context foo with level (0)"); nok( $elt->in_context( 'foo', 1), "in_context foo with level"); nok( $elt->in_context( 'foo', 2), "in_context foo with level (0)"); nok( $elt->in_context( 'elt'), "in_context elt"); nok( $elt->in_context( 'elt', 0), "in_context elt with level (0)"); nok( $elt->in_context( 'elt', 1), "in_context elt with level"); nok( $elt->in_context( 'elt', 2), "in_context elt with level (0)"); } { foreach my $doc ( '', 'totototo', 'totototototo', 'tototatatotototo', 'tototiti tututoto', 'toto tututoto', 'toto', 'foototo', 'totototo', '', '', 'toto', '', 'foo', '', '', '', '', '', '', '', '', # this one does not work: nothing in XML::Twig to output stuff after the ï¬inal end tag #'', 'foo', 'foo', 'foo', 'foo', 'foototo', 'foofoo', 'foofoo', 'foofoo', 'foofoo', 'foofoo', 'foofoofoo', 'foofoofoo', 'foo', ) { my $t=XML::Twig->new->parse( $doc); $t->first_elt( 'ERS')->erase; (my $expected= $doc)=~ s{}{}g; is( $t->sprint, $expected, "erase in $doc"); } } { my $t=XML::Twig->new->parse( '

toto

'); my $pcdata= $t->first_elt( '#PCDATA'); $pcdata->split_at( 2); is( $t->sprint => '

toto

', 'split_at'); } { my $doc= q{tototatatu}; my $t= XML::Twig->new->parse( $doc); $t->subs_text( qr/(to)ta/, '&elt(p => $1)ti'); is( $t->sprint,'to

to

titatu
' , 'subs_text'); $t->subs_text( qr/(to)ta/, '&elt(p => $1)ti'); is( $t->sprint,'to

to

titatu
' , 'subs_text (2cd try, same exp)'); $t->subs_text( qr/(ta)/, '&elt(p1 => $1)ti'); is( $t->sprint,'to

to

titatitu
' , 'subs_text cannot merge text with next sibling'); } { my $doc= q{totatu}; my $t= XML::Twig->new->parse( $doc); $t->subs_text( qr/(to)/, '&elt(e => $1)'); is( $t->sprint,'totatu' , 'subs_text (new elt)'); $t->subs_text( qr/(ta)/, '&elt(e => $1)'); is( $t->sprint,'totatu' , 'subs_text (new elt 2)'); $t->subs_text( qr/(t.)/, '&elt(se => $1)'); is( $t->sprint,'totatu' , 'subs_text (several subs)'); } { my $doc= q{totatitu}; my $t= XML::Twig->new->parse( $doc); $t->subs_text( qr/(t[aeiou])/, '$1$1'); is( $t->sprint,'tototatatititutu' , 'subs_text (duplicate string)'); $t->subs_text( qr/((t[aeiou])\2)/, '$2'); is( $t->sprint,'totatitu' , 'subs_text (use \2)'); $t->subs_text( qr/(t[aeiou])/, '$1$1'); is( $t->sprint,'tototatatititutu' , 'subs_text (duplicate string)'); $t->subs_text( qr/(t[aeiou]t[aeiou])/, '&elt( p => $1)'); is( $t->sprint,'

toto

tata

titi

tutu

' , 'subs_text (use \2)'); } { my $doc= q{ toto toto foo bar baz bar baz bar }; my $t= XML::Twig->new->parse( $doc); my $copy= $t->root->copy; is( $copy->sprint, $t->root->sprint, "copy with extra data"); $t->root->insert_new_elt( first_child => a => { '#ASIS' => 1 }, 'a c a'); $copy= $t->root->copy; is( $copy->sprint, $t->root->sprint, "copy with extra data, and asis"); } { my $save= XML::Twig::_weakrefs(); XML::Twig::_set_weakrefs( 0); my $t= XML::Twig->new->parse( 'foo '); $t->root->first_child->cut->DESTROY; $t->root->first_child->cut->DESTROY; is( $t->sprint, '', 'DESTROY'); XML::Twig::_set_weakrefs( $save); } { # test _keep_encoding even with perl > 5.8.0 if( $] < 5.008) { skip( 2 => "testing utf8 flag mongering only needed in perl 5.8.0+"); } else { require Encode; import Encode; my $s="a"; Encode::_utf8_off( $s); nok( Encode::is_utf8( $s), "utf8 flag off"); XML::Twig::Elt::_utf8_ify( $s); if( $] >= 5.008 and $] < 5.010) { ok( Encode::is_utf8( $s), "utf8 flag back on"); } else { nok( Encode::is_utf8( $s), "_utf8_ify is a noop"); } } } { # test keep_encoding is( XML::Twig::Elt::_keep_encoding(), 0, "_keep_encoding not initialized"); XML::Twig->new( keep_encoding => 0); is( XML::Twig::Elt::_keep_encoding(), 0, "_keep_encoding initialized (0)"); XML::Twig->new( keep_encoding => 1); is( XML::Twig::Elt::_keep_encoding(), 1, "_keep_encoding initialized (1)"); XML::Twig->new( keep_encoding => 0); is( XML::Twig::Elt::_keep_encoding(), 0, "_keep_encoding initialized (0)"); } XML-Twig-3.52/t/xmlxpath_31vars.t0000755000175000017500000000755212732215763016775 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test::More; use XML::Twig::XPath; eval "use XML::XPathEngine"; if( $@) { print "1..1\nok 1\n"; warn "skipping, using variables in XPath requires XML::XPathEngine\n"; exit; } plan( tests => 8); my( $employees, $areas)= do { local $/="\n\n"; ; }; { # test all data in 1 single file my $data= "$employees$areas"; my $t = XML::Twig::XPath->new->parse( $data); { $t->set_var( salary => 12000); my @nodes= $t->findnodes('/data/employees/employee[@salary=$salary]/name'); is( results( @nodes), 'e3:e4', '1 doc, var is a litteral'); } { $t->set_var( E => $t->find( '/data/employees/employee[@salary>10000]')); $t->set_var( A => $t->find( '/data/areas/area[district="Brooklyn"]/street')); my @nodes = $t->findnodes('$E[work_area/street = $A]/name'); is( results( @nodes), 'e3:e4', '1 doc, var is a node set'); } { $t->set_var( org => 'A'); my @nodes= $t->findnodes('/data/employees/employee[@org=$org]/name'); is( results( @nodes), 'e5', '1 doc, var is a simple litteral'); } { $t->set_var( org => 'A/B'); my @nodes= $t->findnodes('/data/employees/employee[@org=$org]/name'); is( results( @nodes), 'e6', '1 doc, var is an XPath-like litteral'); } } { # test with data in 2 single file my $te = XML::Twig::XPath->new->parse( $employees); my $ta = XML::Twig::XPath->new->parse( $areas); { $te->set_var( salary => 12000); my @nodes= $te->findnodes('/employees/employee[@salary=$salary]/name'); is( results( @nodes), 'e3:e4', '2 docs, var is a litteral'); } SKIP: { skip "node sets in an XPath variable are not supported with perl < 5.12", 1 unless $] >= 5.012; $te->set_var( E => $te->find( '/employees/employee[@salary>10000]')); $te->set_var( A => $ta->find( '/areas/area[district="Brooklyn"]/street')); my @nodes = $te->findnodes('$E[work_area/street = $A]/name'); is( results( @nodes), 'e3:e4', '2 docs, var is a node set'); } { $te->set_var( org => 'A'); my @nodes= $te->findnodes('/employees/employee[@org=$org]/name'); is( results( @nodes), 'e5', '2 docs, var is a simple litteral'); } { $te->set_var( org => 'A/B'); my @nodes= $te->findnodes('/employees/employee[@org=$org]/name'); is( results( @nodes), 'e6', '2 docs, var is an XPath-like litteral'); } } sub results { return join ':', map { $_->id || 'XX' } @_; } __DATA__ Employee 1 Fifth Avenue Employee 2 Abbey Court Employee 3 Abbey Court Employee 4 Broad Street Abbey Court Employee 5 Broad Street Abbey Court Employee 6 Broad Street Abbey Court Brooklyn Abbey Court Aberdeen Street Adams Street Manhattan Fifth Avenue Broad Street XML-Twig-3.52/t/test_3_40.t0000755000175000017500000003475112732215763015436 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=96; print "1..$TMAX\n"; { my $d="titlep 1 p 2"; is( lf_in_t( XML::Twig->parse( pretty_print => 'indented', discard_spaces => 1, $d)), 1, 'space prevents indentation'); is( lf_in_t( XML::Twig->parse( pretty_print => 'indented', discard_all_spaces => 1, $d)), 5, 'discard_all_spaces restores indentation'); } sub lf_in_t { my($t)= @_; my @lfs= $t->sprint=~ m{\n}g; return scalar @lfs; } { my $d=''; my @tests= ( [ 't1|t2', HN => 't1t2' ], [ 't1|t2|t3[@att="a|b"]', HN => 't1t2t3-1' ], [ 't1|t2|t3[@att!="a|b"]', HN => 't1t2t3-2t3-3' ], [ 't1|level(1)', H => 't1t1t2t3-1t3-2t3-3' ], [ 't1|level(2)', H => 't1t4' ], [ 't1|_all_', H => 't1t1t2t3-1t3-2t4t3-3d'], [ qr/t[12]/ . '|t3/t4', H => 't1t2t4' ], [ 't3[@a2="a|b"]', HN => 't3-2' ], [ 't3[@a2="a|b"]|t3|t3/t4', H => 't3-1t3-2t3-2t4t3-3' ], ); foreach my $test (@tests) { my $nb=0; my $ids=''; my( $trigger, $test_cat, $expected_ids)= @$test; my $handlers= $test_cat =~ m{H} ? { $trigger => sub { $ids.=$_->id; 1; } } : {}; my $t= XML::Twig->new( twig_handlers => $handlers )->parse( $d); is( $ids, $expected_ids, "(H) trigger with alt: '$trigger'"); my $uniq_ids= join '', sort $expected_ids=~m{(t\d(?:-\d)?)}g; if( $test_cat =~ m{X}) { (my $xpath= "//$trigger")=~ s{\|t}{|//t}g; is( join( '', map { $_->id } $t->findnodes( $xpath)), $uniq_ids, " (X) path with |: '$trigger'"); } if( $test_cat =~ m{N}) { is( join( '', map { $_->id } $t->root->children( $trigger)), $uniq_ids, "(N)navigation with |: '$trigger'"); } } } { my $t1= XML::Twig->parse( ''); is( XML::Twig->active_twig()->root->id, 'd1', 'active_twig, one twig'); my $t2= XML::Twig->parse( ''); is( XML::Twig->active_twig()->root->id, 'd2', 'active_twig, second twig'); } { eval { XML::Twig->new(error_context => 1)->parse( $0); }; matches( $@, "you seem to have used the parse method on a filename", 'parse on a file name'); } { my $got; XML::Twig->parse( twig_handlers => { 'e[@a]' => sub { $got .= $_->id; } }, ''); is( $got, 'i1i3', 'bare attribute in handler condition'); } if( $] > 5.008) { my $doc= q{]>&ext;}; ok( XML::Twig->parse( expand_external_ents => -1, $doc), 'failsafe expand_external_ents'); } else { skip( 1, 'not tested under perl < 5.8'); } { my $t=XML::Twig->parse( q{e11e21e12}); is( join( ':', $t->findvalues( [$t->root->children], "./e1")), 'e11:e12', 'findvalues on array'); } { my $t=XML::Twig->parse( ""); $t->set_encoding( "UTF-8"); is( $t->sprint, qq{\n}, 'set_encoding without XML declaration'); } { my $t=XML::Twig->parse( ""); $t->set_standalone( 1); is( $t->sprint, qq{\n}, 'set_standalone (yes) without XML declaration'); } { my $t=XML::Twig->parse( ""); $t->set_standalone( 0); is( $t->sprint, qq{\n}, 'set_standalone (no) without XML declaration'); } { my $t=XML::Twig->parse( ""); nok( $t->xml_version, 'xml_version with no XML declaration'); $t->set_xml_version( 1.1); is( $t->sprint, qq{\n}, 'set_xml_version without XML declaration'); is( $t->xml_version, 1.1, 'xml_version after being set'); } { my $t= XML::Twig->new; is( $t->_dump, "document\n", '_dump on an empty twig'); } { my $t=XML::Twig->parse( pretty_print => 'none', 'foobar'); $t->root->field_to_att( 'f[@a="b"]', 'g'); is( $t->sprint, 'foo', 'field_to_att on non-simple condition'); $t->root->att_to_field( g => 'gg'); is( $t->sprint, 'barfoo', 'att_to_field with att != field'); } { my $t=XML::Twig->parse( ''); $t->root->wrap_in( 'nroot'); is( $t->sprint, '', 'wrapping the root'); } { my $t=XML::Twig->new; XML::Twig::_set_weakrefs(0); my $doc='\n texttext more text foo\n more'; $t->parse( $doc); $doc=~ s{\n }{}; # just the first one is( $t->sprint, $doc, 'parse with no weakrefs'); $t->root->insert_new_elt( first_child => x => 'text'); $doc=~ s{}{text}; is( $t->sprint, $doc, 'insert first child with no weakrefs'); $t->root->insert_new_elt( last_child => x => 'text'); $doc=~ s{}{text}; is( $t->sprint, $doc, 'insert last child with no weakrefs'); $t->root->wrap_in( 'dd'); $doc=~ s{}{
}; $doc=~s{}{
}; is( $t->sprint, $doc, 'wrap with no weakrefs'); $t->root->unwrap; $doc=~s{}{}g; is( $t->sprint, $doc, 'unwrap with no weakrefs'); my $new_e= XML::Twig::Elt->new( ee => { c => 1 }, 'ee text'); $new_e->replace( $t->root->first_child( 'e')); $doc=~ s{}{ee text}; is( $t->sprint, $doc, 'replace with no weakrefs'); XML::Twig::_set_weakrefs(1); } { my $t= XML::Twig->new( no_expand => 1); XML::Twig::_set_weakrefs(0); my $doc=']> bar &bar; bar&bar;&foo; &bar; bar &foo;&bar; na &foo;'; $t->parse( $doc); (my $got= $t->sprint)=~ s{\n}{}g; is( $got, $doc, 'external entities without weakrefs'); XML::Twig::_set_weakrefs(1); } { XML::Twig::_set_weakrefs(0); { my $t= XML::Twig->new; undef $t; } ok( 1, "DESTROY doesn't crash when weakrefs is off"); XML::Twig::_set_weakrefs(1); } { my $doc= 'foobarbar'; my( $got1, $got2); XML::Twig->new( twig_handlers => { e1 => sub { $_->parent->set_att( get1 => 1); }, e2 => sub { $_->parent->set_att( '#get2' => 1); }, '[@get1]' => sub { $got1 .= 'a' . $_->id; }, '[@#get2]' => sub { $got2 .= 'a' . $_->id; }, 'e[@get1]' => sub { $got1 .= 'b' . $_->id; }, 'e[@#get2]' => sub { $got2 .= 'b' . $_->id; }, }, ) ->parse( $doc); is( $got1, 'be1ae1', 'handler on bare attribute'); is( $got2, 'be3ae3', 'handler on private (starting with #) bare attribute'); } { my $t=XML::Twig->parse( 'foo'); my $root= $t->root; ok( $root->closed, 'closed on completely parsed tree'); ok( $root->_extra_data_before_end_tag, '_extra_data_before_end_tag (success)'); nok( $root->first_child->_extra_data_before_end_tag, '_extra_data_before_end_tag (no data)'); } { my $t= XML::Twig->parse( pi => 'process', ''); is( $t->first_elt( '#PI')->pi_string, '', 'pi_string with empty data'); } { my $t= XML::Twig->parse( ''); is( ids( $t->root->children( '.a')), 'e1:f1', 'nav on class'); } { my $t=XML::Twig->parse( 'foobarfoobar123'); is ( ids( $t->root->children( 'e[string()="foo"]')), 'e1', 'navigation condition using string() ='); is ( ids( $t->root->children( 'e[string()=~/foo/]')), 'e1:e3', 'navigation condition using string() =~'); is ( ids( $t->root->children( 'e[string()!~/foo/]')), 'e2:e4', 'navigation condition using string() !~'); is ( ids( $t->root->children( 'e[string()!="foo"]')), 'e2:e3:e4', 'navigation condition using string() !='); is ( ids( $t->root->children( 'e[string()]')), 'e1:e2:e3', 'navigation condition using bare string()'); is ( ids( $t->root->findnodes( './e[string()="foo"]')), 'e1', 'xpath condition using string() ='); is ( ids( $t->root->findnodes( './e[string()=~/foo/]')), 'e1:e3', 'xpath condition using string() =~'); is ( ids( $t->root->findnodes( './e[string()!~/foo/]')), 'e2:e4', 'xpath condition using string() !~'); is ( ids( $t->root->findnodes( './e[string()!="foo"]')), 'e2:e3:e4', 'xpath condition using string() !='); is ( ids( $t->root->findnodes( './e[string()]')), 'e1:e2:e3', 'xpath condition using bare string()'); is( ids( $t->root->children( 'n[string()=2]')), 'n2', 'navigation string() ='); is( ids( $t->root->children( 'n[string()!=2]')), 'n1:n3', 'navigation string() !='); is( ids( $t->root->children( 'n[string()>2]')), 'n3', 'navigation string() >'); is( ids( $t->root->children( 'n[string()>=2]')), 'n2:n3', 'navigation string() >='); is( ids( $t->root->children( 'n[string()<2]')), 'n1', 'navigation string() <'); is( ids( $t->root->findnodes( './n[string()=2]')), 'n2', 'xpath string() ='); is( ids( $t->root->findnodes( './n[string()!=2]')), 'n1:n3', 'xpath string() !='); is( ids( $t->root->findnodes( './n[string()>2]')), 'n3', 'xpath string() >'); is( ids( $t->root->findnodes( './n[string()>=2]')), 'n2:n3', 'xpath string() >='); is( ids( $t->root->findnodes( './n[string()<2]')), 'n1', 'xpath string() <'); is( ids( $t->root->findnodes( './n[string()<=2]')), 'n1:n2', 'xpath string() <='); } { my $got; my $t=XML::Twig->parse( twig_handlers => { d => sub { $got .="wrong"; }, 'd[@id]' => sub { $got .= "ok"; 0 }, }, '' ); is( $got, 'ok', 'returning 0 prevents the next handler to be called'); } { my $d=q{foo

fooblank


}; my $expected=qq{\n \n foo\n \n \n

fooblank

\n
\n
}; XML::Twig::_indent_xhtml( \$d); is( $d, $expected, '_indent_xhtml'); } { my $d='c'; my @handlers= ( '/d/e[@a="a" or @b="b"]', '/d/e[@a="a" or @b="c"]|e', '/d/e[@a="a"]', '/d/e[@b="b"]', '/d/e', 'd/e[@a="a" and @b="b"]', 'd/e[@a="a"]', 'd/e[@b="b"]', 'd/e', 'e[@a="a" or @b="b"]', 'e[@b="b" or @a="a"]', 'e[@a="a"]|f', 'e[@b="b"]', 'e', qr/e|f/, qr/e|f|g/, 'level(1)', ); my $t= XML::Twig->new(); for my $stem ( 1, 100) { my $i= $stem; my $expected= join '', ($stem..$stem+$#handlers); my $got; $t->setTwigHandlers( { map { $_ => sub { $got .= $i++; } } @handlers }); $t->parse( $d); is( $got, $expected, 'handler order'); } } { my $t=XML::Twig->parse( ""); $t->{twig_dtd}=""; is( $t->doctype(UpdateDTD => 1), "\n", 'doctype with an updated DTD'); } { my $t=XML::Twig->parse( ''); $t->elt_accessors( 'e', 'e'); $t->elt_accessors( { e2 => 'e[2]', se => 'se', sea => 'se[@a]' }); my $root= $t->root; is( $root->e->id, 'e1', 'accessor, no alias, scalar context'); my $e2= ($root->e)[-1]; is( $e2->id, 'e2', 'accessor no alias, list context'); $e2= $root->e2; is( $e2->id, 'e2', 'accessor alias, list context'); is( $e2->se->id, 'se1', 'accessor alias, scalar context'); is( $e2->sea->id, 'se2', 'accessor, with complex step, alias, scalar context'); } { my $t=XML::Twig->new( elt_accessors => [ 'e', 'se' ]) ->parse( ''); my $root= $t->root; is( $root->e->id, 'e1', 'accessor (created in new), no alias, scalar context'); my $se= ($root->e)[-1]->se; is( $se->id, 'se1', 'accessor (created in new) no alias, scalar context, 2'); } { my $t=XML::Twig->new( elt_accessors => { e2 => 'e[2]', se => 'se', sea => 'se[@a]' }) ->parse( ''); my $e2= $t->root->e2; is( $e2->id, 'e2', 'accessor (created in new) alias, list context'); is( $e2->se->id, 'se1', 'accessor (created in new) alias, scalar context'); is( $e2->sea->id, 'se2', 'accessor (created in new), with complex step, alias, scalar context'); } { my $doc= ']>'; my $t= XML::Twig->parse( do_not_output_DTD => 1, $doc); is( $t->sprint, qq{\n}, 'do_not_output_DTD'); } { my $t= XML::Twig->parse( no_prolog => 1, ']>'); is( $t->sprint, qq{}, 'no_prolog'); } { my $t= XML::Twig->parse( ']>'); is( $t->sprint, qq{\n\n]>\n}, 'no_prolog'); } { my $e= XML::Twig::Elt->new( 'e'); $e->set_empty; is( $e->sprint, '', 'set_empty with no value'); $e->set_empty( 0); is( $e->sprint, '', 'set_empty(0)'); $e->set_empty; is( $e->sprint, '', 'set_empty with no value'); $e->set_empty( 1); is( $e->sprint, '', 'set_empty(1'); $e->set_empty; is( $e->sprint, '', 'set_empty with no value'); $e->set_empty( 1); is( $e->sprint, '', 'set_empty(1)'); my $e2= XML::Twig::Elt->parse( ''); $e2->set_not_empty(); is( $e2->sprint, '', 'set_not_empty'); ok( ! $e2->closed, 'closed on an orphan elt'); } { my $t= XML::Twig->parse( ''); my $l2= $t->first_elt( 'l2'); my $l4= $t->first_elt( 'l4'); $l2->cut; $l4->cut; is( $l4->_root_through_cut->tag, 'd', '_root_through_cut'); is( $l4->_inherit_att_through_cut( 'a', 'd'), 'd', '_inherit_att_through_cut'); } { my $s= "foo"; is( XML::Twig::_to_utf8( 'iso-8859-1', $s), $s, 'trivial test of _to_utf8'); } XML-Twig-3.52/t/test_bugs_3_19.t0000755000175000017500000001022512732215763016452 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=26; print "1..$TMAX\n"; { #bug with long CDATA # get an accented char in iso-8859-1 my $latin1_char= perl_io_layer_used() ? '' : slurp( File::Spec->catfile('t', "latin1_accented_char.iso-8859-1")); chomp $latin1_char; my %cdata=( "01- 1025 chars" => 'x' x 1025 . 'a', "02- short CDATA with nl" => "first line\nsecond line", "03- short CDATA with ]" => "first part]second part", "04- short CDATA with ] and spaces" => "first part ] second part", "05- 1024 chars with accent" => $latin1_char x 1023 . 'a', "06- 1025 chars with accent" => $latin1_char x 1024 . 'a', "07- 1023 chars, last a nl" => 'x' x 1022 . "\n", "08- 1023 chars, last a ]" => 'x' x 1022 . "]", "09- 1024 chars, last a nl" => 'x' x 1023 . "\n", "10- 1024 chars, last a ]" => 'x' x 1023 . "]", "11- 1025 chars, last a nl" => 'x' x 1024 . "\n", "12- 1025 chars, last a ]" => 'x' x 1024 . "]", "13- 1050 chars, last a nl" => ('1' x 1024) . ('2' x 25) . "\n", "14- 1050 chars, last a ]" => ('1' x 1024) . ('2' x 25) . "]", '15- 1060 chars, ] and \n' => ('1' x 1024) . ('2' x 26) . "\n \n ]\n]]\n", '16- 1060 chars, ] and \n' => ('1' x 1024) . ('2' x 26) . "\n \n ]\n]]", '17- 1060 chars, ] and \n' => ('1' x 1024) . ('2' x 26) . "\n \n ]\n]] ", '18- 1060 chars, ] and \n' => ('1' x 1024) . ('2' x 26) . "\n \n ]\n]] a", '19- 1060 chars, ] and \n' => '1' x 500 . "\n \n ]\n]] a" . '2' x 500 . "\n \n ]\n]] a", "20- 800 chars with accent" => $latin1_char x 800, "21- 800 chars with accent" => "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa$latin1_char" x 16, "22- 1600 chars with accent" => "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa$latin1_char" x 32, '23- 1600 chars with accent and \n' => "aaaaaaaa]aaaaaaaaaaaaaaaaaaaaaaaaa\naaaaaaaaaaaaaaa$latin1_char" x 32, ); if( ($] == 5.008) || ($] < 5.006) ) { skip( scalar keys %cdata, "KNOWN BUG in 5.8.0 and 5.005 with keep_encoding and long (>1024 char) CDATA, " . "see http://rt.cpan.org/Ticket/Display.html?id=14008" ); } elsif( perl_io_layer_used()) { skip( scalar keys %cdata, "cannot test parseurl when UTF8 perIO layer used " . "(due to PERL_UNICODE or -C option used)\n" ); } else { foreach my $test (sort keys %cdata) { my $cdata=$cdata{$test}; my $doc= qq{}; my $twig= XML::Twig->new( keep_encoding => 1)->parse($doc); my $res = $twig->root->first_child->cdata; is( $res, $cdata, "long CDATA with keep_encoding $test"); } } } { # testing _dump my $doc= q{foobartototatatitiand now a long (more than 40 characters) text to see if it gets shortened by default (or not)}; my $t= XML::Twig->new->parse( $doc); my $dump= q{document |-doc | |-elt att="xyz" | |-- (cpi before) '' | | |-PCDATA: 'foo' | |-elt | | |-PCDATA: 'bar' | | |-CDATA: 'baz' | |-elt2 | |-- (cpi before) '' | | |-PCDATA: 'toto' | | |-b | | | |-PCDATA: 'tata' | | |-PCDATA: 'titi' | |-elt3 | |-elt | | |-PCDATA: 'and now a long (more than 40 characters) tex ... see if it gets shortened by default (or not)' }; is( $t->_dump( { extra => 1 }), $dump, "_dump with extra on"); (my $no_extra= $dump)=~ s{^.*cpi before.*\n}{}gm; is( $t->_dump( ), $no_extra, "_dump without extra"); (my $no_att= $no_extra)=~ s{ att=.*}{}g; is( $t->_dump( { atts => 0 }), $no_att, "_dump without atts"); } XML-Twig-3.52/t/test1.t0000755000175000017500000002715112732215763014766 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,'t'); use tools; # This just tests a complete twig, no callbacks $|=1; use XML::Twig; my $doc=' ]>
S1 I1 S1 I2 S1 Title S1 P1 S2 P2 Note P1 S1 para 3
S2 intro S2 Title S2 P1 S2 P2 S2 P3
Annex Title Annex P1 Annex P2
'; my $TMAX=97; # don't forget to update! print "1..$TMAX\n"; # test twig creation my $t= new XML::Twig(); ok( $t, 'twig creation'); # test parse $t->parse( $doc, ErrorContext=>2); ok( $t, 'parse'); # test the root my $root= $t->root; etest( $t->root, 'doc', 'doc1', 'root'); # print in a file open( TMP, '>tmp'); select TMP; $t->print(); $root->print(); select STDOUT; $t->print( \*TMP); $root->print( \*TMP); ok( 'ok', "print"); # test the element root and twig functions on the root ok( $root->twig, 'root->twig'); etest( $root->root, 'doc', 'doc1', 'root->root'); # navigation my $section1= etest( $root->first_child, 'section', 'section1', 'first_child'); my $annex= etest( $root->first_child( 'annex'), 'annex', 'annex1', 'first_child( annex)'); etest( $root->last_child, 'annex', 'annex1', 'last_child'); my $section2= etest( $root->last_child( 'section'), 'section', 'section2', 'last_child( section)'); etest( $section2->prev_sibling, 'section', 'section1', 'prev_sibling'); etest( $section1->next_sibling, 'section', 'section2', 'next_sibling'); my $note= etest( $root->next_elt( 'note'), 'note', 'note1', 'next_elt( note)'); etest( $note->root, 'doc', 'doc1', 'root'); ok( $note->twig, 'twig'); etest( $note->twig->root, 'doc', 'doc1', 'twig->root'); # playing with next_elt and prev_elt my $para2= etest( $note->prev_sibling, 'para', 'para2', 'prev_sibling'); etest( $note->prev_elt( 'para'), 'para', 'para2', 'prev_elt( para)'); my $para3= etest( $note->next_sibling, 'para', 'para3', 'next_sibling'); my $paranote1= etest( $note->next_elt( 'para'), 'para', 'paranote1', 'next_elt( para)'); etest( $paranote1->next_elt( 'para'), 'para', 'para3', 'next_elt( para)'); # difference between next_sibling and next_sibling( gi) etest( $para2->next_sibling, 'note', 'note1', 'next_sibling'); etest( $para2->next_sibling( 'para'), 'para', 'para3', 'next_sibling( para)'); # testing in/parent/in_context ok( $paranote1->in( $note), 'in'); ok( $paranote1->in( $section1), 'in'); ok( !$paranote1->in( $section2), 'not in'); ok( $paranote1->in_context( 'note'), 'in_context'); ok( $paranote1->in_context( 'section'), 'in_context'); ok( !$paranote1->in_context( 'intro'), 'not in_context'); etest( $paranote1->parent, 'note', 'note1', 'parent'); # testing list methods (ancestors/children) stest( (join ":", map { $_->id} $paranote1->ancestors), 'note1:section1:doc1', 'ancestors'); stest( (join ":", map { $_->id} $paranote1->ancestors('section')), 'section1', 'ancestors( section)'); stest( (join ":", map { $_->id} $section1->children), 'intro1:title1:para1:para2:note1:para3', 'children'); stest( (join ":", map { $_->id} $section1->children( 'para')), 'para1:para2:para3', 'children( para)'); stest( $paranote1->level, 3, 'level'); # testing attributes my $title1= etest( $root->next_elt( 'title'), 'title', 'title1', 'next_elt( title)'); stest( $title1->id, 'title1', 'id'); stest( $title1->att('id'), 'title1', 'att( id)'); stest( $title1->att('no'), '1', 'att( no)'); $title1->set_att('no', 'Auto'); stest( $title1->att('no'), 'Auto', 'set att( no)'); $title1->set_att('no', '1'); $title1->set_att('newatt', 'newval'); stest( $title1->att('newatt'), 'newval', 'set att( newval)'); $title1->del_att('newatt'); stest( stringifyh( %{$title1->atts}), 'id:title1:no:1', 'del_att'); $title1->set_att('id', 'newid'); stest( $title1->id, 'newid', 'set_att(id)'); stest( $title1->att( 'id'), 'newid', 'set_att(id)'); $title1->set_id( 'title1'); stest( $title1->id, 'title1', 'set_id'); stest( $title1->att( 'id'), 'title1', 'set_id'); stest( stringifyh( %{$title1->atts}), 'id:title1:no:1', 'atts'); $title1->del_atts; stest( $title1->att( 'id'), '', 'del_atts'); $title1->set_atts( { 'no' => '1', 'id' => 'newtitleid'}); stest( stringifyh( %{$title1->atts}), 'id:newtitleid:no:1', 'set_atts'); stest( $title1->id, 'newtitleid', 'id'); stest( $title1->att('id'), 'newtitleid', 'att( id)'); $title1->set_id( 'title1'); # now let's cut and paste $title1->cut; stest( (join ":", map { $_->id} $section1->children), 'intro1:para1:para2:note1:para3', 'cut (1)'); my $intro1= $section1->first_child( 'intro'); $intro1->cut; stest( (join ":", map { $_->id} $section1->children), 'para1:para2:note1:para3', 'cut (2)'); $intro1->paste( $section1); stest( (join ":", map { $_->id} $section1->children), 'intro1:para1:para2:note1:para3', 'paste'); $title1->paste( 'first_child', $section2, ); stest( (join ":", map { $_->id} $section2->children), 'title1:intro2:title2:para4:para5:para6', 'paste( first_child)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'paste'); $title1->paste( $section2); stest( (join ":", map { $_->id} $section2->children), 'title1:intro2:title2:para4:para5:para6', 'paste'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut (3)'); $title1->paste( 'last_child', $section2); stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6:title1', 'paste( last_child)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut(4)'); my $intro2= etest( $section2->first_child( 'intro'), 'intro', 'intro2', 'first_sibling( intro)'); $title1->paste( 'after', $intro2); stest( (join ":", map { $_->id} $section2->children), 'intro2:title1:title2:para4:para5:para6', 'paste( after)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut (5)'); $title1->paste( 'before', $intro2); stest( (join ":", map { $_->id} $section2->children), 'title1:intro2:title2:para4:para5:para6', 'paste( before)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut (6)'); my $para4= etest( $t->elt_id( 'para4'), 'para', 'para4', 'elt_id'); $title1->paste( 'after', $para4); stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:title1:para5:para6', 'paste( after)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut (7)'); $title1->paste( 'before', $para4); stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:title1:para4:para5:para6', 'paste( before)'); $title1->cut; stest( (join ":", map { $_->id} $section2->children), 'intro2:title2:para4:para5:para6', 'cut (8)'); # now let's mess up the document # let's erase that pesky intro $intro2->erase; stest( (join ":", map { $_->id} $section2->children), 'paraintro3:title2:para4:para5:para6', 'erase'); $para4->delete; stest( (join ":", map { $_->id} $section2->children), 'paraintro3:title2:para5:para6', 'delete'); $t->change_gi( 'paraintro', 'para'); stest( (join ":", map { $_->gi} $section2->children), 'para:title:para:para', 'change_gi'); $para3= etest( $t->elt_id( 'para3'), 'para', 'para3', 'elt_id'); $para3->cut; stest( $section1->text, 'S1 I1S1 I2S1 P1S2 P2Note P1', 'text'); stest( $section1->sprint, '
S1 I1S1 I2S1 P1S2 P2Note P1
', 'sprint'); # let's have a look at those entities # first their names stest( join( ':', $t->entity_names), 'e1:e2:e3', 'entity_list'); # let's look at their content my $e1= $t->entity( 'e1'); stest( $e1->text, '', 'e1 text'); my $e2= $t->entity( 'e2'); stest( $e2->text, '', 'e2 text'); my $e3= $t->entity( 'e3'); stest( $e3->text, '', 'e3 text'); # additionnal erase test $section1= $root->first_child; stest( (join ":", map { $_->id} $section1->children), 'intro1:para1:para2:note1', 'erase (2)'); $intro1= $section1->first_child( 'intro'); $intro1->erase; stest( (join ":", map { $_->id} $section1->children), 'paraintro1:paraintro2:para1:para2:note1', 'erase (3)'); # new elt test my $new_elt= new XML::Twig::Elt; stest( ref $new_elt, 'XML::Twig::Elt', "new"); my $new_elt1= new XML::Twig::Elt( 'subclass'); stest( ref $new_elt, 'XML::Twig::Elt', "new subclass"); my $new_elt2= new XML::Twig::Elt; stest( ref $new_elt2, 'XML::Twig::Elt', "create no gi"); my $new_elt3= new XML::Twig::Elt( 'elt3'); $new_elt3->set_id( 'elt3'); etest( $new_elt3, 'elt3', 'elt3', "create with gi"); my $new_elt4= new XML::Twig::Elt( 'elt4', 'text of elt4'); ttest( $new_elt4, 'text of elt4', "create with gi and text"); my $new_elt5= new XML::Twig::Elt( 'elt5', 'text of elt5 ', $new_elt4); ttest( $new_elt5, 'text of elt5 text of elt4', "create with gi and content"); my $new_elt6= new XML::Twig::Elt( PCDATA, 'text of elt6'); ttest( $new_elt6, 'text of elt6', "create PCDATA"); # test CDATA my $st1='bold]]>'; my $t1= new XML::Twig; $t1->parse( $st1); sttest( $t1->root, $st1, "CDATA Section"); my $st2='text bold]]> more text'; my $t2= new XML::Twig; $t2->parse( $st2); sttest( $t2->root, $st2, "CDATA Section"); my $st3='bold]]> text'; my $t3= new XML::Twig; $t3->parse( $st3); sttest( $t3->root, $st3, "CDATA Section"); my $st4='textbold]]>more text'; my $t4= new XML::Twig; $t4->parse( $st4); sttest( $t4->root, $st4, "CDATA Section"); my $st5='text more text'; my $t5= new XML::Twig; $t5->parse( $st5); sttest( $t5->root, $st5, "CDATA Section with ]]<"); # test prefix my $st6='textmore text'; my $t6= new XML::Twig; $t6->parse( $st6); $doc= $t6->root; $doc->prefix( 'p1:'); sttest( $t6->root,'p1:textmore text', "prefix doc"); my $el1= $doc->first_child( 'el1'); $el1->prefix( 'p2:'); sttest( $t6->root,'p1:p2:textmore text', "prefix el1"); my $el2= $doc->first_child( 'el2'); my $pcdata= $el2->first_child( PCDATA); $pcdata->prefix( 'p3:'); sttest( $t6->root,'p1:p2:textp3:more text', "prefix pcdata"); exit 0; __END__ XML-Twig-3.52/t/test_xml_split_g.t0000755000175000017500000000466512732215763017313 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use Config; my $devnull = File::Spec->devnull; my $DEBUG=1; # be cautious: run this only on systems I have tested it on my %os_ok=( linux => 1, solaris => 1, darwin => 1, MSWin32 => 1); if( !$os_ok{$^O}) { print "1..1\nok 1\n"; warn "skipping, test runs only on some OSs\n"; exit; } if( $] < 5.006) { print "1..1\nok 1\n"; warn "skipping, xml_merge runs only on perl 5.6 and later\n"; exit; } print "1..13\n"; my $perl = used_perl(); my $xml_split = File::Spec->catfile( "tools", "xml_split", "xml_split"); my $xml_merge = File::Spec->catfile( "tools", "xml_merge", "xml_merge"); sys_ok( "$perl -c $xml_split", "xml_split compilation"); sys_ok( "$perl -c $xml_merge", "xml_merge compilation"); my $xml= q{} . join( "\n ", map { elt( $_) } (1..10)) . qq{\n}; my $xml_file= "test_xml_split_g.xml"; spit( $xml_file => $xml); systemq( "$perl $xml_split -g 3 -n 3 $xml_file"); my $main_file= "test_xml_split_g-000.xml"; my @files= map { sprintf( "test_xml_split_g-%03d.xml", $_) } (1..4); foreach ( $main_file, @files) { ok( -f $_, "created $_"); } is_like( slurp( "test_xml_split_g-000.xml"), q{} . join( '', map { ""} @files) . q{}, "main file content"); is_like( slurp( "test_xml_split_g-001.xml"), sub_file( 1..3), "test_xml_split_g-001.xml content"); is_like( slurp( "test_xml_split_g-002.xml"), sub_file( 4..6), "test_xml_split_g-002.xml content"); is_like( slurp( "test_xml_split_g-003.xml"), sub_file( 7..9), "test_xml_split_g-003.xml content"); is_like( slurp( "test_xml_split_g-004.xml"), sub_file( 10), "test_xml_split_g-004.xml content"); unlink $xml_file; systemq( "$perl $xml_merge $main_file > $xml_file"); is_like( slurp( $xml_file), $xml, "merge result"); unlink $xml_file, $main_file, @files; sub sub_file { my @elt_nb= @_; return q{} . join( '', map { elt( $_)} @elt_nb) . q{}; } sub elt { my( $nb)= @_; return qq{element $nb}; } # slurp and remove spaces from the file sub slurp_trimmed { my( $file)= @_; local undef $/; open( FHSLURP, "<$file") or return "$file not found:$!"; my $content=; $content=~ s{\s}{}g; return $content; } sub systemq { warn "$_[0]\n" if( !$DEBUG); system $_[0]; } XML-Twig-3.52/t/zz_dump_config.t0000755000175000017500000000671212732215763016743 0ustar mrodrigumrodrigu#!/usr/bin/perl my $ok; # global, true if the last call to version found the module, false otherwise use Config; warn "\n\nConfiguration:\n\n"; # required warn "perl: $]\n"; warn "OS: $Config{'osname'} - $Config{'myarchname'}\n"; print "\n"; warn "required\n"; warn version( XML::Parser, ''); # We obviously have expat on VMS, but a symbol/logical might # not be set to xmlwf, and when this is the case a # '%DCL-W-IVVERB, unrecognized command verb - check validity and spelling # \XMLWF\' # will be returned. my $skip_xmlwf_test = 0; if ($^O eq 'VMS') { if(`write sys\$output "''xmlwf'"` !~ m/[a-z]+/i) { $skip_xmlwf_test = 1; warn format_warn( 'expat', "Skipping expat (version) test as don't have a symbol for 'xmlwf'."); } } if (! $skip_xmlwf_test) { # try getting this info my $xmlwf_v= `xmlwf -v`; if( $xmlwf_v=~ m{xmlwf using expat_(.*)$}m) { warn format_warn( 'expat', $1, '(required)'); } else { warn format_warn( 'expat', ''); } } print "\n"; # must-have warn "Strongly Recommended\n"; warn version( Scalar::Util, 'for improved memory management'); if( $ok) { unless( defined( &Scalar::Util::weaken)) { warn format_warn( '', 'NOT USED, weaken not available in this version'); warn version( WeakRef); } } else { warn version( WeakRef, 'for improved memory management'); } # encoding warn version( Encode, 'for encoding conversions'); unless( $ok) { warn version( Text::Iconv, 'for encoding conversions'); } unless( $ok) { warn version( Unicode::Map8, 'for encoding conversions'); } print "\n"; # optional warn "Modules providing additional features\n"; warn version( XML::XPathEngine, 'to use XML::Twig::XPath'); warn version( XML::XPath, 'to use XML::Twig::XPath if Tree::XPathEngine not available'); warn version( LWP, 'for the parseurl method'); warn version( HTML::TreeBuilder, 'to use parse_html and parsefile_html'); warn version( HTML::Entities::Numbered, 'to allow parsing of HTML containing named entities'); warn version( HTML::Tidy, 'to use parse_html and parsefile_html with the use_tidy option'); warn version( HTML::Entities, 'for the html_encode filter'); warn version( Tie::IxHash, 'for the keep_atts_order option'); warn version( Text::Wrap, 'to use the "wrapped" option for pretty_print'); print "\n"; # used in tests warn "Modules used only by the auto tests\n"; warn version( Test, ''); warn version( Test::Pod, ''); warn version( XML::Simple, ''); warn version( XML::Handler::YAWriter, ''); warn version( XML::SAX::Writer, ''); warn version( XML::Filter::BufferText, ''); warn version( IO::Scalar, ''); warn version( IO::CaptureOutput, ''); my $zz_dump_config= File::Spec->catfile( t => "zz_dump_config.t"); warn "\n\nPlease add this information to bug reports (you can run $zz_dump_config to get it)\n\n"; warn "if you are upgrading the module from a previous version, make sure you read the\n", "Changes file for bug fixes, new features and the occasional COMPATIBILITY WARNING\n\n"; print "1..1\nok 1\n"; exit 0; sub version { my $module= shift; my $info= shift || ''; $info &&= "($info)"; my $version; if( eval "require $module") { $ok=1; import $module; $version= ${"$module\::VERSION"}; $version=~ s{\s*$}{}; } else { $ok=0; $version= ''; } return format_warn( $module, $version, $info); } sub format_warn { return sprintf( " %-25s: %16s %s\n", @_); } XML-Twig-3.52/t/test_3_42.t0000755000175000017500000000165012732215763015430 0ustar mrodrigumrodrigu#!/usr/bin/perl use strict; use warnings; use XML::Twig; use Test::More tests => 3; { my $t= XML::Twig->new( twig_handlers => { e => sub { XML::Twig::Elt->parse( '')->paste( before => $_); } }) ->parse(''); is( $t->sprint, '', 'elements created with parse are still available once parsing is done'); } import myElt; { my $doc='fooe1e2foo'; my $t= XML::Twig->new( elt_class => 'myElt', field_accessors => { e => 'e' }, elt_accessors => { ee => 'e', ef => 'f', }, ) ->parse( $doc); is( join( ':', map { $_->e } $t->root->ef), 'e1:e2', 'elt_accessors with elt_class'); is( join( ':', map { $_->ee->text } $t->root->children( 'f')), 'e1:e2', 'field_accessors with elt_class'); } package myElt; use base 'XML::Twig::Elt'; 1; XML-Twig-3.52/t/xmlxpath_05attrib.t0000755000175000017500000000103612732215763017277 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use Test; plan( tests => 6); use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @ids = $t->findnodes( '//BBB[@id]'); ok(@ids, 2); my @names = $t->findnodes( '//BBB[@name]'); ok(@names, 1); my @attribs = $t->findnodes( '//BBB[@*]'); ok(@attribs, 3); my @noattribs = $t->findnodes( '//BBB[not(@*)]'); ok(@noattribs, 1); exit 0; __DATA__ XML-Twig-3.52/t/test_3_30.t0000755000175000017500000003407212732215763015431 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=87; print "1..$TMAX\n"; if( _use( 'Tie::IxHash')) { # test the new indent format example from http://tinyurl.com/2kwscq my $doc=q{ }; my $formatted= XML::Twig->parse( keep_atts_order => 1, pretty_print => cvs => $doc)->sprint; is( $formatted, $doc, 'cvs pretty_print'); } else { skip( 1, "Tie::IxHash not available, cannot test the cvs pretty_print option"); } if( $XML::Parser::VERSION > 2.27) { my $test_dir= "ent_test"; mkdir( $test_dir, 0777) or die "cannot create $test_dir: $!" unless( -d $test_dir); my $xml_file_base = "test.xml"; my $xml_file= File::Spec->catfile( $test_dir => $xml_file_base); my $ent_file_base = "ent.xml"; my $ent_file= File::Spec->catfile( $test_dir => $ent_file_base); my $doc= qq{ ]>&ent;}; my $ent= qq{}; spit( $xml_file, $doc); spit( $ent_file, $ent); my $expected= ''; is( XML::Twig->parse( pretty_print => 'none', $xml_file)->root->sprint, $expected, 'entity resolving when file is in a subdir'); unlink $xml_file or die "cannot remove $xml_file: $!"; unlink $ent_file or die "cannot remove $ent_file: $!"; rmdir $test_dir or die "cannot remove $test_dir: $!"; } else { skip( 1 => "known bug with old XML::Parser versions: base uri not taken into account,\n" . "see RT #25113 at http://rt.cpan.org/Public/Bug/Display.html?id=25113" ); } { my $doc= ""; my $doc_file= "doc.xml"; spit( $doc_file, $doc); my $t= XML::Twig->new; foreach (1..3) { $t->parse( $doc); is( $t->sprint, $doc, "re-using a twig with parse (run $_)"); $t->parse( $doc); is( $t->sprint, $doc, "re-using a twig with parse (run $_)"); $t->parsefile( $doc_file); is( $t->sprint, $doc, "re-using a twig with parsefile (run $_)"); $t->parsefile( $doc_file); is( $t->sprint, $doc, "re-using a twig with parsefile (run $_)"); } unlink $doc_file; } { my $invalid_doc= "
"; my $invalid_doc_file= "invalid_doc.xml"; spit( $invalid_doc_file, $invalid_doc); my $expected="e"; my( $result); my $expected_sprint="
"; my $t= XML::Twig->new( twig_handlers => { e => sub { $result.= $_->tag; shift->finish_now } }); foreach (1..3) { $result=''; $t->parse( $invalid_doc); is( $result, $expected, "finish_now with parse (run $_)"); is( $t->sprint, $expected_sprint, "finish_now with parse (sprint, run $_)"); $result=''; $t->parsefile( $invalid_doc_file); is( $result, $expected, "finish_now with parsefile (run $_)"); is( $t->sprint, $expected_sprint, "finish_now with parse (sprint, run $_)"); } unlink $invalid_doc_file; } { my $doc1=qq{\n\n]>\n t1 &e1; &e2;}; my $doc2=qq{\n\n]>\n t1 &e1; &e3;}; (my $edoc1 = $doc1)=~ s{&e(\d);}{[e$1]}g; (my $edoc2 = $doc2)=~ s{&e(\d);}{[e$1]}g; my $t= XML::Twig->new( keep_spaces => 1); is( $t->parse( $doc1)->sprint, $edoc1, "XML::Twig reuse (run 1: doc1)"); is( $t->parse( $doc2)->sprint, $edoc2, "XML::Twig reuse (run 2: doc2)"); is( $t->parse( $doc1)->sprint, $edoc1, "XML::Twig reuse (run 3: doc1)"); is( $t->parse( $doc1)->sprint, $edoc1, "XML::Twig reuse (run 4: doc1)"); is( $t->parse( $doc2)->sprint, $edoc2, "XML::Twig reuse (run 5: doc2)"); is( $t->parse( $doc2)->sprint, $edoc2, "XML::Twig reuse (run 6: doc2)"); } # some additional coverage { # entity sprint my $tata= "tata content"; spit( "tata.txt", $tata); my %ent_desc=( foo => q{"toto"}, bar => q{SYSTEM "tata.txt"}, baz => q{SYSTEM "tutu.txt" NDATA gif}); my %decl= map { $_ => "" } keys %ent_desc; my $decl_string= join( '', values %decl); my $doc= qq{}; my $t= XML::Twig->parse( $doc); foreach my $ent (sort keys %decl) { is( $t->entity( $ent)->sprint, $decl{$ent}, "sprint entity $ent ($decl{$ent})"); } } { # purge on an element { my $t= XML::Twig->parse( twig_handlers => { e2 => sub { $_->purge } }, q{}); is( $t->root->first_child->tag, 'e3', "purge on the current element"); } { my $t= XML::Twig->parse( twig_handlers => { e2 => sub { $_->prev_sibling->purge } }, q{}); is( $t->root->first_child->tag, 'e2', "purge on an element"); } { my $t= XML::Twig->parse( twig_handlers => { e2 => sub { $_->prev_sibling->purge( $_) } }, q{}); is( $t->root->first_child->tag, 'e3', "purge on an element up to the current element"); } { my $t= XML::Twig->parse( twig_handlers => { e3 => sub { $_->prev_sibling( 'e1')->purge( $_->prev_sibling) } }, q{}); is( $t->root->first_child->tag, 'e3', "purge on an element up to an other element"); } { my $t= XML::Twig->parse( twig_handlers => { e2 => sub { $_[0]->purge_up_to( $_->prev_sibling) } }, q{}); is( $t->root->first_child->tag, 'e2', "purge_up_to"); } } { my $t= XML::Twig->parse( ']>'); is( $t->doctype_name, 'foo', 'doctype_name (with value)'); is( $t->system_id, 'foo.dtd', 'system_id (with value)'); is( $t->public_id, '-//xmltwig//DTD xmltwig test 1.0//EN', 'public_id (with value)'); is( $t->internal_subset, '', 'internal subset (with value)'); } { my $t= XML::Twig->parse( ''); is( $t->doctype_name, '', 'doctype_name (no value)'); is( $t->system_id, '', 'system_id (no value)'); is( $t->public_id, '', 'public_id (no value)'); is( $t->internal_subset, '', 'internal subset (no value)'); } { my $t= XML::Twig->parse( ''); is( $t->doctype_name, 'foo', 'doctype_name (with value)'); is( $t->system_id, 'foo.dtd', 'system_id (with value)'); is( $t->public_id, '', 'public_id (no value)'); is( $t->internal_subset, '', 'internal subset (no value)'); } { my $t= XML::Twig->parse( ']>'); is( $t->doctype_name, 'foo', 'doctype_name (with value)'); is( $t->system_id, '', 'system_id (no value)'); is( $t->public_id, '', 'public_id (no value)'); is( $t->internal_subset, '', 'internal subset (with value)'); } { my $prolog= ']>'; my $doc= ''; my $t= XML::Twig->parse( $prolog . $doc); (my $expected_prolog= $prolog)=~ s{foo}{d}; $t->set_doctype( 'd'); is_like( $t->doctype, $expected_prolog, 'set_doctype'); is_like( $t->sprint, $expected_prolog . $doc); } { # test external entity declaration with SYSTEM _and_ PUBLIC # create external entities my @ext_files= qw( tata1 tata2); foreach my $file (@ext_files) { spit( $file => "content of $file"); } my $doc= q{%bar1;%bar2;]>}; is_like( XML::Twig->parse( $doc)->sprint, $doc, 'external entity declaration with SYSTEM _and_ PUBLIC, regular parse/sprint'); my $out_file= "tmp_test_ext_ent.xml"; open( OUT, ">$out_file") or die "cannot create temp result file '$out_file': $!"; XML::Twig->parse( twig_roots => { elt => sub { $_->print( \*OUT) } }, twig_print_outside_roots => \*OUT, $doc); close OUT; is_like( slurp( $out_file), $doc, 'external entity declaration with SYSTEM _and_ PUBLIC, with twig_roots'); unlink $out_file; open( OUT, ">$out_file") or die "cannot create temp result file '$out_file': $!"; XML::Twig->parse( twig_roots => { elt => sub { $_->print( \*OUT) } }, twig_print_outside_roots => \*OUT, keep_encoding => 1, $doc); close OUT; is_like( slurp( $out_file), $doc, 'external entity declaration with SYSTEM _and_ PUBLIC, with twig_roots and keep_encoding'); unlink @ext_files, $out_file; } { my $doc= q{selt 1selt 2}; my $t= XML::Twig->parse( pretty_print => 'indented', $doc); my $elt_indented = "\n selt 1\n selt 2"; my $elt_not_indented = "selt 1selt 2"; is( $t->first_elt( 'elt')->xml_string, $elt_indented, 'xml_string, indented'); is( $t->first_elt( 'elt')->xml_string( { pretty_print => 'none'} ), $elt_not_indented, 'xml_string, NOT indented'); is( $t->first_elt( 'elt')->xml_string, $elt_indented, 'xml_string, indented again'); } { my $doc=q{ ]>&zzent;}; eval { XML::Twig->new->parse( $doc); }; matches( $@, qr{zznot_there}, "missing SYSTEM entity: file info in the error message ($@)"); matches( $@, qr{zzent}, "missing SYSTEM entity: entity info in the error message ($@)"); } { if( _use( 'HTML::TreeBuilder', 3.13)) { XML::Twig->set_pretty_print( 'none'); my $html=q{

Title

foo
bar

}; my $expected= q{

Title

foo
bar

}; is( XML::Twig->new->safe_parse_html( $html)->sprint, $expected, 'safe_parse_html'); my $html_file= "t/test_3_30.html"; spit( $html_file, $html); is( XML::Twig->new->safe_parsefile_html( $html_file)->sprint, $expected, 'safe_parsefile_html'); if( _use( 'LWP')) { is( XML::Twig->new->safe_parseurl_html( "file:$html_file")->sprint, $expected, 'safe_parseurl_html'); } else { skip( 1, "LWP not available, cannot test safe_parseurl_html"); } unlink $html_file; } else { skip( 3, "HTML::TreeBuilder not available, cannot test safe_parse.*_html methods"); } } { my $dump= XML::Twig->parse( q{text})->_dump; my $sp=qr{[\s|-]*}; matches( $dump, qr{^document $sp doc $sp id="1" $sp elt $sp PCDATA: $sp 'text'\s*}x, "twig _dump"); } { my $dump= XML::Twig->parse( q{]>&foo;})->entity( 'foo')->_dump; is( $dump, q{name => 'foo' - val => 'bar'}, "entity dump"); } { if( $XML::Parser::VERSION > 2.27) { my $t= XML::Twig->parse( q{ ]>&afoo;}); my $non_param_ent= $t->entity( 'afoo'); nok( $non_param_ent->param, 'param on a non-param entity'); my $param_ent= $t->entity( 'bfoo'); ok( $param_ent->param, 'param on a parameter entity'); } else { skip( 2, "cannot use the param method with XML::Parser 2.27"); } } { my $entity_file = "test_3_30.t.ent"; my $missing_file = "not_there"; spit( $entity_file => "entity text"); my $doc= qq{]>&foo;}; ok( eval { XML::Twig->parse( $doc)}, 'doc with missing external SYSTEM ents'); eval { XML::Twig->parse( expand_external_ents => 1, $doc)}; matches( $@, qr{cannot load SYSTEM entity 'bar' from 'not_there': }, 'missing SYSTEM entity'); ok( eval { XML::Twig->parse( $doc)}, 'doc with missing external SYSTEM ents'); my $t= XML::Twig->parse( expand_external_ents => -1, $doc); my $missing_entities= $t->{twig_missing_system_entities}; is( scalar( values %$missing_entities), 1, 'number of missing system entities'); is( (values %$missing_entities)[0]->{name}, 'bar', 'name of missing system entity'); is( (values %$missing_entities)[0]->{sysid}, $missing_file, 'sysid of missing system entity'); eval { XML::Twig->parse( $doc)}; ok( eval { XML::Twig->parse( $doc)}, 'doc with missing external SYSTEM NDATA ents'); unlink( $entity_file); } { my $entity_file = "test_3_30.t.gif"; my $missing_file = "not_there.gif"; spit( $entity_file => "entity text"); my $doc= qq{ ]> }; my $t= XML::Twig->parse( $doc); my $missing_entities= $t->{twig_missing_system_entities}; is( scalar( values %$missing_entities), 1, 'number of missing system entities'); is( $missing_entities->{barn}->name, 'barn', 'name of missing system entity'); is( $missing_entities->{barn}->sysid, $missing_file, 'sysid of missing system entity'); unlink( $entity_file); } { my $doc= q{foo bar baz foobarxyztototatatutu}; my $t= XML::Twig->parse( twig_handlers => { b => sub { $_->erase } }, $doc); is( scalar( $t->descendants( '#TEXT')), 3, 'text descendants, no melding'); $t->normalize; is( scalar( $t->descendants( '#TEXT')), 3, 'text descendants, normalized'); } { my $doc=q{ee1e1-2}; XML::Twig::Elt->init_global_state(); # depending on which modules are available, the state could have been modified my $tmp= "tmp"; open( TMP, ">$tmp") or die "cannot create temp file"; XML::Twig->parse( twig_roots => { e1 => sub { $_->flush( \*TMP) } }, twig_print_outside_roots => \*TMP, $doc); close TMP; my $res= slurp( $tmp); is( $res, $doc, "bug in flush with twig_print_outside_roots"); unlink $tmp; } { # test bug where #default appeared in attributes (RT #27617) my $doc= ''; my $t= XML::Twig->new( map_xmlns => { 'foo' => 'ns2' },)->parse( $doc); ok( grep { $_ eq 'att' } keys %{$t->root->first_child->atts}, 'no #default in attribute names'); } exit; 1; XML-Twig-3.52/t/xmlxpath_21allnodes.t0000755000175000017500000000245212732215763017614 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 13); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '//GGG/ancestor::*'); ok(@nodes, 4); @nodes = $t->findnodes( '//GGG/descendant::*'); ok(@nodes, 3); @nodes = $t->findnodes( '//GGG/following::*'); ok(@nodes, 3); ok($nodes[0]->getName, "VVV"); ok($nodes[1]->getName, "CCC"); ok($nodes[2]->getName, "DDD"); @nodes = $t->findnodes( '//GGG/preceding::*'); ok(@nodes, 5); ok($nodes[0]->getName, "BBB"); # document order, not HHH @nodes = $t->findnodes( '//GGG/self::*'); ok(@nodes, 1); ok($nodes[0]->getName, "GGG"); @nodes = $t->findnodes( '//GGG/ancestor::* | //GGG/descendant::* | //GGG/following::* | //GGG/preceding::* | //GGG/self::*'); ok(@nodes, 16); exit 0; __DATA__ XML-Twig-3.52/t/xmlxpath_xpath_cond.t0000755000175000017500000000541312732215763017777 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,'t'); use tools; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; $|=1; my $t= XML::Twig::XPath->new; $t->parse( ' elt 1 elt 2 elt 3 2 3 '); my @data= grep { !/^##/ && m{\S} } ; my @exp; my %result; foreach( @data) { chomp; my ($exp, $id_list) = split /\s*=>\s*/ ; $result{$exp}= $id_list; push @exp, $exp; } my $nb_tests= 2 + keys %result; print "1..$nb_tests\n"; my $i=1; foreach my $exp ( @exp) { my $expected_result= $result{$exp}; my @result= $t->findnodes( $exp); my $result; if( @result) { $result= join ' ', map { $_->id } @result; } else { $result= 'none'; } if( $result eq $expected_result) { print "ok $i\n"; } else { print "nok $i\n"; print STDERR "$exp: expected $expected_result - real $result\n"; } $i++; } my $exp= '//* |//@* | /'; my @result= $t->findnodes( $exp); my @elts= $t->descendants( '#ELT'); # first check the number of results my $result= @result; my $nb_atts=0; foreach (@elts) { $nb_atts+= $_->att_nb; } my $expected_result= scalar @elts + $nb_atts + 1; if( $result == $expected_result) { print "ok $i\n"; } else { print "nok $i\n"; print STDERR "$exp: expected $expected_result - real $result\n"; } $i++; # then check the results (to make sure they are in hte right order) my @expected_results; push @expected_results, "XML::Twig::XPath '" . $t->sprint ."'"; foreach my $elt (@elts) { push @expected_results, ref( $elt) . " '" . $elt->sprint . "'" ; foreach my $att ($elt->att_names) { push @expected_results, qq{XML::Twig::XPath::Attribute '$att="} . $elt->att( $att) . q{"'} ; } } $expected_result= join( "\n ", @expected_results); $result= join( "\n ", map { ref( $_) . " '" . $_->toString ."'" } @result); if( $result eq $expected_result) { print "ok $i\n"; } else { print "nok $i\n"; print STDERR "$exp:\nexpected: $expected_result\n\nreal : $result\n"; } $i++; exit 0; __DATA__ /elt => none //elt => elt-1 elt-2 elt-3 /doc/elt => elt-1 elt-2 /doc/elt[ last()] => elt-2 //elt[@id='elt-1'] => elt-1 //elt[@id="elt-1"] | //elt[@id="elt-2"] | //elt[@id="elt-3"] => elt-1 elt-2 elt-3 //elt[@id="elt-1" or @id="elt-2" or @id="elt-3"] => elt-1 elt-2 elt-3 //elt2[@att_int > 2] => elt2-4 /doc/elt2[ last()]/* => elt2-3 elt2-4 //*[@id="elt2-2"] => elt2-2 /doc/elt2[./elt[@id="elt-3"]] => elt2-1 XML-Twig-3.52/t/test_erase.t0000755000175000017500000000365712732215763016071 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use XML::Twig; $|=1; my $TMAX=1; # do not forget to update! print "1..$TMAX\n"; undef $/; my $doc=; my $t= XML::Twig->new(keep_spaces => 1); $t->parse( $doc); foreach my $erase ($t->descendants( 'erase')) { $erase->erase; } my $result=$t->sprint; $result=~ s{\s*$}{}s; # remove trailing spaces (and \n) my $expected_result= $doc; $expected_result=~ s{}{}g; $expected_result=~ s{\s*$}{}s; # remove trailing spaces (and \n) if( $result eq $expected_result) { print "ok 1\n"; } else { print "not ok 1\n"; print STDERR "expected: \n$expected_result\n", "real: \n$result\n"; } exit 0; __DATA__ text text (1) text text (2) text (3) text text (4) text (5) text (6) text (7)text (8) text (9) text (10) text (11) text text (12) text (13)text (14) text (15) text (16) text (17)text (18) text (19) text (20)child/> text (21)child/> text (22) XML-Twig-3.52/t/xmlxpath_nav.t0000755000175000017500000000247012732215763016434 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; $|=1; my $t= XML::Twig::XPath->new; $t->parse( ' elt 1 elt 2 elt 3 2 3 elt 3 '); my @data= grep { !/^##/ && m{\S} } ; my @exp; my %result; foreach( @data) { chomp; my ($exp, $id_list) = split /\s*=>\s*/ ; $result{$exp}= $id_list; push @exp, $exp; } my $nb_tests= keys %result; print "1..$nb_tests\n"; my $i=1; foreach my $exp ( @exp) { my $expected_result= $result{$exp}; my $result_elt= $t->root->first_child( $exp); my $result= $result_elt ? $result_elt->att( 'id') : 'none'; if( $result eq $expected_result) { print "ok $i\n"; } else { print "nok $i\n"; print STDERR "$exp: expected $expected_result - real $result\n"; } $i++; } exit 0; __DATA__ elt => elt-1 elt[@id="elt-4"] => elt-4 elt[@id="elt-3"] => none *[@att > 1] => elt-2 elt2[2] => elt2-2 ##elt2[./elt2] => elt2-2 elt3 => none XML-Twig-3.52/t/test_3_41.t0000755000175000017500000001217412732215763015432 0ustar mrodrigumrodrigu#!/usr/bin/perl use strict; use warnings; use XML::Twig; use Test::More tests => 16; { my $in= 'RoseBlackberryCarrot'; my $expected= 'RoseTomatoBlackberryCarrot'; { my $t = XML::Twig->new( twig_handlers => { '//plant/fruit' => sub { XML::Twig::Elt->new( berry => 'Tomato')->paste( $_); } }) ->parse( $in); is( $t->sprint, $expected, 'paste within handler from new element'); } { my $t = XML::Twig->new( twig_handlers => { '//plant/fruit' => sub { XML::Twig->new->parse( 'Tomato')->root->cut->paste( first_child => $_); } }) ->parse( $in); is( $t->sprint, $expected, 'paste new element from twig within handler from parsed element (cut)'); } { my $t = XML::Twig->new( twig_handlers => { '//plant/fruit' => sub { XML::Twig->new->parse( 'Tomato')->root->paste( $_); } }) ->parse( $in); is( $t->sprint, $in, 'paste new element from twig within handler from parsed element (non cut)'); } } { my $d='foo'; my $calls; XML::Twig->new( twig_roots => { f => 1 }, end_tag_handlers => { e => sub { $calls .= ":e"; }, 'd/e' => sub { $calls .= "d/e" }, }, ) ->parse( $d); is( $calls, 'd/e:e', 'several end_tag_handlers called'); $calls=''; XML::Twig->new( twig_roots => { f => 1 }, end_tag_handlers => { e => sub { $calls .= ":e"; }, 'd/e' => sub { $calls .= "d/e"; return 0; }, }, ) ->parse( $d); is( $calls, 'd/e', 'end_tag_handlers chain broken by false return'); } { my $d='foo'; my $calls; XML::Twig->new( twig_roots => { f => 1 }, ignore_elts => { e => 1 }, end_tag_handlers => { e => sub { $calls .= ":e"; }, 'f/e' => sub { $calls .= "f/e" }, }, ) ->parse( $d); is( $calls, 'f/e:e', 'several end_tag_handlers called with ignore_elts active'); $calls=''; XML::Twig->new( twig_roots => { f => 1 }, ignore_elts => { e => 1 }, end_tag_handlers => { e => sub { $calls .= ":e"; }, 'f/e' => sub { $calls .= "f/e"; return 0; }, }, ) ->parse( $d); is( $calls, 'f/e', 'end_tag_handlers chain with ignore_elts active broken by false return'); } is( XML::Twig->parse( '')->encoding, undef, 'encoding, no xml declaration'); is( XML::Twig->parse( '')->encoding, undef, 'encoding, xml declaration but no encoding given'); is( XML::Twig->parse( '')->encoding, 'utf-8', 'encoding, encoding given'); is( XML::Twig->parse( '')->standalone, undef, 'standalone, no xml declaration'); is( XML::Twig->parse( '')->standalone, undef, 'standalone, xml declaration but no standalone bit'); ok( XML::Twig->parse( '')->standalone, 'standalone, yes'); ok( ! XML::Twig->parse( '')->standalone, 'standalone, no'); { XML::Twig::_set_weakrefs(0); my $t= XML::Twig->parse( ''); $t->root->first_child( 'e')->next_sibling( 'e')->erase; is( $t->sprint, '', 'erase without weakrefs'); XML::Twig::_set_weakrefs(1) } { my $doc=' 1 ... 2 ... '; my $expected= $doc; $expected=~ s{ns1}{cmdsvc}g; $expected=~ s{ns2}{shlsvc}g; my %map= reverse ( cmdsvc => "http://namespace/CommandService", shlsvc => "http://namespace/ShelfService", xsi => "http://www.w3.org/2001/XMLSchema-instance", ); my $x = XML::Twig->new( map_xmlns => { %map }, twig_handlers => { '*[@xsi:type]' => sub { upd_xsi_type( @_, \%map) } }, pretty_print => "indented" ); $x->parse($doc); is( $x->sprint, $expected, 'original_uri'); sub upd_xsi_type { my( $t, $elt, $map)= @_; my $type= $elt->att( 'xsi:type'); my( $old_prefix)= $type=~ m{^([^:]*):}; if( my $new_prefix= $map->{$t->original_uri( $old_prefix)}) { $type=~ s{^$old_prefix}{$new_prefix}; $elt->set_att( 'xsi:type' => $type); } return 1; # to make sure other handlers are called } } XML-Twig-3.52/t/test_additional.t0000755000175000017500000035213012732215763017073 0ustar mrodrigumrodrigu#!/usr/bin/perl -w # test designed to improve coverage of the module use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $perl= $]; my $open; BEGIN { if( $] < 5.008) { $open= sub { return }; } else { $open= eval( 'sub { open( $_[0], $_[1], $_[2]) }'); } } my $TMAX=663; print "1..$TMAX\n"; { my $t= XML::Twig->new->parse( q{ foo ]]> bar }); # use CDATA my $cdata= $t->first_elt( CDATA)->text; is( $cdata, 'cdata 01', 'first_elt( CDATA)');# test 1 is( $t->first_elt( CDATA)->cdata_string, '', 'cdata_string');# test 2 is( $t->root->cdata_string, '', 'cdata_string for non cdata element');# test 3 my $cdata2= $t->root->first_child( 'cdata[2]')->next_elt( CDATA)->text; is( $cdata2, 'cdata <02>', 'first_child( cdata[2])');# test 4 } # test warning for invalid options my $old_warning_handler= $SIG{__WARN__}; { my $warning=""; $SIG{__WARN__} = sub { $warning.= join '', @_ }; XML::Twig->new( dummy_opt => 1); $SIG{__WARN__}= $old_warning_handler; chomp $warning; matches( $warning, qr{^invalid option DummyOpt}, "expecting 'invalid option DummyOpt...', got '$warning'");# test 5 # test no warming if more_options is used $warning=""; $SIG{__WARN__} = sub { $warning.= join '', @_ }; XML::Twig->new( more_options => 1, dummy_opt => 1); $SIG{__WARN__}= $old_warning_handler; nok( $warning, "expecting no warning, got '$warning'");# test 6 $warning=""; $SIG{__WARN__} = sub { $warning.= join '', @_ }; XML::Twig::add_options( 'dummy_opt'); XML::Twig->new( dummy_opt => 1); $SIG{__WARN__}= $old_warning_handler; nok( $warning, "expecting no warning (2), got '$warning'");# test 7 } { # test do_not_chain_handlers my $nb_calls=0; my $t= XML::Twig->new( twig_handlers => { chain => sub { $nb_calls++; 1;}, 'doc/chain' => sub { $nb_calls++; 1;}, }, )->parse( 'chained'); is( $nb_calls, 2, "chained calls");# test 8 $nb_calls=0; $t= XML::Twig->new( twig_handlers => { chain => sub { $nb_calls++; 1 }, 'doc/chain' => sub { $nb_calls++; 1 }, }, do_not_chain_handlers => 1, )->parse( 'chained'); is( $nb_calls, 1, "not chained calls");# test 9 $nb_calls=0; $t= XML::Twig->new( twig_handlers => { chain => sub { $nb_calls++; 0; }, 'doc/chain' => sub { $nb_calls++; 0; }, }, )->parse( 'chained'); is( $nb_calls, 1, "chained handlers returning 0");# test 10 } # test ignore_elt { my $t= XML::Twig->new( ignore_elts => { i1 => 1, i2 => 2}) ->parse( ' '); my @t= $t->findnodes( '//t'); my $nb_t= scalar @t; is( $nb_t, 4, 'findnodes //t');# test 11 } # test elt_class { my $t= XML::Twig->new( elt_class => 'twig_test')->parse( ''); package twig_test; use base 'XML::Twig::Elt'; sub test { return 25 } package main; is( $t->root->test, 25, 'elt_class');# test 12 } # test char_handler { my $t= XML::Twig->new( char_handler => sub { my $s= shift; $s=~ s/\w/./g; return $s; }) ->parse( 'foobaz'); my $text= $t->root->text; is( $text, '......', 'silly char_handler');# test 13 my $att= $t->root->last_child( 'elt')->att( 'att'); is( $att, 'bar', 'last_child');# test 14 $att= $t->root->last_child( 'elt')->att( 'att'); # to use the cache is( $att, 'bar', 'last_child');# test 15 } # test various methods { my $t= XML::Twig->new->parse( ''); my @new_children= $t->root->children_copy; $t->set_id_seed( 'toto_'); $_->add_id foreach @new_children; my $id= $new_children[0]->att( 'id'); is( $id, 'toto_1', 'copy att');# test 16 $new_children[1]->change_att_name( id => 'foo'); my $foo= $new_children[1]->att( 'foo'); is( $foo, 'toto_2', 'change_att_name');# test 17 ok( $t->root->all_children_are( 'elt'), "all_children_are( 'elt')");# test 18 nok( $t->root->all_children_are( 'none'), "all_children_are( 'none')");# test 19 my $count= $t->root->children_count( 'elt'); is( $count, 2, "children_count( 'elt')");# test 20 $count= $t->root->children_count( 'none'); is( $count, 0, "children_count( 'none')");# test 21 $count= $t->root->children_count; is( $count, 2, "children_count");# test 22 ok( $t->root->first_child_matches( 'elt'), "first_child_matches");# test 23 $t->root->insert_new_elt( 'p'); nok( $t->root->all_children_are( 'elt'), "all_children_are( 'elt') (with p child)");# test 24 } # test cdata append_cdata, append_extra_data, append_pcdata { my $t=XML::Twig->new->parse( 'text more text'); my $cdata= $t->root->next_elt( CDATA)->cdata; is( $cdata, 'some cdata', 'created CDATA element');# test 25 $t->root->next_elt( CDATA)->append_cdata( ' appended<>'); $t->root->next_elt( PCDATA)->append_pcdata( 'more '); $t->root->first_child( 'elt')->append_extra_data( ''); is( $t->sprint, 'text more ]]> more text', "append_extra_data");# test 26 } # test att_names and att_to_field { my $t= XML::Twig->new->parse( ''); my $elt= $t->root->first_child_matches( 'elt'); ok( $elt, "first_child_matches");# test 27 my $att_names= join ':', sort $elt->att_names; is( $att_names, 'att1:att2', "att_names");# test 28 $elt->att_to_field( 'att1'); $elt->att_to_field( att2 => "new"); my $elt_string= $elt->sprint; is( $elt_string, 'barfoo', "att_to_field")# test 29 } # test child_matches child_text child_trimmed_text children_text { my $t= XML::Twig->new->parse( ' text text text'); my $root= $t->root; ok( $root->child_matches( 1, 'elt2'), "child_matches");# test 30 my $text= $root->child_text( 0); is( $text, ' text ', "child_text");# test 31 $text= $root->child_trimmed_text( -1, 'elt2'); is( $text, 'text text', "child_trimmed_text", 25 );# test 32 $text= join( '-', $root->children_text( qr/elt/)); is( $text, ' text - text text', "children_text");# test 33 } # test _ancestors _children _descendants { my $t= XML::Twig->new->parse( ''); is( tags( $t->root->_children), 'elt1:elt2', "_children");# test 34 is( tags( $t->root->_descendants), 'elt1:elt2:elt3', "_descendants");# test 35 is( $t->root->last_child_matches( 'elt3') ? "matches" : "no match",# test 36 "no match", "last_child_matches (no match)"); my $elt3= $t->root->last_child_matches( 'elt2')->first_child; is( $elt3->gi, 'elt3', "last_child_matches (match)");# test 37 is( tags( $elt3->_ancestors), 'elt2:doc', "_ancestors");# test 38 is( tags( $elt3->_ancestors(1)), 'elt3:elt2:doc', "_ancestors(1)");# test 39 is( tags( $t->root->descendants( 'elt1')), 'elt1', 'descendants with gi');# test 40 is( tags( $t->root->descendants()), 'elt1:elt2:elt3', 'descendants without gi');# test 41 is( tags( $t->root->descendants( qr/^elt/)), 'elt1:elt2:elt3', 'descendants with qr');# test 42 is( tags( $t->root->descendants( qr/^elt/)), 'elt1:elt2:elt3', 'descendants with qr (using cache)');# test 43 } # test comment methods { my $t= XML::Twig->new( comments => 'process') ->parse( 'text '); my $comment= $t->first_elt( '#COMMENT'); is( $comment->comment, ' foo ', "comment");# test 44 is( $comment->comment_string, '', "comment");# test 45 } # test element creation { my $t= XML::Twig->new->parse( ''); my $root= $t->root; my $elt= $root->insert_new_elt( first_child => 'elt'); my $elt3= $elt->insert_new_elt( after => elt3 => "elt3 text"); my $elt2= $elt3->insert_new_elt( before => elt2 => { att => "foo" }, "elt2 text"); is( $t->sprint, 'elt2 textelt3 text',# test 46 "insert_new_elt"); $root->cut_children; is( $t->sprint, '', "cut_children");# test 47 $elt= $root->insert_new_elt( last_child => 'elt' => { '#ASIS' => 1 }, "

bar

"); is( $elt->is_asis ? 'asis' : 'not asis', 'asis', "is_asis (initial, yes)");# test 48 is( $t->sprint, '

bar

', "insert_new_elt (ASIS)");# test 49 $elt->set_not_asis; is( $elt->is_asis ? 'asis' : 'not asis', 'not asis', "is_asis (unset, no)");# test 50 is( $t->sprint, '<p>bar</p>', "set_not_asis");# test 51 $elt->set_asis; is( $elt->is_asis ? 'asis' : 'not asis', 'asis', "is_asis (set, yes)");# test 52 is( $t->sprint, '

bar

', "set_asis");# test 53 $root->cut_children; $root->insert_new_elt( first_child => '#CDATA' => "toto"); is( $t->sprint, '', "create CDATA");# test 54 is($root->last_child_matches( '#CDATA') ? "match" : "no match", "match", "last_child_matches (yes)");# test 55 is($root->last_child_matches( "foo") ? "match" : "no match", "no match", "last_child_matches (no)");# test 56 my $cdata= $root->last_child_matches( '#CDATA'); ok( $cdata->is_cdata, "cdata is_cdata");# test 57 nok( $cdata->is_comment, "cdata is_comment");# test 58 nok( $cdata->is_pi, "cdata is_pi");# test 59 nok( $cdata->is_empty, "cdata is_empty");# test 60 nok( $cdata->is_ent, "cdata is_ent");# test 61 ok( $cdata->is_first_child, "cdata is_first_child");# test 62 ok( $cdata->is_last_child, "cdata is_last_child");# test 63 } # test field last_child_text last_child_trimmed_text { my $t= XML::Twig->new->parse( 'val1val2'); my $root= $t->root; $root->set_field( field2 => "new val2 "); is( $root->last_child_text( 'field2'), "new val2 ", "set_field");# test 64 is( $root->last_child_trimmed_text( 'field2'), "new val2", "set_field (trimmed text)");# test 65 is( $root->last_child_text( 'field1'), "val1", "last_child_text");# test 66 $root->set_field( field3 => "val3"); is( $t->sprint, 'val1new val2 val3',# test 67 "set_field (new field)"); } # test next/prev navigation functions { my $t= XML::Twig->new->parse( q{elt 1 elt 2 elt 3elt 4 elt 5 } ); my $root= $t->root; my $elt1= $t->getElementById( 'elt_1'); is( $elt1->sprint, 'elt 1', "getElementById");# test 68 my $sect= $elt1->getElementById( 'sect_1'); is( $sect->sprint, 'elt 3elt 4', "getElementById (sect)");# test 69 ok( $elt1->next_elt_matches( '#PCDATA[text()="elt 1"]'), "next_elt_matches (elt1 => elt)");# test 70 ok( $elt1->prev_elt_matches( 'doc'), "prev_elt_matches (elt1 => doc)");# test 71 ok( $sect->next_elt_matches( 'elt[@id="elt_3"]'), "next_elt_matches (sect => elt_3)");# test 72 ok( $sect->prev_elt_matches( '#PCDATA[text()="elt 2"]'), "prev_elt_matches (sect => elt_2)");# test 73 is( $sect->next_elt_text( 'elt[@id="elt_5"]'), 'elt 5 ', "next_elt_text");# test 74 is( $sect->next_elt_trimmed_text( 'elt[@id="elt_5"]'), 'elt 5', "next_elt_trimmed_text");# test 75 nok( $sect->next_elt( $sect, 'elt[@id="elt_5"]'), "next_elt (outside the subtree)");# test 76 nok( $sect->next_elt_text( $sect, 'elt[@id="elt_5"]'), "next_elt_text (outside the subtree)");# test 77 is( $sect->first_child_trimmed_text, "elt 3", "first_child_trimmed_text");# test 78 is( $sect->first_child_trimmed_text( 'goofy'), "", "first_child_trimmed_text (no child)");# test 79 # test comparisons $elt1 < $sect < $elt3 my $elt3= $t->elt_id( 'elt_3'); ok( $elt1->le( $sect), "\$elt1 le \$sect");# test 80 ok( $elt1->lt( $sect), "\$elt1 lt \$sect");# test 81 nok( $elt1->ge( $sect), "\$elt1 ge \$sect");# test 82 nok( $elt1->gt( $sect), "\$elt1 gt \$sect");# test 83 nok( $elt3->le( $sect), "\$elt3 le \$sect");# test 84 nok( $elt3->lt( $sect), "\$elt3 lt \$sect");# test 85 ok( $elt3->ge( $sect), "\$elt3 ge \$sect");# test 86 ok( $elt3->gt( $sect), "\$elt3 gt \$sect");# test 87 } # test keep_attribute_order { eval { require Tie::IxHash; }; if( $@) { skip( 7, "Tie::IxHash not available"); } else { import Tie::IxHash; my $t= XML::Twig->new( keep_atts_order => 1) ->parse( ''); is( $t->sprint, '', "keep_atts_order");# test 88 ok( $t->keep_atts_order, "keep_atts_order");# test 89 $t= XML::Twig->new->parse( ''); is( $t->sprint, '', "do not keep_atts_order");# test 90 nok( $t->keep_atts_order, "keep_atts_order not used");# test 91 $t->set_keep_atts_order(1); my $elt1= $t->root->new( 'elt'); $elt1->set_att( a1 => 'v1'); $elt1->set_att( a2 => 'v2'); is( $elt1->sprint, '', 'keep_atts_order with new elt');# test 92 my $elt2= $t->root->new( 'elt'); $elt2->set_att( a2 => 'v2'); $elt2->set_att( a1 => 'v1'); is( $elt2->sprint, '', 'keep_atts_order with new elt (reverse order)');# test 93 XML::Twig::Elt::set_keep_atts_order(0); my $elt3= $t->root->new( 'elt'); $elt3->set_att( a2 => 'v2'); $elt3->set_att( a1 => 'v1'); is( $elt3->sprint, '', 'no keep_atts_order with new elt (reverse order)');# test 94 } } # test wrap_children xml_string { my $t= XML::Twig->new->parse( ''); $t->set_id_seed( 'id_');; $t->root->wrap_children( '+', wrap => { foo => "bar"}); is( $t->sprint, '', "wrap_children");# test 95 $t->root->strip_att( 'id'); is( $t->sprint, '', "wrap_children");# test 96 is( $t->root->xml_string, '', "xml_string");# test 97 } # test set_output_encoding xml_text { my $t= XML::Twig->new->parse( 'elt 1 elt 2'); is( $t->root->xml_text, 'elt 1 elt 2', "xml_text");# test 98 is( $t->root->xml_string, 'elt 1 elt 2', "xml_text");# test 99 $t->set_output_filter( sub { return '.' x length $_[0] }); is( $t->root->xml_text, '...........', "xml_text (encoded)");# test 100 is( $t->root->xml_string, '.................................', "xml_text (encoded)");# test 101 } # is_first_child is_last_child test contains_a_single contains_only { my $t= XML::Twig->new->parse( q{ e2_1 e2_2e2_3 e2_4e2_5e3_1 }); my $elt1= $t->root->first_child('*'); my $elt2= $t->root->child( 1); my $elt3= $t->root->first_child( sub { $_[0]->children_count( 'elt3') == 1 }); my $elt4= $t->root->last_child; nok( $t->root->child( 6), 'child(6)');# test 102 nok( $t->root->child( -6), 'child(-6)');# test 103 nok( $t->root->child( 1, 'foo'), 'child(1, foo)');# test 104 nok( $t->root->child( -1, 'foo'), 'child(-1, foo)');# test 105 nok( $elt4->child( 1), 'child(1) on empty elt on empty elt');# test 106 nok( $elt4->child( 1, 'foo'), 'child(1, foo) on empty elt');# test 107 nok( $elt4->child( -1), 'child(-1) on empty elt');# test 108 nok( $elt4->child( -1, 'foo'), 'child(-1, foo) on empty elt');# test 109 ok( $elt1->is_first_child, "\$elt1->is_first_child");# test 110 nok( $elt2->is_first_child, "\$elt2->is_first_child is false");# test 111 nok( $elt3->is_first_child, "\$elt3->is_first_child is false");# test 112 nok( $elt1->is_last_child, "\$elt1->is_last_child");# test 113 nok( $elt2->is_last_child, "\$elt2->is_last_child");# test 114 ok( $elt4->is_last_child, "\$elt3->is_last_child is false");# test 115 ok( $elt1->contains_a_single( 'elt2'), "\$elt1->contains_a_single( 'elt2')");# test 116 nok( $elt1->contains_a_single( 'elt'), "\$elt1->contains_a_single( 'elt')");# test 117 nok( $elt2->contains_a_single( 'elt2'), "\$elt2->contains_a_single( 'elt2')");# test 118 nok( $elt3->contains_a_single( 'elt2'), "\$elt3->contains_a_single( 'elt2')");# test 119 nok( $elt4->contains_a_single( 'elt2'), "\$elt4->contains_a_single( 'elt2')");# test 120 ok( scalar $elt1->contains_only( 'elt2'), "\$elt1->contains_only( 'elt2')");# test 121 nok( $elt1->contains_only( 'elt'), "\$elt1->contains_only( 'elt') is false");# test 122 ok( scalar $elt2->contains_only( 'elt2'), "\$elt2->contains_only( 'elt2')");# test 123 nok( $elt3->contains_only( 'elt2'), "\$elt3->contains_only( 'elt2') is false");# test 124 ok( $elt4->contains_only( 'elt2'), "elt4->contains_only( 'elt2')");# test 125 is( $elt1->next_sibling_text, $elt2->text, "next_sibling_text");# test 126 is( $elt1->next_sibling_text, $elt2->text, "next_sibling_text using the cache");# test 127 is( $elt1->parent_text, $t->root->text, "parent_text");# test 128 is( $elt1->parent_text('doc'), $t->root->text, "parent_text");# test 129 is( $elt1->first_child->parent_text('doc'), $t->root->text, "parent_text");# test 130 ok( $elt2->parent_matches( 'doc'), "elt->parent_matches( 'doc')");# test 131 nok( $elt2->parent_matches( 'elt'), "elt->parent_matches( 'elt') is false");# test 132 nok( $t->root->parent, 'root parent');# test 133 nok( $t->root->parent_matches( 'doc'), 'root parent( doc)');# test 134 nok( $t->root->parent_matches( 'foo'), 'root parent( foo)');# test 135 is( $elt2->level, 1, "level");# test 136 is( $elt2->level( 'elt'), 0, "level( elt)");# test 137 is( $elt2->level( 'doc'), 1, "level( doc)");# test 138 is( $elt2->level( 'foo'), 0, "level( foo)");# test 139 nok( $elt2->first_child_text( 'foo'), "first_child_text on empty elt( 'foo')");# test 140 nok( $elt2->first_child_trimmed_text( 'foo'), "first_child_trimmed_text on empty elt( 'foo')");# test 141 nok( $elt4->next_sibling, 'next_sibling on last');# test 142 nok( $elt4->first_child, 'first_child on empty');# test 143 nok( $elt4->last_child, 'last_child on empty');# test 144 nok( $elt4->next_sibling_text, 'next_sibling_text on last');# test 145 nok( $elt4->first_child_text, 'first_child_text on empty');# test 146 nok( $elt4->last_child_text, 'last_child_text on empty');# test 147 nok( $elt4->next_sibling_trimmed_text, 'next_sibling_trimmed_text on last');# test 148 nok( $elt4->first_child_trimmed_text, 'first_child_trimmed_text on empty');# test 149 nok( $elt4->last_child_trimmed_text, 'last_child_trimmed_text on empty');# test 150 nok( $elt1->prev_sibling, 'prev_sibling on last');# test 151 nok( $elt1->prev_sibling_text, 'prev_sibling_text on last');# test 152 nok( $elt1->prev_sibling_trimmed_text, 'prev_sibling_trimmed_text on last');# test 153 } # test next_n_elt del_id delete empty_tag_style { my $t= XML::Twig->new->parse( ''); my $elt= $t->root->next_n_elt( 1); is( $elt->gi, 'elt', "next_n_elt");# test 154 is( $t->elt_id( 'id1')->gi, 'elt', "elt_id");# test 155 $elt->del_id; is( $t->sprint, '', "del_id");# test 156 nok( $t->elt_id( 'id1'), "no elt_id( 'id1')");# test 157 $elt->set_id( 'id2'); is( $t->sprint, '', "set_id");# test 158 is( $t->elt_id( 'id2')->gi, "elt", "elt_id after set_id");# test 159 $elt->delete; is( $t->sprint, '', "delete");# test 160 $t->root->insert_new_elt( first_child => '#COMMENT' => "a comment"); is( $t->sprint, '', "add comment");# test 161 $elt= $t->root->insert_new_elt( last_child => elt => { '#EMPTY' => 1 }); is( $t->sprint, '', "empty element");# test 162 ok( $elt->is_empty, "\$elt is empty");# test 163 $elt->set_not_empty; is( $t->sprint, '', "non empty element");# test 164 nok( $elt->is_empty, "\$elt is not empty");# test 165 $elt->set_empty; ok( $elt->is_empty, "\$elt is empty");# test 166 is( $t->sprint, '', "empty element again");# test 167 $t->set_empty_tag_style( 'html'); is( $t->sprint, '', "empty element (html style)");# test 168 XML::Twig::Elt::set_empty_tag_style( 'expand'); is( $t->sprint, '', "empty element (expand style)");# test 169 $t->set_empty_tag_style( 'normal'); is( $t->sprint, '', "empty element (normal style)");# test 170 $elt->set_content( "toto"); nok( $elt->is_empty, "\$elt is not empty");# test 171 is( $t->sprint, 'toto', "element with content");# test 172 nok( $elt->prev_sibling_matches( '#PI'), "prev_sibling_matches nok");# test 173 my $comment= $elt->prev_sibling_matches( '#COMMENT'); ok( $comment, "prev_sibling_matches ok");# test 174 $comment= $elt->prev_sibling; is( $comment->gi, '#COMMENT', 'prev_sibling');# test 175 $comment= $elt->prev_sibling; is( $comment->gi, '#COMMENT', 'prev_sibling using the cached cond');# test 176 $comment= $elt->prev_sibling( '#COMMENT'); is( $comment->gi, '#COMMENT', 'prev_sibling');# test 177 $comment= $elt->prev_sibling( '#COMMENT'); is( $comment->gi, '#COMMENT', 'prev_sibling using the cached cond');# test 178 $comment->set_comment( "another comment"); is( $t->sprint, 'toto', "element with content");# test 179 $t->root->field_to_att( 'elt'); is( $t->sprint, '', "field_to_att");# test 180 $t->root->del_att( 'elt'); $t->root->first_child( '#COMMENT')->delete; is( $t->sprint, '', "back to a very simple doc");# test 181 $t->root->insert( elt => { att => "v2" }); $t->root->first_child->set_content( "val 3"); $t->root->insert_new_elt( first_child => elt => { att => "v1" }, "val 2"); $t->root->insert_new_elt( last_child => elt => { att => "v3" }, "val 1"); is( $t->sprint, 'val 2val 3val 1',# test 182 "insert 3 elements"); $t->root->sort_children_on_value; is( $t->sprint, 'val 1val 2val 3',# test 183 "sort_children_on_value"); $t->root->sort_children_on_att( "att", order => "reverse" ); is( $t->sprint, 'val 1val 3val 2',# test 184 "sort_children_on_att (reverse)"); $t->root->set_text( "fini fini"); is( $t->sprint, 'fini fini', 'set_text on root');# test 185 my $p= $t->root->insert( 'p'); my $new_p= $p->split_at( 4); is( $t->sprint, '

fini

fini

', "split_at");# test 186 my $alt1_p= $p->copy; my $alt2_p= $p->copy; $p->split( qr/(i)/, 'b' ); is( $p->sprint, '

fini

', "split");# test 187 $alt1_p->first_child->split( qr/(i)/, 'b' ); is( $alt1_p->sprint, '

fini

', "split");# test 188 $new_p->split( qr/(i)/, b => { foo => "bar" } ); is( $new_p->sprint, '

fini

', "split (with att)");# test 189 } # test start_tag_handlers { my @results; my $handler; my $t=XML::Twig->new( start_tag_handlers =>{ elt => \&sth1}); sub sth1 { my( $t, $elt)= @_; push @results, "handler 1: ". $elt->id; $handler= $t->setStartTagHandler( elt => \&sth2); } sub sth2 { my( $t, $elt)= @_; push @results, "handler 2: ". $elt->id; $t->setStartTagHandler( elt => $handler); } $t->parse( ''); is( shift @results, "handler 1: id1", "handler 1");# test 190 is( shift @results, "handler 2: id2", "handler 2");# test 191 is( shift @results, "handler 1: id3", "handler 1 again");# test 192 } { my $t= XML::Twig->new( pi =>'process')->parse( ''); my $pi= $t->root->first_child( '#PI'); $pi->set_target( 't2'); $pi->set_data( 'data2'); is( $pi->sprint, '', "pi");# test 193 my $elt= $pi->next_sibling; $elt->set_extra_data( ''); is( $elt->sprint, "", "elt with comment");# test 194 } { my $t= XML::Twig->new->parse( " elt 1 \n elt 2 "); my $elt1= $t->root->first_child; my $elt2= $t->root->last_child; is( $elt2->prev_sibling_text, ' elt 1 ', "prev_sibling_text");# test 195 is( $elt2->prev_sibling_trimmed_text, 'elt 1', "prev_sibling_trimmed_text");# test 196 is( $elt1->next_sibling_trimmed_text, 'elt 2', "next_sibling_trimmed_text");# test 197 ok( $elt1->next_sibling_matches( 'elt'), "next_sibling_matches ok");# test 198 nok( $elt2->next_sibling_matches( 'elt'), "next_sibling_matches nok");# test 199 is( $elt2->prev_elt_text( 'elt'), " elt 1 ", "prev_elt_text");# test 200 is( $elt2->prev_elt_trimmed_text( 'elt'), "elt 1", "prev_elt_trimmed_text");# test 201 is( $elt2->parent_trimmed_text, "elt 1 elt 2", "parent_trimmed_text");# test 202 is( $elt1->sibling( 1)->trimmed_text, "elt 2", "sibling(1)");# test 203 is( $elt2->sibling( -1)->trimmed_text, "elt 1", "sibling(-1)");# test 204 is( $elt1->sibling_text( 1), " elt 2 ", "sibling(1)");# test 205 is( $elt2->sibling_text( -1), " elt 1 ", "sibling(-1)");# test 206 is( scalar $elt1->next_siblings, 1, "next_siblings");# test 207 is( scalar $elt1->next_siblings( 'elt2'), 0, "next_siblings (none)");# test 208 } { my $t= XML::Twig->new->parse( ''); my $elt1= $t->first_elt( 'elt1'); my $elt2= $t->first_elt( 'elt2'); $elt2->move( before => $elt1); is( $t->sprint, '', "cut");# test 209 $elt2->cut; is( $t->sprint, '', "cut");# test 210 $elt2->replace( $elt1); is( $t->sprint, '', "replace");# test 211 $elt2->set_content( "toto"); $elt2->suffix( ":foo"); is( $elt2->xml_string, "toto:foo", "suffix");# test 212 $elt2->first_child( '#TEXT')->suffix( 'bar'); is( $elt2->xml_string, "toto:foobar", "suffix on pcdata elt");# test 213 $elt2->replace_with( $elt1); is( $t->sprint, '', "replace_with");# test 214 $elt1->set_content( "tto"); my $o= XML::Twig::Elt->new( b => "oo"); $o->paste_within( $elt1, 1); is( $t->sprint, 'tooto', "replace_with");# test 215 $o->new( t => {a => 1 }, 'ta')->paste_within( $t->first_elt( 'b')->first_child, 1); is( $t->sprint, 'totaoto', "replace_with");# test 216 } # test methods inherited from XML::Parser::Expat { my $t= XML::Twig->new( twig_handlers => { elt => \&test_inherited }) ->parse( 'toto'); sub test_inherited { my( $t, $elt)= @_; is( $t->depth, 2, "depth");# test 217 ok( $t->in_element( 'sect'), "in_element");# test 218 nok( $t->in_element( 'elt'), "in_element (false)");# test 219 ok( $t->within_element( 'sect'), "within_element");# test 220 ok( $t->within_element( 'doc'), "within_element");# test 221 nok( $t->within_element( 'elt'), "within_element (false)");# test 222 is( join( '/', $t->context), "doc/sect", "context");# test 223 is( $t->current_line, 1, "current_line");# test 224 is( $t->current_byte, 20, "current_byte");# test 225 is( $t->original_string, "", "original_string");# test 226 is( $t->recognized_string, "
", "recognized_string");# test 227 is( $t->current_element, "sect", "current_element");# test 228 if( $XML::Parser::VERSION>2.27) { is( $t->element_index, 3, "element_index"); }# test 229 else { is( $t->element_index, 2, "element_index"); } # alt test 229 $t->base( "foo"); is( $t->base, "foo", "base");# test 230 ok( $t->position_in_context( 1), "position_in_context");# test 231 my $xml= 'toto'; my $expected= '<elt>toto</elt>'; my $broken= '<elt>toto
'; my $xml_escape= $t->xml_escape( $xml); if( $xml_escape eq $broken) { warn "your version of expat/XML::Parser has a broken xml_escape method\n"; ok( 1, "xml_escape"); # test# test 232 } else { is( $xml_escape, $expected, "xml_escape"); } # alt test 232 $xml= 'toto'; $expected= '<elt>toto</elt>'; $broken= '<elt>toto
'; $xml_escape= $t->xml_escape( $xml, 'o'); if( $xml_escape eq $expected) { ok( 1, "xml_escape"); }# test 233 elsif( $xml_escape eq $broken) { ok( 1, "xml_escape"); } # alt test 233 else { is( $xml_escape, $expected, "xml_escape"); } # alt test 233 } } { my $t= XML::Twig->new( start_tag_handlers => { i => sub { $_[0]->ignore }, }, twig_handlers => { s => sub { $_[0]->finish }, } ) ->parse( 'foototototobar'); is( $t->sprint, "foototo", "ignore + finish");# test 234 } # test xml declaration and entity related methods { my $t= XML::Twig->new->parse( ' tata

"> ]> &ent1;&ent2;'); is( $t->xml_version, "1.0", "xml_version");# test 235 is( $t->encoding, "ISO-8859-1", "encoding");# test 236 nok( $t->standalone, "standalone (no)");# test 237 is( $t->xmldecl, qq{\n}, "xmldecl");# test 238 $t->set_xml_version( "1.1"); is( $t->xml_version, "1.1", "set_xml_version");# test 239 $t->set_encoding( "UTF-8"); is( $t->encoding, "UTF-8", "set_encoding");# test 240 $t->set_standalone( 1); ok( $t->standalone||'', "set_standalone");# test 241 is( $t->xmldecl, qq{\n}, "xmldecl");# test 242 is( join( ':', sort $t->entity_names), "ent1:ent2:ent3", "entity_names");# test 243 my $ent1= $t->entity( 'ent1'); is( $ent1->name, "ent1", "entity name");# test 244 is( $ent1->val, "toto", "entity val");# test 245 nok( $ent1->sysid, "entity sysid (none)");# test 246 nok( $ent1->pubid, "entity pubid (none)");# test 247 nok( $ent1->ndata, "entity ndata (none)");# test 248 my $ent3= $t->entity( 'ent3'); is( $ent3->name, "ent3", "entity name");# test 249 nok( $ent3->val, "entity val (none)");# test 250 is( $ent3->sysid, "ent3.png", "entity sysid");# test 251 nok( $ent3->pubid, "entity pubid (none)");# test 252 is( $ent3->ndata, "PNG", "entity ndata");# test 253 my $doctype= qq{\ntata

">\n\n]>\n}; is( $t->doctype, $doctype, "doctype");# test 254 my $ent4= $t->entity_list->add_new_ent( ent4 => "ent 4")->ent( 'ent4'); is( $ent4->text, qq{}, "add_new_ent");# test 255 my $ent5= $t->entity_list->add_new_ent( ent5 => "", "ent5.png", "", "PNG" )->ent( 'ent5'); is( $ent5->text, qq{}, "add_new_ent (ndata)");# test 256 is( join( ':', sort $t->entity_names), "ent1:ent2:ent3:ent4:ent5", "entity_names");# test 257 is( $t->doctype, $doctype, "doctype");# test 258 my $prolog=qq{ tata

"> ]> }; is( $t->prolog( UpdateDTD => 1), $prolog, "prolog, updated DTD");# test 259 $t->entity_list->delete( 'ent3'); is( join( ':', sort $t->entity_names), "ent1:ent2:ent4:ent5", "entity_names");# test 260 $t->entity_list->delete( ($t->entity_list->list)[0]); is( join( ':', sort $t->entity_names), "ent2:ent4:ent5", "entity_names");# test 261 } { my $t= XML::Twig->new( comments => 'process', pi =>'process') ->parse( 'text'); is( $t->root->first_child( '#COMMENT')->get_type, "#COMMENT", "get_type #COMMENT");# test 262 is( $t->root->first_child( '#PI')->get_type, "#PI", "get_type #PI");# test 263 is( $t->root->first_child( '#CDATA')->get_type, "#CDATA", "get_type #CDATA");# test 264 is( $t->root->first_child( '#PCDATA')->get_type, "#PCDATA", "get_type #PCDATA");# test 265 is( $t->root->get_type, "#ELT", "get_type #ELT");# test 266 my $cdata= $t->root->first_child( '#CDATA'); $cdata->set_cdata( "new cdata"); is( $cdata->sprint, "", "set_cdata");# test 267 my $copy= $t->root->copy; is( $copy->sprint, $t->root->sprint, 'copy of an element with extra data');# test 268 is( $t->sprint( pretty_print => 'indented'),# test 269 qq{text\n}, 'indented elt'); } { my $t= XML::Twig->new->parse( ' text &ent; more'); my $ent= $t->first_elt( '#ENT'); is( $ent->get_type, "#ENT", "get_type");# test 270 is( $ent->ent, '&ent;', "ent");# test 271 is( $ent->ent_name, 'ent', "ent_name");# test 272 $ent->set_ent( '&new_ent;'); is( $ent->ent, '&new_ent;', "new_ent ent");# test 273 is( $ent->ent_name, 'new_ent', "new_ent ent_name");# test 274 } { my $t= XML::Twig->new->parse( 'text xx more text xx end'); my $alt_root= $t->root->copy; $t->root->mark( ' (xx) ', b => { att => "y" }); is( $t->sprint, 'textxxmore textxxend', 'mark');# test 275 $alt_root->first_child->mark( ' (xx) ', b => { att => "y" }); is( $alt_root->sprint, 'textxxmore textxxend', 'mark text');# test 276 } { my $t= XML::Twig->new->parse( ''); is( $t->sprint, '', "before save_global_state");# test 277 $t->save_global_state; $t->set_quote( 'single'); is( $t->sprint, "", "after set_global_state");# test 278 $t->restore_global_state; is( $t->sprint, '', "after restore_global_state");# test 279 } { my $t= XML::Twig->new->parse( 'text bold text more text and text even more text'); $t->subs_text( 'text', 'stuff'); is( $t->sprint, "stuff bold stuff more stuff and stuff even more stuff", "subs_text");# test 280 $t->subs_text( qr{stuf+}, 'text'); is( $t->sprint, "text bold text more text and text even more text", "subs_text");# test 281 my $elt= $t->root->first_child; my $bold= $elt->first_child( 'b'); $bold->erase; is( $t->sprint, "text bold text more text and text even more text", "erase");# test 282 $elt->merge( $elt->next_sibling); is( $elt->first_child_text, "text bold text more text and text even more text", "merge_text");# test 283 } # more tests on subs_text { my $doc='

link to http://www.xmltwig.org but do not link to http://bad.com, though link to toto and link to http://www.xml.com

now http://www.nolink.com and do not link to this and do not link to http://www.bad.com and do not link to http://www.bad2.com and link to http://link.com also

'; my $expected='

see www.xmltwig.org but do not link to http://bad.com, though link to toto and see www.xml.com

now http://www.nolink.com and do not link to this and do not link to http://www.bad.com and do not link to http://www.bad2.com and see link.com also

'; my $t= XML::Twig->new->parse( $doc); my $got= $t->subs_text( qr{(?{ href => $1 }, $2)'); is( $got->sprint, $expected, 'complex substitution with subs_text');# test 284 } { my $doc='text

and more text

'; (my $expected= $doc)=~ s{ }{ }g; my $t= XML::Twig->new->parse( $doc); my $got= $t->subs_text( qr{ }, '&ent( " ")'); is( $got->sprint, $expected, 'creating entities with subs_text');# test 285 $t= XML::Twig->new->parse( $doc); my $ent=" "; $got= $t->subs_text( qr{ }, "&ent( '$ent')"); is( $got->sprint, $expected, 'creating entities from a variable with subs_text');# test 286 } { my $t= XML::Twig->new->parse( ' 03val 1 2val 2 4val 3 01val 4 05 '); $t->root->sort_children_on_field( 'key', type =>'numeric' ); my $expected= ' 01val 4 2val 2 03val 1 4val 3 05 '; $t->set_pretty_print( 'record_c'); $t->set_indent( ' '); is( $t->sprint, $expected, "sort_children_on_field");# test 287 XML::Twig::Elt::set_indent( ' '); } { my $t= XML::Twig->new( empty_tags => 'expand', pretty_print => 'none')->parse( ''); is( $t->sprint, "", "empty_tags expand");# test 288 is( $t->sprint( empty_tags => 'normal'), "", "empty_tags normal");# test 289 is( $t->sprint( pretty_print => 'indented', empty_tags => 'normal'), "\n \n\n", "empty_tags expand");# test 290 $t->set_pretty_print( 'none'); $t->set_empty_tag_style( 'normal'); } { if( $perl < 5.008) { skip( 3, "need perl 5.8 or above to perform these tests (you have $perl)"); } else { my $out=''; my $out2=''; $open->( my $fh, ">", \$out); $open->( my $fh2, ">", \$out2); my $t= XML::Twig->new( empty_tags => 'expand', pretty_print => 'none')->parse( ''); $t->print( $fh); is( $out, "", "empty_tags expand");# test 291 $t->print( $fh2); is( $t->sprint( empty_tags => 'normal'), "", "empty_tags normal");# test 292 $out=''; $t->print( $fh); is( $t->sprint( pretty_print => 'indented', empty_tags => 'normal'), "\n \n\n", "empty_tags expand");# test 293 $t->set_pretty_print( 'none'); $t->set_empty_tag_style( 'normal'); } } { if( $perl < 5.008) { skip( 3, "need perl 5.8 or above to perform these tests (you have $perl)"); } else { my $out=''; my $out2=''; $open->( my $fh, ">", \$out); $open->( my $fh2, ">", \$out2); my $t= XML::Twig->new( empty_tags => 'expand', pretty_print => 'none'); $t->parse( '')->flush( $fh); is( $out, "", "empty_tags expand");# test 294 $t->parse( '')->flush( $fh2); is( $t->sprint( empty_tags => 'normal'), "", "empty_tags normal");# test 295 $out=''; $t->parse( '')->flush( $fh); is( $t->sprint( pretty_print => 'indented', empty_tags => 'normal'), "\n \n\n", "empty_tags expand");# test 296 $t->set_pretty_print( 'none'); $t->set_empty_tag_style( 'normal'); } } { my $t= XML::Twig->new->parse( ' 03val 1 2val 2 4val 3 01val 4 '); $t->root->sort_children_on_field( 'key', type =>'numeric' ); my $expected= ' 01val 4 2val 2 03val 1 4val 3 '; $t->set_pretty_print( 'record_c'); $t->set_indent( ' '); is( $t->sprint, $expected, "sort_children_on_field");# test 297 $t->set_indent( ' '); } { my $t= XML::Twig->new( empty_tags => 'expand', pretty_print => 'none')->parse( ''); is( $t->sprint, "", "empty_tags expand");# test 298 is( $t->sprint( empty_tags => 'normal'), "", "empty_tags normal");# test 299 is( $t->sprint( pretty_print => 'indented', empty_tags => 'normal'), "\n \n\n", "empty_tags expand");# test 300 $t->set_pretty_print( 'none'); $t->set_empty_tag_style( 'normal'); } { my $t= XML::Twig->new->parse( ''); my $elt1= $t->root->first_child( 'elt'); my $elt2= $t->root->first_child( 'ns1:elt'); is( $elt1->namespace, "uri_def", "default namespace");# test 301 is( $elt2->namespace, "uri1", "namespace");# test 302 is( $elt1->namespace, "uri_def", "namespace default");# test 303 is( $elt1->namespace( 'ns1'), "uri1", "namespace not default");# test 304 is( join( ' - ', $elt1->current_ns_prefixes), '', "current_ns_prefixes");# test 305 is( join( ' - ', $elt2->current_ns_prefixes), ' - ns1', "current_ns_prefixes");# test 306 } { my $t=XML::Twig->new( ignore_elts => { i => 1 }); $t->parse( ''); is( $t->sprint, '', "setIgnoreEltsHandler");# test 307 } { my $t=XML::Twig->new; $t->setIgnoreEltsHandler( i => 'discard'); $t->parse( ''); is( $t->sprint, '', "setIgnoreEltsHandler");# test 308 } # test setEndTagHandler { my $called=""; my $t= XML::Twig->new( twig_roots => { title => 1 }); my $doc=q{title 1

title 2

}; $t->parse( $doc); is( $called, "", "no end_tag_handler");# test 309 $called= ''; $t->setEndTagHandler( sect => sub { $called.= ":" if( $called); $called .= $_[1]}); $t->parse( $doc); is( $called, "sect:sect", "end_tag_handler");# test 310 $called= ''; $t->setEndTagHandler( sect => sub { return }); $t->parse( $doc); is( $called, "", "empty end_tag_handler");# test 311 } # test replace_prefix { my $called=''; my $not_called=''; my $t= XML::Twig->new( namespaces => 1, map_xmlns => { "uri1" => "foo", "uri2" => "whatever" }, twig_handlers => { "foo:bar" => sub { $called.= ":" if( $called); $called .= $_->id }, "toto:bar" => sub { $not_called.= ":" if( $not_called); $not_called .= $_->id }, }, ); $t->parse( q{ }); is( $called, "ok1:ok2", "map_xmlns");# test 312 is( $not_called, "", "map_xmlns (no hit)");# test 313 } # test parser { my $t= XML::Twig->new( twig_handlers => { doc => sub { is( ref( $_[0]->parser), 'XML::Parser::Expat', "parser"); } },# test 314 ) ->parse( ""); is( ref( $t->parser), '', "parser (empty, after the parse)");# test 315 $t->set_doctype( doc => "doc.dtd"); is( $t->sprint, qq{\n}, "set_doctype");# test 316 $t->set_doctype( doc => "doc.dtd", "-//public id/"); is( $t->sprint, qq{\n}, "set_doctype");# test 317 $t->set_doctype( doc => "doc.dtd", '', qq{[]}); is( $t->sprint, qq{\n]>\n}, "set_doctype");# test 318 #set_doctype ($name, $system, $public, $internal) } { if( $perl < 5.008) { skip( 3, "need perl 5.8 or above to perform these tests (you have $perl)"); } else { my $out=''; $open->( my $fh, ">", \$out); my $doc= q{

p1

p2

}; my $t= XML::Twig->new( twig_handlers => { flush => sub { $_->flush( $fh) } } ); $t->{twig_autoflush}=0; $t->parse( $doc); is( $out, q{

p1

p2

}, "flush");# test 319 close $fh; $out=""; $open->( $fh, ">", \$out); $t= XML::Twig->new( twig_handlers => { flush => sub { $_[0]->flush_up_to( $_->prev_sibling, $fh) } } ); $t->{twig_autoflush}=0; $t->parse( $doc); is( $out, q{

p1

p2

}, "flush_up_to");# test 320 $t= XML::Twig->new( twig_handlers => { purge => sub { $_[0]->purge_up_to( $_->prev_sibling->prev_sibling, $fh) } } ) ->parse( q{

p1

sp 1

}); is( $t->sprint, q{

sp 1

}, "purge_up_to");# test 321 } } # test next_n_elt for a twig { my $t= XML::Twig->new->parse( q{e 2e 3}); is_undef( $t->next_n_elt( 1), "next_n_elt(1)");# test 322 is( $t->next_n_elt( 3)->gi, "e2", "next_n_elt(3)");# test 323 is( $t->next_n_elt( 1, "e3")->gi, "e3", "next_n_elt(1, e3)");# test 324 nok( $t->next_n_elt( 2, "e3"), "next_n_elt(2, e3)");# test 325 is( join(':', map { $_->gi } $t->_children), 'doc', "\$t->_children");# test 326 } # test dtd_print { if( $perl < 5.008) { skip( 2, "need perl 5.8 or above to perform these tests (you have $perl)"); } else { { my $out=''; $open->( my $fh, ">", \$out); my $t= XML::Twig->new()->parse( q{]>toto}); $t->dtd_print( $fh); is( $out, "\n\n]>\n", "dtd_print");# test 327 close $fh; } { my $out=""; $open->( my $fh, ">", \$out); my $t= XML::Twig->new( twig_handlers => { stop => sub { print $fh "[X]"; $_->set_text( '[Y]'); $_[0]->flush( $fh); $_[0]->finish_print( $fh); } }); $t->{twig_autoflush}=0; $t->parse( q{beforefinish}); select STDOUT; is( $out, q{[X]before[Y]finish}, "finish_print");# test 328 } } } # test set_input_filter { my $t=XML::Twig->new( input_filter => \&rot13) ->parse( q{text}); is( $t->sprint, q{grkg}, "input filter");# test 329 $t=XML::Twig->new; $t->parse( q{text}); is( $t->sprint, q{text}, "input filter (none)");# test 330 $t->set_input_filter( \&rot13); $t->parse( q{grkg}); is( $t->sprint, q{text}, "set_input_filter");# test 331 $t->parse( ''); is( $t->sprint, '',# test 332 "set_input_filter on comments and cdata"); } sub rot13 { $_[0]=~ tr/a-z/n-za-m/; $_[0]; } # test global_state methods { my $doc= q{

p 1

p 2

}; my $t=XML::Twig->new->parse( $doc); is( $t->sprint, $doc, "initial state");# test 333 my $state= $t->global_state; $t->set_pretty_print( 'indented'); $t->set_indent( 8); nok( $t->sprint eq $doc, "changed state");# test 334 $t->set_global_state( $state); is( $t->sprint, $doc, "re-set initial state");# test 335 $t->save_global_state; $t->set_pretty_print( 'nice'); $t->set_quote( 'single'); nok( $t->sprint eq $doc, "changed state");# test 336 $t->restore_global_state( $state); is( $t->sprint, $doc, "restored initial state");# test 337 } # test encoding functions { if( $perl < 5.008) { skip( 21, "need perl 5.8 or above to perform these tests (you have $perl)"); } else { require Encode; import Encode; my $text= "\x{E9}t\x{E9}"; my $text_latin1 = encode( latin1 => $text); my $text_utf8 = encode( utf8 => $text); my $text_html="été"; my $text_safe= "été"; my $text_safe_hex= "été"; my $doc_latin1=qq{\n$text_latin1}; my $doc_utf8=qq{\n$text_utf8}; my $doc_html=qq{\n$text_html}; my $doc_safe=qq{\n$text_safe}; my $doc_safe_hex=qq{\n$text_safe_hex}; my $doc_escaped= xml_escape( $doc_html); my $t= XML::Twig->new( output_encoding => "ISO-8859-1")->parse( $doc_utf8); $t->save_global_state; is( $t->output_encoding, 'ISO-8859-1', "output_encoding (ISO-8859-1)");# test 338 is( $t->sprint, $doc_latin1, "output_encoding ISO-8859-1");# test 339 $t->set_output_encoding( "UTF-8"); is( $t->output_encoding, 'UTF-8', "output_encoding (UTF-8)");# test 340 is( $t->sprint, $doc_utf8, "output_encoding UTF-8");# test 341 $t->set_output_text_filter( 'safe'); is( $t->sprint, $doc_safe, 'safe');# test 342 $t->set_output_text_filter( 'safe_hex'); is( $t->sprint, $doc_safe_hex, 'safe_hex');# test 343 if( $perl == 5.008) { skip( 2 => "cannot use latin1_output_text_filter with perl $perl"); } else { $t->set_output_text_filter( $t->latin1 ); $t->set_output_encoding( "ISO-8859-1"); is( normalize_xml( $t->sprint( pretty_print => 'indented')), normalize_xml( $doc_latin1), 'latin1');# test 344 $t->set_output_filter( 'latin1' ); $t->set_output_encoding( "ISO-8859-1"); is( $t->sprint, $doc_latin1, 'latin1 (just the string)');# test 345 } $t->set_output_text_filter( ); $t->set_output_encoding( "UTF-8"); $t->restore_global_state; eval "require HTML::Entities"; if( $@) { skip( 4, "need HTML::Entities for those tests"); } elsif( $perl == 5.008) { skip( 4, "HTML::Entities don't seem to work well with perl 5.8.0 (the e acute becomes é instead of é)"); } else { import HTML::Entities; $t->save_global_state; $t->set_output_encoding( "UTF-8"); my $original_output_text_filter= $t->output_text_filter; $t->set_output_text_filter( "html"); my $html_output_text_filter= $t->output_text_filter; is( $t->sprint, $doc_html, "output_text_filter html");# test 346 $t->set_output_text_filter( $original_output_text_filter); is( $t->sprint, $doc_utf8, "no output_text_filter ");# test 347 my $original_output_filter= $t->output_filter; $t->set_output_filter( "html"); is( $t->sprint, $doc_escaped, "output_filter html");# test 348 $t->restore_global_state; $t->set_output_encoding( "UTF-8"); is( $t->sprint, $doc_utf8, "no output_text_filter ");# test 349 } $t->restore_global_state; $t->set_output_encoding(); eval "require Text::Iconv"; if( $@) { skip( 3, "need Text::Iconv for those tests"); } else { my $encoding= 'ISO-8859-1'; if( eval( '$t->iconv_convert( "$encoding");')) { $t->set_output_filter( $t->iconv_convert( $encoding) ); $t->set_encoding( $encoding); is( $t->encoding, $encoding, "set_encoding");# test 350 is( $t->sprint, $doc_latin1, "output_filter ISO-8859-1 (using Text::Iconv)");# test 351 $t->restore_global_state; $t->set_output_encoding( "UTF-8"); is( $t->sprint, $doc_utf8, "no output_filter ");# test 352 } else { if( $@=~ m{^Unsupported encoding: $encoding}) { skip( 3, "your version of iconv does not support $encoding"); } else { skip( 3, "odd error creating filter with iconv: $@"); } } } $t->restore_global_state; $t->set_output_encoding(); eval( 'require Unicode::Map8 && require Unicode::String;'); if( $@) { skip( 3, "need Unicode::Map8 and Unicode::String for those tests" ); } else { $t->set_output_filter( $t->unicode_convert( 'latin1') ); $t->set_encoding( "ISO-8859-1"); is( $t->encoding, "ISO-8859-1", "set_encoding");# test 353 is( $t->sprint, $doc_latin1, "output_filter latin-1 (using Unicode::*)");# test 354 $t->restore_global_state; $t->set_output_encoding( "UTF-8"); is( $t->sprint, $doc_utf8, "no output_filter ");# test 355 $t->restore_global_state; $t->set_output_encoding(); } $t->set_output_filter( $t->regexp2latin1 ); $t->set_encoding( "ISO-8859-1"); is( $t->encoding, "ISO-8859-1", "set_encoding");# test 356 is( $t->sprint, $doc_latin1, "output_filter latin-1 (using regexp2latin1)");# test 357 $t->restore_global_state; $t->set_output_encoding( "UTF-8"); is( $t->sprint, $doc_utf8, "no output_filter ");# test 358 $t->restore_global_state; $t->set_output_encoding(); } } # test SAX1 export { eval "require XML::Handler::YAWriter"; if( $@) { skip(3, "require XML::Handler::YAWriter"); } else { import XML::Handler::YAWriter; my $xmldecl= qq{}; my $body= qq{

text

}; my $doc= $xmldecl.$body; my $t= XML::Twig->new->parse( $doc); $t->root->set_att( '#priv' => 'private'); $t->root->insert_new_elt( last_child => '#private'); my $writer = XML::Handler::YAWriter->new( AsString => 1); is( normalize_xml( $t->toSAX1( $writer)), $doc, 'toSAX1');# test 359 $writer->start_document; $t->root->toSAX1( $writer); is( normalize_xml( $writer->end_document), $doc, 'root toSAX1');# test 360 my $doc_flush=']>

p 1

text more text &foo;

'; my $doc_flushed=qq{

p 1

a

text more text bar

}; $writer = XML::Handler::YAWriter->new( AsString => 1, Pretty => { CatchEmptyElement => 1 }); $writer->start_document; $SIG{__WARN__} = sub { }; $t= XML::Twig->new( twig_handlers => { add => sub { $_[0]->flush_toSAX1( $writer); $_->new( g => "a")->toSAX1( $writer); }, flush => sub { $_[0]->flush_toSAX1( $writer); }, } ) ->parse( $doc_flush); my $output= $t->flush_toSAX1( $writer) || ''; $SIG{__WARN__}= $old_warning_handler; is( normalize_xml( $output), $doc_flushed, 'root toSAX1');# test 361 } } # test SAX2 export { eval "require XML::SAX::Writer;"; if( $@) { skip(5, "XML::SAX::Writer not available"); } elsif( $XML::SAX::Writer::VERSION < 0.39) { skip( 5, "XML::SAX::Writer version 0.39 and above required to use SAX2 export"); } else { eval "require XML::Filter::BufferText;"; if( $@) { skip(5, "XML::Filter::BufferText not available"); } else { import XML::SAX::Writer; import XML::Filter::BufferText; my $output=''; my $writer = XML::SAX::Writer->new( Output => \$output); my $xmldecl= qq{}; my $body= qq{

text

foo:e textt

[
}; my $doc= $xmldecl.$body; my $xfbtv= $XML::Filter::BufferText::VERSION; if( $xfbtv < 1.01) { skip( 2, "XML::Filter::BufferText version $xfbtv has a bug in CDATA processing"); } else { my $t= XML::Twig->new( comments =>'process', pi => 'process')->parse( $doc); # add private data $t->root->set_att( '#priv' => 'private'); $t->root->insert_new_elt( last_child => '#private'); $t->toSAX2( $writer); is( normalize_xml( $output), $doc, 'toSAX2');# test 362 $output=''; $t->root->toSAX2( $writer); is( normalize_xml( $output), $body, 'flush_toSAX2');# test 363 } my $doc_flush="

p 1

text more text

"; my $doc_flushed=qq{

p 1

a

text more text

}; $output=''; my $t= XML::Twig->new( twig_handlers => { add => sub { $_[0]->flush_toSAX2( $writer); $_->new( g => "a")->toSAX2( $writer); }, flush => sub { $_[0]->flush_toSAX2( $writer); }, } ) ->parse( $doc_flush); $t->flush_toSAX2( $writer); is( normalize_xml( $output), $doc_flushed, 'flush_toSAX2');# test 364 $doc= qq{]>toto = &toto;}; $t= XML::Twig->new()->parse( $doc); $output=''; $writer = XML::SAX::Writer->new( Output => \$output); $t->toSAX2( $writer); $output=~ s{}{}s; # shows that in fact we have a problem with outputing the DTD is( normalize_xml( $output), 'toto = foo', 'toSAX2 with an entity');# test 365 $doc= qq{toto = &toto;}; $t= XML::Twig->new()->parse( $doc); $output=''; $writer = XML::SAX::Writer->new( Output => \$output); $t->toSAX2( $writer); is( normalize_xml( $output), normalize_xml( $doc), 'toSAX2 with a non expanded entity');# test 366 } } } # test flushed an twig_current status (not a very good test, but the methods are not used in practice) { my $t= XML::Twig->new->parse( ''); nok( $t->root->_flushed, "root is not flushed");# test 367 $t->root->_set_flushed; ok( $t->root->_flushed, "root is flushed");# test 368 $t->root->_del_flushed; nok( $t->root->_flushed, "root is not flushed");# test 369 nok( $t->root->{twig_current}, "root is not twig current");# test 370 $t->root->set_twig_current; ok( $t->root->{twig_current}, "root is twig current");# test 371 $t->root->del_twig_current; nok( $t->root->{twig_current}, "root is not twig current");# test 372 ok( $t->root->closed, "root is closed");# test 373 } # test ignore { my $t= XML::Twig->new( start_tag_handlers => { ignore => sub { $_[0]->ignore }, ignore_parent => sub { $_->parent->ignore }, }, ) ->parse( '

yes 1

no 1

yes 2

no 2

'); is( $t->sprint, '

yes 1

yes 2

', "ignore");# test 374 } # test subs_text with replacement { my $doc= 'text rep

text rep

text rep
'; (my $rep1= $doc)=~ s/rep/newr/g; (my $rep2= $doc)=~ s{rep}{new}g; (my $rep3= $rep2)=~ s{ }{ }g; my $t= XML::Twig->new->parse( $doc); $t->root->subs_text( qr/(r)ep/, 'new$1'); is( $t->sprint, $rep1, "subs_text");# test 375 $t->root->subs_text( qr/(new)r/, '&elt( b => $1)'); is( $t->sprint, $rep2, "subs_text (with elt)");# test 376 $t->root->subs_text( qr/ /, '&ent( " ")'); is( $t->sprint, $rep3, "subs_text (with ent)");# test 377 } # test handlers that are not activated in 5.8 package test_handlers; sub new { bless { } } sub recognized_string { return 'recognized_string'; } sub original_string { return 'original_string'; } package main; { if( $perl < 5.008) { skip( 4, "need perl 5.8 or above to perform these tests (you have $perl)"); } else { my $out=''; $open->( my $fh, ">", \$out); my $stdout= select $fh; XML::Twig::_twig_print_original_default( test_handlers->new); select $stdout; close $fh; is( $out, 'original_string', 'twig_print_original_default');# test 378 $out=''; $open->( $fh, ">", \$out); select $fh; XML::Twig::_twig_print( test_handlers->new); select $stdout; close $fh; is( $out, 'recognized_string', 'twig_print');# test 379 $out=''; $open->( $fh, ">", \$out); select $fh; XML::Twig::_twig_print_end_original( test_handlers->new); select $stdout; close $fh; is( $out, 'original_string', 'twig_print_end_original');# test 380 $out=''; $open->( $fh, ">", \$out); select $fh; XML::Twig::_twig_print( test_handlers->new); select $stdout; close $fh; is( $out, 'recognized_string', 'twig_print');# test 381 } XML::Twig::_twig_print_entity; # does nothing! } { my %ents= ( foo => '"toto"', pile => 'SYSTEM "file.bar" NDATA bar'); my %ent_text = hash_ent_text( %ents); my $ent_text = string_ent_text( %ents); my $doc= ""; my $t= XML::Twig->new->parse( $doc); is( normalize_xml( $t->entity_list->text), $ent_text, 'entity_list');# test 382 my @entities= $t->entity_list->list; is( scalar @entities, scalar keys %ents, 'entity_list');# test 383 if( $perl < 5.008) { skip( (scalar( keys %ents) + 1), "need perl 5.8 or above to perform these tests (you have $perl)"); } else { foreach my $ent (@entities) { my $out=''; $open->( my $fh, ">", \$out); my $stdout= select $fh; $ent->print; close $fh; select $stdout; is( normalize_xml( $out), $ent_text{$ent->name}, "print $ent->{name}"); # 2 tests 384 - 385 } my $out=''; $open->( my $fh, ">", \$out); my $stdout= select $fh; $t->entity_list->print; close $fh; select $stdout; is( normalize_xml( $out), $ent_text, 'print entity_list');# test 386 } delete $ents{pile}; %ent_text = hash_ent_text( %ents); $ent_text = string_ent_text( %ents); $t->entity_list->delete( 'pile'); @entities= $t->entity_list->list; is( scalar @entities, scalar keys %ents, '1 entity deleted');# test 387 is( $t->entity_list->text, $ent_text, 'entity_list (one entity deleted)');# test 388 } { if( $perl < 5.008) { skip( 3, "need perl 5.8 or above to perform these tests (you have $perl)"); } else { my $out1=''; $open->( my $fh1, ">", \$out1); my $out2=''; $open->( my $fh2, ">", \$out2); my $out3=''; $open->( my $fh3, ">", \$out3); my $stdout= select $fh3; my $t= XML::Twig->new( twig_handlers => { e => sub { $_->print( $fh2); print $fh1 "X"; $_[0]->finish_print( $fh1); }, }, ) ->parse( 'texte

text

more text

foo

'); print 'should be in $out3'; select $stdout; is( $out1, 'Xmore text

foo

', 'finish_print');# test 389 is( $out2, 'e

text

', 'print to fh');# test 390 is( $out3, 'should be in $out3', 'restoring initial fh');# test 391 } } # test prefix mappings package test_sax_prefix_mapping; sub new { return bless { out => 'start ' }; } sub start_element { my( $h, $element)= @_; $h->{out}.= "- start $element->{Name} "; } sub end_element { my( $h, $element)= @_; $h->{out}.= "- end $element->{Name} "; } sub start_prefix_mapping { my( $h, $map)= @_; $h->{out}.= "- map $map->{Prefix} to $map->{NamespaceURI} "; } sub end_prefix_mapping { my( $h, $map)= @_; $h->{out}.= "- end map $map->{Prefix} "; } sub end_document { my( $h, $document)= @_; return $h->{out}; } package main; { my $h= test_sax_prefix_mapping->new; my $t= XML::Twig->new->parse( q{

text

text

}); my $out= $t->toSAX2( $h); is( $out, 'start - map to uri1 - start doc - map p1 to uri2 - start p - start p1:e - end p1:e - end p - end map p1 - map p1 to uri3 - start p - start p1:e - end p1:e - end p - end map p1 - end doc - end map ', 'prefix mapping');# test 392 } # test parsing with keep_encoding (to check no spurious warnings are produced) { my $warnings=''; $SIG{__WARN__}= sub { $warnings .= shift; }; my $doc= '

text

more text > é

'; my $t= XML::Twig->new( keep_encoding => 1)->parse( $doc); $SIG{__WARN__}= $old_warning_handler; is( $warnings, '', 'keep_encoding with elements with no attributes');# test 393 is( $t->sprint, $doc, 'twig output');# test 394 } # test end_tag_handlers with ignore { my $out=''; my $t= XML::Twig->new( twig_roots => { p => 1 }, start_tag_handlers => { x => sub { $out .= "s" } }, twig_handlers => { n => sub { $out .="n";}, x => sub { $out .="e"; } }, ) ->parse( '

text text more text

'); is( $out, 'snese', 'end_tag_handlers without ignore');# test 395 $out=''; $t= XML::Twig->new( twig_roots => { p => 1 }, start_tag_handlers => { x => sub { $out .= "s"; $_->ignore } }, end_tag_handlers => { x => sub { $out .="e"; }, }, twig_handlers => { n => sub { $out .="n"; }, }, ) ->parse( '

text text more text

'); is( $out, 'sese', 'end_tag_handlers with ignore');# test 396 eval ' XML::Twig->new( start_tag_handlers => { x => sub { $out .= "s"; $_->ignore } }, end_tag_handlers => { x => sub { $out .="e"; }, }, twig_handlers => { n => sub { $out .="n"; }, }, ); '; matches( $@, '^you should not use EndTagHandlers without', "error using end_tag_handlers");# test 397 $out=''; $t= XML::Twig->new( force_end_tag_handlers_usage => 1, start_tag_handlers => { x => sub { $out .= "s"; $_->ignore } }, end_tag_handlers => { x => sub { $out .="e"; }, }, twig_handlers => { n => sub { $out .="n"; }, }, ) ->parse( '

text text more text

'); is( $out, 'sesen', 'end_tag_handlers with ignore and force_end_tag_handlers_usage');# test 398 } { my @doc=( "", "", "", "", ); my( @r1, @r2, @r3, @r4); my $t1= XML::Twig->new( ignore_elts => { lignore => 1 }, twig_handlers => { root => sub { push @r1, $_->tag; }, l1 => sub { push @r1, $_->tag; }, } ); my $t2= XML::Twig->new( twig_handlers => { root => sub { push @r2, $_->tag; }, lignore => sub { $_->parent( 'l1')->ignore; }, }, ); my $t3= XML::Twig->new( twig_handlers => { root => sub { push @r3, $_->tag; }, lignore => sub { $_->parent( 'l1')->ignore; }, }, end_tag_handlers => { l1 => sub { push @r3, $_[1]; }, }, force_end_tag_handlers_usage => 1 ); my $t4= XML::Twig->new( twig_roots => { l1 => sub { push @r4, 'l1 from roots handler'; }, lignore => sub { $_->parent( 'l1')->ignore; }, }, end_tag_handlers => { 'root/l1' => sub { push @r4, 'l1 from end_tag_handler'; }, }, ); my $i=0; foreach my $doc (@doc) { @r1=(); @r2=(); @r3=(); @r4=(); $i++; $t1->parse( $doc); is( join( ':', @r1), "l1:root", "ignore_elt with twig_handlers $i (checking that stack is properly resized)"); $t2->parse( $doc); is( join( ':', @r2), "root", "ignore_elt on ancestor with twig_handlers $i (checking that stack is properly resized)"); $t3->parse( $doc); is( join( ':', @r3), "l1:root", "ignore_elt on ancestor with twig_handlers and end_tag_handlers $i (checking that stack is properly resized)"); $t4->parse( $doc); is( join( ':', @r4), "l1 from end_tag_handler", "ignore_elt on ancestor with twig_roots and end_tag_handlers $i (checking that stack is properly resized)"); } } { my $warning=""; $SIG{__WARN__} = sub { $warning .= join '', @_ }; my $t= XML::Twig->new( dummy_opt2 => 1); $SIG{__WARN__}= $old_warning_handler; matches( $warning, '^invalid option', "warning for extra option");# test 399 } # test various handlers { my %got; my $doc= ' foobarfoono '; my @handler_exp= ( 'e', 'doc/e', '/doc/e', 'a[@a1]', 'a[@a2]', 'a[@a1="v11"]', 'a[@a1="v21"]', '*[@a1]', '*[@a1="v11"]', '*[@a1="v21"]', '*[@a1=~/^v21$/]', 't[string()="foobar"]', 't[string()=~ /^foo/]', 't[string()=~ /bar$/]', '_default_', ); my %handlers; foreach my $exp (@handler_exp) { my $code= "\$got{\$_->id}||=[]; push \@{\$got{\$_->id}}, '$exp'; 1"; my $sub= eval "sub { $code }"; die "oops, error in test, code is '$code'" if( $@); $handlers{$exp}= $sub; } my $t= XML::Twig->new( twig_handlers => \%handlers)->parse( $doc); my %expected= ( e1 => [ '/doc/e', 'doc/e', 'e' ], no1 => [ '_default_'], a1 => [ '*[@a1]', '*[@a1="v11"]', 'a[@a1]', 'a[@a2]', 'a[@a1="v11"]' ], a2 => [ '*[@a1]', '*[@a1="v21"]', 'a[@a1]', 'a[@a2]', 'a[@a1="v21"]', '*[@a1=~/^v21$/]' ], b1 => [ '*[@a1]', '*[@a1="v11"]' ], b2 => [ '*[@a1="v21"]', '*[@a1]', '*[@a1=~/^v21$/]'], t1 => [ 't[string()="foobar"]', 't[string()=~ /^foo/]', 't[string()=~ /bar$/]' ], t2 => [ 't[string()=~ /^foo/]' ], t3 => [ '_default_'], doc1 => [ '_default_'], ); foreach my $elt (sort keys %expected) { my $expected= join( ' - ', sort @{$expected{$elt}}); my $got= $got{$elt} ? join( ' - ', sort @{$got{$elt}}) : ''; is( $got, $expected, "handlers on $elt"); # 10 tests 400 - 409 } my %handlers2; my %got2; foreach my $exp (@handler_exp) { my $code= "\$got2{\$_->id}||=[]; push \@{\$got2{\$_->id}}, '$exp:2'; 1"; my $sub= eval "sub { $code }"; die "oops, error in test, code is '$code'" if( $@); $t->setTwigHandler( $exp, $sub); } $t->parse( $doc); foreach my $elt (sort keys %expected) { my $expected= join( ' - ', map { "$_:2" } sort @{$expected{$elt}}); my $got= $got2{$elt} ? join( ' - ', sort @{$got2{$elt}}) : ''; is( $got, $expected, "handlers on $elt (2)"); # 10 tests 410 - 419 } } { my $t= XML::Twig->new->parse( ''); $t->change_gi( elt1 => 'elt2'); $t->change_gi( elt3 => 'elt4'); is( $t->sprint, '', 'change_gi');# test 420 } # these do not pass (yet?) { my $doc= '

text

<ignored>

more text

'; my $t= XML::Twig->new( start_tag_handlers => { i => sub { $_->ignore( 'string') }}) ->parse( $doc); is( $t->sprint, '

text

more text

', 'ignore');# test 421 $t->set_keep_encoding( 1); $t->parse( $doc); is( $t->sprint, '

text

more text

', 'ignore');# test 422 XML::Twig::Elt::set_keep_encoding( 0); } { my $t= XML::Twig->new->parse( '

'); my $p= $t->first_elt( '*[@ns1:att=~/^f/]'); is( $p->namespace, 'uri', 'namespace on elt');# test 423 is( $p->namespace( 'ns1'), 'uri2', 'namespace with arg');# test 424 is( $p->namespace( 'xmlns'), 'http://www.w3.org/2000/xmlns/', 'namespace for xmlns');# test 425 is( $t->root->namespace(), '', 'default namespace');# test 426 is( $t->root->namespace( ''), '', 'namespace with arg default');# test 427 is( $t->root->namespace( 'xml'), 'http://www.w3.org/XML/1998/namespace', 'namespace for xml');# test 428 } { my $t= XML::Twig->new->parse( 'text'); is( $t->root->first_child( 'e[@att="foo"]')->id, 'e2', 'cond on att value');# test 429 is( $t->root->first_child( '*[@att="foo"]')->id, 'f1', 'cond on att value (with wc)');# test 430 is( $t->root->first_child( '*[@att="foo" and @id="e2"]')->id, 'e2', 'and cond on att value');# test 431 is( $t->root->first_child( '*[@att="foo" or @id="e2"]')->id, 'f1', 'and cond on att value');# test 432 is( $t->root->first_child( 't[string()="text"]')->id, 't1', 'string cond');# test 433 is( $t->root->first_child( '*[string()="text"]')->id, 't1', 'string cond wc');# test 434 is( $t->root->first_child( 't[string()=~/^t/]')->id, 't1', 'regexp cond');# test 435 is( $t->root->first_child( '*[string()=~/^t/]')->id, 't1', 'regexp cond wc');# test 436 is( $t->root->first_child( qr/^t/)->id, 't1', 'regexp cond wc');# test 437 my $sprint= $t->root->first_child( 't')->sprint; $t->root->first_child( 't')->change_att_name( 'foo'); is( $t->root->first_child( 't')->sprint, $sprint, 'change_att_name on non existent att');# test 438 my $ids= join ':', sort keys %{$t->{twig_id_list}}; my $elt= XML::Twig::Elt->new( 'e'); is( $elt->sprint, '', 'new elt');# test 439 $elt->del_id; is( $elt->sprint, '', 'del_id, no id');# test 440 $elt->set_id( 'new_e'); is( $elt->sprint, '', 'set_id');# test 441 my( $new_ids)= join ':', sort keys %{$t->{twig_id_list}}; is( $new_ids, $ids, 'set_id on elt not in the tree');# test 442 $elt->del_id; is( $elt->sprint, '', 'del_id, id removed');# test 443 nok( $t->first_elt( 'e')->next_elt( $t->first_elt( 'e')), 'next_elt on empty subtree');# test 444 nok( $t->first_elt( 'e')->next_elt($t->first_elt( 'e'), 'e'), 'next_elt on empty subtree');# test 445 is( $t->root->get_xpath( './e[1]', 0)->id, 'e1', 'get_xpath with ./');# test 446 is( $t->root->first_child->get_xpath( '/doc/e[1]', 0)->id, 'e1', 'get_xpath with /');# test 447 is( $t->root->first_child->get_xpath( '/doc/e[-1]', 0)->id, 'e3', 'get_xpath with /');# test 448 is( $t->root->first_child->get_xpath( './../e[2]', 0)->id, 'e2', 'get_xpath with ..');# test 449 is( $t->root->first_child->get_xpath( './../*[2]', 0)->id, 'f1', 'get_xpath with ../*[2]');# test 450 is( $t->root->first_child->get_xpath( './../*', 0)->id, 'e1', 'get_xpath with ../*');# test 451 } { my $t= XML::Twig->new->parse( ''); is( $t->root->cmp( $t->root), 0, 'cmp root with itself');# test 452 my $ne= $t->root->new( 'ne'); is( $ne->cmp( $ne), 0, 'cmp with itself');# test 453 is_undef( $t->root->cmp( $ne), 'cmp elt in different trees');# test 454 my $t_sprint= $t->sprint; $t->root->field_to_att( 'foo'); is( $t->sprint, $t_sprint, 'field_to_att on wrong field');# test 455 my $ne_sprint= $ne->sprint; $ne->field_to_att( 'foo'); is( $ne->sprint, $ne->sprint, 'field_to_att on wrong field (no child)');# test 456 $ne->prefix( 'p 1pr', 'asis'); is( $ne->sprint, 'p 1pr', 'prefix asis');# test 457 $ne->prefix( 'p 2', 'asis'); is( $ne->sprint, 'p 2p 1pr', 'prefix asis');# test 458 $ne->suffix( 's 1su', 'asis'); is( $ne->sprint, 'p 2p 1prs 1su', 'prefix asis');# test 459 $ne->suffix( 's 2', 'asis'); is( $ne->sprint, 'p 2p 1prs 1sus 2', 'prefix asis');# test 460 } { my $t= XML::Twig->new( twig_handlers => { w => sub { $_->wrap_in( 'ww'); } }) ->parse( '

text

'); is( $t->sprint, '

text

', 'wrap current elt');# test 461 $t->root->wrap_in( 'd'); is( $t->sprint, '

text

', 'wrap root');# test 462 } { my $t= XML::Twig->new( twig_handlers => { w => sub { $_->parent->wrap_in( 'ww'); } }) ->parse( '

text

'); is( $t->sprint, '

text

', 'wrap real current elt');# test 463 } { my $t= XML::Twig->new( twig_handlers => { w => sub { $_->parent->wrap_in( 'ww'); } }) ->parse( '

text

'); is( $t->sprint, '

text

', 'wrap current elt');# test 464 } my $doc=q{]>}; my $t= XML::Twig->new->parse( $doc); (my $out= $t->sprint)=~ s{\n}{}g; is( $out, $doc, 'doc with entities but no DTD');# test 465 # test is_first(last)_child { my $t= XML::Twig->new->parse( q{}); my $root= $t->root; $root->reset_cond_cache; nok( $root->is_first_child(), 'root as first child');# test 466 nok( $root->is_last_child(), 'root as last child');# test 467 my $elt1= $root->first_child( 'elt1'); ok( $elt1->is_first_child(), 'first_child, no argument');# test 468 ok( $elt1->is_first_child( 'elt1'), 'first_child( elt1)');# test 469 nok( $elt1->is_first_child( 'elt2'), 'first_child( elt2)');# test 470 nok( $elt1->is_first_child( 'dummy'), 'first_child( dummy)');# test 471 nok( $elt1->is_last_child( ), 'last_child');# test 472 ok( $elt1->is_last_child( 'elt1'), 'last_child( elt1)');# test 473 nok( $elt1->is_last_child( 'elt2'), 'last_child( elt2)');# test 474 nok( $elt1->is_last_child( 'dummy'), 'last_child( dummy)');# test 475 } # testing alternate start tag parser { my $t=XML::Twig->new( keep_encoding => 1, parse_start_tag => sub { return ( toto => att => 1)}) ->parse( ''); is( $t->sprint, '', 'parse_start_tag');# test 476 } { my $t=XML::Twig->new( parse_start_tag => sub { return ( toto => att => 1)}) ->parse( ''); is( $t->sprint, '', 'parse_start_tag');# test 477 } # testing output_filter option { my $t= XML::Twig->new( output_filter => sub { return 'a' })->parse( ''); is( $t->sprint, 'a', 'output_filter option');# test 478 } # testing output_text_filter option { my $t= XML::Twig->new( output_text_filter => sub { return 'a' })->parse( ''); is( $t->sprint, '
', 'output_text_filter option');# test 479 } # testing id option { my $t= XML::Twig->new( id => "foo") ->parse( 'bar'); is( $t->elt_id( "f2")->sprint, 'bar', 'id option');# test 480 } # testing no_prolog option { my $t= XML::Twig->new( no_prolog => 1) ->parse( ''); is( $t->sprint, '', 'no_prolog option');# test 481 } # testing no_prolog option { my $t= XML::Twig->new( no_prolog => 1, keep_encoding => 1) ->parse( ''); is( $t->sprint, '', 'no_prolog option');# test 482 } # testing _all_ handler { my $nb_calls= 0; my $t= XML::Twig->new( twig_handlers => { _all_ => sub { $nb_calls++ } }) ->parse( 'text'); is( $nb_calls, 3, '_all_ handler');# test 483 } { my $nb_calls= 0; my $t= XML::Twig->new( start_tag_handlers => { _all_ => sub { $nb_calls++ } }) ->parse( 'text'); is( $nb_calls, 3, '_all_ handler (on starttag)');# test 484 } # test changing handlers # expressions in @exp must match the elements { my @exp= ( 'elt', 'doc/elt', '/doc/elt', 'elt[@att]', 'elt[@att="att1"]', '*[@att]', '*[@att="att1"]', '*[@att=~/att/]', 'elt[@att=~/^att/]', '_default_', 'elt[string()="toto"]', 'elt[string()=~/to/]', 'elt[string(sub)="toto"]', 'elt[string(sub)=~/to/]', ); my $doc= q{totototo totototo }; foreach my $exp (@exp) { my $res=''; my $t= XML::Twig->new( twig_handlers => { $exp => sub { $res .= "O"; }, change => sub { $res .= "C"; $_[0]->setTwigHandler( $exp => sub { $res .= "N"; }); nok( $_->closed, 'closed (on open element)');# 14 tests 485 - 498 }, doc => sub { }, # so _default_ doesn't find it sub => sub { }, }, ) ->parse( $doc); is( $res, 'OOCNN', "changing handlers on $exp");# 14 tests 499 - 512 } } { my $res=''; my $doc= q{totototo totototo totototo }; my %handlers= map { build_handler_on_att( 'O', $_) } (1..3); my $t= XML::Twig->new( twig_handlers => { %handlers, change => sub { foreach( 1..3) { $_[0]->setTwigHandler( build_handler_on_att( 'N', $_)) } }, change2 => sub { $_[0]->setTwigHandler( 'elt[@att="1"]', undef); $_[0]->setTwigHandler( build_handler_on_att( 'D', 2)); } }, ) ->parse( $doc); is( $res, 'O1O2O3N1N1N3D2', "changing handlers on atts");# test 513 sub build_handler_on_att { my( $prefix, $nb)= @_; return( qq{elt[\@att="$nb"]} => sub { $res.= $prefix . $nb }); } } { my $res=''; my $doc= q{totototo totototo totototo }; my %handlers= map { build_att_regexp_handler( 'O', $_) } (1..3); my $t= XML::Twig->new( twig_handlers => { %handlers, change => sub { foreach( 1..3) { $_[0]->setTwigHandler( build_att_regexp_handler( 'N', $_)) } }, change2 => sub { $_[0]->setTwigHandler( 'elt[@att=~ /1/]', undef); $_[0]->setTwigHandler( build_att_regexp_handler( 'D', 2)); } }, ) ->parse( $doc); is( $res, 'O1O2O3N1N1N3D2', "changing handlers on regexps on atts");# test 514 sub build_att_regexp_handler { my( $prefix, $nb)= @_; return( qq{elt[\@att=~ /$nb/]} => sub { $res.= $prefix . $nb }); } } { my $res=''; my $doc= q{totototo totototo totototo }; my %handlers= map { build_handler_on_star_att( 'O', $_) } (1..3); my $t= XML::Twig->new( twig_handlers => { %handlers, change => sub { foreach( 1..3) { $_[0]->setTwigHandler( build_handler_on_star_att( 'N', $_)) } }, change2 => sub { $_[0]->setTwigHandler( '*[@att="1"]', undef); $_[0]->setTwigHandler( build_handler_on_star_att( 'D', 2)); } }, ) ->parse( $doc); is( $res, 'O1O2O3N1N1N3D2', "changing handlers on star atts");# test 515 sub build_handler_on_star_att { my( $prefix, $nb)= @_; return( qq{*[\@att="$nb"]} => sub { $res.= $prefix . $nb }); } } { my $res=''; my $doc= q{totototo totototo totototo }; my %handlers= map { build_star_att_regexp_handler( 'O', $_) } (1..3); my $t= XML::Twig->new( twig_handlers => { %handlers, change => sub { foreach( 1..3) { $_[0]->setTwigHandler( build_star_att_regexp_handler( 'N', $_)) } }, change2 => sub { $_[0]->setTwigHandler( '*[@att=~ /1/]', undef); $_[0]->setTwigHandler( build_star_att_regexp_handler( 'D', 2)); } }, ) ->parse( $doc); is( $res, 'O1O2O3N1N1N3D2', "changing handlers on regexps on star atts");# test 516 sub build_star_att_regexp_handler { my( $prefix, $nb)= @_; return( qq{*[\@att=~ /$nb/]} => sub { $res.= $prefix . $nb }); } } { my $res=''; my $doc= q{123 113 121 }; my %handlers= map { build_string_handler( 'O', $_) } (1..3); my $t= XML::Twig->new( twig_handlers => { %handlers, change => sub { foreach( 1..3) { $_[0]->setTwigHandler( build_string_handler( 'N', $_)) } }, change2 => sub { $_[0]->setTwigHandler( 'elt[string()= "1"]', undef); $_[0]->setTwigHandler( build_string_handler( 'D', 2)); } }, ) ->parse( $doc); is( $res, 'O1O2O3N1N1N3D2', "changing handlers on elt[string()]");# test 517 sub build_string_handler { my( $prefix, $nb)= @_; return( qq{elt[string()= "$nb"]} => sub { $res.= $prefix . $nb }); } } { my $res=''; my $doc= q{123 113 121 }; my %handlers= map { build_regexp_handler( 'O', $_) } (1..3); my $t= XML::Twig->new( twig_handlers => { %handlers, change => sub { foreach( 1..3) { $_[0]->setTwigHandler( build_regexp_handler( 'N', $_)) } }, change2 => sub { $_[0]->setTwigHandler( 'elt[string()=~ /1/]', undef); $_[0]->setTwigHandler( build_regexp_handler( 'D', 2)); } }, ) ->parse( $doc); is( $res, 'O1O2O3N1N1N3D2', "changing handlers on elt[string()]");# test 518 sub build_regexp_handler { my( $prefix, $nb)= @_; return( qq{elt[string()=~ /$nb/]} => sub { $res.= $prefix . $nb }); } } # test PI and comment drops { my $doc= q{text more text}; (my $doc_without_pi = $doc)=~ s{<\?pi.*?\?>}{}g; (my $doc_without_comment = $doc)=~ s{}{}g; (my $doc_without_all = $doc)=~ s{<(\?pi|!--).*?(\?|--)>}{}g; my $t= XML::Twig->new( pi => 'drop', comments => 'process')->parse( $doc); is( normalize_xml( $t->sprint), $doc_without_pi, 'drop pis');# test 519 $t= XML::Twig->new( pi => 'process', comments => 'drop')->parse( $doc); is( normalize_xml( $t->sprint), $doc_without_comment, 'drop comments');# test 520 $t= XML::Twig->new( pi => 'drop' , comments => 'drop')->parse( $doc); is( normalize_xml( $t->sprint), $doc_without_all, 'drop comments and pis');# test 521 my $doc6=q{text more text}; $t= XML::Twig->new( pi => 'keep')->parse( $doc6); is( _hash( normalize_xml( $t->sprint)), _hash( $doc6), 'keep pi');# test 522 my $doc5=q{text more text}; $t= XML::Twig->new( pi => 'process')->parse( $doc5); is( normalize_xml( $t->sprint), $doc5, 'process pi');# test 523 my $doc4=q{text more text}; $t= XML::Twig->new->parse( $doc4); is( _hash( normalize_xml( $t->sprint)), _hash( $doc4), 'comment before PI (2 PIs, no comments)');# test 524 my $doc3=q{text more text}; $t= XML::Twig->new->parse( $doc3); is( _hash( normalize_xml( $t->sprint)), _hash( $doc3), 'comment before PI (2 PIs, no comments)');# test 525 my $doc1=q{ttext more text}; $t= XML::Twig->new->parse( $doc1); is( _hash( normalize_xml( $t->sprint)), _hash( $doc1), 'comment before PI (2 PIs, pcdata before pi)');# test 526 my $doc2=q{ text more text}; $t= XML::Twig->new->parse( $doc2); is( _hash( normalize_xml( $t->sprint)), _hash( $doc2), 'comment before PI (2 PIs)');# test 527 $t= XML::Twig->new->parse( $doc); is( _hash( normalize_xml( $t->sprint)), _hash( $doc), 'comment before PI (3 PIs)');# test 528 } # returns a string that has all the chars in the input, ordered, to allow # comparison of texts without taking the order into consideration sub _hash { return sort split //, $_[0]; } { my $doc=q{}; my $res=''; my $t= XML::Twig->new( twig_roots => { root => 1 }, start_tag_handlers => { 'elt1' => sub { $res.= 'E1'; }, 'elt2[@att="a"]' => sub { $res .= 'E2'; }, 'elt7[@att=~/b/]' => sub { $res .= 'E3'; }, '/doc/elt3' => sub { $res .= 'E4'; }, 'elt3/elt4' => sub { $res .= 'E5'; }, '*[@att="c"]' => sub { $res .= 'E6'; }, '*[@att=~/d/]' => sub { $res .= 'E7'; }, _default_ => sub { $res .= 'E0'; } }, )->parse( $doc); is( $res => 'E0E1E2E3E4E5E6E7E0', 'all types of handlers on start_tags');# test 529 } { my $doc= q{ }; my $t= XML::Twig->new( keep_spaces => 1)->parse( $doc); is( $t->sprint, $doc, 'spaces before cdata');# test 530 } { my $doc= q{ }; my $t= XML::Twig->new( keep_spaces => 1)->parse( $doc); is( $t->sprint, $doc, '2 cdata sections');# test 531 } { my $doc= q{ }; my $t= XML::Twig->new( keep_spaces => 1, comments => 'process')->parse( $doc); is( $t->sprint, $doc, 'spaces and extra data before cdata');# test 532 } { # fun with suffix and asis my $t=XML::Twig->new->parse( 'to'); $t->root->suffix( 'to'); is( $t->sprint, 'toto', 'regular suffix');# test 533 $t=XML::Twig->new->parse( 'to'); $t->root->suffix( 'to'); is( $t->sprint, 'toto', 'regular suffix needs new elt');# test 534 $t=XML::Twig->new->parse( 'to'); $t->root->suffix( '', 'asis'); is( $t->sprint, 'to', 'asis suffix needs new elt');# test 535 $t=XML::Twig->new->parse( 'to'); $t->root->suffix( '', 'asis'); is( $t->sprint, 'to', 'asis suffix');# test 536 $t=XML::Twig->new->parse( '<to/>'); $t->root->set_asis( 1); $t->root->suffix( '', 'asis'); is( $t->sprint, '', 'asis suffix (on asis elt)');# test 537 $t=XML::Twig->new->parse( '<to/>'); $t->root->set_asis( 1); $t->root->suffix( ''); is( $t->sprint, '<to/>', 'regular suffix (on asis elt)');# test 538 } { # fun with prefix and asis my $t=XML::Twig->new->parse( 'to'); $t->root->prefix( 'to'); is( $t->sprint, 'toto', 'regular prefix');# test 539 $t=XML::Twig->new->parse( 'to'); $t->root->prefix( '', 'asis'); is( $t->sprint, 'to', 'regular prefix needs new elt');# test 540 $t=XML::Twig->new->parse( 'to'); $t->root->prefix( 'to'); is( $t->sprint, 'toto', 'asis prefix needs new elt');# test 541 $t=XML::Twig->new->parse( 'to'); $t->root->prefix( '', 'asis'); is( $t->sprint, 'to', 'asis prefix');# test 542 $t=XML::Twig->new->parse( '<to/>'); $t->root->set_asis( 1); $t->root->prefix( '', 'asis'); is( $t->sprint, '', 'asis prefix (on asis elt)');# test 543 $t=XML::Twig->new->parse( '<to/>'); $t->root->set_asis( 1); $t->root->prefix( ''); is( $t->sprint, '<to/>', 'regular suffix (on asis elt)');# test 544 } { # wrap_in on the current my $t= XML::Twig->new( twig_handlers => { wrapped => sub { $_->wrap_in( wrapper => { foo => 'bar'} )} }) ->parse( 'tototata'); is( $t->sprint, 'tototata', 'wrap_in');# test 545 } { my $t= XML::Twig->new->parse( q{}); ok ( $t->first_elt( 'elt1')->has_no_atts, 'has_no_atts true');# test 546 nok( $t->first_elt( 'elt2')->has_no_atts, 'has_no_atts false');# test 547 nok( $t->first_elt( 'elt3')->has_no_atts, 'has_no_atts false');# test 548 nok ( $t->first_elt( 'elt1')->has_atts, 'has_atts false');# test 549 ok( $t->first_elt( 'elt2')->has_atts, 'has_atts true');# test 550 ok( $t->first_elt( 'elt3')->has_atts, 'has_atts true');# test 551 is( $t->first_elt( 'elt1')->att_nb, 0, 'att_nb, 0');# test 552 is( $t->first_elt( 'elt2')->att_nb, 1, 'att_nb, 1');# test 553 is( $t->first_elt( 'elt3')->att_nb, 2, 'att_nb, 2');# test 554 } { my $t= XML::Twig->new->parse( '

titi

'); $t->root->split( qr/(i)/); is( $t->sprint, '

t

i

t

i

', "split with no tag");# test 555 } { my $t= XML::Twig->new->parse( '

titi toto

'); $t->root->split( 'b'); is( $t->sprint, '

titi toto

', "split with no regexp");# test 556 } { my $t= XML::Twig->new->parse( '

titi toto

'); $t->root->split( qr/foo/, 'ta'); is( $t->sprint, '

titi toto

', 'split, no match');# test 557 } { my $doc= '

toto &ent;

'; my $t= XML::Twig->new->parse( $doc); my $alt_root= $t->root->copy; is( $alt_root->sprint, $t->root->sprint, 'copy with entity');# test 558 } { my $doc= 'toto'; my $t= XML::Twig->new->parse( $doc); my $pcdata= $t->first_elt( '#TEXT'); my $start_tag= $pcdata->start_tag; nok( $start_tag, 'start_tag for a text element');# test 559 $t->root->set_att( '#priv_att' => 1); is( $t->sprint, $doc, 'private attributes');# test 560 my $priv_elt= $t->root->insert( '#priv_elt'); is( $t->sprint, $doc, 'private element');# test 561 $priv_elt->set_gi( 'foo'); is( $t->sprint, 'toto', 'private element');# test 562 $priv_elt->set_gi( '#priv'); is( $t->sprint, $doc, 'private element');# test 563 $priv_elt->set_att( att => "val"); is( $t->sprint, $doc, 'private element');# test 564 $priv_elt->set_gi( 'foo'); is( $t->sprint, 'toto', 'private element');# test 565 } { my $doc= qq{val1val2}; my $out= qq{\n\n \n \n val1\n val2\n \n\n}; my $t=XML::Twig->new( pretty_print => 'record')->parse( $doc); is( $t->sprint, $out, 'record with empty record');# test 566 $t->set_pretty_print( 'none'); } { my $e= XML::Twig::Elt->new( 'toto'); nok( scalar $e->_is_private, 'private elt (not)');# test 567 $e->set_tag( '#toto'); ok( scalar $e->_is_private, 'private elt (yes)');# test 568 ok( scalar XML::Twig::Elt::_is_private_name( '#toto'), '_is_private_name (yes)');# test 569 nok( scalar XML::Twig::Elt::_is_private_name( 'toto'), '_is_private_name (no)');# test 570 } { my $t= XML::Twig->new->parse( ''); my $text_elt= $t->first_elt( '#TEXT'); is( $text_elt->xml_string, '', 'xml_string for cdata');# test 571 $text_elt->set_text( '<>'); is( normalize_xml( $t->sprint), ']]>', 'set_text on CDATA');# test 572 $text_elt->set_text( '<>', force_pcdata => 1); is( normalize_xml( $t->sprint), '<>', 'set_text on CDATA (with force_pcdata)');# test 573 $t->root->set_content( { att => "val" }, 'toto ', 'tata'); is( $t->root->sprint, 'toto tata', 'set_content with attributes');# test 574 $text_elt= $t->first_elt( '#TEXT'); $text_elt->set_content( 'titi'); is( $t->root->sprint, 'titi', 'set_content on text elt');# test 575 } { my $t=XML::Twig->new->parse( 'text 1text 2text 3'); my $elt1= $t->root->first_child( 'elt[1]'); my $elt2= $t->root->first_child( 'elt[2]'); my $elt3= $t->root->first_child( 'elt[3]'); my $new1= XML::Twig::Elt->new( new => "new 1"); my $new2= XML::Twig::Elt->new( new => "new 2"); my $new3= XML::Twig::Elt->new( new => "new 3"); $new1->replace( $elt1); $new2->replace( $elt2); $new3->replace( $elt3); is( $t->sprint, 'new 1new 2new 3', 'replace');# test 576 $new1->replace_with( $elt2, $elt1, $elt3); is( $t->sprint, 'text 2text 1text 3new 2new 3', 'replace');# test 577 } { if( $perl < 5.008) { skip( 1, "need perl 5.8 or above to perform these tests (you have $perl)"); } else { my $doc= 'tatat more'; my $out=''; $open->( my $fh, ">", \$out); my $t= XML::Twig->new( comments => 'process', pi => 'process')->parse( $doc); $t->flush( $fh); is( $out, $doc, 'flush with cdata');# test 578 } } { my $doc=< textthis
text to 
keep spaces
  in like
    this
END my $t= XML::Twig->new( pretty_print => 'indented', keep_spaces_in => [ qw(pre) ])->parse( $doc); (my $indented= $doc)=~ s{}{\n }; is( $t->sprint, $indented, 'indented with keep_spaces_in');# test 579 $t->set_pretty_print( 'indented'); } { my $doc='text'; my $nsgmls= qq{text
\n}; my $t= XML::Twig->new( pretty_print => 'nsgmls')->parse( $doc); is( $t->sprint, $nsgmls, 'nsgmls style');# test 580 $t->set_pretty_print( 'indented'); } { my $t= XML::Twig->new->parse( 'text'); $t->root->erase; is( $t->root->sprint, "text\n", 'erase root');# test 581 } { my $t= XML::Twig->new->parse( ''); my $elt2= $t->first_elt( 'elt2'); ok( $elt2->sibling( 0, 'elt2'), 'sibling 0 (ok)');# test 582 nok( $elt2->sibling( 0, 'elt1'), 'sibling 0 (nok)');# test 583 nok( $elt2->sibling( 1, 'elt1'), 'sibling 1 (nok)');# test 584 nok( $elt2->sibling( -1, 'elt3'), 'sibling -1 (nok)');# test 585 ok( $elt2->in( 'doc'), 'in with condition');# test 586 ok( $elt2->in( $t->root), 'in with elt');# test 587 nok( $elt2->in( 'elt1'), 'in with condition (false)');# test 588 nok( $elt2->in( $t->root->last_child), 'in with elt (false)');# test 589 is( $elt2->prev_sibling( 'elt1[@att="val"]')->gi, 'elt1', '@att="val" condition');# test 590 nok( $elt2->prev_sibling( 'elt1[@att="val2"]'), '@att="val" condition (not found)');# test 591 is( $elt2->prev_sibling( 'elt1[@att=~ /val/]')->gi, 'elt1', '@att=~ /val/ condition');# test 592 nok( $elt2->prev_sibling( 'elt1[@att=~/val2/]'), '@att=~/val2/ condition (not found)');# test 593 } { if( $perl < 5.008) { skip( 2, "need perl 5.8 or above to perform these tests (you have $perl)"); } else { my $out=''; $open->( my $fh, ">", \$out); my $doc='texttext'; $t= XML::Twig->new( twig_roots=> { elt2 => 1 }, start_tag_handlers => { elt => sub { print $fh ''; } }, end_tag_handlers => { elt3 => sub { print $fh ''; } }, twig_print_outside_roots => $fh, keep_encoding => 1 ) ->parse( $doc); is( $out, 'texttext',# test 594 'twig_print_outside_roots, start/end_tag_handlers, keep_encoding'); close $fh; $out=''; $open->( $fh, ">", \$out); $t= XML::Twig->new( twig_roots=> { elt2 => 1 }, start_tag_handlers => { elt => sub { print $fh ''; } }, end_tag_handlers => { elt3 => sub { print $fh ''; } }, twig_print_outside_roots => $fh, ) ->parse( $doc); is( $out, 'texttext',# test 595 'twig_print_outside_roots and start_tag_handlers'); } } { my $t= XML::Twig->new->parse( 'text 1 text 2 text 3 '); my @a1= $t->get_xpath( '/doc/elt[@att="a1"]'); is( ids( @a1), 'elt-1:elt-2', 'xpath /doc/elt[@att="a1"]');# test 596 @a1= $t->get_xpath( '/doc/*[@att="a1"]'); is( ids( @a1), 'elt-1:elt-2', 'xpath /doc/*[@att="a1"]');# test 597 @a1= $t->get_xpath( '/doc//*[@att="a1"]'); is( ids( @a1), 'elt-1:elt-2', 'xpath /doc//*[@att="a1"]');# test 598 @a1= $t->get_xpath( '//*[@att="a1"]'); is( ids( @a1), 'elt-1:elt-2', 'xpath //*[@att="a1"]');# test 599 @a1= $t->get_xpath( '//elt[@att="a1"]'); is( ids( @a1), 'elt-1:elt-2', 'xpath //elt[@att="a1"]');# test 600 my @a2= $t->get_xpath( '//elt2[@id="elt2-4" and @att2="a2"]'); is( ids( @a2), 'elt2-4', 'xpath //elt2[@id="elt2-4" and @att2="a2"]');# test 601 @a2= $t->get_xpath( '//elt2[@id="toto" or @att2="a2"]'); is( ids( @a2), 'elt2-2:elt2-4:elt2-6', 'xpath //elt2[@id="toto" or @att2="a2"]');# test 602 my $a2= $t->get_xpath( '//elt2[@id="toto" or @att2="a2"]', 1); is( $a2->att( 'id'), 'elt2-4', 'xpath //elt2[@id="toto" or @att2="a2"], offset 1');# test 603 @a2= $t->get_xpath( \@a1, './elt2[@id="toto" or @att2="a2"]'); is( ids( @a2), 'elt2-2:elt2-4', 'xpath //elt2[@id="toto" or @att2="a2"] on @a1');# test 604 $a2= $t->findvalue( \@a1, './elt2[@id="toto" or @att2="a2"]'); is( $a2, 'text 1text 2', 'findvalue //elt2[@id="toto" or @att2="a2"] on @a1');# test 605 } { my $doc= qq{\n]>\ntoto &ent;\n}; my $t= XML::Twig->new( keep_encoding => 1)->parse( $doc); is( $t->sprint, $doc, 'keep_encoding with entity');# test 606 } # testing DTD parsing { my $doc= qq{\n\n]>\n}; my $t= XML::Twig->new->parse( $doc); is( $t->sprint, $doc, 'simple DTD');# test 607 } { my $doc= qq{\n\n\n]>\n}; my $t= XML::Twig->new->parse( $doc); is( $t->sprint, $doc, 'DTD 1 element and simple attlist');# test 608 } { my $doc=< ]> text DTD my $t= XML::Twig->new( ErrorContext => 1)->parse( $doc); is( $t->sprint, $doc, 'complex DTD');# test 609 is( join( ':', $t->model), 'doc:elt:elt2', 'model with no elt (all element in the dtd)');# test 610 } # testing do_not_output_DTD option { my $t= XML::Twig->new( no_prolog => 1) ->parse( ']>'); is( $t->sprint, '', 'no_prolog');# test 611 } # testing do_not_output_DTD option { my $t= XML::Twig->new( do_not_output_DTD => 1) ->parse( ']>'); is( $t->sprint, '', 'do_not_output_DTD option');# test 612 $t->purge; } # handlers on PIs { my $t= XML::Twig->new( pretty_print => 'none', twig_handlers => { '?t1' => sub { return ""; } }) ->parse( 'toto'); is( $t->sprint, 'toto', 'handler on pi t1, with comment');# test 613 } # handlers on PIs { my $t= XML::Twig->new( pretty_print => 'none', twig_handlers => { '?' => sub { return ""; } }) ->parse( 'toto'); is( $t->sprint, 'toto', 'handler on all pi, with comment');# test 614 } # creating an output encoding { if( $perl < 5.008) { skip( 1, "need perl 5.8 or above to perform these tests (you have $perl)"); } else { my $t= XML::Twig->new->parse( ''); $t->set_output_encoding( 'ISO-8859-1'); is( $t->sprint, qq{}, 'creating an output encoding');# test 615 } } # some calls that return false { my $root= XML::Twig->new->parse( '')->root; nok( $root->last_child_matches( 'toto'), 'last_child_matches (not)');# test 616 nok( $root->first_child_matches( 'toto'), 'first_child_matches(not)');# test 617 nok( $root->child_text( 1, 'toto'), 'child_text(not)');# test 618 nok( $root->child_trimmed_text( 1, 'toto'), 'child_trimmed_text(not)');# test 619 nok( $root->child_matches( 1, 'toto'), 'child_matches(not)');# test 620 nok( $root->prev_sibling_matches( 'toto'), 'prev_sibling_matches(not)');# test 621 nok( $root->prev_elt_text( 'toto'), 'prev_elt_text(not)');# test 622 nok( $root->sibling_text( 1, 'toto'), 'prev_elt_text(not)');# test 623 nok( $root->prev_elt_trimmed_text( 'toto'), 'prev_elt_trimmed_text(not)');# test 624 nok( $root->prev_elt_matches( 'toto'), 'prev_elt_matches(not)');# test 625 nok( $root->next_elt_trimmed_text( 'toto'), 'next_elt_trimmed_text(not)');# test 626 nok( $root->next_elt_matches( 'toto'), 'next_elt_matches(not)');# test 627 nok( $root->parent_text( 'toto'), 'parent_text(not)');# test 628 nok( $root->parent_trimmed_text( 'toto'), 'parent_trimmed_text(not)');# test 629 nok( $root->pcdata_xml_string, 'pcdata_xml_string of a non pcdata elt');# test 630 nok( $root->att_xml_string( 'foo'), 'att_xml_string of a non existing att');# test 631 } { my $doc=< text 1 text 2 text 1 text 2 END my $expected_doc=q{ text 1 text 2 text 1text 2}; my $expected_s1= q{ text 1 text 2 }; my $expected_s2= q{text 1text 2}; my $t=XML::Twig->new(pretty_print => 'none')->parse( $doc); is( $t->sprint, $expected_doc, 'doc with xml:space="preserve"');# test 632 is( $t->get_xpath( '//*[@id="s1"]', 0)->sprint, $expected_s1, 'sub element of an xml:space="preserve" element');# test 633 is( $t->get_xpath( '//*[@id="s2"]', 0)->sprint, $expected_s2, 'regular sub element');# test 634 } { my $e= XML::Twig::Elt->parse( ''); is( $e->xml_text, '', 'xml_text of an empty elt');# test 635 $e= XML::Twig::Elt->parse( 'toto')->first_child; is( $e->xml_text, 'toto', 'xml_text of a pcdata');# test 636 $e->set_content(); is( $e->xml_text, 'toto', 'empty set_content');# test 637 $e= XML::Twig::Elt->parse( '')->first_child; is( $e->xml_text, '', 'xml_text of a cdata');# test 638 } { my $doc= q{toto} . q{tata}; my $expected_keep= $doc; $expected_keep=~ s{toto}{foo}; $expected_keep=~ s{tata}{bar}; my $t= XML::Twig->new( map_xmlns => { uri1 => "ns_1", uri2 => "ns_2" }, keep_original_prefix => 1, twig_handlers => { 'ns_1:elt' => sub { $_->set_text( 'foo'); }, 'ns_2:elt' => sub { $_->set_text( 'bar'); }, } ) ->parse( $doc); is( $t->sprint, $expected_keep, "map_xmlns and keep_original_prefix");# test 639 $t= XML::Twig->new( map_xmlns => { uri1 => "ns_1", uri2 => "ns_2" }, twig_handlers => { 'ns_1:elt' => sub { $_->set_text( 'foo'); }, 'ns_2:elt' => sub { $_->set_text( 'bar'); }, } ) ->parse( $doc); (my $expected_remap= $expected_keep)=~ s{ns(?=\d)}{ns_}g; is( $t->sprint, $expected_remap, "map_xmlns");# test 640 } { my $doc= q{toto} . q{tata}; my $expected_keep= $doc; $expected_keep=~ s{toto}{foo}; $expected_keep=~ s{tata}{bar}; my $t= XML::Twig->new( map_xmlns => { uri1 => "ns_1", uri2 => "ns_2" }, keep_original_prefix => 1, twig_handlers => { 'ns_1:elt' => sub { $_->set_text( 'foo'); }, 'ns_2:elt' => sub { $_->set_text( 'bar'); }, } ) ->parse( $doc); is( $t->sprint, $expected_keep, "map_xmlns and keep_original_prefix");# test 641 $t= XML::Twig->new( map_xmlns => { uri1 => "ns_1", uri2 => "ns_2" }, twig_handlers => { 'ns_1:elt' => sub { $_->set_text( 'foo'); }, 'ns_2:elt' => sub { $_->set_text( 'bar'); }, } ) ->parse( $doc); (my $expected_remap= $expected_keep)=~ s{ns(?=\d)}{ns_}g; is( $t->sprint, $expected_remap, "map_xmlns");# test 642 } { my $doc= q{toto} . q{tata}; my $expected_keep= $doc; $expected_keep=~ s{toto}{foo}; $expected_keep=~ s{tata}{bar}; my $t= XML::Twig->new( map_xmlns => { uri1 => "ns_1", uri2 => "ns_2" }, keep_original_prefix => 1, twig_handlers => { 'ns_1:elt' => sub { $_->set_text( 'foo'); }, 'ns_2:elt' => sub { $_->set_text( 'bar'); }, } ) ->parse( $doc); is( $t->sprint, $expected_keep, "map_xmlns and keep_original_prefix with default ns");# test 643 $t= XML::Twig->new( map_xmlns => { uri1 => "ns_1", uri2 => "ns_2" }, twig_handlers => { 'ns_1:elt' => sub { $_->set_text( 'foo'); }, 'ns_2:elt' => sub { $_->set_text( 'bar'); }, } ) ->parse( $doc); (my $expected_remap= $expected_keep)=~ s{ns(?=\d)}{ns_}g; $expected_remap=~ s{(?sprint, $expected_remap, "map_xmlns with default ns");# test 644 } { my $doc= q{toto} . q{tatakaboom}; my $expected_keep= $doc; $expected_keep=~ s{toto}{foo}; $expected_keep=~ s{tata}{bar}; my $t= XML::Twig->new( map_xmlns => { uri1 => "ns_1", uri2 => "ns_2" }, keep_original_prefix => 1, twig_handlers => { 'ns_1:elt' => sub { $_->set_text( 'foo'); }, 'ns_2:elt' => sub { $_->set_text( 'bar'); }, } ) ->parse( $doc); is( $t->sprint, $expected_keep, "map_xmlns and keep_original_prefix with default ns");# test 645 $t= XML::Twig->new( map_xmlns => { uri1 => "ns_1", uri2 => "ns_2" }, twig_handlers => { 'ns_1:elt' => sub { $_->set_text( 'foo'); }, 'ns_2:elt' => sub { $_->set_text( 'bar'); }, } ) ->parse( $doc); (my $expected_remap= $expected_keep)=~ s{ns(?=\d)}{ns_}g; $expected_remap=~ s{(?sprint, $expected_remap, "map_xmlns with default ns");# test 646 } { my $t= XML::Twig->new->parse( ''); my $elt= $t->root->insert( elt => { att => undef}); $elt->insert( '#PCDATA'); is( $t->sprint => '', "undef text and att");# test 647 } exit 0; XML-Twig-3.52/t/xmlxpath_02descendant.t0000755000175000017500000000063212732215763020120 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok($t); my @bbb = $t->findnodes('//BBB'); ok(@bbb, 5); my @subbbb = $t->findnodes('//DDD/BBB'); ok(@subbbb, 3); exit 0; __DATA__ XML-Twig-3.52/t/xmlxpath_06attrib_val.t0000755000175000017500000000102112732215763020134 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 5); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '//BBB[@id = "b1"]'); ok(@nodes, 1); @nodes = $t->findnodes( '//BBB[@name = "bbb"]'); ok(@nodes, 1); @nodes = $t->findnodes( '//BBB[normalize-space(@name) = "bbb"]'); ok(@nodes, 2); exit 0; __DATA__ XML-Twig-3.52/t/test2_2.exp0000644000175000017500000000232112732215763015526 0ustar mrodrigumrodrigu ]>
S1 I1S1 I2S1 TitleS1 P1S2 P2Note P1S1 para 3
S2 introS2 TitleS2 P1S2 P2S2 P3
Annex TitleAnnex P1Annex P2
XML-Twig-3.52/t/test_need_io_scalar.t0000755000175000017500000004050112732215763017706 0ustar mrodrigumrodrigu#!/usr/bin/perl -w # tests that require IO::String to run use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; #$|=1; my $DEBUG=0; use XML::Twig; BEGIN { eval "require IO::String"; if( $@) { print "1..1\nok 1\n"; warn "skipping, need IO::String\n"; exit; } else { import IO::String; } } print "1..1778\n"; { my $out1=''; my $fh1= IO::String->new( \$out1); my $out2=''; my $fh2= IO::String->new( \$out2); my $doc='mainignoredcompletelyinignoredcompletely'; my $out= select $fh1; my $t= XML::Twig->new( ignore_elts => { i => 'print' })->parse( $doc); $t->print( $fh2); select $out; is( $out1,'ignoredcompletelyignoredcompletely', 'ignored with print option'); is( $out2,'mainin', 'print after ignored_elts'); } { my $doc='mainignoredcompletelyinignoredcompletely'; my $t= XML::Twig->new( ignore_elts => { i => 'string' })->parse( $doc); is( $t->{twig_buffered_string} || '','ignoredcompletelyignoredcompletely', 'ignored with string option'); is( $t->sprint,'mainin', 'string after ignored_elts (to string)'); } { my $string=''; my $doc='mainignoredcompletelyinignoredcompletely'; my $t= XML::Twig->new( ignore_elts => { i => \$string })->parse( $doc); is( $string,'ignoredcompletelyignoredcompletely', 'ignored with string reference option'); is( $t->sprint,'mainin', 'string after ignored_elts (to string reference)'); } { # test autoflush my $out=''; my $fh= IO::String->new( \$out); my $doc= ""; my $t= XML::Twig->nparse( twig_handlers => { elt => sub { $_->flush( $fh) } }, $doc); is( $out, $doc, "autoflush, no args"); } { my $out=''; my $fh= IO::String->new( \$out); my $doc= ""; my $t= XML::Twig->nparse( twig_handlers => { elt => sub { $_->flush( $fh, empty_tags => "expand") } }, $doc); is( $out, "", "autoflush, no args, expand empty tags"); } { # test bug on comments after the root element RT #17064 my $out=''; my $fh= IO::String->new( \$out); my $doc= q{}; XML::Twig->nparse( $doc)->print( $fh); is( $out, $doc, 'comment after root element'); } { # more tests, with flush this time my $c= ''; my $pi= ''; my @simple_docs= ('', '', 'foo'); my $i=0; my @docs= map { $i++; (my $l= $_)=~ s{#}{$i}g; $i++; (my $t= $_)=~ s{#}{$i}g; map { ("$l$_", "$_$t", "$l$_$t") } @simple_docs; } ( $c, $pi, $c.$pi, $pi.$c, $c.$c, $pi.$pi, $c.$pi.$c, $pi.$c.$pi, $c.$pi.$c.$pi, $pi.$c.$pi.$c, $c.$c.$pi, $c.$pi.$pi) ; foreach my $doc (@docs) { foreach my $options ( { comments => "keep", pi => "keep" }, { comments => "process", pi => "keep" }, { comments => "keep", pi => "process" }, { comments => "process", pi => "process" }, ) { my $options_text= join( ', ', map { "$_ => $options->{$_}" } sort keys %$options); is( XML::Twig->nparse( %$options, $doc)->sprint, $doc, "sprint cpi $options_text $doc"); is( XML::Twig->nparse( %$options, keep_encoding => 1, $doc)->sprint, $doc, "sprint cpi keep_encoding $options_text $doc"); { my $out=''; my $fh= IO::String->new( \$out); XML::Twig->nparse( %$options, $doc)->flush( $fh); is( $out, $doc, "flush cpi $options_text $doc"); } { my $out=''; my $fh= IO::String->new( \$out); XML::Twig->nparse( keep_encoding => 1, %$options, $doc)->flush( $fh); is( $out, $doc, "flush cpi keep_encoding $options_text $doc"); } } } } { my $out=''; my $fh= IO::String->new( \$out); my $doc=q{foo}; my $t= XML::Twig->new( pretty_print => 'indented', empty_tags => 'expand', twig_handlers => { elt => sub { $_[0]->flush( $fh, pretty_print => 'none', empty_tags => 'html' ); }, }, ); $t->{twig_autoflush}=0; $t->parse( $doc); is( $out => q{foo}, 'flush with a pretty_print arg'); is( $t->sprint => qq{\n \n \n \n \n \n\n}, 'flush with a pretty_print arg (checking that option values are properly restored)' ); } { my $out=''; my $fh= IO::String->new( \$out); select $fh; my $doc=q{foo}; my $t= XML::Twig->new( pretty_print => 'indented', empty_tags => 'expand', twig_handlers => { elt => sub { $_[0]->flush( pretty_print => 'none', empty_tags => 'html' ); }, }, ); $t->{twig_autoflush}=0; $t->parse( $doc); select STDOUT; is( $out => q{foo}, 'flush with a pretty_print arg (default fh)'); is( $t->sprint => qq{\n \n \n \n \n \n\n}, 'flush with a pretty_print arg (checking that option values are properly restored) (default fh)' ); } { my $out=''; my $fh= IO::String->new( \$out); select $fh; my $doc=q{foo}; my $t= XML::Twig->new( pretty_print => 'indented', empty_tags => 'expand', twig_handlers => { elt => sub { $_[0]->flush_up_to( $_, pretty_print => 'none', empty_tags => 'html' ); }, }, ); $t->{twig_autoflush}=0; $t->parse( $doc); select STDOUT; is( $out => q{foo}, 'flush with a pretty_print arg (default fh)'); is( $t->sprint => qq{\n \n\n}, 'flush with a pretty_print arg (checking that option values are properly restored)' ); } { my $out=''; my $out2=''; my $out3=''; my $out4=''; my $fh= IO::String->new( \$out); my $fh2= IO::String->new( \$out2); my $fh3= IO::String->new( \$out3); my $fh4= IO::String->new( \$out4); my $t= XML::Twig->new( empty_tags => 'expand', pretty_print => 'none')->parse( ''); $t->print( $fh); is( $out, "", "empty_tags expand"); $t->print( $fh2, empty_tags => 'normal', pretty_print => 'indented' ); is( $out2, "\n \n\n", "print with args"); $t->print( $fh3); is( $out3, "", "print without args"); is( $t->sprint( empty_tags => 'normal'), "", "empty_tags normal"); $t->print( $fh4); is( $t->sprint( pretty_print => 'indented', empty_tags => 'normal'), "\n \n\n", "empty_tags expand"); $t->set_pretty_print( 'none'); $t->set_empty_tag_style( 'normal'); } { my $out=''; my $out2=''; my $fh= IO::String->new( \$out); my $fh2= IO::String->new( \$out2); my $t= XML::Twig->new( empty_tags => 'expand', pretty_print => 'none')->parse( ''); $t->root->print( $fh); is( $out, "", "empty_tags expand"); $t->root->print( $fh2, { pretty_print => 'indented' } ); is( $out2, "\n \n\n", "print elt indented"); $out=''; $fh= IO::String->new( \$out); $t->root->print( $fh); is( $out, "", "back to default"); $t->set_pretty_print( 'none'); $t->set_empty_tag_style( 'normal'); } { my $out=''; my $out2=''; my $fh= IO::String->new( \$out); my $fh2= IO::String->new( \$out2); my $t= XML::Twig->new( empty_tags => 'expand', pretty_print => 'none'); $t->parse( '')->flush( $fh); is( $out, "", "empty_tags expand"); $t->parse( '')->flush( $fh2); is( $t->sprint( empty_tags => 'normal'), "", "empty_tags normal"); $out=''; $t->parse( '')->flush( $fh); is( $t->sprint( pretty_print => 'indented', empty_tags => 'normal'), "\n \n\n", "empty_tags expand"); $t->set_pretty_print( 'none'); $t->set_empty_tag_style( 'normal'); } { my $out=''; my $fh= IO::String->new( \$out); my $doc= q{

p1

p2

}; my $t= XML::Twig->new( twig_handlers => { flush => sub { $_->flush( $fh) } } ); $t->{twig_autoflush}=0; $t->parse( $doc); is( $out, q{

p1

p2

}, "flush"); close $fh; $out=""; $fh= IO::String->new( \$out); $t= XML::Twig->new( twig_handlers => { flush => sub { $_[0]->flush_up_to( $_->prev_sibling, $fh) } } ); $t->{twig_autoflush}=0; $t->parse( $doc); is( $out, q{

p1

p2

}, "flush_up_to"); $t= XML::Twig->new( twig_handlers => { purge => sub { $_[0]->purge_up_to( $_->prev_sibling->prev_sibling, $fh) } } ) ->parse( q{

p1

sp 1

}); is( $t->sprint, q{

sp 1

}, "purge_up_to"); } { my $out=''; my $fh= IO::String->new( \$out); my $t= XML::Twig->new()->parse( q{]>toto}); $t->dtd_print( $fh); is( $out, "\n\n]>\n", "dtd_print"); close $fh; } { my $out=""; my $fh= IO::String->new( \$out); my $t= XML::Twig->new( twig_handlers => { stop => sub { print $fh "[X]"; $_->set_text( '[Y]'); $_[0]->flush( $fh); $_[0]->finish_print( $fh); } }); $t->{twig_autoflush}=0; $t->parse( q{beforefinish}); select STDOUT; is( $out, q{[X]before[Y]finish}, "finish_print"); } package test_handlers; sub new { bless { } } sub recognized_string { return 'recognized_string'; } sub original_string { return 'original_string'; } package main; { my $out=''; my $fh= IO::String->new( \$out); my $stdout= select $fh; XML::Twig::_twig_print_original_default( test_handlers->new); select $stdout; close $fh; is( $out, 'original_string', 'twig_print_original_default'); $out=''; $fh= IO::String->new( \$out); select $fh; XML::Twig::_twig_print( test_handlers->new); select $stdout; close $fh; is( $out, 'recognized_string', 'twig_print_default'); $out=''; $fh= IO::String->new( \$out); select $fh; XML::Twig::_twig_print_end_original( test_handlers->new); select $stdout; close $fh; is( $out, 'original_string', 'twig_print_end_original'); $out=''; $fh= IO::String->new( \$out); select $fh; XML::Twig::_twig_print( test_handlers->new); select $stdout; close $fh; is( $out, 'recognized_string', 'twig_print_end'); } XML::Twig::_twig_print_entity(); # does nothing! { my %ents= ( foo => '"toto"', pile => 'SYSTEM "file.bar" NDATA bar'); my %ent_text = hash_ent_text( %ents); my $ent_text = string_ent_text( %ents); my $doc= ""; my $t= XML::Twig->new->parse( $doc); is( normalize_xml( $t->entity_list->text), $ent_text, 'entity_list'); my @entities= $t->entity_list->list; is( scalar @entities, scalar keys %ents, 'entity_list'); foreach my $ent (@entities) { my $out=''; my $fh= IO::String->new( \$out); my $stdout= select $fh; $ent->print; close $fh; select $stdout; is( normalize_xml( $out), $ent_text{$ent->name}, "print $ent->{name}"); } my $out=''; my $fh= IO::String->new( \$out); my $stdout= select $fh; $t->entity_list->print; close $fh; select $stdout; is( normalize_xml( $out), $ent_text, 'print entity_list'); } { my( $out1, $out2, $out3); my $fh1= IO::String->new( \$out1); my $fh2= IO::String->new( \$out2); my $fh3= IO::String->new( \$out3); my $stdout= select $fh3; my $t= XML::Twig->new( twig_handlers => { e => sub { $_->print( $fh2); print $fh1 "X"; $_[0]->finish_print( $fh1); }, }, ) ->parse( 'texte

text

more text

foo

'); print 'should be in $out3'; select $stdout; is( $out1, 'Xmore text

foo

', 'finish_print'); is( $out2, 'e

text

', 'print to fh'); is( $out3, 'should be in $out3', 'restoring initial fh'); } { my $doc= 'tatat more'; my $out; my $fh= IO::String->new( \$out); my $t= XML::Twig->new( comments => 'process', pi => 'process')->parse( $doc); $t->flush( $fh); is( $out, $doc, 'flush with cdata'); } { my $out=''; my $fh= IO::String->new( \$out); my $doc='texttext'; my $t= XML::Twig->new( twig_roots=> { elt2 => 1 }, start_tag_handlers => { elt => sub { print $fh ''; } }, end_tag_handlers => { elt3 => sub { print $fh ''; } }, twig_print_outside_roots => $fh, keep_encoding => 1 ) ->parse( $doc); is( $out, 'texttext', 'twig_print_outside_roots, start/end_tag_handlers, keep_encoding'); close $fh; $out=''; $fh= IO::String->new( \$out); $t= XML::Twig->new( twig_roots=> { elt2 => 1 }, start_tag_handlers => { elt => sub { print $fh ''; } }, end_tag_handlers => { elt3 => sub { print $fh ''; } }, twig_print_outside_roots => $fh, ) ->parse( $doc); is( $out, 'texttext', 'twig_print_outside_roots and start_tag_handlers'); } { my $t= XML::Twig->new->parse( ''); eval( '$t->set_output_encoding( "ISO-8859-1");'); if( $@) { skip( 1 => "your system does not seem to support conversions to ISO-8859-1: $@\n"); } else { is( $t->sprint, qq{}, 'creating an output encoding' ); } } { my $out=''; my $fh= IO::String->new( \$out); select $fh; my $doc=' ]>'; my( $expected)= $doc=~ m{()}; XML::Twig->new->parse( $doc)->dtd_print; select STDOUT; is_like( $out, $expected, "dtd_print to STDOUT"); } { my $out=''; my $fh= IO::String->new( \$out); select $fh; my $doc=''; XML::Twig->new( twig_handlers => { elt1 => sub { $_[0]->finish_print; } })->parse( $doc); select STDOUT; is( $out, '', "finish_print to STDOUT"); } { my $out=''; my $fh= IO::String->new( \$out); select $fh; my $doc=''; XML::Twig->new( keep_encoding => 1, twig_handlers => { elt1 => sub { $_[0]->finish_print; } })->parse( $doc); select STDOUT; is( $out, '', "finish_print to STDOUT"); } exit 0; XML-Twig-3.52/t/test_keep_atts_order.t0000755000175000017500000000456412732215763020142 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use XML::Twig; { if( eval 'require Tie::IxHash') { import Tie::IxHash; print "1..7\n"; } else { warn( "Tie::IxHash not available, option keep_atts_order not allowed\n"); print "1..1\nok 1\n"; exit 0; } my $nb_elt=10; my $doc= gen_doc( $nb_elt); my $result= XML::Twig->new( pretty_print => 'indented')->parse( $doc)->sprint; isnt( $result, $doc, "keep_atts_order => 0 (first try)"); $result= XML::Twig->new( keep_atts_order => 1, pretty_print => 'indented')->parse( $doc)->sprint; is( $result, $doc, "keep_atts_order => 1 (first try)"); $result= XML::Twig->new( pretty_print => 'indented')->parse( $doc)->sprint; isnt( $result, $doc, "keep_atts_order => 0 (second try)"); $result= XML::Twig->new( keep_atts_order => 1, pretty_print => 'indented')->parse( $doc)->sprint; is( $result, $doc, "keep_atts_order => 1 (second try)"); $result= XML::Twig->new( keep_atts_order => 1, keep_encoding => 1, pretty_print => 'indented') ->parse( $doc)->sprint; is( $result, $doc, "keep_atts_order => 1, keep_encoding => 1 (first time)"); $result= XML::Twig->new( keep_encoding => 1, pretty_print => 'indented'); $result= XML::Twig->new( keep_atts_order => 1, keep_encoding => 1, pretty_print => 'indented') ->parse( $doc)->sprint; is( $result, $doc, "keep_atts_order => 1, keep_encoding => 1 (second time)"); $result= XML::Twig->new( keep_encoding => 1, pretty_print => 'indented') ->parse( $doc)->sprint; isnt( $result, $doc, " keep_encoding => 1 (second time)"); }; exit 0; sub gen_doc { my( $nb_elt)= @_; my $doc= "\n"; foreach (1..$nb_elt) { $doc .= " $_ + 1 } (0..4) ; while( my( $att, $value)= each %atts) { $doc .= qq{ $att="$value"}; } $doc .= "/>\n"; } $doc .= "\n"; return $doc; } sub randomize { my @list= @_; my $n= @list; foreach (1..10) { my $i= int rand( $n); my $j= int rand( $n); ($list[$i], $list[$j])=($list[$j], $list[$i]) } return @list; } XML-Twig-3.52/t/test_3_45.t0000755000175000017500000000702312732215763015433 0ustar mrodrigumrodrigu#!/usr/bin/perl use strict; use warnings; use XML::Twig; use Test::More tests => 16; is( XML::Twig->new( keep_encoding => 1)->parse( q{})->sprint, q{}, "quote in att with keep_encoding"); # test CDATA sections in HTML escaping https://rt.cpan.org/Ticket/Display.html?id=86773 my $html = <<'EOF';
body
EOF # module => XML::Twig->new options my %html_conv= ( 'HTML::TreeBuilder' => {}, 'HTML::Tidy' => { use_tidy => 1 }, ); foreach my $module ( sort keys %html_conv) { SKIP: { eval "use $module"; skip "$module not available", 3 if $@ ; my $parser= XML::Twig->new( %{$html_conv{$module}}); my $xml = $parser->safe_parse_html($html); print $@ if $@; my @cdata = $xml->get_xpath('//#CDATA'); ok(@cdata == 1, "1 CDATA section found (using $module)"); ok(((index $xml->sprint, "//]]>") >= 0), "end of cdata ok in doc (using $module)"); #diag "\n", $xml->sprint, "\n"; my @elts = $xml->get_xpath('//script'); foreach my $el (@elts) { #diag $el->sprint; ok(((index $el->sprint, "//]]>") >= 0), "end of cdata ok in script element (using $module)"); } } } # test & in HTML (RT #86633) my $html_with_amp='

Marco&company

'; my $expected_body= '

Marco&company

'; SKIP: { eval "use HTML::Tidy"; skip "HTML::Tidy not available", 1 if $@ ; my $parsert = XML::Twig->new(); my $html_tidy = $parsert->safe_parse_html( { use_tidy => 1 }, "

Marco&company

"); diag $@ if $@; is( $html_tidy->first_elt( 'body')->sprint, $expected_body, "& in text, converting html with use_tidy"); } SKIP: { eval "use HTML::TreeBuilder"; skip "HTML::TreeBuilder not available", 1 if $@ ; my $parserh= XML::Twig->new(); my $html = $parserh->safe_parse_html("

Marco&company

"); diag $@ if $@; is( $html->first_elt( 'body')->sprint , $expected_body, "& in text, converting html with treebuilder"); } is( XML::Twig::_unescape_cdata( '<tag att="foo&bar&baz">>></tag>'), '>>', '_unescape_cdata'); SKIP: { skip "safe_print_to_file method does not work on Windows", 6 if $^O =~ m{win}i; # testing safe_print_to_file my $tmp= "safe_print_to_file.xml"; my $doc= "foo"; unlink( $tmp); # no check, it could not be there my $t1= XML::Twig->nparse( $doc)->safe_print_to_file( $tmp); ok( -f $tmp, "safe_print_to_file created document"); my $t2= XML::Twig->nparse( $tmp); is( $t2->sprint, $t1->sprint, "generated document identical to original document"); unlink( $tmp); my $e1= XML::Twig->parse( 'foobar')->first_elt( 'b')->safe_print_to_file( $tmp); ok( -f $tmp, "safe_print_to_file on elt created document"); $t2= XML::Twig->nparse( $tmp); is( $t2->sprint, 'bar', "generated sub-document identical to original sub-document"); unlink( $tmp); # failure modes eval { XML::Twig->nparse( $tmp); }; like( $@, qr/Couldn't open $tmp:/, 'parse a non-existent file'); my $non_existent="safe_non_existent_I_hope_01/tmp"; while( -f $non_existent) { $non_existent++; } # most likely unnecessary ;--) eval { $t1->safe_print_to_file( $non_existent); }; like( $@, qr/(does not exist|is not a directory)/, 'safe_print_to_file in non-existent dir'); } exit; XML-Twig-3.52/t/test_error_with_unicode_layer0000755000175000017500000000113012732215763021576 0ustar mrodrigumrodriguuse XML::Twig; use strict; use Config; my( $infile)= @ARGV; my $perl= used_perl(); open( FH, "$perl -p -e1 $infile |") or die $!; XML::Twig->nparse( \*FH); die "OK\n"; sub used_perl { my $perl; if( $^O eq 'VMS') { $perl= $Config{perlpath}; } # apparently $^X does not work on VMS else { $perl= $^X; } # but $Config{perlpath} does not work in 5.005 if ($^O ne 'VMS' && $Config{_exe} && $perl !~ m{$Config{_exe}$}i) { $perl .= $Config{_exe}; } $perl .= " -Iblib/lib"; if( $ENV{TEST_COVER}) { $perl .= " -MDevel::Cover"; } return $perl; } XML-Twig-3.52/t/test_expand_external_entities.xml0000644000175000017500000000015512732215763022377 0ustar mrodrigumrodrigu

&ent1;

&ent2;

more &ent1;

XML-Twig-3.52/t/pod.t0000755000175000017500000000047412732215763014507 0ustar mrodrigumrodrigu#!/usr/bin/perl use strict; use warnings; if( ! $ENV{TEST_AUTHOR} ) { print "1..1\nok 1\n"; warn "Author test. Set \$ENV{TEST_AUTHOR} to a true value to run.\n"; exit; } eval "use Test::Pod 1.00"; if( $@) { print "1..1\nok 1\n"; warn "skipping, Test::Pod required\n"; } else { all_pod_files_ok( ); } exit 0; XML-Twig-3.52/t/test3.t0000755000175000017500000000752512732215763014773 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; # This just tests a complete twig, no callbacks # additional tests for element creation/parse and # space policy # plus test for the is_pcdata method $|=1; use XML::Twig; my $i=0; my $failed=0; my $TMAX=23; # do not forget to update! print "1..$TMAX\n"; my $p1= XML::Twig::Elt->new( 'para', 'p1'); $p1->set_id( 'p1'); etest( $p1, 'para', 'p1', 'Element creation'); my $p2= XML::Twig::Elt->parse( 'para2'); etest( $p2, 'para', 'p2', 'Element parse'); my $s1= parse XML::Twig::Elt( '
title1para 3
'); etest( $s1, 'section', 's1', 'Element parse (complex)'); my $p3= $s1->first_child( 'para'); etest( $p3, 'para', 'p3', 'Element parse (sub-element)'); my $string= "\n

para

\n

\n
"; my $t1= new XML::Twig( DiscardSpacesIn => [ 'doc']); $t1->parse( $string); sttest( $t1->root, "

para

\n

", 'DiscardSpacesIn'); my $t2= new XML::Twig( DiscardSpacesIn => [ 'doc', 'p']); $t2->parse( $string); sttest( $t2->root, "

para

", 'DiscardSpacesIn'); my $t3= new XML::Twig( KeepSpaces =>1); $t3->parse( $string); sttest( $t3->root, $string, 'KeepSpaces'); my $t4= new XML::Twig( KeepSpacesIn =>[ 'p']); $t4->parse( $string); sttest( $t4->root, "

para

\n

", 'KeepSpacesIn'); my $p4= XML::Twig::Elt->parse( $string, KeepSpaces => 1); sttest( $p4, $string, 'KeepSpaces'); my $p5= XML::Twig::Elt->parse( $string, DiscardSpaces => 1); sttest( $p5, '

para

', "DiscardSpaces"); $p5= XML::Twig::Elt->parse( $string); sttest( $p5, '

para

', "DiscardSpaces (def)"); my $p6= XML::Twig::Elt->parse( $string, KeepSpacesIn => ['p']); sttest( $p6, "

para

\n

", "KeepSpacesIn 1"); my $p7= XML::Twig::Elt->parse( $string, KeepSpacesIn => [ 'doc', 'p']); sttest( $p7, "\n

para

\n

\n
", "KeepSpacesIn 2"); my $p8= XML::Twig::Elt->parse( $string, DiscardSpacesIn => ['doc']); sttest( $p8, "

para

\n

", "DiscardSpacesIn 1 "); my $p9= XML::Twig::Elt->parse( $string, DiscardSpacesIn => [ 'doc', 'p']); sttest( $p9, "

para

", "DiscardSpacesIn 2"); my $string2= "

para bold end of para

"; my $p10= XML::Twig::Elt->parse( $string2,); sttest( $p10, '

para bold end of para

', "mixed content"); my $string3= "\n

para

\n

\n

\n
"; my $p11= XML::Twig::Elt->parse( $string3, KeepSpaces => 1); sttest( $p4, $string, 'KeepSpaces'); my $p12= XML::Twig::Elt->parse( $string3, KeepSpacesIn => [ 'doc']); sttest( $p12, "\n

para

\n

\n
", 'KeepSpacesIn'); my $p13= XML::Twig::Elt->parse( $string3, KeepSpaces => 1); sttest( $p13, "\n

para

\n

\n

\n
", 'KeepSpaces'); my $p14= XML::Twig::Elt->parse( $string2); my $is_pcdata= $p14->is_pcdata; ok( $is_pcdata ? 0 : 1, "is_pcdata on a "); my $pcdata= $p14->first_child( PCDATA); $is_pcdata= $pcdata->is_pcdata; ok( $pcdata->is_pcdata, "is_pcdata on PCDATA"); my $erase_string='text 1 text 2 text 3 text 4'; my $er_t= new XML::Twig( TwigHandlers => { selt => sub { $_[1]->erase; } }); $er_t->parse( $erase_string); sttest( $er_t->root, 'text 1 text 2 text 3 text 4', "erase"); # test whether Twig packs strings my $br_pcdata= "line 1\nline 2\nline 3\n"; my $doc_br_pcdata= "$br_pcdata"; my $t_br_pcdata= new XML::Twig(); $t_br_pcdata->parse( $doc_br_pcdata); $pcdata= $t_br_pcdata->root->first_child->pcdata; stest( $pcdata, $br_pcdata, "multi-line pcdata"); exit 0; XML-Twig-3.52/t/test_pi_handler.t0000755000175000017500000000343312732215763017067 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; # test for the various conditions in navigation methods $|=1; use XML::Twig; if( $] < 5.008) { warn "skipped, not tested under perl < 5.8\n"; print "1..1\nok 1\n"; exit 0; } my $nb_tests=4; print "1..$nb_tests\n"; my $result; my $t= XML::Twig->new( pi => 'process', twig_handlers => { '?pi' => sub { $result .=$_->text; } }, ); $t->parse( q{}); my $expected= ''; if( $result eq $expected) { print "ok 1\n"; } else { print "not ok 1\n"; warn "expected: $expected\nfound : $result\n"; } $result=''; $t= XML::Twig->new( pi => 'process', twig_handlers => { '?pi' => sub { $result .=$_->text; } }, ); $t->parse( q{}); $expected= ''; if( $result eq $expected) { print "ok 2\n"; } else { print "not ok 2\n"; warn "expected: $expected\nfound : $result\n"; } $result=''; $t= XML::Twig->new( twig_handlers => { 'doc' => sub { $result= $_->{extra_data}; } },); $t->parse( q{}); $expected= ''; if( $result eq $expected) { print "ok 3\n"; } else { print "not ok 3\n"; warn "expected: $expected\nfound : $result\n"; } $result=''; $t= XML::Twig->new( pi => 'process', twig_roots => { '?pi' => sub { $result= $_->target . "/" . $_->data; }, elt => sub { }, }); $t->parse( q{}); $expected= 'pi/pi in doc '; if( $result eq $expected) { print "ok 4\n"; } else { print "not ok 4\n"; warn "expected: /$expected/\nfound : /$result/\n"; } exit 0; XML-Twig-3.52/t/test_class_selector.t0000755000175000017500000000314512732215763017767 0ustar mrodrigumrodrigu#!/usr/bin/perl use strict; use warnings; use XML::Twig; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,'t'); use tools; my @DATA; while( ) { chomp; my( $cond, $expected)= split /\s*=>\s*/; push @DATA, [$cond, $expected]; } my $TMAX= 20; print "1..$TMAX\n"; my $doc=q{e1e2e3}; my $doc_dot=q{wrong e1wrong e2wrong e3e1e2e3}; my $t= XML::Twig->parse( $doc); foreach my $test (@DATA) { my( $cond, $expected)= @$test; my $got= join '', map { $_->text } $t->root->children( $cond); is( $got, $expected, "navigation: $cond" ); } if( $] > 5.008) { foreach my $test (@DATA) { my( $cond, $expected)= @$test; my $got=''; XML::Twig->new( twig_handlers => { $cond => sub { $got.= $_->text } }, css_sel => 1, ) ->parse( $doc); is( $got, $expected, "handlers (css_sel enabled): $cond" ); } foreach my $test (@DATA) { my( $cond, $expected)= @$test; next if $cond !~ m{^e}; my $got=''; XML::Twig->new( twig_handlers => { $cond => sub { $got.= $_->text } },) ->parse( $doc_dot); is( $got, $expected, "handlers (css_sel NOT enabled): $cond" ); } } else { skip( 12, 'not tested under perl < 5.8'); } __DATA__ e.c1 => e1e2 e.c1[@a="v1"] => e2 e.c1[@a] => e2 e.c1[@a="v2"] => *.c1[@a="v1"] => e2 *.c1[@a="v2" or @a="v1"] => e2 .c1[@a="v1"] => e2 .c1[@a="v2" or @a="v1"] => e2 XML-Twig-3.52/t/test_drop_comments.t0000755000175000017500000000174112732215763017633 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use XML::Twig; print "1..3\n"; my $xml = < value XML_TEST { my $twig1 = XML::Twig->new(comments => 'keep', keep_spaces => 1); $twig1->parse($xml); ok ($twig1->sprint() =~ //s, 'keep comments'); #print $twig1->sprint, "\n", '-'x80, "\n"; # keeps comments ok $twig1->dispose; } { my $twig2 = XML::Twig->new(comments => 'drop', keep_spaces => 1); $twig2->parse($xml); ok ($twig2->sprint() !~ //s, 'drop comments'); #print $twig2->sprint, "\n", '-'x80, "\n"; # drops comments ok $twig2->dispose; } { my $twig3 = XML::Twig->new(comments => 'keep', keep_spaces => 1); $twig3->parse($xml); ok ($twig3->sprint() =~ //s, 'keep comments'); #print $twig3->sprint, "\n", '-'x80, "\n"; # drops comments!! $twig3->dispose; } exit 0; XML-Twig-3.52/t/test_wrapped.t0000755000175000017500000000616612732215763016432 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=13; print "1..$TMAX\n"; unless( XML::Twig::_use( 'Text::Wrap')) { print "1..1\nok 1\n"; warn "skipping: Text::Wrap not available\n"; exit; } while( my $doc= get_doc()) { my $result= XML::Twig->nparse( pretty_print => 'wrapped', $doc)->sprint; my $expected= get_doc(); foreach ($result, $expected) { s{ }{.}g; } is( $result, $expected, ''); } XML::Twig::Elt->set_wrap(0); is( XML::Twig::Elt->set_wrap(1), 0, "set_wrap - 1"); is( XML::Twig::Elt->set_wrap(1), 1, "set_wrap - 2"); is( XML::Twig::Elt->set_wrap(0), 1, "set_wrap - 3"); is( XML::Twig::Elt->set_wrap(0), 0, "set_wrap - 4"); is( XML::Twig::Elt::set_wrap(1), 0, "set_wrap - 5"); is( XML::Twig::Elt::set_wrap(1), 1, "set_wrap - 6"); is( XML::Twig::Elt::set_wrap(0), 1, "set_wrap - 7"); is( XML::Twig::Elt::set_wrap(0), 0, "set_wrap - 8"); sub get_doc { local $/="\n\n"; my $doc= ; if( $doc) { $doc=~ s{\n\n}{\n}; $doc=~ s/\{([^}]*)\}/$1/eeg; } return $doc; } __DATA__ {"foo" x 40} foofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoo foofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoo {"foo" x 80} foofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoo foofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofo ofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoof oofoofoofoofoofoofoofoofoofoofoo
{"foo" x 40}
foofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoof oofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoofoo
{"foo " x 40} {"bar " x 40} foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar {"foo " x 40}{ "aaa" x 60}{ "foo "x20 } {"bar " x 40} foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaafoo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo foo bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar bar XML-Twig-3.52/t/xmlxpath_25scope.t0000755000175000017500000000064312732215763017130 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); use XML::Twig::XPath; ok(1); eval { # Removing the 'my' makes this work?!? my $t= XML::Twig::XPath->new->parse( ''); ok( $t); $t->findnodes( '/test'); ok(1); die "This should be caught\n"; }; if ($@) { ok(1); } else { ok(0); } exit 0; XML-Twig-3.52/t/test2_1.exp0000644000175000017500000000236412732215763015534 0ustar mrodrigumrodrigu ]>
S1 I1S1 I2S1 TitleS1 P1S2 P2Note P1S1 para 3
S2 introS2 TitleS2 P1S2 P2S2 P3
Annex TitleAnnex P1Annex P2
XML-Twig-3.52/t/test2_1.res0000644000175000017500000000232413015347616015523 0ustar mrodrigumrodrigu ]>
S1 I1S1 I2S1 TitleS1 P1S2 P2Note P1S1 para 3
S2 introS2 TitleS2 P1S2 P2S2 P3
Annex TitleAnnex P1Annex P2
XML-Twig-3.52/t/test_new_features_3_15.t0000755000175000017500000000106312732215763020175 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; # test designed to improve coverage of the module use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; #$|=1; my $DEBUG=0; use XML::Twig; my $TMAX=1; print "1..$TMAX\n"; { my $indented="\n \n\n"; (my $straight=$indented)=~ s{\s}{}g; is( XML::Twig->new( pretty_print => 'indented')->parse( $indented)->sprint, $indented, "pretty printed doc"); exit; is( XML::Twig->new()->parse( $indented)->sprint, $straight, "non pretty printed doc"); } XML-Twig-3.52/t/test_new_features_3_18.t0000755000175000017500000001126112732215763020201 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; use XML::Twig; my $DEBUG=0; print "1..44\n"; { # test tag regexp handler my @res; my $doc=q{}; my $handlers= { qr/^foo_/ => sub { push @res, $_->tag; }, foo_f2 => sub { push @res, uc $_->tag; 0 }, }; my $expected= 'foo_f1:FOO_F2:foo_f1:foo_f3'; XML::Twig->new( twig_handlers => $handlers)->parse( $doc); my $res= join( ':', @res); is( $res, $expected, "tag regexp handlers"); } { # test tag regexp handler with i modifier my @res; my $doc=q{}; my $handlers= { qr/^foo_/i => sub { push @res, $_->tag; }, foo_f2 => sub { push @res, uc $_->tag; 0 }, }; my $expected= 'foo_f1:FOO_F2:foo_f1:foo_f3:FOO_f4'; XML::Twig->new( twig_handlers => $handlers)->parse( $doc); my $res= join( ':', @res); is( $res, $expected, "tag regexp handlers"); } { # test tag regexp handler with all modifier my @res; my $doc=q{}; my $handlers= { qr/^foo_/xism => sub { push @res, $_->tag; }, foo_f2 => sub { push @res, uc $_->tag; 0 }, }; my $expected= 'foo_f1:FOO_F2:foo_f1:foo_f3:FOO_f4'; XML::Twig->new( twig_handlers => $handlers)->parse( $doc); my $res= join( ':', @res); is( $res, $expected, "tag regexp handlers"); } { # testing last_descendant my $t= XML::Twig->new->parse( ' t_e_3 t_e_1 t_e_2t_n ' ); my %exp2id= ( '' => 't_n', 'n' => 'n1', '#ELT' => 'n1', 'e' => 'e2', 'e[@id="e1"]' => 'e1', 'e2' => undef, ); foreach my $exp (sort keys %exp2id) { my $expected= $exp2id{$exp}; is( result( $t->last_elt( $exp)), $expected, "last_elt( $exp)"); is( result( $t->root->last_descendant( $exp)), $expected, "last_descendant( $exp)"); } # some more tests to check that we stay in te subtree and that we get the last descendant if it is itself is( result( $t->last_elt( 'e3')), 'e3', 'last_elt( e3)'); is( result( $t->root->last_descendant( 'e3')), 'e3', 'last_descendant( e3)'); is( result( $t->root->first_child( 'e3')->last_descendant( 'e3')), 'e3', 'last_descendant( e3) (on e3)'); is( result( $t->root->first_child( 'e3')->last_descendant()), 't_e_3', 'last_descendant() (on e3)'); is_undef( $t->root->last_child->last_descendant( 'e3'), 'last_descendant (no result)'); is( result( $t->root->first_child( 'e4')->last_descendant( 'e4')), 'e4', 'last_descendant( e4) (on e4)'); is( result( $t->root->first_child( 'e4')->last_descendant( )), 'e4', 'last_descendant( ) (on e4)'); sub result { my( $elt)= @_; return undef unless $elt; return $elt->id || $elt->text; } } {# testing trim my $expected; while( ) { chomp; next unless( m{\S}); if( s{^#}{}) { $expected= $_; } is( XML::Twig->new->parse( $_)->trim->root->sprint, $expected, "trimming '$_'"); } } { # testing children_trimmed_text my $t = XML::Twig->new; $t->parse(" hell foo o, \n world"); is( join( ':', $t->root->children_trimmed_text("e")), "hell:o, world" , "children_trimmed_text (list context)"); my $scalar= $t->root->children_trimmed_text("e"); is( $scalar, "hello, world" , "children_trimmed_text (scalar context)"); is( join( ':', $t->root->children_text("e")), " hell : o, \n world" , "children_text (list context)"); $scalar= $t->root->children_text("e"); is( $scalar, " hell o, \n world" , "children_text (scalar context)"); } __DATA__ #text1 text2 text1 text2 text1 text2 text1 text2 text1 text2 text1 text2 text1 text2 text1 text2 text1 text2 #text1 text2 text3 text1 text2 text3 #text1 text2 text3 text1 text2 text3 # #text hah! yep text hah! yep XML-Twig-3.52/t/tools.pm0000644000175000017500000002655412732215763015242 0ustar mrodrigumrodrigu# $Id: /xmltwig/trunk/t/tools.pm 21 2006-09-12T17:31:19.157455Z mrodrigu $ use strict; use Config; use Carp; use vars qw/$TDEBUG $TFATAL/; BEGIN { if( grep { m{-[f]*[dv][f]*} } @ARGV) { $TDEBUG=1; warn "debug!\n"; } if( grep { m{-[dv]*f[dv]*\b} } @ARGV) { $TFATAL= 1; warn "fatal!\n";} } { my $test_nb; BEGIN { $test_nb=0; } sub is { my( $got, $expected, $message) = @_; $test_nb++; if( ( !defined( $expected) && !defined( $got) ) || ($expected eq $got) ) { print "ok $test_nb"; print " $message" if( $TDEBUG); print "\n"; return 1; } else { print "not ok $test_nb\n"; if( length( $expected) > 20) { warn "$message:\nexpected: '$expected'\ngot : '$got'\n"; } else { warn "$message: expected '$expected', got '$got'\n"; } croak if $TFATAL; return 0; } } sub isnt { my( $got, $expected, $message) = @_; $test_nb++; if( $expected ne $got) { print "ok $test_nb"; print " $message" if( $TDEBUG); print "\n"; return 1; } else { print "not ok $test_nb\n"; if( length( $expected) > 20) { warn "$message:\ngot : '$got'\n"; } else { warn "$message: got '$got'\n"; } croak if $TFATAL; return 0; } } sub matches { my $got = shift; my $expected_regexp= shift; my $message = shift; $test_nb++; if( $got=~ /$expected_regexp/) { print "ok $test_nb"; print " $message" if( $TDEBUG); print "\n"; return 1; } else { print "not ok $test_nb\n"; warn "$message: expected to match /$expected_regexp/, got '$got'\n"; croak if $TFATAL; return 0; } } sub not_matches { my $got = shift; my $expected_regexp= shift; my $message = shift; $test_nb++; if( $got!~ /$expected_regexp/) { print "ok $test_nb"; print " $message" if( $TDEBUG); print "\n"; return 1; } else { print "not ok $test_nb\n"; warn "$message: expected to NOT match /$expected_regexp/, got '$got'\n"; croak if $TFATAL; return 0; } } sub ok { my $cond = shift; my $message=shift; $test_nb++; if( $cond) { print "ok $test_nb"; print " $message" if( $TDEBUG); print "\n"; return 1; } else { print "not ok $test_nb\n"; warn "$message: false\n"; croak if $TFATAL; return 0; } } sub nok { my $cond = shift; my $message=shift; $test_nb++; if( !$cond) { print "ok $test_nb"; print " $message" if( $TDEBUG); print "\n"; return 1; } else { print "not ok $test_nb\n"; warn "$message: true (should be false): '$cond'\n"; croak if $TFATAL; return 0; } } sub is_undef { my $cond = shift; my $message=shift; $test_nb++; if( ! defined( $cond)) { print "ok $test_nb"; print "$message" if( $TDEBUG); print "\n"; return 1; } else { print "not ok $test_nb\n"; warn "$message is defined: '$cond'\n"; croak if $TFATAL; return 0; } } my $devnull = File::Spec->devnull; sub sys_ok { my $message=pop; $test_nb++; my $status= system join " ", @_, "2>$devnull"; if( !$status) { print "ok $test_nb"; print " $message" if( $TDEBUG); print "\n"; } else { print "not ok $test_nb\n"; warn "$message: $!\n"; } } sub sys_nok { my $message=pop; $test_nb++; my $status= system join " ", @_, "2>$devnull"; if( $status) { print "ok $test_nb"; print " $message" if( $TDEBUG); print "\n"; } else { print "not ok $test_nb\n"; warn "$message: $!\n"; } } sub is_like { my( $got, $expected, $message) = @_; $message ||=''; $test_nb++; if( clean_sp( $expected) eq clean_sp( $got)) { print "ok $test_nb"; print " $message" if( $TDEBUG); print "\n"; return 1; } else { print "not ok $test_nb\n"; if( length( $expected) > 20) { warn "$message:\nexpected: '$expected'\ngot : '$got'\n"; } else { warn "$message: expected '$expected', got '$got'\n"; } warn "compact expected: ", clean_sp( $expected), "\n", "compact got: ", clean_sp( $got), "\n"; croak if $TFATAL; return 0; } } sub etest { my ($elt, $gi, $id, $message)= @_; $test_nb++; unless( $elt) { print "not ok $test_nb\n -- $message\n"; warn " -- no element returned"; return; } if( ($elt->tag eq $gi) && ($elt->att( 'id') eq $id)) { print "ok $test_nb\n"; return $elt; } print "not ok $test_nb\n -- $message\n"; warn " -- expecting ", $gi, " ", $id, "\n"; warn " -- found ", $elt->tag, " ", $elt->id, "\n"; return $elt; } # element text test sub ttest { my ($elt, $text, $message)= @_; $test_nb++; unless( $elt) { print "not ok $test_nb\n -- $message\n"; warn " -- no element returned "; return; } if( $elt->text eq $text) { print "ok $test_nb\n"; return $elt; } print "not ok $test_nb\n -- $message\n"; warn " expecting ", $text, "\n"; warn " found ", $elt->text, "\n"; return $elt; } # testing if the result is a strings sub stest { my ($result, $expected, $message)= @_; $result ||=''; $expected ||=''; $test_nb++; if( $result eq $expected) { print "ok $test_nb\n"; } else { print "not ok $test_nb\n -- $message\n"; warn " expecting ", $expected, "\n"; warn" found ", $result, "\n"; } } # element sprint test sub sttest { my ($elt, $text, $message)= @_; $test_nb++; unless( $elt) { print "not ok $test_nb\n -- $message\n"; warn " -- no element returned "; return; } if( $elt->sprint eq $text) { print "ok $test_nb\n"; return $elt; } print "not ok $test_nb\n -- $message\n"; warn " expecting ", $text, "\n"; warn " found ", $elt->sprint, "\n"; return $elt; } # testing if the result matches a pattern sub mtest { my ($result, $expected, $message)= @_; $test_nb++; if( $result=~ /$expected/) { print "ok $test_nb\n"; } else { print "not ok $test_nb\n -- $message\n"; warn " expecting ", $expected, "\n"; warn" found ", $result, "\n"; } } # test 2 files sub ftest { my ($result_file, $expected_file, $message)= @_; my $result_string= clean_sp( slurp( $result_file)); my $expected_string= clean_sp( slurp( $expected_file)); $test_nb++; if( $result_string eq $expected_string) { print "ok $test_nb\n"; } else { print "not ok $test_nb\n -- $message\n"; warn " expecting ", $expected_string, "\n"; warn " found ", $result_string, "\n"; } } sub slurp { my( $file)= @_; local undef $/; open( FH, "<$file") or die "cannot slurp '$file': $!\n"; my $content=; close FH; return $content; } sub spit { my( $file, $content)= @_; open( FH, ">$file") or die "cannot spit '$file': $!\n"; print FH $content; close FH; } sub stringifyh { my %h= @_; return '' unless @_; return join ':', map { "$_:$h{$_}"} sort keys %h; } sub stringify { return '' unless @_; return join ":", @_; } my %seen_message; sub skip { my( $nb_skip, $message)= @_; $message ||=''; unless( $seen_message{$message}) { warn "\n$message: skipping $nb_skip tests\n"; $seen_message{$message}++; } for my $test ( ($test_nb + 1) .. ($test_nb + $nb_skip)) { print "ok $test\n"; warn "skipping $test ($message)\n" if( $TDEBUG); } $test_nb= $test_nb + $nb_skip; return 1; } } sub tags { return join ':', map { $_->gi } @_ } sub ids { return join ':', map { $_->att( 'id') || '<' . $_->gi . ':no_id>' } @_ } sub id_list { my $list= join( "-", sort keys %{$_[0]->{twig_id_list}}); if( !defined $list) { $list= ''; } return $list; } sub id { my $elt= $_[0]->elt_id( $_[1]) or return "unknown"; return $elt->att( $_[0]->{twig_id}); } sub clean_sp { my $str= shift; $str=~ s{\s+}{}g; return $str; } sub normalize_xml { my $xml= shift; $xml=~ s{\n}{}g; $xml=~ s{'}{"}g; #' $xml=~ s{ />}{/>}g; return $xml; } sub xml_escape { my $string= shift; #$string=~ s{&}{&}g; $string=~ s{<}{<}g; $string=~ s{>}{>}g; $string=~ s{"}{"}g; #" $string=~ s{'}{'}g; #' return $string; } sub hash_ent_text { my %ents= @_; return map { $_ => "" } keys %ents; } sub string_ent_text { my %ents= @_; my %hash_ent_text= hash_ent_text( %ents); return join( '', map { $hash_ent_text{$_} } sort keys %hash_ent_text); } 1; sub _use { my( $module, $version)= @_; $version ||= 0; $version=~ s{^\s*(\d+\.\d+).*}{$1}; # trim version numbers like 2.42_01 if( eval "require $module") { import $module; no strict 'refs'; my $mversion= ${"${module}::VERSION"}; $mversion=~ s{^\s*(\d+\.\d+).*}{$1}; # trim version numbers like 2.42_01 if( $mversion >= $version ) { return 1; } else { croak if $TFATAL; return 0; } } } sub test_get_xpath { my( $t, $exp, $expected)= @_; is( ids( $t->get_xpath( $exp)), $expected, "$exp xpath exp"); } sub perl_io_layer_used { if( $] >= 5.008) { return eval '${^UNICODE} & 24'; } # in a eval to pass tests in 5.005 else { croak if $TFATAL; return 0; } } # slurp and discard locale errors sub slurp_error { my( $file)= @_; my $error= eval { slurp( $file); } || ''; $error=~ s{^\s$}{}mg; $error=~ s{^[^:]+: warning:.*$}{}mg; return $error; } sub used_perl { my $perl; if( $^O eq 'VMS') { $perl= $Config{perlpath}; } # apparently $^X does not work on VMS else { $perl= $^X; } # but $Config{perlpath} does not work in 5.005 if ($^O ne 'VMS' && $Config{_exe} && $perl !~ m{$Config{_exe}$}i) { $perl .= $Config{_exe}; } $perl .= " -Iblib/lib"; if( $ENV{TEST_COVER}) { $perl .= " -MDevel::Cover"; } return $perl; } # scrubs xhtml generated by tools from likely to change bits sub scrub_xhtml { my( $html)= @_; $html=~ s{]*>}{}; # scrub doctype $html=~ s{\s*xmlns="[^"]*"}{}; # scrup namespace declaration return $html; } __END__ =head1 SYNOPSYS use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use tools; XML-Twig-3.52/t/test_3_36.t0000755000175000017500000004060212732215763015433 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=67; print "1..$TMAX\n"; { my $doc=q{title 1title 2}; my $ids; XML::Twig->parse( twig_handlers => { 's[t]' => sub { $ids .= $_->id; } }, $doc); is( $ids, 's2s1', 's[t]'); } { my $string = q{power}; my $t=XML::Twig->parse( $string); my $root = $t->root(); my $copy = $root->copy(); is( $copy->sprint, $root->sprint, 'empty elements in a copy') } { my $doc=q{e1e2e3f1}; my $t=XML::Twig->parse( $doc); my $e1= $t->first_elt( 'e'); is( all_text( $e1->siblings), 'e2:e3:f1', 'siblings, all'); is( all_text( $e1->siblings( 'e')), 'e2:e3', 'siblings(e)'); is( all_text( $e1->siblings('f')), 'f1', 'siblings(f)'); my $e2= $e1->next_sibling( 'e'); is( all_text( $e2->siblings), 'e1:e3:f1', 'siblings (2cd elt), all'); is( all_text( $e2->siblings( 'e')), 'e1:e3', 'siblings(e) (2cd elt)'); is( all_text( $e2->siblings('f')), 'f1', 'siblings(f) (2cd elt)'); my $f= $e1->next_sibling( 'f'); is( all_text( $f->siblings), 'e1:e2:e3', 'siblings (f elt), all'); is( all_text( $f->siblings( 'e')), 'e1:e2:e3', 'siblings(e) (f elt)'); is( all_text( $f->siblings('f')), '', 'siblings(f) (f elt)'); } { my $doc= q{barbar2ff1}; my $t= XML::Twig->new( att_accessors => [ 'b', 'a' ], elt_accessors => [ 'x', 'e', 'f' ], field_accessors => [ 'f3', 'f1' ]) ->parse( $doc); my $d= $t->root; is( $d->e->a, 'foo', 'accessors (elt + att)'); is( $d->f->a, 'foo2', 'accessors (elt + att), on f'); is( $d->f1, 'ff1', 'field accessor'); eval { $t->elt_accessors( 'tag'); }; matches( $@, q{^attempt to redefine existing method tag using elt_accessors }, 'duplicate elt accessor'); eval { $t->field_accessors( 'tag'); }; matches( $@, q{^attempt to redefine existing method tag using field_accessors }, 'duplicate elt accessor'); $t->att_accessors( 'a2'); is( $d->f->a2, 'toto', 'accessors created after the parse'); $t->elt_accessors( 'f'); $t->att_accessors( 'a2'); is( $d->f->a2, 'toto', 'accessors created twice after the parse'); $t->field_accessors( 'f1'); is( $d->f1, 'ff1', 'field accessor (created twice)'); } { my $doc=q{foobarvaztoto}; my $t= XML::Twig->parse( $doc); $t->elt_id( 'i1')->set_outer_xml( 'boh'); $t->elt_id( 'i3')->set_outer_xml( 'duh'); is( $t->sprint, 'bohbarduh', 'set_outer_xml'); } { my $doc= q{}; my $t= XML::Twig->parse( $doc); $t->first_elt( 'e')->cut_children( 'g'); is( $t->sprint, q{}, "cut_children leaves some children"); } { if( $] >= 5.006) { my $t= XML::Twig->parse( q{}); $t->first_elt( 'e')->latt( 'a')= 'b'; is( $t->sprint, q{}, 'lvalued attribute (no attributes)'); $t->first_elt( 'e')->latt( 'c')= 'd'; is( $t->sprint, q{}, 'lvalued attribute (attributes)'); $t->first_elt( 'e')->latt( 'c')= ''; is( $t->sprint, q{}, 'lvalued attribute (modifying existing attributes)'); $t->root->lclass= 'foo'; is( $t->sprint, q{}, 'lvalued class (new class)'); $t->root->lclass=~ s{fo}{tot}; is( $t->sprint, q{}, 'lvalued class (modify class)'); $t= XML::Twig->parse( ''); $t->root->latt( 'a')++; is( $t->sprint, '', '++ on attribute'); } else { skip( 6 => "cannot use lvalued attributes with perl $]"); } } # used for all HTML parsing tests with HTML::Tidy my $DECL= qq{\n}; my $NS= 'xmlns="http://www.w3.org/1999/xhtml"'; { # testing set_inner_html if( !XML::Twig::_use( 'HTML::Tidy')) { skip( 4 => "need HTML::Tidy to use the use_tidy method method"); } elsif( !XML::Twig::_use( 'LWP')) { skip( 4 => "need LWP to use set_inner_html method"); } elsif( !XML::Twig::_use( 'HTML::TreeBuilder')) { skip( 4 => "need LWP to use set_inner_html method"); } else { my $doc= 'a titlepar 1

par 2
after the break'; my $t= XML::Twig->new( use_tidy => 1)->parse_html( $doc); my $inner= '

  • foo
  • bar
'; $t->first_elt( 'p')->set_inner_html( $inner); (my $expected= $t->sprint)=~ s{

.*

}{

$inner

}; is( $t->sprint, $expected, "set_inner_html"); $inner= q{2cd title}; $t->first_elt( 'head')->set_inner_html( $inner); $inner=~ s{>$}{/>}; $expected=~ s{.*}{$inner}; $expected=~ s{(]*)(/>)}{$1 $2}g; is( $t->sprint, $expected, "set_inner_html (in head)"); $inner= q{

just a p

}; $t->root->set_inner_html( $inner); $expected= qq{$DECL$inner}; is( $t->sprint, $expected, "set_inner_html (all doc)"); $inner= q{the content of the
body}; $t->first_elt( 'body')->set_inner_html( $inner); $expected= qq{$DECL$inner}; $expected=~ s{
}{
}g; is( $t->sprint, $expected, "set_inner_html (body)"); } } { if( !XML::Twig::_use( "File::Temp")) { skip( 5, "File::Temp not available"); } elsif( !XML::Twig::_use( "HTML::Tidy")) { skip( 5, "HTML::Tidy not available"); } elsif( !XML::Twig::_use( "LWP")) { skip( 5, "LWP not available"); } elsif( !XML::Twig::_use( "LWP::UserAgent")) { skip( 5, "LWP::UserAgent not available"); } else { # parsefile_html_inplace my $file= "test_3_36.html"; spit( $file, q{foo

this is it

>}); XML::Twig->new( use_tidy => 1, twig_handlers => { p => sub { $_->set_tag( 'h1')->flush; }}) ->parsefile_html_inplace( $file); matches( slurp( $file), qr/

/, "parsefile_html_inplace"); XML::Twig->new( use_tidy => 1, twig_handlers => { h1 => sub { $_->set_tag( 'blockquote')->flush; }}, error_context => 6) ->parsefile_html_inplace( $file, '.bak'); matches( slurp( $file), qr/
/, "parsefile_html_inplace (with backup, checking file)"); matches( slurp( "$file.bak"), qr/

/, "parsefile_html_inplace (with backup, checking backup)"); unlink( "$file.bak"); XML::Twig->new( use_tidy => 1, twig_handlers => { blockquote => sub { $_->set_tag( 'div')->flush; }}) ->parsefile_html_inplace( $file, 'bak_*'); matches( slurp( $file), qr/
/, "parsefile_html_inplace (with complex backup, checking file)"); matches( slurp( "bak_$file"), qr/
/, "parsefile_html_inplace (with complex backup, checking backup)"); unlink( "bak_$file"); unlink $file; } } { if( _use( 'HTML::Tidy')) { XML::Twig->set_pretty_print( 'none'); my $html=q{

Title

foo
bar

}; my $expected= qq{$DECL

Title

foo
\nbar

}; is( XML::Twig->new( use_tidy => 1 )->safe_parse_html( $html)->sprint, $expected, 'safe_parse_html'); my $html_file= "t/test_3_30.html"; spit( $html_file, $html); is( XML::Twig->new( use_tidy => 1 )->safe_parsefile_html( $html_file)->sprint, $expected, 'safe_parsefile_html'); if( _use( 'LWP')) { is( XML::Twig->new( use_tidy => 1 )->safe_parseurl_html( "file:$html_file")->sprint, $expected, 'safe_parseurl_html'); } else { skip( 1, "LWP not available, cannot test safe_parseurl_html"); } unlink $html_file; } else { skip( 3, "HTML::Tidy not available, cannot test safe_parse.*_html methods with the use_tidy option"); } } { # testing parse_html with use_tidy if( XML::Twig::_use( 'HTML::Tidy') && XML::Twig::_use( 'LWP::Simple') && XML::Twig::_use( 'LWP::UserAgent')) { my $html= q{Tt
t2

t3}; my $tidy= HTML::Tidy->new( { output_xhtml => 1, # duh! tidy_mark => 0, # do not add the "generated by tidy" comment numeric_entities => 1, char_encoding => 'utf8', bare => 1, clean => 1, doctype => 'transitional', fix_backslash => 1, merge_divs => 0, merge_spans => 0, sort_attributes => 'alpha', indent => 0, wrap => 0, break_before_br => 0 } ); $tidy->ignore( type =>1, type => 2); my $expected= $tidy->clean( $html); $expected=~ s{>new( use_tidy => 1)->parse_html( $html)->sprint, $expected, 'parse_html string using HTML::Tidy'); my $html_file= File::Spec->catfile( "t", "test_new_features_3_22.html"); spit( $html_file => $html); if( -f $html_file) { is_like( XML::Twig->new( use_tidy => 1)->parsefile_html( $html_file)->sprint, $expected, 'parsefile_html using HTML::Tidy'); open( HTML, "<$html_file") or die "cannot open HTML file '$html_file': $!"; is_like( XML::Twig->new( use_tidy => 1)->parse_html( \*HTML)->sprint, $expected, 'parse_html fh using HTML::Tidy'); } else { skip( 2, "could not write HTML file in t directory, check permissions"); } } else { skip( 3 => 'need HTML::Tidy and LWP to test parse_html with the use_tidy option'); } } { if( XML::Twig::_use( 'HTML::TreeBuilder')) { my $html_with_Amp= XML::Twig->new->parse_html( '&Amp;')->sprint; if( $HTML::TreeBuilder::VERSION <= 3.23) { is( $html_with_Amp, '&', '&Amp; used in html (fixed HTB < 4.00)'); } else { is( $html_with_Amp, '&Amp;', '&Amp; used in html (NOT fixed HTB > r.00)'); } is( XML::Twig->new->parse_html( '')->sprint, '', 'extra XML declaration in html' ); my $doc=q{

fooah

}; (my $expected= $doc)=~s{

}{

}g; is_like( XML::Twig->parse($doc)->sprint, $expected, 'CDATA and comments in html'); } else { skip( 3, 'need HTML::TreeBuilder for additional HTML tests'); } } { my $t= XML::Twig->parse( ''); $t->{twig_root}= undef; is( $t->first_elt, undef, 'first_elt on empty tree'); is( $t->last_elt, undef, 'last_elt on empty tree'); } { if( XML::Twig::_use( 'XML::XPathEngine') && XML::Twig::_use( 'XML::Twig::XPath')) { my $t= XML::Twig::XPath->new->parse( '

'); eval { $t->get_xpath( '//d[.//p]'); }; matches( $@, qr{the expression is a valid XPath statement, and you are using XML::Twig::XPath}, 'non XML::Twig xpath with get_xpath'); } else { skip( 1); } } { my $r= XML::Twig->parse( '')->root; is( $r->is_empty, 0, 'non empty element'); $r->cut_children( 'e'); is( $r->is_empty, 0, 'non empty element after cut_children'); $r->cut_children( 'e1'); is( $r->is_empty, 1, 'empty element after cut_children'); } { my $r= XML::Twig->parse( '')->root; is( $r->is_empty, 0, 'non empty element'); $r->cut_descendants( 'e'); is( $r->is_empty, 0, 'non empty element after cut_descendants'); $r->cut_descendants( 'e1'); is( $r->is_empty, 1, 'empty element after cut_descendants'); } { if( XML::Twig::_use( 'LWP::Simple')) { eval { XML::Twig->parse( 'file://not_there'); }; matches( $@, 'no element found', 'making xparse fail'); } else { skip( 1); } } { is( XML::Twig::Elt::_short_text( 'a', 0), 'a', 'shorten with no length'); } { is( XML::Twig->parse( comments => 'process', pi => 'process', pretty_print => 'indented', "" )->sprint, "\n \n \n \n \n \n \n \n \n\n", 'indenting pi and comments' ); } { XML::Twig::_set_debug_handler(3); XML::Twig->new( twig_handlers => { 'foo[@a="bar"]' => sub { $_->att( 'a')++; } }); my $expected=<<'EXPECTED'; parsing path 'foo[@a="bar"]' predicate is: '@a="bar"' predicate becomes: '$elt->{'a'} eq "bar"' perlfunc: no warnings; my( $stack)= @_; my @current_elts= (scalar @$stack); my @new_current_elts; my $elt; warn q{checking path 'foo[@a="bar"]' }; foreach my $current_elt (@current_elts) { next if( !$current_elt); $current_elt--; $elt= $stack->[$current_elt]; if( ($elt->{'##tag'} eq "foo") && $elt->{'a'} eq "bar") { push @new_current_elts, $current_elt;} } unless( @new_current_elts) { warn qq%fail at cond '($elt->{'##tag'} eq "foo") && $elt->{'a'} eq "bar"'%; return 0; } @current_elts= @new_current_elts; @new_current_elts=(); warn "handler for 'foo[@a="bar"]' triggered\n"; return q{foo[@a="bar"]}; last tag: 'foo', test_on_text: '0' score: anchored: 0 predicates: 3 steps: 1 type: 3 EXPECTED my $got= XML::Twig::_return_debug_handler(); $got=~ s{\\}{}g; $expected=~ s{\\}{}g; is( $got, $expected, 'handler content'); XML::Twig::_set_debug_handler( 0); } { my $t=XML::Twig->parse( elt_class => 'XML::Twig::Elt', ''); is( ref($t->root), 'XML::Twig::Elt', 'alternate class... as the default one!'); } { my( $triggered_bare, $triggered_foo); my $t= XML::Twig->new( twig_handlers => { 'e1[@#a]' => sub { $triggered_bare.=$_->id; }, 'e1[@#a="foo"]' => sub { $triggered_foo .=$_->id; }, e2 => sub { $_->parent->set_att( '#a', 1); }, e4 => sub { $_->parent->set_att( '#a', 'foo'); }, } ) ->parse( ''); is( $triggered_bare, 'e1.1e1.2', 'handler condition on bare private attribute'); is( $triggered_foo , 'e1.1', 'handler condition on valued private attribute'); } { my $t= XML::Twig->parse( ''); $t->root->remove_class( 'foo'); is( $t->root->class, '', 'empty class after remove_class'); my $e= $t->first_elt( 'e'); $e->remove_class( 'foo'); is( $e->class, 'bar baz', 'remove_class on non-existent class'); $e->remove_class( 'baz'); is( $e->class, 'bar', 'remove_class'); $e->remove_class( 'foo'); is( $e->class, 'bar', 'remove_class on non-existent class (again)'); $e->remove_class( 'bar'); is( $e->class, '', 'remove_class until no class is left'); } { if( XML::Twig::_use( 'Text::Wrap')) { my $out= "t/test_wrapped.xml"; my $out_fh; open( $out_fh, ">$out") or die "cannot create temp file $out: $!"; $Text::Wrap::columns=40; $Text::Wrap::columns=40; XML::Twig->parse( pretty_print => 'wrapped', '' . "foobarbaz " x 10 . '') ->print( $out_fh); close $out_fh; is( slurp( $out),qq{\n foobarbaz foobarbaz foobarbaz\n foobarbaz foobarbaz foobarbaz\n foobarbaz foobarbaz foobarbaz\n foobarbaz \n}, 'wrapped print' ); unlink $out; } else { skip( 1); } } sub all_text { return join ':' => map { $_->text } @_; } 1; XML-Twig-3.52/t/xmlxpath_16axisprec_sib.t0000755000175000017500000000160612732215763020472 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 7); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '/AAA/XXX/preceding-sibling::*'); ok(@nodes, 1); ok($nodes[0]->getName, "BBB"); @nodes = $t->findnodes( '//CCC/preceding-sibling::*'); ok(@nodes, 4); @nodes = $t->findnodes( '/AAA/CCC/preceding-sibling::*[1]'); ok($nodes[0]->getName, "XXX"); @nodes = $t->findnodes( '/AAA/CCC/preceding-sibling::*[2]'); ok($nodes[0]->getName, "BBB"); exit 0; __DATA__ XML-Twig-3.52/t/xmlxpath_22name_select.t0000755000175000017500000000064712732215763020277 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 4); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '//*[name() = /AAA/SELECT]'); ok(@nodes, 2); ok($nodes[0]->getName, "BBB"); exit 0; __DATA__ XML-Twig-3.52/t/test_3_32.t0000755000175000017500000000153212732215763015426 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=1; print "1..$TMAX\n"; if( $] >= 5.008) { # test non ascii letters at the beginning of an element name in a selector # can't use non ascii chars in script, so the tag name needs to come from the doc! my $doc=q{étésummerestate}; my $t= XML::Twig->parse( $doc); my $tag= $t->root->first_child( 'tag')->text; foreach ($t->root->children( 'elt')) { $_->set_tag( $tag); } is( $t->root->first_child( $tag)->text, 'summer', 'non ascii letter to start a name in a condition'); } else { skip( 1, "known bug in perl $]: tags starting with a non ascii letter cannot be used in expressions"); } exit; 1; XML-Twig-3.52/t/test2_2.dtd0000644000175000017500000000066112732215763015512 0ustar mrodrigumrodrigu XML-Twig-3.52/t/test_twig_roots.t0000755000175000017500000001725412732215763017170 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; #use diagnostics; use XML::Twig; $|=1; my $TMAX=12; # do not forget to update! print "1..$TMAX\n"; $/= "\n\n"; my $t= XML::Twig->new( twig_roots => { }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 1); $t= XML::Twig->new( twig_roots => { elt2 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 2); $t= XML::Twig->new( twig_roots => { elt3 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 3); $t= XML::Twig->new( twig_roots => { }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 4); $t= XML::Twig->new( twig_roots => { elt2 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 5); $t= XML::Twig->new( twig_roots => { elt3 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 6); $t= XML::Twig->new( twig_roots => { }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 7); $t= XML::Twig->new( twig_roots => { elt2 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 8); $t= XML::Twig->new( twig_roots => { elt3 => sub { } }, twig_print_outside_roots => \*RESULT, error_context => 1, ); test_twig( $t, 9); $t= XML::Twig->new( twig_roots => { elt => sub { print RESULT "elt handler called on ", $_->gi, "\n"; }, }, start_tag_handlers => { doc => sub { print RESULT "start tag handler called on ", $_->gi, "\n"; }, }, end_tag_handlers => { doc => sub { print RESULT "end tag handler called on $_[1]\n"; }, }, ); test_twig( $t, 10); # test with doc root as root $t= XML::Twig->new( twig_roots => { doc => sub { $_->print( \*RESULT); } }); test_twig( $t, 11); # test with elt as root $t= XML::Twig->new( twig_roots => { elt => sub { $_->print( \*RESULT); } }); test_twig( $t, 12); exit 0; sub test_twig { my( $t, $test_nb)= @_; my $doc= read_doc(); my $expected_result= read_expected_result(); my $result_file= "test_twig_roots.res1"; open( RESULT, ">$result_file") or die "cannot create $result_file: $!"; $t->parse( $doc); check_result( $result_file, $test_nb, $expected_result); close RESULT; } sub check_result { my( $result_file, $test_no, $expected_result)= @_; # now check result my $result= read_result( $result_file); if( $result eq $expected_result) { print "ok $test_no\n"; } else { print "not ok $test_no\n"; print STDERR "\ntest $test_no:\n", "expected: \n$expected_result\n", "real: \n$result\n"; } } { my $last_doc; my $buffered_result; sub read_doc { local $/="\n\n"; my $doc= ; # if the data starts with #doc then it's a doc, otherwise use the previous one if( $doc=~ /^\s*#\s*doc/) { $doc= clean_data( $doc); $last_doc= $doc; $buffered_result=''; return $doc; } else { $buffered_result= clean_data( $doc); return $last_doc; } } sub read_expected_result { if( $buffered_result) { return $buffered_result; } else { local $/="\n\n"; my $expected_result= ; $expected_result= clean_data( $expected_result); return $expected_result; } } } sub clean_data { my $data= shift; $data=~ s{^\s*#.*\n}{}m; # get rid of comments $data=~ s{\s*$}{}s; # remove trailing spaces (and \n) $data=~ s{(^|\n)\s*(\n|$)}{}g; # remove empty lines return $data; } sub read_result { my $file= shift; local $/="\n"; open( RESULT, "<$file") or die "cannot read $file: $!"; my @result= grep {m/\S/} ; my $result= join( '', @result); $result=~ s{(^|\n)\s*(\n|$)}{}g; # remove empty lines close RESULT; unlink $file; return $result; } __DATA__ # doc 1 text subelt text another elt text an other type of element text of subelt # expected_res 1 text subelt text another elt text an other type of element text of subelt # expected_res 2 text subelt text another elt text text of subelt # expected_res 3 text subelt text another elt text an other type of element # doc 2 text subelt text another elt text an other type of element text of subelt # expected_res 4 text subelt text another elt text an other type of element text of subelt # expected_res 5 text subelt text another elt text text of subelt # expected_res 6 text subelt text another elt text an other type of element # doc 3 text subelt text another elt text an other type of element text of subelt # expected_res 7 text subelt text another elt text an other type of element text of subelt # expected_res 8 text subelt text another elt text text of subelt # expected_res 9 text subelt text another elt text an other type of element # doc 4 # expected_res 10 start tag handler called on doc elt handler called on elt end tag handler called on doc # expected_res 11 # expected_res 12 XML-Twig-3.52/t/test2.t0000755000175000017500000000422312732215763014762 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; # This tests the doctype and DTD access functions $|=1; use XML::Twig; use Cwd; $0 =~ s!\\!/!g; my ($DIR,$PROG) = $0 =~ m=^(.*/)?([^/]+)$=; $DIR =~ s=/$== || chop($DIR = cwd()); chdir $DIR; my $i=0; my $failed=0; my $TMAX=15; # don't forget to update! print "1..$TMAX\n"; # test twig creation my $t= new XML::Twig(); ok( $t, 'twig creation'); # first test an internal DTD my $in_file= "test2_1.xml"; my $res_file= "test2_1.res"; my $exp_file= "test2_1.exp"; # test parse no dtd info required $t->parsefile( $in_file, ErrorContext=>2); ok( $t, 'parse'); open( RES, ">$res_file") or die "cannot open $res_file:$!"; $t->print( \*RES); close RES; ok( $res_file, $exp_file, "flush"); $res_file= 'test2_2.res'; $exp_file= 'test2_2.exp'; open( RES, ">$res_file") or die "cannot open $res_file:$!"; $t->print( \*RES, Update_DTD => 1); close RES; ok( $res_file, $exp_file, "flush"); $t= new XML::Twig(); ok( $t, 'twig creation'); $in_file= "test2_2.xml"; $res_file= "test2_3.res"; $exp_file= "test2_3.exp"; $t->parsefile( $in_file, ErrorContext=>2); ok( $t, 'parse'); open( RES, ">$res_file") or die "cannot open $res_file:$!"; my $e2=new XML::Twig::Entity( 'e2', 'entity2'); my $entity_list= $t->entity_list; $entity_list->add( $e2); my $e3=new XML::Twig::Entity( 'e3', undef, 'pic.jpeg', 'JPEG'); $entity_list= $t->entity_list; $entity_list->add( $e3); $t->print( \*RES, Update_DTD => 1); close RES; ok( $res_file, $exp_file, "flush"); my $dtd= $t->dtd; ok( !$dtd, 'dtd exits'); $t= new XML::Twig(LoadDTD=>1); ok( $t, 'twig creation'); $t->parsefile( $in_file, ErrorContext=>2, ); $dtd= $t->dtd; ok( $dtd, 'dtd not found'); my @model= sort keys %{$dtd->{model}}; stest( stringify( @model), 'doc:intro:note:para:section:title', 'element list'); stest( $t->model( 'title'), '(#PCDATA)', 'title model'); mtest( $t->model( 'section'), '\(intro\?,\s*title,\s*\(para|note\)+\)', 'section model'); stest( $t->dtd->{att}->{section}->{id}->{type}, 'ID', 'section id type'); stest( $t->dtd->{att}->{section}->{id}->{default}, '#IMPLIED', 'section id default'); exit 0; XML-Twig-3.52/t/test_attregexp_cond.t0000755000175000017500000000254512732215763017773 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use XML::Twig; $|=1; my $i=0; my $failed=0; my $TMAX=4; # do not forget to update! print "1..$TMAX\n"; $i++; print "ok $i\n"; # loading my $t= XML::Twig->new( twig_handlers => { 'elt[@att=~/^v/]' => sub { $i++; if( $_->att( 'ok') eq "ok") { print "ok $i\n"; } else { print "NOK $i\n"; # print STDERR "id: ", $_->att( 'id'), "\n"; } }, 'elt[@change=~/^now$/]' => sub { $_[0]->setTwigHandler( 'elt[@att=~/^new/]' => sub { $i++; if( $_->att( 'ok') eq "ok") { print "ok $i\n"; } else { print "NOK $i\n"; # print STDERR "id: ", $_->att( 'id'), "\n"; } }); }, }, ); $t->parse( \*DATA); exit 0; __DATA__ foo q XML-Twig-3.52/t/xmlxpath_15axisfol_sib.t0000755000175000017500000000113212732215763020312 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; use Test; plan( tests => 6); use XML::Twig::XPath; ok(1); my $t= XML::Twig::XPath->new->parse( \*DATA); ok( $t); my @nodes; @nodes = $t->findnodes( '/AAA/BBB/following-sibling::*'); ok(@nodes, 2); ok($nodes[1]->getName, "CCC"); # test document order @nodes = $t->findnodes( '//CCC/following-sibling::*'); ok(@nodes, 3); ok($nodes[1]->getName, "FFF"); exit 0; __DATA__ XML-Twig-3.52/t/test_3_39.t0000755000175000017500000000550312732215763015437 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use Carp; use File::Spec; use lib File::Spec->catdir(File::Spec->curdir,"t"); use tools; $|=1; my $DEBUG=0; use XML::Twig; my $TMAX=12; print "1..$TMAX\n"; { my $doc='foo bar fooo baz'; my $t= XML::Twig->parse( $doc); $t->root->split( '(fo+)', e => { att => '$1' } ); is( $t->sprint, 'foo bar fooo baz', 'split, with $1 on attribute value'); $t= XML::Twig->parse( $doc); $t->root->split( '(fo+)', e => { '$1' => 'v$1' } ); is( $t->sprint, 'foo bar fooo baz', 'split, with $1 on attribute name and value'); $t= XML::Twig->parse( $doc); $t->root->split( '(fo+)', '$1' ); is( $t->sprint, 'foo bar fooo baz', 'split, with $1 on tag name'); $t= XML::Twig->parse( $doc); $t->root->split( '(foo+)', '$1', '' ); is( $t->sprint, 'foo bar fooo baz', 'split, with $1 on tag name'); $t= XML::Twig->parse( $doc); $t->root->split( '(fo+)(.*?)(a[rz])', x => { class => 'f' }, '', a => { class => 'x' }); is( $t->sprint, 'foo bar fooo baz', 'split, checking that it works with non capturing grouping'); $t= XML::Twig->parse( $doc); $t->root->split( '(fo+)(.*?)(a[rz])', x => { class => '$1' }, '', a => { class => '$3' }); is( $t->sprint, 'foo bar fooo baz', 'split, with $1 and $3 on att value'); } { my $t= XML::Twig->parse( 'e1e2'); is( join( '-', $t->findvalues( '//e')), 'e1-e2', 'findvalues'); } { my $html='

boo

'; my $well_formed = qq{$html}; my $short_doctype = qq{$html}; my $t= XML::Twig->new->parse( $well_formed); is_like( $t->sprint, $well_formed, 'valid xhtml'); if( _use( 'HTML::TreeBuilder')) { my $th= XML::Twig->new->parse_html( $well_formed); is_like( $t->sprint, $well_formed, 'valid xhtml (parsed as html)'); my $t3= XML::Twig->new->parse_html( $short_doctype); is_like( $t3->sprint, $html, 'xhtml without SYSTEM in DOCTYPE (parsed as html, no DOCTYPE output)'); my $t4= XML::Twig->new( output_html_doctype => 1)->parse_html( $short_doctype); is_like( $t4->sprint, $well_formed, 'xhtml without SYSTEM in DOCTYPE (parsed as html, with proper DOCTYPE output)'); } else { skip( 3); } my $t2= XML::Twig->new->safe_parse( $short_doctype); nok( $t2, 'xhtml without SYSTEM in DOCTYPE'); } XML-Twig-3.52/t/test5.t0000755000175000017500000004731312732215763014774 0ustar mrodrigumrodrigu#!/usr/bin/perl -w use strict; use XML::Twig; $|=1; my $doc= ' p2 (/doc/p1/p2) p2 (/doc/p1/p3/p2) p2 (/doc/p2) p2 (/doc/p2) p2 (/doc/p2) p2 (/doc/p2) p2 (/doc/p3/p2) '; my $TMAX=80; # do not forget to update print "1..$TMAX\n"; my $t= new XML::Twig; $t->parse( $doc); my $elt1= $t->elt_id( 'elt1'); my $elt2= $t->elt_id( 'elt2'); my $elt3= $t->elt_id( 'elt3'); my $root= $t->root; # testing before and after my $res= $elt1->before( $elt2); if( $res) { print "ok 1\n"; } else { print "not ok 1\n"; } $res= $elt2->before( $elt3); if( $res) { print "ok 2\n"; } else { print "not ok 2\n"; } $res= $elt1->before( $elt3); if( $res) { print "ok 3\n"; } else { print "not ok 3\n"; } $res= $elt3->before( $elt2); unless( $res) { print "ok 4\n"; } else { print "not ok 4\n"; } $res= $elt1->after( $elt2); unless( $res) { print "ok 5\n"; } else { print "not ok 5\n"; } $res= $elt1->after( $elt3); unless( $res) { print "ok 6\n"; } else { print "not ok 6\n"; } $res= $elt3->after( $elt2); if( $res) { print "ok 7\n"; } else { print "not ok 7\n"; } $res= $elt1->before( $root); unless( $res) { print "ok 8\n"; } else { print "not ok 8\n"; } $res= $root->before( $elt1); if( $res) { print "ok 9\n"; } else { print "not ok 9\n"; } # testing path capabilities my $path= $elt1->path; my $exp_path= '/doc/elt1/elt2'; if( $path eq $exp_path) { print "ok 10\n"; } else { print "not ok 10\n"; print "$path instead\n"; warn "of $exp_path\n"; } $path= $elt2->path; $exp_path= '/doc/elt1/elt2/elt3'; if( $path eq $exp_path) { print "ok 11\n"; } else { print "not ok 11\n"; warn "$path instead of $exp_path\n"; } $path= $elt3->path; $exp_path= '/doc/elt1/elt2'; if( $path eq $exp_path) { print "ok 12\n"; } else { print "not ok 12\n"; warn "$path instead of $exp_path\n"; } $path= $root->path; $exp_path= '/doc'; if( $path eq $exp_path) { print "ok 13\n"; } else { print "not ok 13\n"; warn "$path instead of $exp_path\n"; } my $id1=''; my $exp_id1= 'p2_1'; my $id2=''; my $exp_id2= 'p2_3p2_4'; my $id3=''; my $exp_id3= 'p2_2p2_7'; my $id4=''; my $exp_id4= 'p2_5p2_6'; my $path_error=''; my $t2= new XML::Twig( TwigHandlers => { '/doc/p1/p2' => sub { $id1.= $_[1]->id; return; }, '/doc/p2' => sub { $id2.= $_[1]->id; return; }, 'p3/p2' => sub { $id3.= $_[1]->id; return; }, 'p2' => sub { $id4.= $_[1]->id; return; }, _all_ => sub { my( $t, $elt)= @_; my $gi= $elt->gi; my $tpath= $t->path( $gi); my $epath= $elt->path; unless( $tpath eq $epath) { $path_error.= " $tpath <> $epath\n"; } } } ); $t2->parse( $doc); if( $id1 eq $exp_id1) { print "ok 14\n"; } else { print "not ok 14\n"; warn "$id1 instead of $exp_id1\n"; } if( $id2 eq $exp_id2) { print "ok 15\n"; } else { print "not ok 15\n"; warn "$id2 instead of $exp_id2\n"; } if( $id3 eq $exp_id3) { print "ok 16\n"; } else { print "not ok 16\n"; warn "$id3 instead of $exp_id3\n"; } if( $id4 eq $exp_id4) { print "ok 17\n"; } else { print "not ok 17\n"; warn "$id4 instead of $exp_id4\n"; } unless( $path_error) { print "ok 18\n"; } else { print "not ok 18\n"; warn "$path_error\n"; } $id1=''; $exp_id1= 'p2_1'; my $t3= new XML::Twig( TwigRoots => { '/doc/p1/p2' => sub { $id1.= $_[1]->id; } } ); $t3->parse( $doc); if( $id1 eq $exp_id1) { print "ok 19\n"; } else { print "not ok 19\n"; warn "$id1 instead of $exp_id1\n"; } $id2=''; $exp_id2= 'p2_3p2_4'; $t3= new XML::Twig( TwigRoots => { '/doc/p2' => sub { $id2.= $_[1]->id;} } ); $t3->parse( $doc); if( $id2 eq $exp_id2) { print "ok 20\n"; } else { print "not ok 20\n"; warn "$id2 instead of $exp_id2\n"; } $id3=''; $exp_id3= 'p2_2p2_7'; $t3= new XML::Twig( TwigRoots => { 'p3/p2' => sub { $id3.= $_[1]->id;} } ); $t3->parse( $doc); if( $id3 eq $exp_id3) { print "ok 21\n"; } else { print "not ok 21\n"; warn "$id3 instead of $exp_id3\n"; } # test what happens to 0 in pcdata/cdata my $pcdata= '0'; my $cdata= ''; my $t4= new XML::Twig; $t4->parse( $pcdata); if( my $res= $t4->sprint eq $pcdata) { print "ok 22\n"; } else { print "not ok 22\n"; warn "sprint returns $res instead of $pcdata\n"; } $t4->parse( $pcdata); if( my $res= $t4->root->text eq '0') { print "ok 23\n"; } else { print "not ok 23\n"; warn "sprint returns $res instead of '0'\n"; } $t4->parse( $cdata); if( my $res= $t4->sprint eq $cdata) { print "ok 24\n"; } else { print "not ok 23\n"; warn "sprint returns $res instead of $cdata\n"; } $t4->parse( $cdata); if( my $res= $t4->root->text eq '0') { print "ok 25\n"; } else { print "not ok 25\n"; warn "sprint returns $res instead of '0'\n"; } my $test_inherit= ' '; my $t5= new XML::Twig; $t5->parse( $test_inherit); my $subelt= $t5->root->first_child->first_child; if( my $att= $subelt->att( 'att1') eq "subelt1") { print "ok 26\n"; } else { print "not ok 26\n"; warn "sprint returns $att instead of 'subelt1'\n"; } if( my $att= $subelt->inherit_att( 'att1') eq "subelt1") { print "ok 27\n"; } else { print "not ok 27\n"; warn "sprint returns $att instead of 'subelt1'\n"; } if( my $att= $subelt->inherit_att( 'att1', 'elt') eq "elt1") { print "ok 28\n"; } else { print "not ok 28 sprint returns $att instead of 'elt1'\n"; } if( my $att= $subelt->inherit_att( 'att1', 'elt', 'doc') eq "elt1") { print "ok 29\n"; } else { print "not ok 29\n"; warn "sprint returns $att instead of 'elt1'\n"; } if( my $att= $subelt->inherit_att( 'att1', "doc") eq "doc1") { print "ok 30\n"; } else { print "not ok 30\n"; warn "sprint returns $att instead of 'doc1'\n"; } if( my $att= $subelt->inherit_att( 'att3') eq "doc3") { print "ok 31\n"; } else { print "not ok 31\n"; warn "sprint returns $att instead of 'doc3'\n"; } if( my $att= $subelt->inherit_att( 'att3') eq "doc3") { print "ok 32\n"; } else { print "not ok 32\n"; warn "sprint returns $att instead of 'doc3'\n"; } if( my $att= $subelt->inherit_att( 'att_null') == 0) { print "ok 33\n"; } else { print "not ok 33\n"; warn "sprint returns $att instead of '0'\n"; } # test attribute paths my $test_att_path= ' '; my $res1=''; my $t6= new XML::Twig ( TwigHandlers => #'' (or VIM messes up colors) { 'elt[@id="elt1"]' => sub { $res1.= $_[1]->id} } ); $t6->parse( $test_att_path); if( $res1 eq 'elt1') { print "ok 34\n"; } else { print "not ok 34\n"; warn "returns $res1 instead of elt1\n"; } $res1=''; my $res2=''; $t6= new XML::Twig ( TwigHandlers => { 'elt[@id="elt1"]' => sub { $res1.= $_[1]->id}, 'elt[@att="val1"]' => sub { $res2.= $_[1]->id} }, ); $t6->parse( $test_att_path); if( $res1 eq 'elt1') { print "ok 35\n"; } else { print "not ok 35\n"; warn "returns $res1 instead of 'elt1'\n"; } if( $res2 eq 'elt1elt2') { print "ok 36\n"; } else { print "not ok 36\n"; warn "returns $res2 instead of 'elt1elt2'\n"; } my $doc_with_escaped_entities= q{<apos>''<apos><">}; my $exp_res1= q{<apos>''<apos><">}; my $exp_res2= q{''<">}; my $t7= new XML::Twig(); $t7->parse( $doc_with_escaped_entities); $res= $t7->sprint; if( $res eq $exp_res1) { print "ok 37\n"; } else { print "not ok 37\n"; warn "returns \n$res instead of \n$exp_res1\n"; } $t7= new XML::Twig( KeepEncoding => 1, NoExpand => 1); $t7->parse( $doc_with_escaped_entities); $res= $t7->sprint; if( $res eq $doc_with_escaped_entities) { print "ok 38\n"; } else { print "not ok 38\n"; warn "returns \n$res instead of \n$doc_with_escaped_entities\n"; } # test extra options for new my $elt= XML::Twig::Elt->new( 'p'); $res= $elt->sprint; my $exp_res= '

'; if( $res eq $exp_res) { print "ok 39\n"; } else { print "not ok 39\n"; warn "returns $res instead of $exp_res\n"; } $elt= XML::Twig::Elt->new( 'p', q{#EMPTY}); $res= $elt->sprint; $exp_res= '

'; if( $res eq $exp_res) { print "ok 40\n"; } else { print "not ok 40\n"; warn "returns $res instead of $exp_res\n"; } $elt= XML::Twig::Elt->new( 'p', { att => 'val'}); $res= $elt->sprint; $exp_res= '

'; if( $res eq $exp_res) { print "ok 41\n"; } else { print "not ok 41\n"; warn "returns $res instead of $exp_res\n"; } $elt= XML::Twig::Elt->new( 'p', { att => 'val'}, '#EMPTY'); $res= $elt->sprint; $exp_res= '

'; if( $res eq $exp_res) { print "ok 42\n"; } else { print "not ok 42\n"; warn "returns $res instead of $exp_res\n"; } $elt= XML::Twig::Elt->new( 'p', { att1 => 'val1', att2=> 'val2'}); $res= $elt->sprint; $exp_res= '

'; if( $res eq $exp_res) { print "ok 43\n"; } else { print "not ok 43\n"; warn "returns $res instead of $exp_res\n"; } $elt= XML::Twig::Elt->new( 'p', { att1 => 'val1', att2=>'val2'}, '#EMPTY'); $res= $elt->sprint; $exp_res= '

'; if( $res eq $exp_res) { print "ok 44\n"; } else { print "not ok 44\n"; warn "returns $res instead of $exp_res\n"; } $elt= XML::Twig::Elt->new( 'p', "content"); $res= $elt->sprint; $exp_res= '

content

'; if( $res eq $exp_res) { print "ok 45\n"; } else { print "not ok 45\n"; warn "returns $res instead of $exp_res\n"; } $elt= XML::Twig::Elt->new( 'p', { att1 => 'val1'}, "content"); $res= $elt->sprint; $exp_res= '

content

'; if( $res eq $exp_res) { print "ok 46\n"; } else { print "not ok 46\n"; warn "returns $res instead of $exp_res\n"; } $elt= XML::Twig::Elt->new( 'p', { att1 => 'val1', att2=>'val2'}, "content"); $res= $elt->sprint; $exp_res= '

content

'; if( $res eq $exp_res) { print "ok 47\n"; } else { print "not ok 47\n"; warn "returns $res instead of $exp_res\n"; } $elt= XML::Twig::Elt->new( 'p', { att1 => 'val1'}, "content", " more content"); $res= $elt->sprint; $exp_res= '

content more content

'; if( $res eq $exp_res) { print "ok 48\n"; } else { print "not ok 48\n"; warn "returns $res instead of $exp_res\n"; } my $sub1= XML::Twig::Elt->new( 'sub', '#EMPTY'); my $sub2= XML::Twig::Elt->new( 'sub', { att => 'val'}, '#EMPTY'); my $sub3= XML::Twig::Elt->new( 'sub', "sub3"); my $sub4= XML::Twig::Elt->new( 'sub', "sub4"); my $sub5= XML::Twig::Elt->new( 'sub', "sub5", $sub3, "sub5 again", $sub4); $elt= XML::Twig::Elt->new( 'p', { att1 => 'val1'}, $sub1, $sub2, $sub5); $res= $elt->sprint; $exp_res= '

'. 'sub5sub3sub5 againsub4

'; if( $res eq $exp_res) { print "ok 49\n"; } else { print "not ok 49\n"; warn "returns \n$res\n instead of \n$exp_res\n"; } $elt->set_empty_tag_style( 'html'); $res= $elt->sprint; $exp_res= '

'. 'sub5sub3sub5 againsub4

'; if( $res eq $exp_res) { print "ok 50\n"; } else { print "not ok 50\n"; warn "returns \n$res\n instead of \n$exp_res\n"; } $elt->set_empty_tag_style( 'expand'); $res= $elt->sprint; $exp_res= '

'. 'sub5sub3sub5 againsub4

'; if( $res eq $exp_res) { print "ok 51\n"; } else { print "not ok 51\n"; warn "returns \n$res\n instead of \n$exp_res\n"; } $elt->set_empty_tag_style( 'normal'); $res= $elt->sprint; $exp_res= '

'. 'sub5sub3sub5 againsub4

'; if( $res eq $exp_res) { print "ok 52\n"; } else { print "not ok 52\n"; warn "returns \n$res\n instead of \n$exp_res\n"; } my $new_elt= parse XML::Twig::Elt( $res); $res= $new_elt->sprint; $exp_res= '

'. 'sub5sub3sub5 againsub4

'; if( $res eq $exp_res) { print "ok 53\n"; } else { print "not ok 53\n"; warn "returns \n$res\n instead of \n$exp_res\n"; } $doc='text1root1text 2'; $res=''; $exp_res= 'text1'; $t= new XML::Twig( TwigHandlers => { 'elt[string()="text1"]' => \&display1, 'elt[@att="val1"]' => \&display1, }, ); $t->parse( $doc); sub display1 { $res .=$_[1]->sprint; return 0; } if( $res eq $exp_res) { print "ok 54\n"; } else { print "not ok 54\n"; warn "returns \n$res\n instead of \n$exp_res\n"; } $res=''; $exp_res= 'text1' x 2; $t= new XML::Twig( TwigHandlers => { 'elt[string()="text1"]' => \&display2, 'elt[@att="val1"]' => \&display2, }, ); $t->parse( $doc); sub display2 { $res .=$_[1]->sprint; } if( $res eq $exp_res) { print "ok 55\n"; } else { print "not ok 55\n"; warn "returns \n$res\n instead of \n$exp_res\n"; } $doc= ''; $t= new XML::Twig; $t->parse( $doc); $res= $t->first_elt->id; $exp_res= 'doc1'; if( $res eq $exp_res) { print "ok 56\n"; } else { print "not ok 56\n"; warn "returns \n$res\n instead of \n$exp_res\n"; } $res= $t->first_elt( 'doc')->id; $exp_res= 'doc1'; if( $res eq $exp_res) { print "ok 57\n"; } else { print "not ok 57\n"; warn "returns \n$res\n instead of \n$exp_res\n"; } $res= $t->first_elt( 'sub')->id; $exp_res= 'sub1'; if( $res eq $exp_res) { print "ok 58\n"; } else { print "not ok 58\n"; warn "returns \n$res\n instead of \n$exp_res\n"; } $sub1= $t->first_elt( 'sub'); $res= $sub1->next_elt( 'sub')->id; $exp_res= 'sub2'; if( $res eq $exp_res) { print "ok 59\n"; } else { print "not ok 59\n"; warn "returns \n$res\n instead of \n$exp_res\n"; } $sub1= $t->first_elt( 'sub'); $res= $sub1->next_elt( $sub1, 'sub'); unless( defined $res) { print "ok 60\n"; } else { print "not ok 60\n"; warn "should return undef, returned elt is " . $res->id; } $sub1= $t->first_elt( 'sub'); $sub2= $sub1->next_elt( 'sub'); $res= $sub2->next_elt( 'sub'); unless( defined $res) { print "ok 61\n"; } else { print "not ok 61\n"; warn "should return undef, returned elt is" . $res->id; } # test : (for name spaces) in elements $doc="p1

p

p2
"; $res=''; $exp_res='p1p2'; $t= new XML::Twig( TwigHandlers => { 'ns:p' => sub { $res .= $_[1]->text; } }); $t->parse( $doc); if( $res eq $exp_res) { print "ok 62\n"; } else { print "not ok 62\n"; warn "should return $exp_res, returned $res"; } $exp_res="p"; my $e_res= $t->get_xpath( '/doc/p', 0); $res= $e_res->text; if( $res eq $exp_res) { print "ok 63\n"; } else { print "not ok 63\n"; warn "should return $exp_res, returned $res"; } $exp_res='p1p2'; $res=''; foreach ($t->get_xpath( 'ns:p')) { $res .= $_->text; } if( $res eq $exp_res) { print "ok 64\n"; } else { print "not ok 64\n"; warn "should return $exp_res, returned $res"; } # test : (for name spaces) in attributes $doc='p1

p

p3

p2
'; $res=''; $exp_res='p1'; $t= new XML::Twig( TwigHandlers => { 'ns:p[@ns:a="a1"]' => sub { $res .= $_[1]->text; } }); $t->parse( $doc); if( $res eq $exp_res) { print "ok 65\n"; } else { print "not ok 65\n"; warn "should return $exp_res, returned $res"; } $res=''; $exp_res='p3'; foreach ($t->find_nodes( 'p[@a="a1"]')) { $res .= $_->text; } if( $res eq $exp_res) { print "ok 66\n"; } else { print "not ok 66\n"; warn "should return $exp_res, returned $res"; } $res=''; $exp_res='p1'; foreach ($t->find_nodes( 'ns:p[@ns:a="a1"]')) { $res .= $_->text; } if( $res eq $exp_res) { print "ok 67\n"; } else { print "not ok 67\n"; warn "should return $exp_res, returned $res"; } $res=''; $exp_res='p1p2'; foreach ($t->get_xpath( 'ns:p[@ns:a="a1" or @ns:a="a2"]')) { $res .= $_->text; } if( $res eq $exp_res) { print "ok 68\n"; } else { print "not ok 68\n"; warn "should return $exp_res, returned $res"; } $res=''; $exp_res='p'; foreach ($t->get_xpath( 'p[@b="a1" or @ns:a="a1"]')) { $res .= $_->text; } if( $res eq $exp_res) { print "ok 69\n"; } else { print "not ok 69\n"; warn "should return $exp_res, returned $res"; } $doc='

p1

p2

p3

p4

'; $res=''; $exp_res='p2p4'; $t= new XML::Twig( twig_handlers => { 'p[@a]' => sub { $res .= $_[1]->text; } }); $t->parse( $doc); if( $res eq $exp_res) { print "ok 70\n"; } else { print "not ok 70\n"; warn "should return $exp_res, returned $res"; } $res=''; foreach ($t->get_xpath( '//p[@a]')) { $res .= $_->text; } if( $res eq $exp_res) { print "ok 71\n"; } else { print "not ok 71\n"; warn "should return $exp_res, returned $res"; } $res=''; $exp_res='p1p2p4'; foreach ($t->get_xpath( '//p[@ns:a or @a ]')) { $res .= $_->text; } if( $res eq $exp_res) { print "ok 72\n"; } else { print "not ok 72\n"; warn "should return $exp_res, returned $res"; } $doc='

p1

p2

p3

p4

'; $res=''; $exp_res='p1p2p4'; $t= new XML::Twig(); $t->parse( $doc); $res .= $_->text foreach ($t->get_xpath( '//*[@a]')); if( $res eq $exp_res) { print "ok 73\n"; } else { print "not ok 73\n"; warn "should return $exp_res, returned $res"; } $res=''; $exp_res='p1p2'; $res .= $_->text foreach ($t->get_xpath( '*[@a="a1"]')); if( $res eq $exp_res) { print "ok 74\n"; } else { print "not ok 74\n"; warn "should return $exp_res, returned $res"; } $res=''; $exp_res='p1p2'; $res .= $_->text foreach ($t->get_xpath( '//*[@a="a1"]')); if( $res eq $exp_res) { print "ok 75\n"; } else { print "not ok 75\n"; warn "should return $exp_res, returned $res"; } $res=''; $exp_res='p1'; $res .= $_->text foreach ($t->get_xpath( 'p[string()= "p1"]')); if( $res eq $exp_res) { print "ok 76\n"; } else { print "not ok 76\n"; warn "should return $exp_res, returned $res"; } $doc='p1

p

p3

p2
'; $res=''; $exp_res='p1p'; $t= new XML::Twig( TwigHandlers => { '[@ns:a="a1"]' => sub { $res .= $_[1]->text; } }); $t->parse( $doc); if( $res eq $exp_res) { print "ok 77\n"; } else { print "not ok 77\n"; warn "should return $exp_res, returned $res"; } $res=''; $res2=''; $exp_res2='p2'; $t= new XML::Twig( TwigHandlers => { '[@ns:a="a1"]' => sub { $res .= $_[1]->text; }, '[@ns:a="a2"]' => sub { $res2 .= $_[1]->text; } }); $t->parse( $doc); if( $res eq $exp_res) { print "ok 78\n"; } else { print "not ok 78\n"; warn "should return $exp_res, returned $res"; } if( $res2 eq $exp_res2) { print "ok 79\n"; } else { print "not ok 79\n"; warn "should return $exp_res2, returned $res2"; } $elt= XML::Twig::Elt->new( 'p', { att => 'val', '#EMPTY' => 0 }); $res= $elt->sprint; $exp_res= '

'; if( $res eq $exp_res) { print "ok 80\n"; } else { print "not ok 80\n"; warn "returns $res instead of $exp_res\n"; } exit 0; XML-Twig-3.52/t/test_kwalitee.t0000755000175000017500000000067412732215763016573 0ustar mrodrigumrodrigu#!/usr/bin/perl use strict; use warnings; if( ! $ENV{TEST_AUTHOR} ) { print "1..1\nok 1\n"; warn "Author test. Set \$ENV{TEST_AUTHOR} to a true value to run.\n"; exit; } eval { require Test::More; Test::More->import(); }; if( $@) { print "1..1\nok 1\n"; warn "need test::More installed for this test\n"; exit; } eval { require Test::Kwalitee; Test::Kwalitee->import() }; plan( skip_all => 'Test::Kwalitee not installed; skipping' ) if $@; XML-Twig-3.52/t/xmlxpath_tools.pm0000644000175000017500000000057112732215763017156 0ustar mrodrigumrodriguuse strict; use Config; BEGIN { if( eval( 'require XML::Twig::XPath')) { import XML::Twig::XPath; } elsif( $@ =~ m{^cannot use XML::Twig::XPath}) { print "1..1\nok 1\n"; $@=~s{ at.*}{}s; warn "$@\n"; exit; } else { die $@; } } 1; __END__ =head1 SYNOPSYS use FindBin qw($Bin); BEGIN { unshift @INC, $Bin; } use xmlxpath_tools; XML-Twig-3.52/Twig.pm0000644000175000017500000166544713015347616014562 0ustar mrodrigumrodriguuse strict; use warnings; # > perl 5.5 # This is created in the caller's space # I realize (now!) that it's not clean, but it's been there for 10+ years... BEGIN { sub ::PCDATA { '#PCDATA' } ## no critic (Subroutines::ProhibitNestedSubs); sub ::CDATA { '#CDATA' } ## no critic (Subroutines::ProhibitNestedSubs); } use UNIVERSAL(); ## if a sub returns a scalar, it better not bloody disappear in list context ## no critic (Subroutines::ProhibitExplicitReturnUndef); my $perl_version; my $parser_version; ###################################################################### package XML::Twig; ###################################################################### require 5.004; use utf8; # > perl 5.5 use vars qw($VERSION @ISA %valid_option); use Carp; use File::Spec; use File::Basename; use Config; # to get perl's path name in case we need to know if perlio is available *isa= *UNIVERSAL::isa; # flag, set to true if the weaken sub is available use vars qw( $weakrefs); # flag set to true if the version of expat seems to be 1.95.2, which has annoying bugs # wrt doctype handling. This is global for performance reasons. my $expat_1_95_2=0; # a slight non-xml mod: # is allowed as a first character my $REG_TAG_FIRST_LETTER; #$REG_TAG_FIRST_LETTER= q{(?:[^\W\d]|[:#_])}; # < perl 5.6 - does not work for leading non-ascii letters $REG_TAG_FIRST_LETTER= q{(?:[[:alpha:]:#_])}; # >= perl 5.6 my $REG_TAG_LETTER= q{(?:[\w_.-]*)}; # a simple name (no colon) my $REG_NAME_TOKEN= qq{(?:$REG_TAG_FIRST_LETTER$REG_TAG_LETTER*)}; # a tag name, possibly including namespace my $REG_NAME= qq{(?:(?:$REG_NAME_TOKEN:)?$REG_NAME_TOKEN)}; # tag name (leading # allowed) # first line is for perl 5.005, second line for modern perl, that accept character classes my $REG_TAG_NAME=$REG_NAME; # name or wildcard (* or '') (leading # allowed) my $REG_NAME_W = qq{(?:$REG_NAME|[*])}; # class and ids are deliberately permissive my $REG_NTOKEN_FIRST_LETTER; #$REG_NTOKEN_FIRST_LETTER= q{(?:[^\W\d]|[:_])}; # < perl 5.6 - does not work for leading non-ascii letters $REG_NTOKEN_FIRST_LETTER= q{(?:[[:alpha:]:_])}; # >= perl 5.6 my $REG_NTOKEN_LETTER= q{(?:[\w_:.-]*)}; my $REG_NTOKEN= qq{(?:$REG_NTOKEN_FIRST_LETTER$REG_NTOKEN_LETTER*)}; my $REG_CLASS = $REG_NTOKEN; my $REG_ID = $REG_NTOKEN; # allow # (private elt) * . *. # *# my $REG_TAG_PART= qq{(?:$REG_NAME_W(?:[.]$REG_CLASS|[#]$REG_ID)?|[.]$REG_CLASS)}; my $REG_REGEXP = q{(?:/(?:[^\\/]|\\.)*/[eimsox]*)}; # regexp my $REG_MATCH = q{[!=]~}; # match (or not) my $REG_STRING = q{(?:"(?:[^\\"]|\\.)*"|'(?:[^\\']|\\.)*')}; # string (simple or double quoted) my $REG_NUMBER = q{(?:\d+(?:\.\d*)?|\.\d+)}; # number my $REG_VALUE = qq{(?:$REG_STRING|$REG_NUMBER)}; # value my $REG_OP = q{==|!=|>|<|>=|<=|eq|ne|lt|gt|le|ge|=}; # op my $REG_FUNCTION = q{(?:string|text)\(\s*\)}; my $REG_STRING_ARG = qq{(?:string|text)\\(\\s*$REG_NAME_W\\s*\\)}; my $REG_COMP = q{(?:>=|<=|!=|<|>|=)}; my $REG_TAG_IN_PREDICATE= $REG_NAME_W . q{(?=\s*(?i:and\b|or\b|\]|$))}; # keys in the context stack, chosen not to interfere with att names, even private (#-prefixed) ones my $ST_TAG = '##tag'; my $ST_ELT = '##elt'; my $ST_NS = '##ns' ; # used in the handler trigger code my $REG_NAKED_PREDICATE= qq{((?:"[^"]*"|'[^']*'|$REG_STRING_ARG|$REG_FUNCTION|\@$REG_NAME_W|$REG_MATCH\\s*$REG_REGEXP|[\\s\\d><=!()+.-]|(?i:and)|(?i:or)|$REG_TAG_IN_PREDICATE)*)}; my $REG_PREDICATE= qq{\\[$REG_NAKED_PREDICATE\\]}; # not all axis, only supported ones (in get_xpath) my @supported_axis= ( 'ancestor', 'ancestor-or-self', 'child', 'descendant', 'descendant-or-self', 'following', 'following-sibling', 'parent', 'preceding', 'preceding-sibling', 'self' ); my $REG_AXIS = "(?:" . join( '|', @supported_axis) .")"; # only used in the "xpath"engine (for get_xpath/findnodes) for now my $REG_PREDICATE_ALT = qr{\[(?:(?:string\(\s*\)|\@$REG_TAG_NAME)\s*$REG_MATCH\s*$REG_REGEXP\s*|[^\]]*)\]}; # used to convert XPath tests on strings to the perl equivalent my %PERL_ALPHA_TEST= ( '=' => ' eq ', '!=' => ' ne ', '>' => ' gt ', '>=' => ' ge ', '<' => ' lt ', '<=' => ' le '); my( $FB_HTMLCREF, $FB_XMLCREF); my $NO_WARNINGS= $perl_version >= 5.006 ? 'no warnings' : 'local $^W=0'; # default namespaces, both ways my %DEFAULT_NS= ( xml => "http://www.w3.org/XML/1998/namespace", xmlns => "http://www.w3.org/2000/xmlns/", ); my %DEFAULT_URI2NS= map { $DEFAULT_NS{$_} => $_ } keys %DEFAULT_NS; # constants my( $PCDATA, $CDATA, $PI, $COMMENT, $ENT, $ELT, $NOTATION, $TEXT, $ASIS, $EMPTY, $BUFSIZE); # used when an HTML doc only has a PUBLIC declaration, to generate the SYSTEM one # this should really be done by HTML::TreeBuilder, but as of HTML::TreeBuilder 4.2 it isn't # the various declarations are taken from http://en.wikipedia.org/wiki/Document_Type_Declaration my %HTML_DECL= ( "-//W3C//DTD HTML 4.0 Transitional//EN" => "http://www.w3.org/TR/REC-html40/loose.dtd", "-//W3C//DTD HTML 4.01//EN" => "http://www.w3.org/TR/html4/strict.dtd", "-//W3C//DTD HTML 4.01 Transitional//EN" => "http://www.w3.org/TR/html4/loose.dtd", "-//W3C//DTD HTML 4.01 Frameset//EN" => "http://www.w3.org/TR/html4/frameset.dtd", "-//W3C//DTD XHTML 1.0 Strict//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd", "-//W3C//DTD XHTML 1.0 Transitional//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "-//W3C//DTD XHTML 1.0 Frameset//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd", "-//W3C//DTD XHTML 1.1//EN" => "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", "-//W3C//DTD XHTML Basic 1.0//EN" => "http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd", "-//W3C//DTD XHTML Basic 1.1//EN" => "http://www.w3.org/TR/xhtml-basic/xhtml-basic11.dtd", "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" => "http://www.wapforum.org/DTD/xhtml-mobile10.dtd", "-//WAPFORUM//DTD XHTML Mobile 1.1//EN" => "http://www.openmobilealliance.org/tech/DTD/xhtml-mobile11.dtd", "-//WAPFORUM//DTD XHTML Mobile 1.2//EN" => "http://www.openmobilealliance.org/tech/DTD/xhtml-mobile12.dtd", "-//W3C//DTD XHTML+RDFa 1.0//EN" => "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd", ); my $DEFAULT_HTML_TYPE= "-//W3C//DTD HTML 4.0 Transitional//EN"; my $SEP= qr/\s*(?:$|\|)/; BEGIN { $VERSION = '3.52'; use XML::Parser; my $needVersion = '2.23'; ($parser_version= $XML::Parser::VERSION)=~ s{_\d+}{}; # remove _ from version so numeric tests do not warn croak "need at least XML::Parser version $needVersion" unless $parser_version >= $needVersion; ($perl_version= $])=~ s{_\d+}{}; if( $perl_version >= 5.008) { eval "use Encode qw( :all)"; ## no critic ProhibitStringyEval $FB_XMLCREF = 0x0400; # Encode::FB_XMLCREF; $FB_HTMLCREF = 0x0200; # Encode::FB_HTMLCREF; } # test whether we can use weak references # set local empty signal handler to trap error messages { local $SIG{__DIE__}; if( eval( 'require Scalar::Util') && defined( \&Scalar::Util::weaken)) { import Scalar::Util( 'weaken'); $weakrefs= 1; } elsif( eval( 'require WeakRef')) { import WeakRef; $weakrefs= 1; } else { $weakrefs= 0; } } import XML::Twig::Elt; import XML::Twig::Entity; import XML::Twig::Entity_list; # used to store the gi's # should be set for each twig really, at least when there are several # the init ensures that special gi's are always the same # constants: element types $PCDATA = '#PCDATA'; $CDATA = '#CDATA'; $PI = '#PI'; $COMMENT = '#COMMENT'; $ENT = '#ENT'; $NOTATION = '#NOTATION'; # element classes $ELT = '#ELT'; $TEXT = '#TEXT'; # element properties $ASIS = '#ASIS'; $EMPTY = '#EMPTY'; # used in parseurl to set the buffer size to the same size as in XML::Parser::Expat $BUFSIZE = 32768; # gi => index %XML::Twig::gi2index=( '', 0, $PCDATA => 1, $CDATA => 2, $PI => 3, $COMMENT => 4, $ENT => 5); # list of gi's @XML::Twig::index2gi=( '', $PCDATA, $CDATA, $PI, $COMMENT, $ENT); # gi's under this value are special $XML::Twig::SPECIAL_GI= @XML::Twig::index2gi; %XML::Twig::base_ent= ( '>' => '>', '<' => '<', '&' => '&', "'" => ''', '"' => '"',); foreach my $c ( "\n", "\r", "\t") { $XML::Twig::base_ent{$c}= sprintf( "&#x%02x;", ord( $c)); } # now set some aliases *find_nodes = *get_xpath; # same as XML::XPath *findnodes = *get_xpath; # same as XML::LibXML *getElementsByTagName = *descendants; *descendants_or_self = *descendants; # valid in XML::Twig, not in XML::Twig::Elt *find_by_tag_name = *descendants; *getElementById = *elt_id; *getEltById = *elt_id; *toString = *sprint; *create_accessors = *att_accessors; } @ISA = qw(XML::Parser); # fake gi's used in twig_handlers and start_tag_handlers my $ALL = '_all_'; # the associated function is always called my $DEFAULT= '_default_'; # the function is called if no other handler has been # some defaults my $COMMENTS_DEFAULT= 'keep'; my $PI_DEFAULT = 'keep'; # handlers used in regular mode my %twig_handlers=( Start => \&_twig_start, End => \&_twig_end, Char => \&_twig_char, Entity => \&_twig_entity, Notation => \&_twig_notation, XMLDecl => \&_twig_xmldecl, Doctype => \&_twig_doctype, Element => \&_twig_element, Attlist => \&_twig_attlist, CdataStart => \&_twig_cdatastart, CdataEnd => \&_twig_cdataend, Proc => \&_twig_pi, Comment => \&_twig_comment, Default => \&_twig_default, ExternEnt => \&_twig_extern_ent, ); # handlers used when twig_roots is used and we are outside of the roots my %twig_handlers_roots= ( Start => \&_twig_start_check_roots, End => \&_twig_end_check_roots, Doctype => \&_twig_doctype, Char => undef, Entity => undef, XMLDecl => \&_twig_xmldecl, Element => undef, Attlist => undef, CdataStart => undef, CdataEnd => undef, Proc => undef, Comment => undef, Proc => \&_twig_pi_check_roots, Default => sub {}, # hack needed for XML::Parser 2.27 ExternEnt => \&_twig_extern_ent, ); # handlers used when twig_roots and print_outside_roots are used and we are # outside of the roots my %twig_handlers_roots_print_2_30= ( Start => \&_twig_start_check_roots, End => \&_twig_end_check_roots, Char => \&_twig_print, Entity => \&_twig_print_entity, ExternEnt => \&_twig_print_entity, DoctypeFin => \&_twig_doctype_fin_print, XMLDecl => sub { _twig_xmldecl( @_); _twig_print( @_); }, Doctype => \&_twig_print_doctype, # because recognized_string is broken here # Element => \&_twig_print, Attlist => \&_twig_print, CdataStart => \&_twig_print, CdataEnd => \&_twig_print, Proc => \&_twig_pi_check_roots, Comment => \&_twig_print, Default => \&_twig_print_check_doctype, ExternEnt => \&_twig_extern_ent, ); # handlers used when twig_roots, print_outside_roots and keep_encoding are used # and we are outside of the roots my %twig_handlers_roots_print_original_2_30= ( Start => \&_twig_start_check_roots, End => \&_twig_end_check_roots, Char => \&_twig_print_original, # I have no idea why I should not be using this handler! Entity => \&_twig_print_entity, ExternEnt => \&_twig_print_entity, DoctypeFin => \&_twig_doctype_fin_print, XMLDecl => sub { _twig_xmldecl( @_); _twig_print_original( @_) }, Doctype => \&_twig_print_original_doctype, # because original_string is broken here Element => \&_twig_print_original, Attlist => \&_twig_print_original, CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original, Proc => \&_twig_pi_check_roots, Comment => \&_twig_print_original, Default => \&_twig_print_original_check_doctype, ); # handlers used when twig_roots and print_outside_roots are used and we are # outside of the roots my %twig_handlers_roots_print_2_27= ( Start => \&_twig_start_check_roots, End => \&_twig_end_check_roots, Char => \&_twig_print, # if the Entity handler is set then it prints the entity declaration # before the entire internal subset (including the declaration!) is output Entity => sub {}, XMLDecl => \&_twig_print, Doctype => \&_twig_print, CdataStart => \&_twig_print, CdataEnd => \&_twig_print, Proc => \&_twig_pi_check_roots, Comment => \&_twig_print, Default => \&_twig_print, ExternEnt => \&_twig_extern_ent, ); # handlers used when twig_roots, print_outside_roots and keep_encoding are used # and we are outside of the roots my %twig_handlers_roots_print_original_2_27= ( Start => \&_twig_start_check_roots, End => \&_twig_end_check_roots, Char => \&_twig_print_original, # for some reason original_string is wrong here # this can be a problem if the doctype includes non ascii characters XMLDecl => \&_twig_print, Doctype => \&_twig_print, # if the Entity handler is set then it prints the entity declaration # before the entire internal subset (including the declaration!) is output Entity => sub {}, #Element => undef, Attlist => undef, CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original, Proc => \&_twig_pi_check_roots, Comment => \&_twig_print_original, Default => \&_twig_print, # _twig_print_original does not work ExternEnt => \&_twig_extern_ent, ); my %twig_handlers_roots_print= $parser_version > 2.27 ? %twig_handlers_roots_print_2_30 : %twig_handlers_roots_print_2_27; my %twig_handlers_roots_print_original= $parser_version > 2.27 ? %twig_handlers_roots_print_original_2_30 : %twig_handlers_roots_print_original_2_27; # handlers used when the finish_print method has been called my %twig_handlers_finish_print= ( Start => \&_twig_print, End => \&_twig_print, Char => \&_twig_print, Entity => \&_twig_print, XMLDecl => \&_twig_print, Doctype => \&_twig_print, Element => \&_twig_print, Attlist => \&_twig_print, CdataStart => \&_twig_print, CdataEnd => \&_twig_print, Proc => \&_twig_print, Comment => \&_twig_print, Default => \&_twig_print, ExternEnt => \&_twig_extern_ent, ); # handlers used when the finish_print method has been called and the keep_encoding # option is used my %twig_handlers_finish_print_original= ( Start => \&_twig_print_original, End => \&_twig_print_end_original, Char => \&_twig_print_original, Entity => \&_twig_print_original, XMLDecl => \&_twig_print_original, Doctype => \&_twig_print_original, Element => \&_twig_print_original, Attlist => \&_twig_print_original, CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original, Proc => \&_twig_print_original, Comment => \&_twig_print_original, Default => \&_twig_print_original, ); # handlers used within ignored elements my %twig_handlers_ignore= ( Start => \&_twig_ignore_start, End => \&_twig_ignore_end, Char => undef, Entity => undef, XMLDecl => undef, Doctype => undef, Element => undef, Attlist => undef, CdataStart => undef, CdataEnd => undef, Proc => undef, Comment => undef, Default => undef, ExternEnt => undef, ); # those handlers are only used if the entities are NOT to be expanded my %twig_noexpand_handlers= ( ExternEnt => undef, Default => \&_twig_default ); my @saved_default_handler; my $ID= 'id'; # default value, set by the Id argument my $css_sel=0; # set through the css_sel option to allow .class selectors in triggers # all allowed options %valid_option= ( # XML::Twig options TwigHandlers => 1, Id => 1, TwigRoots => 1, TwigPrintOutsideRoots => 1, StartTagHandlers => 1, EndTagHandlers => 1, ForceEndTagHandlersUsage => 1, DoNotChainHandlers => 1, IgnoreElts => 1, Index => 1, AttAccessors => 1, EltAccessors => 1, FieldAccessors => 1, CharHandler => 1, TopDownHandlers => 1, KeepEncoding => 1, DoNotEscapeAmpInAtts => 1, ParseStartTag => 1, KeepAttsOrder => 1, LoadDTD => 1, DTDHandler => 1, DTDBase => 1, NoXxe => 1, DoNotOutputDTD => 1, NoProlog => 1, ExpandExternalEnts => 1, DiscardSpaces => 1, KeepSpaces => 1, DiscardAllSpaces => 1, DiscardSpacesIn => 1, KeepSpacesIn => 1, PrettyPrint => 1, EmptyTags => 1, EscapeGt => 1, Quote => 1, Comments => 1, Pi => 1, OutputFilter => 1, InputFilter => 1, OutputTextFilter => 1, OutputEncoding => 1, RemoveCdata => 1, EltClass => 1, MapXmlns => 1, KeepOriginalPrefix => 1, SkipMissingEnts => 1, # XML::Parser options ErrorContext => 1, ProtocolEncoding => 1, Namespaces => 1, NoExpand => 1, Stream_Delimiter => 1, ParseParamEnt => 1, NoLWP => 1, Non_Expat_Options => 1, Xmlns => 1, CssSel => 1, UseTidy => 1, TidyOptions => 1, OutputHtmlDoctype => 1, ); my $active_twig; # last active twig,for XML::Twig::s # predefined input and output filters use vars qw( %filter); %filter= ( html => \&html_encode, safe => \&safe_encode, safe_hex => \&safe_encode_hex, ); # trigger types (used to sort them) my ($LEVEL_TRIGGER, $REGEXP_TRIGGER, $XPATH_TRIGGER)=(1..3); sub new { my ($class, %args) = @_; my $handlers; # change all nice_perlish_names into nicePerlishNames %args= _normalize_args( %args); # check options unless( $args{MoreOptions}) { foreach my $arg (keys %args) { carp "invalid option $arg" unless $valid_option{$arg}; } } # a twig is really an XML::Parser # my $self= XML::Parser->new(%args); my $self; $self= XML::Parser->new(%args); bless $self, $class; $self->{_twig_context_stack}= []; # allow tag.class selectors in handler triggers $css_sel= $args{CssSel} || 0; if( exists $args{TwigHandlers}) { $handlers= $args{TwigHandlers}; $self->setTwigHandlers( $handlers); delete $args{TwigHandlers}; } # take care of twig-specific arguments if( exists $args{StartTagHandlers}) { $self->setStartTagHandlers( $args{StartTagHandlers}); delete $args{StartTagHandlers}; } if( exists $args{DoNotChainHandlers}) { $self->{twig_do_not_chain_handlers}= $args{DoNotChainHandlers}; } if( exists $args{IgnoreElts}) { # change array to hash so you can write ignore_elts => [ qw(foo bar baz)] if( isa( $args{IgnoreElts}, 'ARRAY')) { $args{IgnoreElts}= { map { $_ => 1 } @{$args{IgnoreElts}} }; } $self->setIgnoreEltsHandlers( $args{IgnoreElts}); delete $args{IgnoreElts}; } if( exists $args{Index}) { my $index= $args{Index}; # we really want a hash name => path, we turn an array into a hash if necessary if( ref( $index) eq 'ARRAY') { my %index= map { $_ => $_ } @$index; $index= \%index; } while( my( $name, $exp)= each %$index) { $self->setTwigHandler( $exp, sub { push @{$_[0]->{_twig_index}->{$name}}, $_; 1; }); } } $self->{twig_elt_class}= $args{EltClass} || 'XML::Twig::Elt'; if( defined( $args{EltClass}) && $args{EltClass} ne 'XML::Twig::Elt') { $self->{twig_alt_elt_class}=1; } if( exists( $args{EltClass})) { delete $args{EltClass}; } if( exists( $args{MapXmlns})) { $self->{twig_map_xmlns}= $args{MapXmlns}; $self->{Namespaces}=1; delete $args{MapXmlns}; } if( exists( $args{KeepOriginalPrefix})) { $self->{twig_keep_original_prefix}= $args{KeepOriginalPrefix}; delete $args{KeepOriginalPrefix}; } $self->{twig_dtd_handler}= $args{DTDHandler}; delete $args{DTDHandler}; if( $args{ExpandExternalEnts}) { $self->set_expand_external_entities( 1); $self->{twig_expand_external_ents}= $args{ExpandExternalEnts}; $self->{twig_read_external_dtd}= 1; # implied by ExpandExternalEnts if( $args{ExpandExternalEnts} == -1) { $self->{twig_extern_ent_nofail}= 1; $self->setHandlers( ExternEnt => \&_twig_extern_ent_nofail); } delete $args{LoadDTD}; delete $args{ExpandExternalEnts}; } else { $self->set_expand_external_entities( 0); } if( !$args{NoLWP} && ! _use( 'URI') && ! _use( 'URI::File') && ! _use( 'LWP')) { $self->{twig_ext_ent_handler}= \&XML::Parser::initial_ext_ent_handler } elsif( $args{NoXxe}) { $self->{twig_ext_ent_handler}= sub { my($xp, $base, $path) = @_; $xp->{ErrorMessage}.= "cannot use entities in document when the no_xxe option is on"; return undef; }; } else { $self->{twig_ext_ent_handler}= \&XML::Parser::file_ext_ent_handler } if( $args{DoNotEscapeAmpInAtts}) { $self->set_do_not_escape_amp_in_atts( 1); $self->{twig_do_not_escape_amp_in_atts}=1; } else { $self->set_do_not_escape_amp_in_atts( 0); $self->{twig_do_not_escape_amp_in_atts}=0; } # deal with TwigRoots argument, a hash of elements for which # subtrees will be built (and associated handlers) if( $args{TwigRoots}) { $self->setTwigRoots( $args{TwigRoots}); delete $args{TwigRoots}; } if( $args{EndTagHandlers}) { unless ($self->{twig_roots} || $args{ForceEndTagHandlersUsage}) { croak "you should not use EndTagHandlers without TwigRoots\n", "if you want to use it anyway, normally because you have ", "a start_tag_handlers that calls 'ignore' and you want to ", "call an ent_tag_handlers at the end of the element, then ", "pass 'force_end_tag_handlers_usage => 1' as an argument ", "to new"; } $self->setEndTagHandlers( $args{EndTagHandlers}); delete $args{EndTagHandlers}; } if( $args{TwigPrintOutsideRoots}) { croak "cannot use twig_print_outside_roots without twig_roots" unless( $self->{twig_roots}); # if the arg is a filehandle then store it if( _is_fh( $args{TwigPrintOutsideRoots}) ) { $self->{twig_output_fh}= $args{TwigPrintOutsideRoots}; } $self->{twig_default_print}= $args{TwigPrintOutsideRoots}; } # space policy if( $args{KeepSpaces}) { croak "cannot use both keep_spaces and discard_spaces" if( $args{DiscardSpaces}); croak "cannot use both keep_spaces and discard_all_spaces" if( $args{DiscardAllSpaces}); croak "cannot use both keep_spaces and keep_spaces_in" if( $args{KeepSpacesIn}); $self->{twig_keep_spaces}=1; delete $args{KeepSpaces}; } if( $args{DiscardSpaces}) { croak "cannot use both discard_spaces and keep_spaces_in" if( $args{KeepSpacesIn}); croak "cannot use both discard_spaces and discard_all_spaces" if( $args{DiscardAllSpaces}); croak "cannot use both discard_spaces and discard_spaces_in" if( $args{DiscardSpacesIn}); $self->{twig_discard_spaces}=1; delete $args{DiscardSpaces}; } if( $args{KeepSpacesIn}) { croak "cannot use both keep_spaces_in and discard_spaces_in" if( $args{DiscardSpacesIn}); croak "cannot use both keep_spaces_in and discard_all_spaces" if( $args{DiscardAllSpaces}); $self->{twig_discard_spaces}=1; $self->{twig_keep_spaces_in}={}; my @tags= @{$args{KeepSpacesIn}}; foreach my $tag (@tags) { $self->{twig_keep_spaces_in}->{$tag}=1; } delete $args{KeepSpacesIn}; } if( $args{DiscardAllSpaces}) { croak "cannot use both discard_all_spaces and discard_spaces_in" if( $args{DiscardSpacesIn}); $self->{twig_discard_all_spaces}=1; delete $args{DiscardAllSpaces}; } if( $args{DiscardSpacesIn}) { $self->{twig_keep_spaces}=1; $self->{twig_discard_spaces_in}={}; my @tags= @{$args{DiscardSpacesIn}}; foreach my $tag (@tags) { $self->{twig_discard_spaces_in}->{$tag}=1; } delete $args{DiscardSpacesIn}; } # discard spaces by default $self->{twig_discard_spaces}= 1 unless( $self->{twig_keep_spaces}); $args{Comments}||= $COMMENTS_DEFAULT; if( $args{Comments} eq 'drop') { $self->{twig_keep_comments}= 0; } elsif( $args{Comments} eq 'keep') { $self->{twig_keep_comments}= 1; } elsif( $args{Comments} eq 'process') { $self->{twig_process_comments}= 1; } else { croak "wrong value for comments argument: '$args{Comments}' (should be 'drop', 'keep' or 'process')"; } delete $args{Comments}; $args{Pi}||= $PI_DEFAULT; if( $args{Pi} eq 'drop') { $self->{twig_keep_pi}= 0; } elsif( $args{Pi} eq 'keep') { $self->{twig_keep_pi}= 1; } elsif( $args{Pi} eq 'process') { $self->{twig_process_pi}= 1; } else { croak "wrong value for pi argument: '$args{Pi}' (should be 'drop', 'keep' or 'process')"; } delete $args{Pi}; if( $args{KeepEncoding}) { # set it in XML::Twig::Elt so print functions know what to do $self->set_keep_encoding( 1); $self->{parse_start_tag}= $args{ParseStartTag} || \&_parse_start_tag; delete $args{ParseStartTag} if defined( $args{ParseStartTag}) ; delete $args{KeepEncoding}; } else { $self->set_keep_encoding( 0); if( $args{ParseStartTag}) { $self->{parse_start_tag}= $args{ParseStartTag}; } else { delete $self->{parse_start_tag}; } delete $args{ParseStartTag}; } if( $args{OutputFilter}) { $self->set_output_filter( $args{OutputFilter}); delete $args{OutputFilter}; } else { $self->set_output_filter( 0); } if( $args{RemoveCdata}) { $self->set_remove_cdata( $args{RemoveCdata}); delete $args{RemoveCdata}; } else { $self->set_remove_cdata( 0); } if( $args{OutputTextFilter}) { $self->set_output_text_filter( $args{OutputTextFilter}); delete $args{OutputTextFilter}; } else { $self->set_output_text_filter( 0); } if( $args{KeepAttsOrder}) { $self->{keep_atts_order}= $args{KeepAttsOrder}; if( _use( 'Tie::IxHash')) { $self->set_keep_atts_order( $self->{keep_atts_order}); } else { croak "Tie::IxHash not available, option keep_atts_order not allowed"; } } else { $self->set_keep_atts_order( 0); } if( $args{PrettyPrint}) { $self->set_pretty_print( $args{PrettyPrint}); } if( $args{EscapeGt}) { $self->escape_gt( $args{EscapeGt}); } if( $args{EmptyTags}) { $self->set_empty_tag_style( $args{EmptyTags}) } if( exists $args{Id}) { $ID= $args{Id}; delete $args{ID}; } if( $args{NoProlog}) { $self->{no_prolog}= 1; delete $args{NoProlog}; } if( $args{DoNotOutputDTD}) { $self->{no_dtd_output}= 1; delete $args{DoNotOutputDTD}; } if( $args{LoadDTD}) { $self->{twig_read_external_dtd}= 1; delete $args{LoadDTD}; } if( $args{CharHandler}) { $self->setCharHandler( $args{CharHandler}); delete $args{CharHandler}; } if( $args{InputFilter}) { $self->set_input_filter( $args{InputFilter}); delete $args{InputFilter}; } if( $args{NoExpand}) { $self->setHandlers( %twig_noexpand_handlers); $self->{twig_no_expand}=1; } if( my $output_encoding= $args{OutputEncoding}) { $self->set_output_encoding( $output_encoding); delete $args{OutputFilter}; } if( my $tdh= $args{TopDownHandlers}) { $self->{twig_tdh}=1; delete $args{TopDownHandlers}; } if( my $acc_a= $args{AttAccessors}) { $self->att_accessors( @$acc_a); } if( my $acc_e= $args{EltAccessors}) { $self->elt_accessors( isa( $acc_e, 'ARRAY') ? @$acc_e : $acc_e); } if( my $acc_f= $args{FieldAccessors}) { $self->field_accessors( isa( $acc_f, 'ARRAY') ? @$acc_f : $acc_f); } if( $args{UseTidy}) { $self->{use_tidy}= 1; } $self->{tidy_options}= $args{TidyOptions} || {}; if( $args{OutputHtmlDoctype}) { $self->{html_doctype}= 1; } $self->set_quote( $args{Quote} || 'double'); # set handlers if( $self->{twig_roots}) { if( $self->{twig_default_print}) { if( $self->{twig_keep_encoding}) { $self->setHandlers( %twig_handlers_roots_print_original); } else { $self->setHandlers( %twig_handlers_roots_print); } } else { $self->setHandlers( %twig_handlers_roots); } } else { $self->setHandlers( %twig_handlers); } # XML::Parser::Expat does not like these handler to be set. So in order to # use the various sets of handlers on XML::Parser or XML::Parser::Expat # objects when needed, these ones have to be set only once, here, at # XML::Parser level $self->setHandlers( Init => \&_twig_init, Final => \&_twig_final); $self->{twig_entity_list}= XML::Twig::Entity_list->new; $self->{twig_notation_list}= XML::Twig::Notation_list->new; $self->{twig_id}= $ID; $self->{twig_stored_spaces}=''; $self->{twig_autoflush}= 1; # auto flush by default $self->{twig}= $self; if( $weakrefs) { weaken( $self->{twig}); } return $self; } sub parse { my $t= shift; # if called as a class method, calls nparse, which creates the twig then parses it if( !ref( $t) || !isa( $t, 'XML::Twig')) { return $t->nparse( @_); } # requires 5.006 at least (or the ${^UNICODE} causes a problem) # > perl 5.5 # trap underlying bug in IO::Handle (see RT #17500) # > perl 5.5 # croak if perl 5.8+, -CD (or PERL_UNICODE set to D) and parsing a pipe # > perl 5.5 if( $perl_version>=5.008 && ${^UNICODE} && (${^UNICODE} & 24) && isa( $_[0], 'GLOB') && -p $_[0] ) # > perl 5.5 { croak "cannot parse the output of a pipe when perl is set to use the UTF8 perlIO layer\n" # > perl 5.5 . "set the environment variable PERL_UNICODE or use the -C option (see perldoc perlrun)\n" # > perl 5.5 . "not to include 'D'"; # > perl 5.5 } # > perl 5.5 $t= eval { $t->SUPER::parse( @_); }; if( !$t && $@=~m{(syntax error at line 1, column 0, byte 0|not well-formed \(invalid token\) at line 1, column 1, byte 1)} && -f $_[0] && ( ! ref( $_[0]) || ref( $_[0])) ne 'GLOB' # -f works on a filehandle, so this make sure $_[0] is a real file ) { croak "you seem to have used the parse method on a filename ($_[0]), you probably want parsefile instead"; } return _checked_parse_result( $t, $@); } sub parsefile { my $t= shift; if( -f $_[0] && ! -s $_[0]) { return _checked_parse_result( undef, "empty file '$_[0]'"); } $t= eval { $t->SUPER::parsefile( @_); }; return _checked_parse_result( $t, $@); } sub _checked_parse_result { my( $t, $returned)= @_; if( !$t) { if( isa( $returned, 'XML::Twig') && $returned->{twig_finish_now}) { $t= $returned; delete $t->{twig_finish_now}; return $t->_twig_final; } else { _croak( $returned, 0); } } $active_twig= $t; return $t; } sub active_twig { return $active_twig; } sub finish_now { my $t= shift; $t->{twig_finish_now}=1; # XML::Parser 2.43 changed xpcroak in a way that caused test failures for XML::Twig # the change was reverted in 2.44, but this is here to ensure that tests pass with 2.43 if( $XML::Parser::VERSION == 2.43) { no warnings; $t->parser->{twig_error}= $t; *XML::Parser::Expat::xpcroak= sub { die $_[0]->{twig_error}; }; die $t; } else { die $t; } } sub parsefile_inplace { shift->_parse_inplace( parsefile => @_); } sub parsefile_html_inplace { shift->_parse_inplace( parsefile_html => @_); } sub _parse_inplace { my( $t, $method, $file, $suffix)= @_; _use( 'File::Temp') || croak "need File::Temp to use inplace methods\n"; _use( 'File::Basename'); my $tmpdir= dirname( $file); my( $tmpfh, $tmpfile)= File::Temp::tempfile( DIR => $tmpdir); my $original_fh= select $tmpfh; # we can only use binmode :utf8 if perl was compiled with useperlio # might be a problem if keep_encoding used but the file is already in utf8 if( $perl_version > 5.006 && !$t->{twig_keep_encoding} && _use_perlio()) { binmode( $tmpfh, ":utf8" ); } $t->$method( $file); select $original_fh; close $tmpfh; my $mode= (stat( $file))[2] & oct(7777); chmod $mode, $tmpfile or croak "cannot change temp file mode to $mode: $!"; if( $suffix) { my $backup; if( $suffix=~ m{\*}) { ($backup = $suffix) =~ s/\*/$file/g; } else { $backup= $file . $suffix; } rename( $file, $backup) or croak "cannot backup initial file ($file) to $backup: $!"; } rename( $tmpfile, $file) or croak "cannot rename temp file ($tmpfile) to initial file ($file): $!"; return $t; } sub parseurl { my $t= shift; $t->_parseurl( 0, @_); } sub safe_parseurl { my $t= shift; $t->_parseurl( 1, @_); } sub safe_parsefile_html { my $t= shift; eval { $t->parsefile_html( @_); }; return $@ ? $t->_reset_twig_after_error : $t; } sub safe_parseurl_html { my $t= shift; _use( 'LWP::Simple') or croak "missing LWP::Simple"; eval { $t->parse_html( LWP::Simple::get( shift()), @_); } ; return $@ ? $t->_reset_twig_after_error : $t; } sub parseurl_html { my $t= shift; _use( 'LWP::Simple') or croak "missing LWP::Simple"; $t->parse_html( LWP::Simple::get( shift()), @_); } # uses eval to catch the parser's death sub safe_parse_html { my $t= shift; eval { $t->parse_html( @_); } ; return $@ ? $t->_reset_twig_after_error : $t; } sub parsefile_html { my $t= shift; my $file= shift; my $indent= $t->{ErrorContext} ? 1 : 0; $t->set_empty_tag_style( 'html'); my $html2xml= $t->{use_tidy} ? \&_tidy_html : \&_html2xml; my $options= $t->{use_tidy} ? $t->{tidy_options} || {} : { indent => $indent, html_doctype => $t->{html_doctype} }; $t->parse( $html2xml->( _slurp( $file), $options), @_); return $t; } sub parse_html { my $t= shift; my $options= ref $_[0] && ref $_[0] eq 'HASH' ? shift() : {}; my $use_tidy= exists $options->{use_tidy} ? $options->{use_tidy} : $t->{use_tidy}; my $content= shift; my $indent= $t->{ErrorContext} ? 1 : 0; $t->set_empty_tag_style( 'html'); my $html2xml= $use_tidy ? \&_tidy_html : \&_html2xml; my $conv_options= $use_tidy ? $t->{tidy_options} || {} : { indent => $indent, html_doctype => $t->{html_doctype} }; $t->parse( $html2xml->( isa( $content, 'GLOB') ? _slurp_fh( $content) : $content, $conv_options), @_); return $t; } sub xparse { my $t= shift; my $to_parse= $_[0]; if( isa( $to_parse, 'GLOB')) { $t->parse( @_); } elsif( $to_parse=~ m{^\s*<}) { $to_parse=~ m{_parse_as_xml_or_html( @_) : $t->parse( @_); } elsif( $to_parse=~ m{^\w+://.*\.html?$}) { _use( 'LWP::Simple') or croak "missing LWP::Simple"; $t->_parse_as_xml_or_html( LWP::Simple::get( shift()), @_); } elsif( $to_parse=~ m{^\w+://}) { _use( 'LWP::Simple') or croak "missing LWP::Simple"; my $doc= LWP::Simple::get( shift); if( ! defined $doc) { $doc=''; } my $xml_parse_ok= $t->safe_parse( $doc, @_); if( $xml_parse_ok) { return $xml_parse_ok; } else { my $diag= $@; if( $doc=~ m{parse_html( $doc, @_); } else { croak $diag; } } } elsif( $to_parse=~ m{\.html?$}) { my $content= _slurp( shift); $t->_parse_as_xml_or_html( $content, @_); } else { $t->parsefile( @_); } } sub _parse_as_xml_or_html { my $t= shift; if( _is_well_formed_xml( $_[0])) { $t->parse( @_) } else { my $html2xml= $t->{use_tidy} ? \&_tidy_html : \&_html2xml; my $options= $t->{use_tidy} ? $t->{tidy_options} || {} : { indent => 0, html_doctype => $t->{html_doctype} }; my $html= $html2xml->( $_[0], $options, @_); if( _is_well_formed_xml( $html)) { $t->parse( $html); } else { croak $@; } # can't really test this because HTML::Parser or HTML::Tidy may change how they deal with bas HTML between versions } } { my $parser; sub _is_well_formed_xml { $parser ||= XML::Parser->new; eval { $parser->parse( $_[0]); }; return $@ ? 0 : 1; } } sub nparse { my $class= shift; my $to_parse= pop; $class->new( @_)->xparse( $to_parse); } sub nparse_pp { shift()->nparse( pretty_print => 'indented', @_); } sub nparse_e { shift()->nparse( error_context => 1, @_); } sub nparse_ppe { shift()->nparse( pretty_print => 'indented', error_context => 1, @_); } sub _html2xml { my( $html, $options)= @_; _use( 'HTML::TreeBuilder', '3.13') or croak "cannot parse HTML: missing HTML::TreeBuilder v >= 3.13\n"; my $tree= HTML::TreeBuilder->new; $tree->ignore_ignorable_whitespace( 0); $tree->ignore_unknown( 0); $tree->no_space_compacting( 1); $tree->store_comments( 1); $tree->store_pis(1); $tree->parse( $html); $tree->eof; my $xml=''; if( $options->{html_doctype} && exists $tree->{_decl} ) { my $decl= $tree->{_decl}->as_XML; # first try to fix declarations that are missing the SYSTEM part $decl =~ s{^\s*} { my $system= $HTML_DECL{$2} || $HTML_DECL{$DEFAULT_HTML_TYPE}; qq{} }xe; # then check that the declaration looks OK (so it parses), if not remove it, # better to parse without the declaration than to die stupidly if( $decl =~ m{}x # PUBLIC then SYSTEM || $decl =~ m{}x # just SYSTEM ) { $xml= $decl; } } $xml.= _as_XML( $tree); _fix_xml( $tree, \$xml); if( $options->{indent}) { _indent_xhtml( \$xml); } $tree->delete; $xml=~ s{\s+$}{}s; # trim end return $xml; } sub _tidy_html { my( $html, $options)= @_; _use( 'HTML::Tidy') or croak "cannot cleanup HTML using HTML::Tidy (required by the use_tidy option): $@\n"; ; my $TIDY_DEFAULTS= { output_xhtml => 1, # duh! tidy_mark => 0, # do not add the "generated by tidy" comment numeric_entities => 1, char_encoding => 'utf8', bare => 1, clean => 1, doctype => 'transitional', fix_backslash => 1, merge_divs => 0, merge_spans => 0, sort_attributes => 'alpha', indent => 0, wrap => 0, break_before_br => 0, }; $options ||= {}; my $tidy_options= { %$TIDY_DEFAULTS, %$options}; my $tidy = HTML::Tidy->new( $tidy_options); $tidy->ignore( type => 1, type => 2 ); # 1 is TIDY_WARNING, 2 is TIDY_ERROR, not clean my $xml= $tidy->clean( $html ); return $xml; } { my %xml_parser_encoding; sub _fix_xml { my( $tree, $xml)= @_; # $xml is a ref to the xml string my $max_tries=5; my $add_decl; while( ! _check_xml( $xml) && $max_tries--) { # a couple of fixes for weird HTML::TreeBuilder errors if( $@=~ m{^\s*xml (or text )?declaration not at start of (external )?entity}i) { $$xml=~ s{<\?xml.*?\?>}{}g; #warn " fixed xml declaration in the wrong place\n"; } elsif( $@=~ m{undefined entity}) { $$xml=~ s{&(amp;)?Amp;}{&}g if $HTML::TreeBuilder::VERSION < 4.00; if( _use( 'HTML::Entities::Numbered')) { $$xml=name2hex_xml( $$xml); } $$xml=~ s{&(\w+);}{ my $ent= $1; if( $ent !~ m{^(amp|lt|gt|apos|quote)$}) { "&$ent;" } }eg; } elsif( $@=~ m{&Amp; used in html}) # if $Amp; is used instead of & then HTML::TreeBuilder's as_xml is tripped (old version) { $$xml=~ s{&(amp;)?Amp;}{&}g if $HTML::TreeBuilder::VERSION < 4.00; } elsif( $@=~ m{^\s*not well-formed \(invalid token\)}) { if( $HTML::TreeBuilder::VERSION < 4.00) { $$xml=~ s{&(amp;)?Amp;}{&}g; $$xml=~ s{(<[^>]* )(\d+=)"}{$1a$2"}g; # comes out as
, "fix the attribute } my $q= ')?}{}s; #warn " added decl (encoding $encoding)\n"; } else { $$xml=~ s{^(<\?xml.*?\?>)?}{}s; #warn " converting to utf8 from $encoding\n"; $$xml= _to_utf8( $encoding, $$xml); } } else { $$xml=~ s{^(<\?xml.*?\?>)?}{}s; #warn " converting to utf8 from $encoding\n"; $$xml= _to_utf8( $encoding, $$xml); } } } } # some versions of HTML::TreeBuilder escape CDATA sections $$xml=~ s{(<!\[CDATA\[.*?\]\]>)}{_unescape_cdata( $1)}eg; } sub _xml_parser_encodings { my @encodings=( 'iso-8859-1'); # this one is included by default, there is no map for it in @INC foreach my $inc (@INC) { push @encodings, map { basename( $_, '.enc') } glob( File::Spec->catdir( $inc => XML => Parser => Encodings => '*.enc')); } return map { $_ => 1 } @encodings; } } sub _unescape_cdata { my( $cdata)= @_; $cdata=~s{<}{<}g; $cdata=~s{>}{>}g; $cdata=~s{&}{&}g; return $cdata; } sub _as_XML { # fork of HTML::Element::as_XML, which is a little too buggy and inconsistent between versions for my liking my ($elt) = @_; my $xml= ''; my $empty_element_map = $elt->_empty_element_map; my ( $tag, $node, $start ); # per-iteration scratch $elt->traverse( sub { ( $node, $start ) = @_; if ( ref $node ) { # it's an element $tag = $node->{'_tag'}; if ($start) { # on the way in foreach my $att ( grep { ! m{^(_|/$)} } keys %$node ) { # fix attribute names instead of dying my $new_att= $att; if( $att=~ m{^\d}) { $new_att= "a$att"; } $new_att=~ s{[^\w\d:_-]}{}g; $new_att ||= 'a'; if( $new_att ne $att) { $node->{$new_att}= delete $node->{$att}; } } if ( $empty_element_map->{$tag} && (!@{ $node->{'_content'} || []}) ) { $xml.= $node->starttag_XML( undef, 1 ); } else { $xml.= $node->starttag_XML(undef); } } else { # on the way out unless ( $empty_element_map->{$tag} and !@{ $node->{'_content'} || [] } ) { $xml.= $node->endtag_XML(); } # otherwise it will have been an <... /> tag. } } elsif( $node=~ /)/s, $node) # chunks are CDATA sections or normal text { $xml.= $chunk =~ m{/>/g; $html =~ s/"/"/g; $html =~ s/'/'/g; return $html; } sub _check_xml { my( $xml)= @_; # $xml is a ref to the xml string my $ok= eval { XML::Parser->new->parse( $$xml); }; #if( $ok) { warn " parse OK\n"; } return $ok; } sub _encoding_from_meta { my( $tree)= @_; my $enc="iso-8859-1"; my @meta= $tree->find( 'meta'); foreach my $meta (@meta) { if( $meta->{'http-equiv'} && ($meta->{'http-equiv'} =~ m{^\s*content-type\s*}i) && $meta->{content} && ($meta->{content} =~ m{^\s*text/html\s*;\s*charset\s*=\s*(\S*)\s*}i) ) { $enc= lc $1; #warn " encoding from meta tag is '$enc'\n"; last; } } return $enc; } { sub _to_utf8 { my( $encoding, $string)= @_; local $SIG{__DIE__}; if( _use( 'Encode')) { Encode::from_to( $string, $encoding => 'utf8', 0x0400); } # 0x0400 is Encode::FB_XMLCREF elsif( _use( 'Text::Iconv')) { my $converter = eval { Text::Iconv->new( $encoding => "utf8") }; if( $converter) { $string= $converter->convert( $string); } } elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) { my $map= Unicode::Map8->new( $encoding); $string= $map->tou( $string)->utf8; } $string=~ s{[\x00-\x08\x0B\x0C\x0E-\x1F]}{}g; # get rid of control chars, portable in 5.6 return $string; } } sub _indent_xhtml { my( $xhtml)= @_; # $xhtml is a ref my %block_tag= map { $_ => 1 } qw( html head meta title link script base body h1 h2 h3 h4 h5 h6 p br address blockquote pre ol ul li dd dl dt table tr td th tbody tfoot thead col colgroup caption div frame frameset hr ); my $level=0; $$xhtml=~ s{( (?:|[CDATA[.*?]]>)) # ignore comments and CDATA sections | <(\w+)((?:\s+\w+\s*=\s*(?:"[^"]*"|'[^']*'))*\s*/>) # empty tag | <(\w+) # start tag |}); my $nl= $4 eq 'html' ? '' : "\n"; "$nl$indent<$4"; } elsif( $5 && $block_tag{$5}) { $level--; " 1 } qw( xsl css); my $ss= $t->{twig_elt_class}->new( $PI); if( $text_type{$type}) { $ss->_set_pi( 'xml-stylesheet', qq{type="text/$type" href="$href"}); } else { croak "unsupported style sheet type '$type'"; } $t->_add_cpi_outside_of_root( leading_cpi => $ss); return $t; } { my %used; # module => 1 if require ok, 0 otherwise my %disallowed; # for testing, refuses to _use modules in this hash sub _disallow_use ## no critic (Subroutines::ProhibitNestedSubs); { my( @modules)= @_; $disallowed{$_}= 1 foreach (@modules); } sub _allow_use ## no critic (Subroutines::ProhibitNestedSubs); { my( @modules)= @_; $disallowed{$_}= 0 foreach (@modules); } sub _use ## no critic (Subroutines::ProhibitNestedSubs); { my( $module, $version)= @_; $version ||= 0; if( $disallowed{$module}) { return 0; } if( $used{$module}) { return 1; } if( eval "require $module") { import $module; $used{$module}= 1; # no critic ProhibitStringyEval if( $version) { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; if( ${"${module}::VERSION"} >= $version ) { return 1; } else { return 0; } } else { return 1; } } else { $used{$module}= 0; return 0; } } } # used to solve the [n] predicates while avoiding getting the entire list # needs a prototype to accept passing bare blocks sub _first_n(&$@) ## no critic (Subroutines::ProhibitSubroutinePrototypes); { my $coderef= shift; my $n= shift; my $i=0; if( $n > 0) { foreach (@_) { if( &$coderef) { $i++; return $_ if( $i == $n); } } } elsif( $n < 0) { foreach (reverse @_) { if( &$coderef) { $i--; return $_ if( $i == $n); } } } else { croak "illegal position number 0"; } return undef; } sub _slurp_uri { my( $uri, $base)= @_; if( $uri=~ m{^\w+://}) { _use( 'LWP::Simple'); return LWP::Simple::get( $uri); } else { return _slurp( _based_filename( $uri, $base)); } } sub _based_filename { my( $filename, $base)= @_; # cf. XML/Parser.pm's file_ext_ent_handler if (defined($base) and not ($filename =~ m{^(?:[\\/]|\w+:)})) { my $newpath = $base; $newpath =~ s{[^\\/:]*$}{$filename}; $filename = $newpath; } return $filename; } sub _slurp { my( $filename)= @_; my $to_slurp; open( $to_slurp, "<$filename") or croak "cannot open '$filename': $!"; local $/= undef; my $content= <$to_slurp>; close $to_slurp; return $content; } sub _slurp_fh { my( $fh)= @_; local $/= undef; my $content= <$fh>; return $content; } # I should really add extra options to allow better configuration of the # LWP::UserAgent object # this method forks (except on VMS!) # - the child gets the data and copies it to the pipe, # - the parent reads the stream and sends it to XML::Parser # the data is cut it chunks the size of the XML::Parser::Expat buffer # the method returns the twig and the status sub _parseurl { my( $t, $safe, $url, $agent)= @_; _use( 'LWP') || croak "LWP not available, needed to use parseurl methods"; if( $^O ne 'VMS') { pipe( README, WRITEME) or croak "cannot create connected pipes: $!"; if( my $pid= fork) { # parent code: parse the incoming file close WRITEME; # no need to write my $result= $safe ? $t->safe_parse( \*README) : $t->parse( \*README); close README; return $@ ? 0 : $t; } else { # child close README; # no need to read local $|=1; $agent ||= LWP::UserAgent->new; my $request = HTTP::Request->new( GET => $url); # _pass_url_content is called with chunks of data the same size as # the XML::Parser buffer my $response = $agent->request( $request, sub { _pass_url_content( \*WRITEME, @_); }, $BUFSIZE); $response->is_success or croak "$url ", $response->message; close WRITEME; CORE::exit(); # CORE is there for mod_perl (which redefines exit) } } else { # VMS branch (hard to test!) local $|=1; $agent ||= LWP::UserAgent->new; my $request = HTTP::Request->new( GET => $url); my $response = $agent->request( $request); $response->is_success or croak "$url ", $response->message; my $result= $safe ? $t->safe_parse($response->content) : $t->parse($response->content); return $@ ? 0 : $t; } } # get the (hopefully!) XML data from the URL and sub _pass_url_content { my( $fh, $data, $response, $protocol)= @_; print {$fh} $data; } sub add_options { my %args= map { $_, 1 } @_; %args= _normalize_args( %args); foreach (keys %args) { $valid_option{$_}++; } } sub _pretty_print_styles { return XML::Twig::Elt::_pretty_print_styles(); } sub _twig_store_internal_dtd { # warn " in _twig_store_internal_dtd...\n"; # DEBUG handler my( $p, $string)= @_; my $t= $p->{twig}; if( $t->{twig_keep_encoding}) { $string= $p->original_string(); } $t->{twig_doctype}->{internal} .= $string; return; } sub _twig_stop_storing_internal_dtd { # warn " in _twig_stop_storing_internal_dtd...\n"; # DEBUG handler my $p= shift; if( @saved_default_handler && defined $saved_default_handler[1]) { $p->setHandlers( @saved_default_handler); } else { $p->setHandlers( Default => undef); } $p->{twig}->{twig_doctype}->{internal}=~ s{^\s*\[}{}; $p->{twig}->{twig_doctype}->{internal}=~ s{\]\s*$}{}; return; } sub _twig_doctype_fin_print { # warn " in _twig_doctype_fin_print...\n"; # DEBUG handler my( $p)= shift; if( $p->{twig}->{twig_doctype}->{has_internal} && !$expat_1_95_2) { print ' ]>'; } return; } sub _normalize_args { my %normalized_args; while( my $key= shift ) { $key= join '', map { ucfirst } split /_/, $key; #$key= "Twig".$key unless( substr( $key, 0, 4) eq 'Twig'); $normalized_args{$key}= shift ; } return %normalized_args; } sub _is_fh { return unless $_[0]; return $_[0] if( isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar')); } sub _set_handler { my( $handlers, $whole_path, $handler)= @_; my $H_SPECIAL = qr{($ALL|$DEFAULT|$COMMENT|$TEXT)}; my $H_PI = qr{(\?|$PI)\s*(([^\s]*)\s*)}; my $H_LEVEL = qr{level \s* \( \s* ([0-9]+) \s* \)}x; my $H_REGEXP = qr{\(\?([\^xism]*)(-[\^xism]*)?:(.*)\)}x; my $H_XPATH = qr{(/?/?$REG_TAG_PART? \s* ($REG_PREDICATE\s*)?)+}x; my $prev_handler; my $cpath= $whole_path; #warn "\$cpath: '$cpath\n"; while( $cpath && $cpath=~ s{^\s*($H_SPECIAL|$H_PI|$H_LEVEL|$H_REGEXP|$H_XPATH)\s*($|\|)}{}) { my $path= $1; #warn "\$cpath: '$cpath' - $path: '$path'\n"; $prev_handler ||= $handlers->{handlers}->{string}->{$path} || undef; # $prev_handler gets the first found handler _set_special_handler ( $handlers, $path, $handler, $prev_handler) || _set_pi_handler ( $handlers, $path, $handler, $prev_handler) || _set_level_handler ( $handlers, $path, $handler, $prev_handler) || _set_regexp_handler ( $handlers, $path, $handler, $prev_handler) || _set_xpath_handler ( $handlers, $path, $handler, $prev_handler) || croak "unrecognized expression in handler: '$whole_path'"; # this both takes care of the simple (gi) handlers and store # the handler code reference for other handlers $handlers->{handlers}->{string}->{$path}= $handler; } if( $cpath) { croak "unrecognized expression in handler: '$whole_path'"; } return $prev_handler; } sub _set_special_handler { my( $handlers, $path, $handler, $prev_handler)= @_; if( $path =~ m{^\s*($ALL|$DEFAULT|$COMMENT|$TEXT)\s*$}io ) { $handlers->{handlers}->{$1}= $handler; return 1; } else { return 0; } } sub _set_xpath_handler { my( $handlers, $path, $handler, $prev_handler)= @_; if( my $handler_data= _parse_xpath_handler( $path, $handler)) { _add_handler( $handlers, $handler_data, $path, $prev_handler); return 1; } else { return 0; } } sub _add_handler { my( $handlers, $handler_data, $path, $prev_handler)= @_; my $tag= $handler_data->{tag}; my @handlers= $handlers->{xpath_handler}->{$tag} ? @{$handlers->{xpath_handler}->{$tag}} : (); if( $prev_handler) { @handlers= grep { $_->{path} ne $path } @handlers; } push @handlers, $handler_data if( $handler_data->{handler}); if( @handlers > 1) { @handlers= sort { (($b->{score}->{type} || 0) <=> ($a->{score}->{type} || 0)) || (($b->{score}->{anchored} || 0) <=> ($a->{score}->{anchored} || 0)) || (($b->{score}->{steps} || 0) <=> ($a->{score}->{steps} || 0)) || (($b->{score}->{predicates} || 0) <=> ($a->{score}->{predicates} || 0)) || (($b->{score}->{tests} || 0) <=> ($a->{score}->{tests} || 0)) || ($a->{path} cmp $b->{path}) } @handlers; } $handlers->{xpath_handler}->{$tag}= \@handlers; } sub _set_pi_handler { my( $handlers, $path, $handler, $prev_handler)= @_; # PI conditions ( '?target' => \&handler or '?' => \&handler # or '#PItarget' => \&handler or '#PI' => \&handler) if( $path=~ /^\s*(?:\?|$PI)\s*(?:([^\s]*)\s*)$/) { my $target= $1 || ''; # update the path_handlers count, knowing that # either the previous or the new handler can be undef $handlers->{pi_handlers}->{$1}= $handler; return 1; } else { return 0; } } sub _set_level_handler { my( $handlers, $path, $handler, $prev_handler)= @_; if( $path =~ m{^ \s* level \s* \( \s* ([0-9]+) \s* \) \s* $}ox ) { my $level= $1; my $sub= sub { my( $stack)= @_; return( ($stack->[-1]->{$ST_TAG} !~ m{^#}) && (scalar @$stack == $level + 1) ) }; my $handler_data= { tag=> '*', score => { type => $LEVEL_TRIGGER}, trigger => $sub, path => $path, handler => $handler, test_on_text => 0 }; _add_handler( $handlers, $handler_data, $path, $prev_handler); return 1; } else { return 0; } } sub _set_regexp_handler { my( $handlers, $path, $handler, $prev_handler)= @_; # if the expression was a regexp it is now a string (it was stringified when it became a hash key) if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$}) { my $regexp= qr/(?$1:$2)/; # convert it back into a regexp my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{$ST_TAG} =~ $regexp ) }; my $handler_data= { tag=> '*', score => { type => $REGEXP_TRIGGER} , trigger => $sub, path => $path, handler => $handler, test_on_text => 0 }; _add_handler( $handlers, $handler_data, $path, $prev_handler); return 1; } else { return 0; } } my $DEBUG_HANDLER= 0; # 0 or 1 (output the handler checking code) or 2 (super verbose) my $handler_string; # store the handler itself sub _set_debug_handler { $DEBUG_HANDLER= shift; } sub _warn_debug_handler { if( $DEBUG_HANDLER < 3) { warn @_; } else { $handler_string .= join( '', @_); } } sub _return_debug_handler { my $string= $handler_string; $handler_string=''; return $string; } sub _parse_xpath_handler { my( $xpath, $handler)= @_; my $xpath_original= $xpath; if( $DEBUG_HANDLER >=1) { _warn_debug_handler( "\n\nparsing path '$xpath'\n"); } my $path_to_check= $xpath; $path_to_check=~ s{/?/?$REG_TAG_PART?\s*(?:$REG_PREDICATE\s*)?}{}g; if( $DEBUG_HANDLER && $path_to_check=~ /\S/) { _warn_debug_handler( "left: $path_to_check\n"); } return if( $path_to_check=~ /\S/); (my $xpath_to_display= $xpath)=~ s{(["{}'\[\]\@\$])}{\\$1}g; my @xpath_steps; my $last_token_is_sep; while( $xpath=~ s{^\s* ( (//?) # separator | (?:$REG_TAG_PART\s*(?:$REG_PREDICATE\s*)?) # tag name and optional predicate | (?:$REG_PREDICATE) # just a predicate ) } {}x ) { # check that we have alternating separators and steps if( $2) # found a separator { if( $last_token_is_sep) { return 0; } # 2 separators in a row $last_token_is_sep= 1; } else { if( defined( $last_token_is_sep) && !$last_token_is_sep) { return 0; } # 2 steps in a row $last_token_is_sep= 0; } push @xpath_steps, $1; } if( $last_token_is_sep) { return 0; } # expression cannot end with a separator my $i=-1; my $perlfunc= _join_n( $NO_WARNINGS . ';', q|my( $stack)= @_; |, q|my @current_elts= (scalar @$stack); |, q|my @new_current_elts; |, q|my $elt; |, ($DEBUG_HANDLER >= 1) && (qq#warn q{checking path '$xpath_to_display'\n};#), ); my $last_tag=''; my $anchored= $xpath_original=~ m{^\s*/(?!/)} ? 1 : 0; my $score={ type => $XPATH_TRIGGER, anchored => $anchored }; my $flag= { test_on_text => 0 }; my $sep='/'; # '/' or '//' while( my $xpath_step= pop @xpath_steps) { my( $tag, $predicate)= $xpath_step =~ m{^($REG_TAG_PART)?(?:\[(.*)\])?\s*$}; $score->{steps}++; $tag||='*'; my $warn_empty_stack= $DEBUG_HANDLER >= 2 ? qq{warn "return with empty stack\\n";} : ''; if( $predicate) { if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate is: '$predicate'\n"); } # changes $predicate (from an XPath expression to a Perl one) if( $predicate=~ m{^\s*$REG_NUMBER\s*$}) { croak "position selector [$predicate] not supported on twig_handlers"; } _parse_predicate_in_handler( $predicate, $flag, $score); if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate becomes: '$predicate'\n"); } } my $tag_cond= _tag_cond( $tag); my $cond= join( " && ", grep { $_ } $tag_cond, $predicate) || 1; if( $css_sel && $tag=~ m{\.}) { $tag=~s{\.[^.]*$}{}; $tag ||='*'; } $tag=~ s{(.)#.+$}{$1}; $last_tag ||= $tag; if( $sep eq '/') { $perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #, q# { next if( !$current_elt); #, q# $current_elt--; #, q# $elt= $stack->[$current_elt]; #, q# if( %s) { push @new_current_elts, $current_elt;} #, q# } #, ), $cond ); } elsif( $sep eq '//') { $perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #, q# { next if( !$current_elt); #, q# $current_elt--; #, q# my $candidate= $current_elt; #, q# while( $candidate >=0) #, q# { $elt= $stack->[$candidate]; #, q# if( %s) { push @new_current_elts, $candidate;} #, q# $candidate--; #, q# } #, q# } #, ), $cond ); } my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq%fail at cond '$cond'%;#) : ''; $perlfunc .= sprintf( _join_n( q#unless( @new_current_elts) { %s return 0; } #, q#@current_elts= @new_current_elts; #, q#@new_current_elts=(); #, ), $warn ); $sep= pop @xpath_steps; } if( $anchored) # there should be a better way, but this works { my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq{fail, stack not empty};#) : ''; $perlfunc .= sprintf( _join_n( q#if( ! grep { $_ == 0 } @current_elts) { %s return 0;}#), $warn); } $perlfunc.= qq{warn "handler for '$xpath_to_display' triggered\\n";\n} if( $DEBUG_HANDLER >=2); $perlfunc.= qq{return q{$xpath_original};\n}; _warn_debug_handler( "\nperlfunc:\n$perlfunc\n") if( $DEBUG_HANDLER>=1); my $s= eval "sub { $perlfunc }"; if( $@) { croak "wrong handler condition '$xpath' ($@);" } _warn_debug_handler( "last tag: '$last_tag', test_on_text: '$flag->{test_on_text}'\n") if( $DEBUG_HANDLER >=1); _warn_debug_handler( "score: ", join( ' ', map { "$_: $score->{$_}" } sort keys %$score), "\n") if( $DEBUG_HANDLER >=1); return { tag=> $last_tag, score => $score, trigger => $s, path => $xpath_original, handler => $handler, test_on_text => $flag->{test_on_text} }; } sub _join_n { return join( "\n", @_, ''); } # the "tag" part can be , . or # (where tag can be *, or start with # for hidden tags) sub _tag_cond { my( $full_tag)= @_; my( $tag, $class, $id); if( $full_tag=~ m{^(.+)#(.+)$}) { ($tag, $id)= ($1, $2); } # # else { ( $tag, $class)= $css_sel ? $full_tag=~ m{^(.*?)(?:\.([^.]*))?$} : ($full_tag, undef); } my $tag_cond = $tag && $tag ne '*' ? qq#(\$elt->{'$ST_TAG'} eq "$tag")# : ''; my $id_cond = defined $id ? qq#(\$elt->{id} eq "$id")# : ''; my $class_cond = defined $class ? qq#(\$elt->{class}=~ m{(^| )$class( |\$)})# : ''; my $full_cond= join( ' && ', grep { $_ } ( $tag_cond, $class_cond, $id_cond)); return $full_cond; } # input: the predicate ($_[0]) which will be changed in place # flags, a hashref with various flags (like test_on_text) # the score sub _parse_predicate_in_handler { my( $flag, $score)= @_[1..2]; $_[0]=~ s{( ($REG_STRING) # strings |\@($REG_TAG_NAME)(\s* $REG_MATCH \s* $REG_REGEXP) # @att and regexp |\@($REG_TAG_NAME)(?=\s*(?:[><=!])) # @att followed by a comparison operator |\@($REG_TAG_NAME) # @att (not followed by a comparison operator) |=~|!~ # matching operators |([><]=?|=|!=)(?=\s*[\d+-]) # test before a number |([><]=?|=|!=) # test, other cases |($REG_FUNCTION) # no arg functions # this bit is a mess, but it is the only solution with this half-baked parser |(string\(\s*$REG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child)=~ /regexp/ |(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_STRING) # string( child) = "value" (or other test) |(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_NUMBER) # string( child) = nb (or other test) |(and|or) # |($REG_NAME(?=\s*(and|or|$))) # nested tag name (needs to be after all other unquoted strings) |($REG_TAG_IN_PREDICATE) # nested tag name (needs to be after all other unquoted strings) )} { my( $token, $str, $att_re_name, $att_re_regexp, $att, $bare_att, $num_test, $alpha_test, $func, $str_regexp, $str_test_alpha, $str_test_num, $and_or, $tag) = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14); $score->{predicates}++; # store tests on text (they are not always allowed) if( $func || $str_regexp || $str_test_num || $str_test_alpha ) { $flag->{test_on_text}= 1; } if( defined $str) { $token } elsif( $tag) { qq{(\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->has_child( '$tag'))} } elsif( $att) { $att=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att'})} : qq{\$elt->{'$att'}} } elsif( $att_re_name) { $att_re_name=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att_re_name'}$att_re_regexp)} : qq{\$elt->{'$att_re_name'}$att_re_regexp} } # for some reason Devel::Cover flags the following lines as not tested. They are though. elsif( $bare_att) { $bare_att=~ m{^#} ? qq{(\$elt->{'$ST_ELT'} && defined(\$elt->{'$ST_ELT'}->{att}->{'$bare_att'}))} : qq{defined( \$elt->{'$bare_att'})} } elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} } elsif( $func && $func=~ m{^string}) { "\$elt->{'$ST_ELT'}->text"; } elsif( $str_regexp && $str_regexp =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)}) { "defined( _first_n { \$_->text $2 $3 } 1, \$elt->{'$ST_ELT'}->_children( '$1'))"; } elsif( $str_test_alpha && $str_test_alpha =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_STRING)}) { my( $tag, $op, $str)= ($1, $2, $3); $str=~ s{(?<=.)'(?=.)}{\\'}g; # escape a quote within the string $str=~ s{^"}{'}; $str=~ s{"$}{'}; "defined( _first_n { \$_->text $PERL_ALPHA_TEST{$op} $str } 1, \$elt->{'$ST_ELT'}->children( '$tag'))"; } elsif( $str_test_num && $str_test_num =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_NUMBER)}) { my $test= ($2 eq '=') ? '==' : $2; "defined( _first_n { \$_->text $test $3 } 1, \$elt->{'$ST_ELT'}->children( '$1'))"; } elsif( $and_or) { $score->{tests}++; $and_or eq 'and' ? '&&' : '||' ; } else { $token; } }gexs; } sub setCharHandler { my( $t, $handler)= @_; $t->{twig_char_handler}= $handler; } sub _reset_handlers { my $handlers= shift; delete $handlers->{handlers}; delete $handlers->{path_handlers}; delete $handlers->{subpath_handlers}; $handlers->{attcond_handlers_exp}=[] if( $handlers->{attcond_handlers}); delete $handlers->{attcond_handlers}; } sub _set_handlers { my $handlers= shift || return; my $set_handlers= {}; foreach my $path (keys %{$handlers}) { _set_handler( $set_handlers, $path, $handlers->{$path}); } return $set_handlers; } sub setTwigHandler { my( $t, $path, $handler)= @_; $t->{twig_handlers} ||={}; return _set_handler( $t->{twig_handlers}, $path, $handler); } sub setTwigHandlers { my( $t, $handlers)= @_; my $previous_handlers= $t->{twig_handlers} || undef; _reset_handlers( $t->{twig_handlers}); $t->{twig_handlers}= _set_handlers( $handlers); return $previous_handlers; } sub setStartTagHandler { my( $t, $path, $handler)= @_; $t->{twig_starttag_handlers}||={}; return _set_handler( $t->{twig_starttag_handlers}, $path, $handler); } sub setStartTagHandlers { my( $t, $handlers)= @_; my $previous_handlers= $t->{twig_starttag_handlers} || undef; _reset_handlers( $t->{twig_starttag_handlers}); $t->{twig_starttag_handlers}= _set_handlers( $handlers); return $previous_handlers; } sub setIgnoreEltsHandler { my( $t, $path, $action)= @_; $t->{twig_ignore_elts_handlers}||={}; return _set_handler( $t->{twig_ignore_elts_handlers}, $path, $action ); } sub setIgnoreEltsHandlers { my( $t, $handlers)= @_; my $previous_handlers= $t->{twig_ignore_elts_handlers}; _reset_handlers( $t->{twig_ignore_elts_handlers}); $t->{twig_ignore_elts_handlers}= _set_handlers( $handlers); return $previous_handlers; } sub setEndTagHandler { my( $t, $path, $handler)= @_; $t->{twig_endtag_handlers}||={}; return _set_handler( $t->{twig_endtag_handlers}, $path,$handler); } sub setEndTagHandlers { my( $t, $handlers)= @_; my $previous_handlers= $t->{twig_endtag_handlers}; _reset_handlers( $t->{twig_endtag_handlers}); $t->{twig_endtag_handlers}= _set_handlers( $handlers); return $previous_handlers; } # a little more complex: set the twig_handlers only if a code ref is given sub setTwigRoots { my( $t, $handlers)= @_; my $previous_roots= $t->{twig_roots}; _reset_handlers($t->{twig_roots}); $t->{twig_roots}= _set_handlers( $handlers); _check_illegal_twig_roots_handlers( $t->{twig_roots}); foreach my $path (keys %{$handlers}) { $t->{twig_handlers}||= {}; _set_handler( $t->{twig_handlers}, $path, $handlers->{$path}) if( ref($handlers->{$path}) && isa( $handlers->{$path}, 'CODE')); } return $previous_roots; } sub _check_illegal_twig_roots_handlers { my( $handlers)= @_; foreach my $tag_handlers (values %{$handlers->{xpath_handler}}) { foreach my $handler_data (@$tag_handlers) { if( my $type= $handler_data->{test_on_text}) { croak "string() condition not supported on twig_roots option"; } } } return; } # just store the reference to the expat object in the twig sub _twig_init { # warn " in _twig_init...\n"; # DEBUG handler my $p= shift; my $t=$p->{twig}; if( $t->{twig_parsing} ) { croak "cannot reuse a twig that is already parsing"; } $t->{twig_parsing}=1; $t->{twig_parser}= $p; if( $weakrefs) { weaken( $t->{twig_parser}); } # in case they had been created by a previous parse delete $t->{twig_dtd}; delete $t->{twig_doctype}; delete $t->{twig_xmldecl}; delete $t->{twig_root}; # if needed set the output filehandle $t->_set_fh_to_twig_output_fh(); return; } # uses eval to catch the parser's death sub safe_parse { my $t= shift; eval { $t->parse( @_); } ; return $@ ? $t->_reset_twig_after_error : $t; } sub safe_parsefile { my $t= shift; eval { $t->parsefile( @_); } ; return $@ ? $t->_reset_twig_after_error : $t; } # restore a twig in a proper state so it can be reused for a new parse sub _reset_twig { my $t= shift; $t->{twig_parsing}= 0; delete $t->{twig_current}; delete $t->{extra_data}; delete $t->{twig_dtd}; delete $t->{twig_in_pcdata}; delete $t->{twig_in_cdata}; delete $t->{twig_stored_space}; delete $t->{twig_entity_list}; $t->root->delete if( $t->root); delete $t->{twig_root}; return $t; } sub _reset_twig_after_error { my $t= shift; $t->_reset_twig; return undef; } sub _add_or_discard_stored_spaces { my $t= shift; $t->{twig_right_after_root}=0; #XX my $current= $t->{twig_current} or return; # ugly hack, with ignore on, twig_current can disappear return unless length $t->{twig_stored_spaces}; my $current_gi= $XML::Twig::index2gi[$current->{'gi'}]; if( ! $t->{twig_discard_all_spaces}) { if( ! defined( $t->{twig_space_policy}->{$current_gi})) { $t->{twig_space_policy}->{$current_gi}= _space_policy( $t, $current_gi); } if( $t->{twig_space_policy}->{$current_gi} || ($t->{twig_stored_spaces}!~ m{\n}) || $t->{twig_preserve_space}) { _insert_pcdata( $t, $t->{twig_stored_spaces} ); } } $t->{twig_stored_spaces}=''; return; } # the default twig handlers, which build the tree sub _twig_start { # warn " in _twig_start...\n"; # DEBUG handler #foreach my $s (@_) { next if ref $s; warn "$s: ", is_utf8( $s) ? "has flag" : "FLAG NOT SET"; } # YYY my ($p, $gi, @att)= @_; my $t=$p->{twig}; # empty the stored pcdata (space stored in case they are really part of # a pcdata element) or stored it if the space policy dictates so # create a pcdata element with the spaces if need be _add_or_discard_stored_spaces( $t); my $parent= $t->{twig_current}; # if we were parsing PCDATA then we exit the pcdata if( $t->{twig_in_pcdata}) { $t->{twig_in_pcdata}= 0; delete $parent->{'twig_current'}; $parent= $parent->{parent}; } # if we choose to keep the encoding then we need to parse the tag if( my $func = $t->{parse_start_tag}) { ($gi, @att)= &$func($p->original_string); } elsif( $t->{twig_entities_in_attribute}) { ($gi,@att)= _parse_start_tag( $p->recognized_string); $t->{twig_entities_in_attribute}=0; } # if we are using an external DTD, we need to fill the default attributes if( $t->{twig_read_external_dtd}) { _fill_default_atts( $t, $gi, \@att); } # filter the input data if need be if( my $filter= $t->{twig_input_filter}) { $gi= $filter->( $gi); foreach my $att (@att) { $att= $filter->($att); } } my $ns_decl; if( $t->{twig_map_xmlns}) { $ns_decl= _replace_ns( $t, \$gi, \@att); } my $elt= $t->{twig_elt_class}->new( $gi); $elt->set_atts( @att); # now we can store the tag and atts my $context= { $ST_TAG => $gi, $ST_ELT => $elt, @att}; $context->{$ST_NS}= $ns_decl if $ns_decl; if( $weakrefs) { weaken( $context->{$ST_ELT}); } push @{$t->{_twig_context_stack}}, $context; delete $parent->{'twig_current'} if( $parent); $t->{twig_current}= $elt; $elt->{'twig_current'}=1; if( $parent) { my $prev_sibling= $parent->{last_child}; if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; } $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; unless( $parent->{first_child}) { $parent->{first_child}= $elt; } delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } else { # processing root $t->set_root( $elt); # call dtd handler if need be $t->{twig_dtd_handler}->($t, $t->{twig_dtd}) if( defined $t->{twig_dtd_handler}); # set this so we can catch external entities # (the handler was modified during DTD processing) if( $t->{twig_default_print}) { $p->setHandlers( Default => \&_twig_print); } elsif( $t->{twig_roots}) { $p->setHandlers( Default => sub { return }); } else { $p->setHandlers( Default => \&_twig_default); } } $elt->{empty}= $p->recognized_string=~ m{/\s*>$}s ? 1 : 0; $elt->{extra_data}= $t->{extra_data} if( $t->{extra_data}); $t->{extra_data}=''; # if the element is ID-ed then store that info my $id= $elt->{'att'}->{$ID}; if( defined $id) { $t->{twig_id_list}->{$id}= $elt; if( $weakrefs) { weaken( $t->{twig_id_list}->{$id}); } } # call user handler if need be if( $t->{twig_starttag_handlers}) { # call all appropriate handlers my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi); local $_= $elt; foreach my $handler ( @handlers) { $handler->($t, $elt) || last; } # call _all_ handler if needed if( my $all= $t->{twig_starttag_handlers}->{handlers}->{$ALL}) { $all->($t, $elt); } } # check if the tag is in the list of tags to be ignored if( $t->{twig_ignore_elts_handlers}) { my @handlers= _handler( $t, $t->{twig_ignore_elts_handlers}, $gi); # only the first handler counts, it contains the action (discard/print/string) if( @handlers) { my $action= shift @handlers; $t->ignore( $elt, $action); } } if( $elt->{'att'}->{'xml:space'} && ( $elt->{'att'}->{'xml:space'} eq 'preserve')) { $t->{twig_preserve_space}++; } return; } sub _replace_ns { my( $t, $gi, $atts)= @_; my $decls; foreach my $new_prefix ( $t->parser->new_ns_prefixes) { my $uri= $t->parser->expand_ns_prefix( $new_prefix); # replace the prefix if it is mapped $decls->{$new_prefix}= $uri; if( !$t->{twig_keep_original_prefix} && (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri})) { $new_prefix= $mapped_prefix; } # now put the namespace declaration back in the element if( $new_prefix eq '#default') { push @$atts, "xmlns" => $uri; } else { push @$atts, "xmlns:$new_prefix" => $uri; } } if( $t->{twig_keep_original_prefix}) { # things become more complex: we need to find the original prefix # and store both prefixes my $ns_info= $t->_ns_info( $$gi); my $map_att; if( $ns_info->{mapped_prefix}) { $$gi= "$ns_info->{mapped_prefix}:$$gi"; $map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix}; } my $att_name=1; foreach( @$atts) { if( $att_name) { my $ns_info= $t->_ns_info( $_); if( $ns_info->{mapped_prefix}) { $_= "$ns_info->{mapped_prefix}:$_"; $map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix}; } $att_name=0; } else { $att_name=1; } } push @$atts, '#original_gi', $map_att if( $map_att); } else { $$gi= $t->_replace_prefix( $$gi); my $att_name=1; foreach( @$atts) { if( $att_name) { $_= $t->_replace_prefix( $_); $att_name=0; } else { $att_name=1; } } } return $decls; } # extract prefix, local_name, uri, mapped_prefix from a name # will only work if called from a start or end tag handler sub _ns_info { my( $t, $name)= @_; my $ns_info={}; my $p= $t->parser; $ns_info->{uri}= $p->namespace( $name); return $ns_info unless( $ns_info->{uri}); $ns_info->{prefix}= _a_proper_ns_prefix( $p, $ns_info->{uri}); $ns_info->{mapped_prefix}= $t->{twig_map_xmlns}->{$ns_info->{uri}} || $ns_info->{prefix}; return $ns_info; } sub _a_proper_ns_prefix { my( $p, $uri)= @_; foreach my $prefix ($p->current_ns_prefixes) { if( $p->expand_ns_prefix( $prefix) eq $uri) { return $prefix; } } return; } # returns the uri bound to a prefix in the original document # only works in a handler # can be used to deal with xsi:type attributes sub original_uri { my( $t, $prefix)= @_; my $ST_NS = '##ns' ; foreach my $ns (map { $_->{$ST_NS} if $_->{$ST_NS} } reverse @{$t->{_twig_context_stack}}) { return $ns->{$prefix} || next; } return; } sub _fill_default_atts { my( $t, $gi, $atts)= @_; my $dtd= $t->{twig_dtd}; my $attlist= $dtd->{att}->{$gi}; my %value= @$atts; foreach my $att (keys %$attlist) { if( !exists( $value{$att}) && exists( $attlist->{$att}->{default}) && ( $attlist->{$att}->{default} ne '#IMPLIED') ) { # the quotes are included in the default, so we need to remove them my $default_value= substr( $attlist->{$att}->{default}, 1, -1); push @$atts, $att, $default_value; } } return; } # the default function to parse a start tag (in keep_encoding mode) # can be overridden with the parse_start_tag method # only works for 1-byte character sets sub _parse_start_tag { my $string= shift; my( $gi, @atts); # get the gi (between < and the first space, / or > character) #if( $string=~ s{^<\s*([^\s>/]*)[\s>/]*}{}s) if( $string=~ s{^<\s*($REG_TAG_NAME)\s*[\s>/]}{}s) { $gi= $1; } else { croak "error parsing tag '$string'"; } while( $string=~ s{^([^\s=]*)\s*=\s*(["'])(.*?)\2\s*}{}s) { push @atts, $1, $3; } return $gi, @atts; } sub set_root { my( $t, $elt)= @_; $t->{twig_root}= $elt; if( $elt) { $elt->{twig}= $t; if( $weakrefs) { weaken( $elt->{twig}); } } return $t; } sub _twig_end { # warn " in _twig_end...\n"; # DEBUG handler my ($p, $gi) = @_; my $t=$p->{twig}; if( $t->{twig_in_pcdata} && (my $text_handler= $t->{TwigHandlers}->{$TEXT}) ) { local $_= $t->{twig_current}; $text_handler->( $t, $_) if $_; } if( $t->{twig_map_xmlns}) { $gi= $t->_replace_prefix( $gi); } _add_or_discard_stored_spaces( $t); # the new twig_current is the parent my $elt= $t->{twig_current}; delete $elt->{'twig_current'}; # if we were parsing PCDATA then we exit the pcdata too if( $t->{twig_in_pcdata}) { $t->{twig_in_pcdata}= 0; $elt= $elt->{parent} if($elt->{parent}); delete $elt->{'twig_current'}; } # parent is the new current element my $parent= $elt->{parent}; $t->{twig_current}= $parent; if( $parent) { $parent->{'twig_current'}=1; # twig_to_be_normalized if( $parent->{twig_to_be_normalized}) { $parent->normalize; $parent->{twig_to_be_normalized}=0; } } if( $t->{extra_data}) { $elt->_set_extra_data_before_end_tag( $t->{extra_data}); $t->{extra_data}=''; } if( $t->{twig_handlers}) { # look for handlers my @handlers= _handler( $t, $t->{twig_handlers}, $gi); if( $t->{twig_tdh}) { if( @handlers) { push @{$t->{twig_handlers_to_trigger}}, [ $elt, \@handlers ]; } if( my $all= $t->{twig_handlers}->{handlers}->{$ALL}) { push @{$t->{twig_handlers_to_trigger}}, [ $elt, [$all] ]; } } else { local $_= $elt; # so we can use $_ in the handlers foreach my $handler ( @handlers) { $handler->($t, $elt) || last; } # call _all_ handler if needed my $all= $t->{twig_handlers}->{handlers}->{$ALL}; if( $all) { $all->($t, $elt); } if( @handlers || $all) { $t->{twig_right_after_root}=0; } } } # if twig_roots is set for the element then set appropriate handler if( $t->{twig_root_depth} and ($p->depth == $t->{twig_root_depth}) ) { if( $t->{twig_default_print}) { # select the proper fh (and store the currently selected one) $t->_set_fh_to_twig_output_fh(); if( !$p->depth==1) { $t->{twig_right_after_root}=1; } #XX if( $t->{twig_keep_encoding}) { $p->setHandlers( %twig_handlers_roots_print_original); } else { $p->setHandlers( %twig_handlers_roots_print); } } else { $p->setHandlers( %twig_handlers_roots); } } if( $elt->{'att'}->{'xml:space'} && ( $elt->{'att'}->{'xml:space'} eq 'preserve')) { $t->{twig_preserve_space}--; } pop @{$t->{_twig_context_stack}}; return; } sub _trigger_tdh { my( $t)= @_; if( @{$t->{twig_handlers_to_trigger}}) { my @handlers_to_trigger_now= sort { $a->[0]->cmp( $b->[0]) } @{$t->{twig_handlers_to_trigger}}; foreach my $elt_handlers (@handlers_to_trigger_now) { my( $handled_elt, $handlers_to_trigger)= @$elt_handlers; foreach my $handler ( @$handlers_to_trigger) { local $_= $handled_elt; $handler->($t, $handled_elt) || last; } } } return; } # return the list of handler that can be activated for an element # (either of CODE ref's or 1's for twig_roots) sub _handler { my( $t, $handlers, $gi)= @_; my @found_handlers=(); my $found_handler; foreach my $handler ( map { @$_ } grep { $_ } $handlers->{xpath_handler}->{$gi}, $handlers->{xpath_handler}->{'*'}) { my $trigger= $handler->{trigger}; if( my $found_path= $trigger->( $t->{_twig_context_stack})) { my $found_handler= $handler->{handler}; push @found_handlers, $found_handler; } } # if no handler found call default handler if defined if( !@found_handlers && defined $handlers->{handlers}->{$DEFAULT}) { push @found_handlers, $handlers->{handlers}->{$DEFAULT}; } if( @found_handlers and $t->{twig_do_not_chain_handlers}) { @found_handlers= ($found_handlers[0]); } return @found_handlers; # empty if no handler found } sub _replace_prefix { my( $t, $name)= @_; my $p= $t->parser; my $uri= $p->namespace( $name); # try to get the namespace from default if none is found (for attributes) # this should probably be an option if( !$uri and( $name!~/^xml/)) { $uri= $p->expand_ns_prefix( '#default'); } if( $uri) { if (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri} || $DEFAULT_URI2NS{$uri}) { return "$mapped_prefix:$name"; } else { my $prefix= _a_proper_ns_prefix( $p, $uri); if( $prefix eq '#default') { $prefix=''; } return $prefix ? "$prefix:$name" : $name; } } else { return $name; } } sub _twig_char { # warn " in _twig_char...\n"; # DEBUG handler my ($p, $string)= @_; my $t=$p->{twig}; if( $t->{twig_keep_encoding}) { if( !$t->{twig_in_cdata}) { $string= $p->original_string(); } else { use bytes; # > perl 5.5 if( length( $string) < 1024) { $string= $p->original_string(); } else { #warn "dodgy case"; # TODO original_string does not hold the entire string, but $string is wrong # I believe due to a bug in XML::Parser # for now, we use the original string, even if it means that it's been converted to utf8 } } } if( $t->{twig_input_filter}) { $string= $t->{twig_input_filter}->( $string); } if( $t->{twig_char_handler}) { $string= $t->{twig_char_handler}->( $string); } my $elt= $t->{twig_current}; if( $t->{twig_in_cdata}) { # text is the continuation of a previously created cdata $elt->{cdata}.= $t->{twig_stored_spaces} . $string; } elsif( $t->{twig_in_pcdata}) { # text is the continuation of a previously created pcdata if( $t->{extra_data}) { $elt->_push_extra_data_in_pcdata( $t->{extra_data}, length( $elt->{pcdata})); $t->{extra_data}=''; } $elt->{pcdata}.= $string; } else { # text is just space, which might be discarded later if( $string=~/\A\s*\Z/s) { if( $t->{extra_data}) { # we got extra data (comment, pi), lets add the spaces to it $t->{extra_data} .= $string; } else { # no extra data, just store the spaces $t->{twig_stored_spaces}.= $string; } } else { my $new_elt= _insert_pcdata( $t, $t->{twig_stored_spaces}.$string); delete $elt->{'twig_current'}; $new_elt->{'twig_current'}=1; $t->{twig_current}= $new_elt; $t->{twig_in_pcdata}=1; if( $t->{extra_data}) { $new_elt->_push_extra_data_in_pcdata( $t->{extra_data}, 0); $t->{extra_data}=''; } } } return; } sub _twig_cdatastart { # warn " in _twig_cdatastart...\n"; # DEBUG handler my $p= shift; my $t=$p->{twig}; $t->{twig_in_cdata}=1; my $cdata= $t->{twig_elt_class}->new( $CDATA); my $twig_current= $t->{twig_current}; if( $t->{twig_in_pcdata}) { # create the node as a sibling of the PCDATA $cdata->{prev_sibling}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $cdata->{prev_sibling});} ; $twig_current->{next_sibling}= $cdata; my $parent= $twig_current->{parent}; $cdata->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $cdata->{parent});} ; delete $parent->{empty}; $parent->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; $t->{twig_in_pcdata}=0; } else { # we have to create a PCDATA element if we need to store spaces if( $t->_space_policy($XML::Twig::index2gi[$twig_current->{'gi'}]) && $t->{twig_stored_spaces}) { _insert_pcdata( $t, $t->{twig_stored_spaces}); } $t->{twig_stored_spaces}=''; # create the node as a child of the current element $cdata->{parent}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $cdata->{parent});} ; if( my $prev_sibling= $twig_current->{last_child}) { $cdata->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $cdata->{prev_sibling});} ; $prev_sibling->{next_sibling}= $cdata; } else { $twig_current->{first_child}= $cdata; } delete $twig_current->{empty}; $twig_current->{last_child}=$cdata; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; } delete $twig_current->{'twig_current'}; $t->{twig_current}= $cdata; $cdata->{'twig_current'}=1; if( $t->{extra_data}) { $cdata->set_extra_data( $t->{extra_data}); $t->{extra_data}='' }; return; } sub _twig_cdataend { # warn " in _twig_cdataend...\n"; # DEBUG handler my $p= shift; my $t=$p->{twig}; $t->{twig_in_cdata}=0; my $elt= $t->{twig_current}; delete $elt->{'twig_current'}; my $cdata= $elt->{cdata}; $elt->{cdata}= $cdata; push @{$t->{_twig_context_stack}}, { $ST_TAG => $CDATA }; if( $t->{twig_handlers}) { # look for handlers my @handlers= _handler( $t, $t->{twig_handlers}, $CDATA); local $_= $elt; # so we can use $_ in the handlers foreach my $handler ( @handlers) { $handler->($t, $elt) || last; } } pop @{$t->{_twig_context_stack}}; $elt= $elt->{parent}; $t->{twig_current}= $elt; $elt->{'twig_current'}=1; $t->{twig_long_cdata}=0; return; } sub _pi_elt_handlers { my( $t, $pi)= @_; my $pi_handlers= $t->{twig_handlers}->{pi_handlers} || return; foreach my $handler ( $pi_handlers->{$pi->{target}}, $pi_handlers->{''}) { if( $handler) { local $_= $pi; $handler->( $t, $pi) || last; } } } sub _pi_text_handler { my( $t, $target, $data)= @_; if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target}) { return $handler->( $t, $target, $data); } if( my $handler= $t->{twig_handlers}->{pi_handlers}->{''}) { return $handler->( $t, $target, $data); } return defined( $data) && $data ne '' ? "" : "" ; } sub _comment_elt_handler { my( $t, $comment)= @_; if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT}) { local $_= $comment; $handler->($t, $comment); } } sub _comment_text_handler { my( $t, $comment)= @_; if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT}) { $comment= $handler->($t, $comment); if( !defined $comment || $comment eq '') { return ''; } } return ""; } sub _twig_comment { # warn " in _twig_comment...\n"; # DEBUG handler my( $p, $comment_text)= @_; my $t=$p->{twig}; if( $t->{twig_keep_encoding}) { $comment_text= substr( $p->original_string(), 4, -3); } $t->_twig_pi_comment( $p, $COMMENT, $t->{twig_keep_comments}, $t->{twig_process_comments}, '_set_comment', '_comment_elt_handler', '_comment_text_handler', $comment_text ); return; } sub _twig_pi { # warn " in _twig_pi...\n"; # DEBUG handler my( $p, $target, $data)= @_; my $t=$p->{twig}; if( $t->{twig_keep_encoding}) { my $pi_text= substr( $p->original_string(), 2, -2); ($target, $data)= split( /\s+/, $pi_text, 2); } $t->_twig_pi_comment( $p, $PI, $t->{twig_keep_pi}, $t->{twig_process_pi}, '_set_pi', '_pi_elt_handlers', '_pi_text_handler', $target, $data ); return; } sub _twig_pi_comment { my( $t, $p, $type, $keep, $process, $set, $elt_handler, $text_handler, @parser_args)= @_; if( $t->{twig_input_filter}) { foreach my $arg (@parser_args) { $arg= $t->{twig_input_filter}->( $arg); } } # if pi/comments are to be kept then we piggyback them to the current element if( $keep) { # first add spaces if( $t->{twig_stored_spaces}) { $t->{extra_data}.= $t->{twig_stored_spaces}; $t->{twig_stored_spaces}= ''; } my $extra_data= $t->$text_handler( @parser_args); $t->{extra_data}.= $extra_data; } elsif( $process) { my $twig_current= $t->{twig_current}; # defined unless we are outside of the root my $elt= $t->{twig_elt_class}->new( $type); $elt->$set( @parser_args); if( $t->{extra_data}) { $elt->set_extra_data( $t->{extra_data}); $t->{extra_data}=''; } unless( $t->root) { $t->_add_cpi_outside_of_root( leading_cpi => $elt); } elsif( $t->{twig_in_pcdata}) { # create the node as a sibling of the PCDATA $elt->paste_after( $twig_current); $t->{twig_in_pcdata}=0; } elsif( $twig_current) { # we have to create a PCDATA element if we need to store spaces if( $t->_space_policy($XML::Twig::index2gi[$twig_current->{'gi'}]) && $t->{twig_stored_spaces}) { _insert_pcdata( $t, $t->{twig_stored_spaces}); } $t->{twig_stored_spaces}=''; # create the node as a child of the current element $elt->paste_last_child( $twig_current); } else { $t->_add_cpi_outside_of_root( trailing_cpi => $elt); } if( $twig_current) { delete $twig_current->{'twig_current'}; my $parent= $elt->{parent}; $t->{twig_current}= $parent; $parent->{'twig_current'}=1; } $t->$elt_handler( $elt); } } # add a comment or pi before the first element sub _add_cpi_outside_of_root { my($t, $type, $elt)= @_; # $type is 'leading_cpi' or 'trailing_cpi' $t->{$type} ||= $t->{twig_elt_class}->new( '#CPI'); # create the node as a child of the current element $elt->paste_last_child( $t->{$type}); return $t; } sub _twig_final { # warn " in _twig_final...\n"; # DEBUG handler my $p= shift; my $t= $p->isa( 'XML::Twig') ? $p : $p->{twig}; # store trailing data if( $t->{extra_data}) { $t->{trailing_cpi_text} = $t->{extra_data}; $t->{extra_data}=''; } $t->{trailing_spaces}= $t->{twig_stored_spaces} || ''; my $s= $t->{twig_stored_spaces}; $s=~s{\n}{\\n}g; if( $t->{twig_stored_spaces}) { my $s= $t->{twig_stored_spaces}; } # restore the selected filehandle if needed $t->_set_fh_to_selected_fh(); $t->_trigger_tdh if( $t->{twig_tdh}); select $t->{twig_original_selected_fh} if($t->{twig_original_selected_fh}); # probably dodgy if( exists $t->{twig_autoflush_data}) { my @args; push @args, $t->{twig_autoflush_data}->{fh} if( $t->{twig_autoflush_data}->{fh}); push @args, @{$t->{twig_autoflush_data}->{args}} if( $t->{twig_autoflush_data}->{args}); $t->flush( @args); delete $t->{twig_autoflush_data}; $t->root->delete if $t->root; } # tries to clean-up (probably not very well at the moment) #undef $p->{twig}; undef $t->{twig_parser}; delete $t->{twig_parsing}; @{$t}{ qw( twig_parser twig_parsing _twig_context_stack twig_current) }=(); return $t; } sub _insert_pcdata { my( $t, $string)= @_; # create a new PCDATA element my $parent= $t->{twig_current}; # always defined my $elt; if( exists $t->{twig_alt_elt_class}) { $elt= $t->{twig_elt_class}->new( $PCDATA); $elt->{pcdata}= $string; } else { $elt= bless( { gi => $XML::Twig::gi2index{$PCDATA}, pcdata => $string }, 'XML::Twig::Elt'); } my $prev_sibling= $parent->{last_child}; if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; } else { $parent->{first_child}= $elt; } $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; $t->{twig_stored_spaces}=''; return $elt; } sub _space_policy { my( $t, $gi)= @_; my $policy; $policy=0 if( $t->{twig_discard_spaces}); $policy=1 if( $t->{twig_keep_spaces}); $policy=1 if( $t->{twig_keep_spaces_in} && $t->{twig_keep_spaces_in}->{$gi}); $policy=0 if( $t->{twig_discard_spaces_in} && $t->{twig_discard_spaces_in}->{$gi}); return $policy; } sub _twig_entity { # warn " in _twig_entity...\n"; # DEBUG handler my( $p, $name, $val, $sysid, $pubid, $ndata, $param)= @_; my $t=$p->{twig}; #{ no warnings; my $base= $p->base; warn "_twig_entity called: expand: '$t->{twig_expand_external_ents}', base: '$base', name: '$name', val: '$val', sysid: '$sysid', pubid: '$pubid', ndata: '$ndata', param: '$param'\n";} my $missing_entity=0; if( $sysid) { if($ndata) { if( ! -f _based_filename( $sysid, $p->base)) { $missing_entity= 1; } } else { if( $t->{twig_expand_external_ents}) { $val= eval { _slurp_uri( $sysid, $p->base) }; if( ! defined $val) { if( $t->{twig_extern_ent_nofail}) { $missing_entity= 1; } else { _croak( "cannot load SYSTEM entity '$name' from '$sysid': $@", 3); } } } } } my $ent=XML::Twig::Entity->new( $name, $val, $sysid, $pubid, $ndata, $param); if( $missing_entity) { $t->{twig_missing_system_entities}->{$name}= $ent; } my $entity_list= $t->entity_list; if( $entity_list) { $entity_list->add( $ent); } if( $parser_version > 2.27) { # this is really ugly, but with some versions of XML::Parser the value # of the entity is not properly returned by the default handler my $ent_decl= $ent->text; if( $t->{twig_keep_encoding}) { if( defined $ent->{val} && ($ent_decl !~ /["']/)) { my $val= $ent->{val}; $ent_decl .= $val =~ /"/ ? qq{'$val' } : qq{"$val" }; } # for my solaris box (perl 5.6.1, XML::Parser 2.31, expat?) $t->{twig_doctype}->{internal}=~ s{{twig_doctype}->{internal} .= $ent_decl unless( $t->{twig_doctype}->{internal}=~ m{{twig}; my $notation = XML::Twig::Notation->new( $name, $base, $sysid, $pubid ); my $notation_list = $t->notation_list(); if( $notation_list ) { $notation_list->add( $notation ); } # internal should get the recognized_string, but XML::Parser does not provide it # so we need to re-create it ( $notation->text) and stick it there. $t->{twig_doctype}->{internal} .= $notation->text; return; } sub _twig_extern_ent { # warn " in _twig_extern_ent...I (", $_[0]->original_string, ")\n"; # DEBUG handler my( $p, $base, $sysid, $pubid)= @_; my $t= $p->{twig}; if( $t->{twig_no_expand}) { my $ent_name= $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string; _twig_insert_ent( $t, $ent_name); return ''; } my $ent_content= eval { $t->{twig_ext_ent_handler}->( $p, $base, $sysid) }; if( ! defined $ent_content) { my $ent_name = $p->recognized_string; my $file = _based_filename( $sysid, $base); my $error_message= "cannot expand $ent_name - cannot load '$file'"; if( $t->{twig_extern_ent_nofail}) { return ""; } else { _croak( $error_message); } } return $ent_content; } # I use this so I can change the $Carp::CarpLevel (which determines how many call frames to skip when reporting an error) sub _croak { my( $message, $level)= @_; $Carp::CarpLevel= $level || 0; croak $message; } sub _twig_xmldecl { # warn " in _twig_xmldecl...\n"; # DEBUG handler my $p= shift; my $t=$p->{twig}; $t->{twig_xmldecl}||={}; # could have been set by set_output_encoding $t->{twig_xmldecl}->{version}= shift; $t->{twig_xmldecl}->{encoding}= shift; $t->{twig_xmldecl}->{standalone}= shift; return; } sub _twig_doctype { # warn " in _twig_doctype...\n"; # DEBUG handler my( $p, $name, $sysid, $pub, $internal)= @_; my $t=$p->{twig}; $t->{twig_doctype}||= {}; # create $t->{twig_doctype}->{name}= $name; # always there $t->{twig_doctype}->{sysid}= $sysid; # $t->{twig_doctype}->{pub}= $pub; # # now let's try to cope with XML::Parser 2.28 and above if( $parser_version > 2.27) { @saved_default_handler= $p->setHandlers( Default => \&_twig_store_internal_dtd, Entity => \&_twig_entity, ); $p->setHandlers( DoctypeFin => \&_twig_stop_storing_internal_dtd); $t->{twig_doctype}->{internal}=''; } else # for XML::Parser before 2.28 { $internal||=''; $internal=~ s{^\s*\[}{}; $internal=~ s{]\s*$}{}; $t->{twig_doctype}->{internal}=$internal; } # now check if we want to get the DTD info if( $t->{twig_read_external_dtd} && $sysid) { # let's build a fake document with an internal DTD if( $t->{DTDBase}) { _use( 'File::Spec'); $sysid=File::Spec->catfile($t->{DTDBase}, $sysid); } my $dtd= _slurp_uri( $sysid); # if the DTD includes an XML declaration, it needs to be moved before the DOCTYPE bit if( $dtd=~ s{^(\s*<\?xml(\s+\w+\s*=\s*("[^"]*"|'[^']*'))*\s*\?>)}{}) { $dtd= "$1<$name/>"; } else { $dtd= "<$name/>"; } $t->save_global_state(); # save the globals (they will be reset by the following new) my $t_dtd= XML::Twig->new( load_DTD => 1, ParseParamEnt => 1, error_context => $t->{ErrorContext} || 0); # create a temp twig $t_dtd->parse( $dtd); # parse it $t->{twig_dtd}= $t_dtd->{twig_dtd}; # grab the dtd info #$t->{twig_dtd_is_external}=1; $t->entity_list->_add_list( $t_dtd->entity_list) if( $t_dtd->entity_list); # grab the entity info $t->notation_list->_add_list( $t_dtd->notation_list) if( $t_dtd->notation_list); # grab the notation info $t->restore_global_state(); } return; } sub _twig_element { # warn " in _twig_element...\n"; # DEBUG handler my( $p, $name, $model)= @_; my $t=$p->{twig}; $t->{twig_dtd}||= {}; # may create the dtd $t->{twig_dtd}->{model}||= {}; # may create the model hash $t->{twig_dtd}->{elt_list}||= []; # ordered list of elements push @{$t->{twig_dtd}->{elt_list}}, $name; # store the elt $t->{twig_dtd}->{model}->{$name}= $model; # store the model if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) ) { my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string; unless( $text) { # this version of XML::Parser does not return the text in the *_string method # we need to rebuild it $text= ""; } $t->{twig_doctype}->{internal} .= $text; } return; } sub _twig_attlist { # warn " in _twig_attlist...\n"; # DEBUG handler my( $p, $gi, $att, $type, $default, $fixed)= @_; #warn "in attlist: gi: '$gi', att: '$att', type: '$type', default: '$default', fixed: '$fixed'\n"; my $t=$p->{twig}; $t->{twig_dtd}||= {}; # create dtd if need be $t->{twig_dtd}->{$gi}||= {}; # create elt if need be #$t->{twig_dtd}->{$gi}->{att}||= {}; # create att if need be if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) ) { my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string; unless( $text) { # this version of XML::Parser does not return the text in the *_string method # we need to rebuild it my $att_decl="$att $type"; $att_decl .= " #FIXED" if( $fixed); $att_decl .= " $default" if( defined $default); # 2 cases: there is already an attlist on that element or not if( $t->{twig_dtd}->{att}->{$gi}) { # there is already an attlist, add to it $t->{twig_doctype}->{internal}=~ s{(} { "$1$2\n" . ' ' x length( $1) . "$att_decl\n>"}es; } else { # create the attlist $t->{twig_doctype}->{internal}.= "" } } } $t->{twig_dtd}->{att}->{$gi}->{$att}= {} ; $t->{twig_dtd}->{att}->{$gi}->{$att}->{type}= $type; $t->{twig_dtd}->{att}->{$gi}->{$att}->{default}= $default if( defined $default); $t->{twig_dtd}->{att}->{$gi}->{$att}->{fixed}= $fixed; return; } sub _twig_default { # warn " in _twig_default...\n"; # DEBUG handler my( $p, $string)= @_; my $t= $p->{twig}; # we need to process the data in 2 cases: entity, or spaces after the closing tag # after the closing tag (no twig_current and root has been created) if( ! $t->{twig_current} && $t->{twig_root} && $string=~ m{^\s+$}m) { $t->{twig_stored_spaces} .= $string; } # process only if we have an entity if( $string=~ m{^&([^;]*);$}) { # the entity has to be pure pcdata, or we have a problem if( ($p->original_string=~ m{^<}) && ($p->original_string=~ m{>$}) ) { # string is a tag, entity is in an attribute $t->{twig_entities_in_attribute}=1 if( $t->{twig_do_not_escape_amp_in_atts}); } else { my $ent; if( $t->{twig_keep_encoding}) { _twig_char( $p, $string); $ent= substr( $string, 1, -1); } else { $ent= _twig_insert_ent( $t, $string); } return $ent; } } } sub _twig_insert_ent { my( $t, $string)=@_; my $twig_current= $t->{twig_current}; my $ent= $t->{twig_elt_class}->new( $ENT); $ent->{ent}= $string; _add_or_discard_stored_spaces( $t); if( $t->{twig_in_pcdata}) { # create the node as a sibling of the #PCDATA $ent->{prev_sibling}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $ent->{prev_sibling});} ; $twig_current->{next_sibling}= $ent; my $parent= $twig_current->{parent}; $ent->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $ent->{parent});} ; delete $parent->{empty}; $parent->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; # the twig_current is now the parent delete $twig_current->{'twig_current'}; $t->{twig_current}= $parent; # we left pcdata $t->{twig_in_pcdata}=0; } else { # create the node as a child of the current element $ent->{parent}=$twig_current; if( $XML::Twig::weakrefs) { weaken( $ent->{parent});} ; if( my $prev_sibling= $twig_current->{last_child}) { $ent->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $ent->{prev_sibling});} ; $prev_sibling->{next_sibling}= $ent; } else { if( $twig_current) { $twig_current->{first_child}= $ent; } } if( $twig_current) { delete $twig_current->{empty}; $twig_current->{last_child}=$ent; if( $XML::Twig::weakrefs) { weaken( $twig_current->{last_child});} ; } } # meant to trigger entity handler, does not seem to be activated at this time #if( my $handler= $t->{twig_handlers}->{gi}->{$ENT}) # { local $_= $ent; $handler->( $t, $ent); } return $ent; } sub parser { return $_[0]->{twig_parser}; } # returns the declaration text (or a default one) sub xmldecl { my $t= shift; return '' unless( $t->{twig_xmldecl} || $t->{output_encoding}); my $decl_string; my $decl= $t->{twig_xmldecl}; if( $decl) { my $version= $decl->{version}; $decl_string= q{{output_encoding}) # or come from the document (in $decl->{encoding}) if( $t->{output_encoding}) { my $encoding= $t->{output_encoding}; $decl_string .= qq{ encoding="$encoding"}; } elsif( $decl->{encoding}) { my $encoding= $decl->{encoding}; $decl_string .= qq{ encoding="$encoding"}; } if( defined( $decl->{standalone})) { $decl_string .= q{ standalone="}; $decl_string .= $decl->{standalone} ? "yes" : "no"; $decl_string .= q{"}; } $decl_string .= "?>\n"; } else { my $encoding= $t->{output_encoding}; $decl_string= qq{}; } my $output_filter= XML::Twig::Elt::output_filter(); return $output_filter ? $output_filter->( $decl_string) : $decl_string; } sub set_doctype { my( $t, $name, $system, $public, $internal)= @_; $t->{twig_doctype}= {} unless defined $t->{twig_doctype}; my $doctype= $t->{twig_doctype}; $doctype->{name} = $name if( defined $name); $doctype->{sysid} = $system if( defined $system); $doctype->{pub} = $public if( defined $public); $doctype->{internal} = $internal if( defined $internal); } sub doctype_name { my $t= shift; my $doctype= $t->{twig_doctype} or return ''; return $doctype->{name} || ''; } sub system_id { my $t= shift; my $doctype= $t->{twig_doctype} or return ''; return $doctype->{sysid} || ''; } sub public_id { my $t= shift; my $doctype= $t->{twig_doctype} or return ''; return $doctype->{pub} || ''; } sub internal_subset { my $t= shift; my $doctype= $t->{twig_doctype} or return ''; return $doctype->{internal} || ''; } # return the dtd object sub dtd { my $t= shift; return $t->{twig_dtd}; } # return an element model, or the list of element models sub model { my $t= shift; my $elt= shift; return $t->dtd->{model}->{$elt} if( $elt); return (sort keys %{$t->dtd->{model}}); } # return the entity_list object sub entity_list { my $t= shift; return $t->{twig_entity_list}; } # return the list of entity names sub entity_names { my $t= shift; return $t->entity_list->entity_names; } # return the entity object sub entity { my $t= shift; my $entity_name= shift; return $t->entity_list->ent( $entity_name); } # return the notation_list object sub notation_list { my $t= shift; return $t->{twig_notation_list}; } # return the list of notation names sub notation_names { my $t= shift; return $t->notation_list->notation_names; } # return the notation object sub notation { my $t= shift; my $notation_name= shift; return $t->notation_list->notation( $notation_name); } sub print_prolog { my $t= shift; my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : $t->{twig_output_fh} || select() || \*STDOUT; ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $t->prolog( @_); } sub prolog { my $t= shift; if( $t->{no_prolog}){ return ''; } return $t->{no_prolog} ? '' : defined $t->{no_dtd_output} ? $t->xmldecl : $t->xmldecl . $t->doctype( @_); } sub doctype { my $t= shift; my %args= _normalize_args( @_); my $update_dtd = $args{UpdateDTD} || ''; my $doctype_text=''; my $doctype= $t->{twig_doctype}; if( $doctype) { $doctype_text .= qq{{name}} if( $doctype->{name}); $doctype_text .= qq{ PUBLIC "$doctype->{pub}"} if( $doctype->{pub}); $doctype_text .= qq{ SYSTEM} if( $doctype->{sysid} && !$doctype->{pub}); $doctype_text .= qq{ "$doctype->{sysid}"} if( $doctype->{sysid}); } if( $update_dtd) { if( $doctype) { my $internal=$doctype->{internal}; # awful hack, but at least it works a little better that what was there before if( $internal) { # remove entity and notation declarations (they will be re-generated from the updated entity list) $internal=~ s{]*) >\s*}{}xg; $internal=~ s{\s*}{}sxg; $internal=~ s{^\n}{}; } $internal .= $t->entity_list->text ||'' if( $t->entity_list); $internal .= $t->notation_list->text ||'' if( $t->notation_list); if( $internal) { $doctype_text .= "[\n$internal]>\n"; } } elsif( !$t->{'twig_dtd'} && ( keys %{$t->entity_list} || keys %{$t->notation_list} ) ) { $doctype_text .= "root->gi . " [\n" . $t->entity_list->text . $t->notation_list->text . "\n]>";} else { $doctype_text= $t->{twig_dtd}; $doctype_text .= $t->dtd_text; } } elsif( $doctype) { if( my $internal= $doctype->{internal}) { # add opening and closing brackets if not already there # plus some spaces and newlines for a nice formating # I test it here because I can't remember which version of # XML::Parser need it or not, nor guess which one will in the # future, so this about the best I can do $internal=~ s{^\s*(\[\s*)?}{ [\n}; $internal=~ s{\s*(\]\s*(>\s*)?)?\s*$}{\n]>\n}; # XML::Parser does not include the NOTATION declarations in the DTD # at least in the current version. So put them back #if( $t->notation_list && $internal !~ m{\n)$}{ "\n" . $t->notation_list->text . $1}es; } $doctype_text .= $internal; } } if( $doctype_text) { # terrible hack, as I can't figure out in which case the darn prolog # should get an extra > (depends on XML::Parser and expat versions) $doctype_text=~ s/(>\s*)*$/>\n/; # if($doctype_text); my $output_filter= XML::Twig::Elt::output_filter(); return $output_filter ? $output_filter->( $doctype_text) : $doctype_text; } else { return $doctype_text; } } sub _leading_cpi { my $t= shift; my $leading_cpi= $t->{leading_cpi} || return ''; return $leading_cpi->sprint( 1); } sub _trailing_cpi { my $t= shift; my $trailing_cpi= $t->{trailing_cpi} || return ''; return $trailing_cpi->sprint( 1); } sub _trailing_cpi_text { my $t= shift; return $t->{trailing_cpi_text} || ''; } sub print_to_file { my( $t, $filename)= (shift, shift); my $out_fh; # open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8 my $mode= $t->{twig_keep_encoding} && ! _use_perlio() ? '>' : '>:utf8'; # >= perl 5.8 open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8 $t->print( $out_fh, @_); close $out_fh; return $t; } # probably only works on *nix (at least the chmod bit) # first print to a temporary file, then rename that file to the desired file name, then change permissions # to the original file permissions (or to the current umask) sub safe_print_to_file { my( $t, $filename)= (shift, shift); my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ; XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n"; my $tmpdir= dirname( $filename); my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir); $t->print_to_file( $tmpfilename, @_); rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!"); chmod $perm, $filename; return $t; } sub print { my $t= shift; my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; my %args= _normalize_args( @_); my $old_select = defined $fh ? select $fh : undef; my $old_pretty = defined ($args{PrettyPrint}) ? $t->set_pretty_print( $args{PrettyPrint}) : undef; my $old_empty_tag = defined ($args{EmptyTags}) ? $t->set_empty_tag_style( $args{EmptyTags}) : undef; #if( !$t->{encoding} || lc( $t->{encoding}) eq 'utf-8') { my $out= $fh || \*STDOUT; binmode $out, ':utf8'; } if( $perl_version > 5.006 && ! $t->{twig_keep_encoding} && _use_perlio() ) { binmode( $fh || \*STDOUT, ":utf8" ); } print $t->prolog( %args) . $t->_leading_cpi( %args); $t->{twig_root}->print; print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) . $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) . ( ($t->{twig_keep_spaces}||'') && ($t->{trailing_spaces} || '')) ; $t->set_pretty_print( $old_pretty) if( defined $old_pretty); $t->set_empty_tag_style( $old_empty_tag) if( defined $old_empty_tag); if( $fh) { select $old_select; } return $t; } sub flush { my $t= shift; $t->_trigger_tdh if $t->{twig_tdh}; return if( $t->{twig_completely_flushed}); my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; my $old_select= defined $fh ? select $fh : undef; my $up_to= ref $_[0] ? shift : undef; my %args= _normalize_args( @_); my $old_pretty; if( defined $args{PrettyPrint}) { $old_pretty= $t->set_pretty_print( $args{PrettyPrint}); delete $args{PrettyPrint}; } my $old_empty_tag_style; if( $args{EmptyTags}) { $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags}); delete $args{EmptyTags}; } # the "real" last element processed, as _twig_end has closed it my $last_elt; my $flush_trailing_data=0; if( $up_to) { $last_elt= $up_to; } elsif( $t->{twig_current}) { $last_elt= $t->{twig_current}->{last_child}; } else { $last_elt= $t->{twig_root}; $flush_trailing_data=1; $t->{twig_completely_flushed}=1; } # flush the DTD unless it has ready flushed (ie root has been flushed) my $elt= $t->{twig_root}; unless( $elt->{'flushed'}) { # store flush info so we can auto-flush later if( $t->{twig_autoflush}) { $t->{twig_autoflush_data}={}; $t->{twig_autoflush_data}->{fh} = $fh if( $fh); $t->{twig_autoflush_data}->{args} = \@_ if( @_); } $t->print_prolog( %args); print $t->_leading_cpi; } while( $elt) { my $next_elt; if( $last_elt && $last_elt->in( $elt)) { unless( $elt->{'flushed'}) { # just output the front tag print $elt->start_tag(); $elt->{'flushed'}=1; } $next_elt= $elt->{first_child}; } else { # an element before the last one or the last one, $next_elt= $elt->{next_sibling}; $elt->_flush(); $elt->delete; last if( $last_elt && ($elt == $last_elt)); } $elt= $next_elt; } if( $flush_trailing_data) { print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) , $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) } select $old_select if( defined $old_select); $t->set_pretty_print( $old_pretty) if( defined $old_pretty); $t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style); if( my $ids= $t->{twig_id_list}) { while( my ($id, $elt)= each %$ids) { if( ! defined $elt) { delete $t->{twig_id_list}->{$id} } } } return $t; } # flushes up to an element # this method just reorders the arguments and calls flush sub flush_up_to { my $t= shift; my $up_to= shift; if( isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar')) { my $fh= shift; $t->flush( $fh, $up_to, @_); } else { $t->flush( $up_to, @_); } return $t; } # same as print except the entire document text is returned as a string sub sprint { my $t= shift; my %args= _normalize_args( @_); my $old_pretty; if( defined $args{PrettyPrint}) { $old_pretty= $t->set_pretty_print( $args{PrettyPrint}); delete $args{PrettyPrint}; } my $old_empty_tag_style; if( defined $args{EmptyTags}) { $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags}); delete $args{EmptyTags}; } my $string= $t->prolog( %args) # xml declaration and doctype . $t->_leading_cpi( %args) # leading comments and pi's in 'process' mode . ( ($t->{twig_root} && $t->{twig_root}->sprint) || '') . $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) . $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) ; if( $t->{twig_keep_spaces} && $t->{trailing_spaces}) { $string .= $t->{trailing_spaces}; } $t->set_pretty_print( $old_pretty) if( defined $old_pretty); $t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style); return $string; } # this method discards useless elements in a tree # it does the same thing as a flush except it does not print it # the second argument is an element, the last purged element # (this argument is usually set through the purge_up_to method) sub purge { my $t= shift; my $up_to= shift; $t->_trigger_tdh if $t->{twig_tdh}; # the "real" last element processed, as _twig_end has closed it my $last_elt; if( $up_to) { $last_elt= $up_to; } elsif( $t->{twig_current}) { $last_elt= $t->{twig_current}->{last_child}; } else { $last_elt= $t->{twig_root}; } my $elt= $t->{twig_root}; while( $elt) { my $next_elt; if( $last_elt && $last_elt->in( $elt)) { $elt->{'flushed'}=1; $next_elt= $elt->{first_child}; } else { # an element before the last one or the last one, $next_elt= $elt->{next_sibling}; $elt->delete; last if( $last_elt && ($elt == $last_elt) ); } $elt= $next_elt; } if( my $ids= $t->{twig_id_list}) { while( my ($id, $elt)= each %$ids) { if( ! defined $elt) { delete $t->{twig_id_list}->{$id} } } } return $t; } # flushes up to an element. This method just calls purge sub purge_up_to { my $t= shift; return $t->purge( @_); } sub root { return $_[0]->{twig_root}; } sub normalize { return $_[0]->root->normalize; } # create accessor methods on attribute names { my %accessor; # memorize accessor names so re-creating them won't trigger an error sub att_accessors { my $twig_or_class= shift; my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} : 'XML::Twig::Elt' ; ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; foreach my $att (@_) { _croak( "attempt to redefine existing method $att using att_accessors") if( $elt_class->can( $att) && !$accessor{$att}); if( !$accessor{$att}) { *{"$elt_class\::$att"}= sub :lvalue # > perl 5.5 { my $elt= shift; if( @_) { $elt->{att}->{$att}= $_[0]; } $elt->{att}->{$att}; }; $accessor{$att}=1; } } return $twig_or_class; } } { my %accessor; # memorize accessor names so re-creating them won't trigger an error sub elt_accessors { my $twig_or_class= shift; my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} : 'XML::Twig::Elt' ; # if arg is a hash ref, it's exp => name, otherwise it's a list of tags my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]} : map { $_ => $_ } @_; ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; while( my( $alias, $exp)= each %exp_to_alias ) { if( $elt_class->can( $alias) && !$accessor{$alias}) { _croak( "attempt to redefine existing method $alias using elt_accessors"); } if( !$accessor{$alias}) { *{"$elt_class\::$alias"}= sub { my $elt= shift; return wantarray ? $elt->children( $exp) : $elt->first_child( $exp); }; $accessor{$alias}=1; } } return $twig_or_class; } } { my %accessor; # memorize accessor names so re-creating them won't trigger an error sub field_accessors { my $twig_or_class= shift; my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} : 'XML::Twig::Elt' ; my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]} : map { $_ => $_ } @_; ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; while( my( $alias, $exp)= each %exp_to_alias ) { if( $elt_class->can( $alias) && !$accessor{$alias}) { _croak( "attempt to redefine existing method $exp using field_accessors"); } if( !$accessor{$alias}) { *{"$elt_class\::$alias"}= sub { my $elt= shift; $elt->field( $exp) }; $accessor{$alias}=1; } } return $twig_or_class; } } sub first_elt { my( $t, $cond)= @_; my $root= $t->root || return undef; return $root if( $root->passes( $cond)); return $root->next_elt( $cond); } sub last_elt { my( $t, $cond)= @_; my $root= $t->root || return undef; return $root->last_descendant( $cond); } sub next_n_elt { my( $t, $offset, $cond)= @_; $offset -- if( $t->root->matches( $cond) ); return $t->root->next_n_elt( $offset, $cond); } sub get_xpath { my $twig= shift; if( isa( $_[0], 'ARRAY')) { my $elt_array= shift; return _unique_elts( map { $_->get_xpath( @_) } @$elt_array); } else { return $twig->root->get_xpath( @_); } } # get a list of elts and return a sorted list of unique elts sub _unique_elts { my @sorted= sort { $a ->cmp( $b) } @_; my @unique; while( my $current= shift @sorted) { push @unique, $current unless( @unique && ($unique[-1] == $current)); } return @unique; } sub findvalue { my $twig= shift; if( isa( $_[0], 'ARRAY')) { my $elt_array= shift; return join( '', map { $_->findvalue( @_) } @$elt_array); } else { return $twig->root->findvalue( @_); } } sub findvalues { my $twig= shift; if( isa( $_[0], 'ARRAY')) { my $elt_array= shift; return map { $_->findvalues( @_) } @$elt_array; } else { return $twig->root->findvalues( @_); } } sub set_id_seed { my $t= shift; XML::Twig::Elt->set_id_seed( @_); return $t; } # return an array ref to an index, or undef sub index { my( $twig, $name, $index)= @_; return defined( $index) ? $twig->{_twig_index}->{$name}->[$index] : $twig->{_twig_index}->{$name}; } # return a list with just the root # if a condition is given then return an empty list unless the root matches sub children { my( $t, $cond)= @_; my $root= $t->root; unless( $cond && !($root->passes( $cond)) ) { return ($root); } else { return (); } } sub _children { return ($_[0]->root); } # weird, but here for completude # used to solve (non-sensical) /doc[1] XPath queries sub child { my $t= shift; my $nb= shift; return ($t->children( @_))[$nb]; } sub descendants { my( $t, $cond)= @_; my $root= $t->root; if( $root->passes( $cond) ) { return ($root, $root->descendants( $cond)); } else { return ( $root->descendants( $cond)); } } sub simplify { my $t= shift; $t->root->simplify( @_); } sub subs_text { my $t= shift; $t->root->subs_text( @_); } sub trim { my $t= shift; $t->root->trim( @_); } sub set_keep_encoding { my( $t, $keep)= @_; $t->{twig_keep_encoding}= $keep; $t->{NoExpand}= $keep; return XML::Twig::Elt::set_keep_encoding( $keep); } sub set_expand_external_entities { return XML::Twig::Elt::set_expand_external_entities( @_); } sub escape_gt { my $t= shift; $t->{twig_escape_gt}= 1; return XML::Twig::Elt::escape_gt( @_); } sub do_not_escape_gt { my $t= shift; $t->{twig_escape_gt}= 0; return XML::Twig::Elt::do_not_escape_gt( @_); } sub elt_id { return $_[0]->{twig_id_list}->{$_[1]}; } # change it in ALL twigs at the moment sub change_gi { my( $twig, $old_gi, $new_gi)= @_; my $index; return unless($index= $XML::Twig::gi2index{$old_gi}); $XML::Twig::index2gi[$index]= $new_gi; delete $XML::Twig::gi2index{$old_gi}; $XML::Twig::gi2index{$new_gi}= $index; return $twig; } # builds the DTD from the stored (possibly updated) data sub dtd_text { my $t= shift; my $dtd= $t->{twig_dtd}; my $doctype= $t->{twig_doctype} or return ''; my $string= "{name}; $string .= " [\n"; foreach my $gi (@{$dtd->{elt_list}}) { $string.= "{model}->{$gi}.">\n" ; if( $dtd->{att}->{$gi}) { my $attlist= $dtd->{att}->{$gi}; $string.= "{$att}->{fixed}) { $string.= " $att $attlist->{$att}->{type} #FIXED $attlist->{$att}->{default}"; } else { $string.= " $att $attlist->{$att}->{type} $attlist->{$att}->{default}"; } $string.= "\n"; } $string.= ">\n"; } } $string.= $t->entity_list->text if( $t->entity_list); $string.= "\n]>\n"; return $string; } # prints the DTD from the stored (possibly updated) data sub dtd_print { my $t= shift; my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; if( $fh) { print $fh $t->dtd_text; } else { print $t->dtd_text; } return $t; } # build the subs that call directly expat BEGIN { my @expat_methods= qw( depth in_element within_element context current_line current_column current_byte recognized_string original_string xpcroak xpcarp base current_element element_index xml_escape position_in_context); foreach my $method (@expat_methods) { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; *{$method}= sub { my $t= shift; _croak( "calling $method after parsing is finished") unless( $t->{twig_parsing}); return $t->{twig_parser}->$method(@_); }; } } sub path { my( $t, $gi)= @_; if( $t->{twig_map_xmlns}) { return "/" . join( "/", map { $t->_replace_prefix( $_)} ($t->{twig_parser}->context, $gi)); } else { return "/" . join( "/", ($t->{twig_parser}->context, $gi)); } } sub finish { my $t= shift; return $t->{twig_parser}->finish; } # just finish the parse by printing the rest of the document sub finish_print { my( $t, $fh)= @_; my $old_fh; unless( defined $fh) { $t->_set_fh_to_twig_output_fh(); } elsif( defined $fh) { $old_fh= select $fh; $t->{twig_original_selected_fh}= $old_fh if( $old_fh); } my $p=$t->{twig_parser}; if( $t->{twig_keep_encoding}) { $p->setHandlers( %twig_handlers_finish_print); } else { $p->setHandlers( %twig_handlers_finish_print_original); } return $t; } sub set_remove_cdata { return XML::Twig::Elt::set_remove_cdata( @_); } sub output_filter { return XML::Twig::Elt::output_filter( @_); } sub set_output_filter { return XML::Twig::Elt::set_output_filter( @_); } sub output_text_filter { return XML::Twig::Elt::output_text_filter( @_); } sub set_output_text_filter { return XML::Twig::Elt::set_output_text_filter( @_); } sub set_input_filter { my( $t, $input_filter)= @_; my $old_filter= $t->{twig_input_filter}; if( !$input_filter || isa( $input_filter, 'CODE') ) { $t->{twig_input_filter}= $input_filter; } elsif( $input_filter eq 'latin1') { $t->{twig_input_filter}= latin1(); } elsif( $filter{$input_filter}) { $t->{twig_input_filter}= $filter{$input_filter}; } else { _croak( "invalid input filter: $input_filter"); } return $old_filter; } sub set_empty_tag_style { return XML::Twig::Elt::set_empty_tag_style( @_); } sub set_pretty_print { return XML::Twig::Elt::set_pretty_print( @_); } sub set_quote { return XML::Twig::Elt::set_quote( @_); } sub set_indent { return XML::Twig::Elt::set_indent( @_); } sub set_keep_atts_order { shift; return XML::Twig::Elt::set_keep_atts_order( @_); } sub keep_atts_order { return XML::Twig::Elt::keep_atts_order( @_); } sub set_do_not_escape_amp_in_atts { return XML::Twig::Elt::set_do_not_escape_amp_in_atts( @_); } # save and restore package globals (the ones in XML::Twig::Elt) # should probably return the XML::Twig object itself, but instead # returns the state (as a hashref) for backward compatibility sub save_global_state { my $t= shift; return $t->{twig_saved_state}= XML::Twig::Elt::global_state(); } sub restore_global_state { my $t= shift; XML::Twig::Elt::set_global_state( $t->{twig_saved_state}); } sub global_state { return XML::Twig::Elt::global_state(); } sub set_global_state { return XML::Twig::Elt::set_global_state( $_[1]); } sub dispose { my $t= shift; $t->DESTROY; return; } sub DESTROY { my $t= shift; if( $t->{twig_root} && isa( $t->{twig_root}, 'XML::Twig::Elt')) { $t->{twig_root}->delete } # added to break circular references undef $t->{twig}; undef $t->{twig_root}->{twig} if( $t->{twig_root}); undef $t->{twig_parser}; undef %$t;# prevents memory leaks (especially when using mod_perl) undef $t; } # return true if perl was compiled using perlio # if perl is not available return true, these days perlio should be used sub _use_perlio { my $perl= _this_perl(); return $perl ? grep /useperlio=define/, `$perl -V` : 1; } # returns the parth to the perl executable (if available) sub _this_perl { # straight from perlvar my $secure_perl_path= $Config{perlpath}; if ($^O ne 'VMS') { $secure_perl_path .= $Config{_exe} unless $secure_perl_path =~ m/$Config{_exe}$/i; } if( ! -f $secure_perl_path) { $secure_perl_path= ''; } # when perl is not available (PDK) return $secure_perl_path; } # # non standard handlers # # kludge: expat 1.95.2 calls both Default AND Doctype handlers # so if the default handler finds 'recognized_string(); if( $string eq 'setHandlers( Default => undef); $p->setHandlers( Entity => undef); $expat_1_95_2=1; } else { print $string; } return; } sub _twig_print { # warn " in _twig_print...\n"; # DEBUG handler my $p= shift; if( $expat_1_95_2 && ($p->recognized_string eq '[') && !$p->{twig}->{expat_1_95_2_seen_bracket}) { # otherwise the opening square bracket of the doctype gets printed twice $p->{twig}->{expat_1_95_2_seen_bracket}=1; } else { if( $p->{twig}->{twig_right_after_root}) { my $s= $p->recognized_string(); print $s if $s=~ m{\S}; } else { print $p->recognized_string(); } } return; } # recognized_string does not seem to work for entities, go figure! # so this handler is used to print them anyway sub _twig_print_entity { # warn " in _twig_print_entity...\n"; # DEBUG handler my $p= shift; XML::Twig::Entity->new( @_)->print; } # kludge: expat 1.95.2 calls both Default AND Doctype handlers # so if the default handler finds 'original_string(); if( $string eq 'setHandlers( Default => undef); $p->setHandlers( Entity => undef); $expat_1_95_2=1; } else { print $string; } return; } sub _twig_print_original { # warn " in _twig_print_original...\n"; # DEBUG handler my $p= shift; print $p->original_string(); return; } sub _twig_print_original_doctype { # warn " in _twig_print_original_doctype...\n"; # DEBUG handler my( $p, $name, $sysid, $pubid, $internal)= @_; if( $name) { # with recent versions of XML::Parser original_string does not work, # hence we need to rebuild the doctype declaration my $doctype=''; $doctype .= qq{} unless( $internal || $expat_1_95_2); $p->{twig}->{twig_doctype}->{has_internal}=$internal; print $doctype; } $p->setHandlers( Default => \&_twig_print_original); return; } sub _twig_print_doctype { # warn " in _twig_print_doctype...\n"; # DEBUG handler my( $p, $name, $sysid, $pubid, $internal)= @_; if( $name) { # with recent versions of XML::Parser original_string does not work, # hence we need to rebuild the doctype declaration my $doctype=''; $doctype .= qq{} unless( $internal || $expat_1_95_2); $p->{twig}->{twig_doctype}->{has_internal}=$internal; print $doctype; } $p->setHandlers( Default => \&_twig_print); return; } sub _twig_print_original_default { # warn " in _twig_print_original_default...\n"; # DEBUG handler my $p= shift; print $p->original_string(); return; } # account for the case where the element is empty sub _twig_print_end_original { # warn " in _twig_print_end_original...\n"; # DEBUG handler my $p= shift; print $p->original_string(); return; } sub _twig_start_check_roots { # warn " in _twig_start_check_roots...\n"; # DEBUG handler my $p= shift; my $gi= shift; my $t= $p->{twig}; my $fh= $t->{twig_output_fh} || select() || \*STDOUT; my $ns_decl; unless( $p->depth == 0) { if( $t->{twig_map_xmlns}) { $ns_decl= _replace_ns( $t, \$gi, \@_); } } my $context= { $ST_TAG => $gi, @_}; $context->{$ST_NS}= $ns_decl if $ns_decl; push @{$t->{_twig_context_stack}}, $context; my %att= @_; if( _handler( $t, $t->{twig_roots}, $gi)) { $p->setHandlers( %twig_handlers); # restore regular handlers $t->{twig_root_depth}= $p->depth; pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start _twig_start( $p, $gi, @_); return; } # $tag will always be true if it needs to be printed (the tag string is never empty) my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string : ''; if( $p->depth == 0) { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $tag if( $tag); pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start _twig_start( $p, $gi, @_); $t->root->{'flushed'}=1; # or the root start tag gets output the first time we flush } elsif( $t->{twig_starttag_handlers}) { # look for start tag handlers my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi); my $last_handler_res; foreach my $handler ( @handlers) { $last_handler_res= $handler->($t, $gi, %att); last unless $last_handler_res; } ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $tag if( $tag && (!@handlers || $last_handler_res)); } else { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $tag if( $tag); } return; } sub _twig_end_check_roots { # warn " in _twig_end_check_roots...\n"; # DEBUG handler my( $p, $gi, %att)= @_; my $t= $p->{twig}; # $tag can be empty (), hence the undef and the tests for defined my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string : undef; my $fh= $t->{twig_output_fh} || select() || \*STDOUT; if( $t->{twig_endtag_handlers}) { # look for end tag handlers my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi); my $last_handler_res=1; foreach my $handler ( @handlers) { $last_handler_res= $handler->($t, $gi) || last; } #if( ! $last_handler_res) # { pop @{$t->{_twig_context_stack}}; warn "tested"; # return; # } } { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $tag if( defined $tag); } if( $p->depth == 0) { _twig_end( $p, $gi); $t->root->{end_tag_flushed}=1; } pop @{$t->{_twig_context_stack}}; return; } sub _twig_pi_check_roots { # warn " in _twig_pi_check_roots...\n"; # DEBUG handler my( $p, $target, $data)= @_; my $t= $p->{twig}; my $pi= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string : undef; my $fh= $t->{twig_output_fh} || select() || \*STDOUT; if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target} || $t->{twig_handlers}->{pi_handlers}->{''} ) { # if handler is called on pi, then it needs to be processed as a regular node my @flags= qw( twig_process_pi twig_keep_pi); my @save= @{$t}{@flags}; # save pi related flags @{$t}{@flags}= (1, 0); # override them, pi needs to be processed _twig_pi( @_); # call handler on the pi @{$t}{@flags}= @save;; # restore flag } else { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $pi if( defined( $pi)); } return; } sub _output_ignored { my( $t, $p)= @_; my $action= $t->{twig_ignore_action}; my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string'; if( $action eq 'print' ) { print $p->$get_string; } else { my $string_ref; if( $action eq 'string') { $string_ref= \$t->{twig_buffered_string}; } elsif( ref( $action) && ref( $action) eq 'SCALAR') { $string_ref= $action; } else { _croak( "wrong ignore action: $action"); } $$string_ref .= $p->$get_string; } } sub _twig_ignore_start { # warn " in _twig_ignore_start...\n"; # DEBUG handler my( $p, $gi)= @_; my $t= $p->{twig}; $t->{twig_ignore_level}++; my $action= $t->{twig_ignore_action}; $t->_output_ignored( $p) unless $action eq 'discard'; return; } sub _twig_ignore_end { # warn " in _twig_ignore_end...\n"; # DEBUG handler my( $p, $gi)= @_; my $t= $p->{twig}; my $action= $t->{twig_ignore_action}; $t->_output_ignored( $p) unless $action eq 'discard'; $t->{twig_ignore_level}--; if( ! $t->{twig_ignore_level}) { $t->{twig_current} = $t->{twig_ignore_elt}; $t->{twig_current}->{'twig_current'}=1; $t->{twig_ignore_elt}->cut; # there could possibly be a memory leak here (delete would avoid it, # but could also delete elements that should not be deleted) # restore the saved stack to the current level splice( @{$t->{_twig_context_stack}}, $p->depth+ 1 ); #warn "stack: ", _dump_stack( $t->{_twig_context_stack}), "\n"; $p->setHandlers( @{$t->{twig_saved_handlers}}); # test for handlers if( $t->{twig_endtag_handlers}) { # look for end tag handlers my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi); my $last_handler_res=1; foreach my $handler ( @handlers) { $last_handler_res= $handler->($t, $gi) || last; } } pop @{$t->{_twig_context_stack}}; }; return; } #sub _dump_stack { my( $stack)= @_; return join( ":", map { $_->{$ST_TAG} } @$stack); } sub ignore { my( $t, $elt, $action)= @_; my $current= $t->{twig_current}; if( ! ($elt && ref( $elt) && isa( $elt, 'XML::Twig::Elt'))) { $elt= $current; } #warn "ignore: current = ", $current->tag, ", elt = ", $elt->tag, ")\n"; # we need the ($elt == $current->{last_child}) test because the current element is set to the # parent _before_ handlers are called (and I can't figure out how to fix this) unless( ($elt == $current) || ($current->{last_child} && ($elt == $current->{last_child})) || $current->in( $elt)) { _croak( "element to be ignored must be ancestor of current element"); } $t->{twig_ignore_level}= $current == $elt ? 1 : $t->_level_in_stack( $current) - $t->_level_in_stack($elt) + 1; #warn "twig_ignore_level: $t->{twig_ignore_level} (current: ", $current->tag, ", elt: ", $elt->tag, ")\n"; $t->{twig_ignore_elt} = $elt; # save it, so we can delete it later $action ||= 'discard'; if( !($action eq 'print' || $action eq 'string' || ( ref( $action) && ref( $action) eq 'SCALAR'))) { $action= 'discard'; } $t->{twig_ignore_action}= $action; my $p= $t->{twig_parser}; my @saved_handlers= $p->setHandlers( %twig_handlers_ignore); # set handlers my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string'; my $default_handler; if( $action ne 'discard') { if( $action eq 'print') { $p->setHandlers( Default => sub { print $_[0]->$get_string; }); } else { my $string_ref; if( $action eq 'string') { if( ! exists $t->{twig_buffered_string}) { $t->{twig_buffered_string}=''; } $string_ref= \$t->{twig_buffered_string}; } elsif( ref( $action) && ref( $action) eq 'SCALAR') { $string_ref= $action; } $p->setHandlers( Default => sub { $$string_ref .= $_[0]->$get_string; }); } $t->_output_ignored( $p, $action); } $t->{twig_saved_handlers}= \@saved_handlers; # save current handlers } sub _level_in_stack { my( $t, $elt)= @_; my $level=1; foreach my $elt_in_stack ( @{$t->{_twig_context_stack}} ) { if( $elt_in_stack->{$ST_ELT} && ($elt == $elt_in_stack->{$ST_ELT})) { return $level } $level++; } } # select $t->{twig_output_fh} and store the current selected fh sub _set_fh_to_twig_output_fh { my $t= shift; my $output_fh= $t->{twig_output_fh}; if( $output_fh && !$t->{twig_output_fh_selected}) { # there is an output fh $t->{twig_selected_fh}= select(); # store the currently selected fh $t->{twig_output_fh_selected}=1; select $output_fh; # select the output fh for the twig } } # select the fh that was stored in $t->{twig_selected_fh} # (before $t->{twig_output_fh} was selected) sub _set_fh_to_selected_fh { my $t= shift; return unless( $t->{twig_output_fh}); my $selected_fh= $t->{twig_selected_fh}; $t->{twig_output_fh_selected}=0; select $selected_fh; return; } sub encoding { return $_[0]->{twig_xmldecl}->{encoding} if( $_[0]->{twig_xmldecl}); } sub set_encoding { my( $t, $encoding)= @_; $t->{twig_xmldecl} ||={}; $t->set_xml_version( "1.0") unless( $t->xml_version); $t->{twig_xmldecl}->{encoding}= $encoding; return $t; } sub output_encoding { return $_[0]->{output_encoding}; } sub set_output_encoding { my( $t, $encoding)= @_; my $output_filter= $t->output_filter || ''; if( ($encoding && $encoding !~ m{^utf-?8$}i) || $t->{twig_keep_encoding} || $output_filter) { $t->set_output_filter( _encoding_filter( $encoding || '')); } $t->{output_encoding}= $encoding; return $t; } sub xml_version { return $_[0]->{twig_xmldecl}->{version} if( $_[0]->{twig_xmldecl}); } sub set_xml_version { my( $t, $version)= @_; $t->{twig_xmldecl} ||={}; $t->{twig_xmldecl}->{version}= $version; return $t; } sub standalone { return $_[0]->{twig_xmldecl}->{standalone} if( $_[0]->{twig_xmldecl}); } sub set_standalone { my( $t, $standalone)= @_; $t->{twig_xmldecl} ||={}; $t->set_xml_version( "1.0") unless( $t->xml_version); $t->{twig_xmldecl}->{standalone}= $standalone; return $t; } # SAX methods sub toSAX1 { _croak( "cannot use toSAX1 while parsing (use flush_toSAX1)") if (defined $_[0]->{twig_parser}); shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1, \&XML::Twig::Elt::_end_tag_data_SAX1 ); } sub toSAX2 { _croak( "cannot use toSAX2 while parsing (use flush_toSAX2)") if (defined $_[0]->{twig_parser}); shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2, \&XML::Twig::Elt::_end_tag_data_SAX2 ); } sub _toSAX { my( $t, $handler, $start_tag_data, $end_tag_data) = @_; if( my $start_document = $handler->can( 'start_document')) { $start_document->( $handler); } $t->_prolog_toSAX( $handler); if( $t->root) { $t->root->_toSAX( $handler, $start_tag_data, $end_tag_data) ; } if( my $end_document = $handler->can( 'end_document')) { $end_document->( $handler); } } sub flush_toSAX1 { shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1, \&XML::Twig::Elt::_end_tag_data_SAX1 ); } sub flush_toSAX2 { shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2, \&XML::Twig::Elt::_end_tag_data_SAX2 ); } sub _flush_toSAX { my( $t, $handler, $start_tag_data, $end_tag_data)= @_; # the "real" last element processed, as _twig_end has closed it my $last_elt; if( $t->{twig_current}) { $last_elt= $t->{twig_current}->{last_child}; } else { $last_elt= $t->{twig_root}; } my $elt= $t->{twig_root}; unless( $elt->{'flushed'}) { # init unless already done (ie root has been flushed) if( my $start_document = $handler->can( 'start_document')) { $start_document->( $handler); } # flush the DTD $t->_prolog_toSAX( $handler) } while( $elt) { my $next_elt; if( $last_elt && $last_elt->in( $elt)) { unless( $elt->{'flushed'}) { # just output the front tag if( my $start_element = $handler->can( 'start_element')) { if( my $tag_data= $start_tag_data->( $elt)) { $start_element->( $handler, $tag_data); } } $elt->{'flushed'}=1; } $next_elt= $elt->{first_child}; } else { # an element before the last one or the last one, $next_elt= $elt->{next_sibling}; $elt->_toSAX( $handler, $start_tag_data, $end_tag_data); $elt->delete; last if( $last_elt && ($elt == $last_elt)); } $elt= $next_elt; } if( !$t->{twig_parsing}) { if( my $end_document = $handler->can( 'end_document')) { $end_document->( $handler); } } } sub _prolog_toSAX { my( $t, $handler)= @_; $t->_xmldecl_toSAX( $handler); $t->_DTD_toSAX( $handler); } sub _xmldecl_toSAX { my( $t, $handler)= @_; my $decl= $t->{twig_xmldecl}; my $data= { Version => $decl->{version}, Encoding => $decl->{encoding}, Standalone => $decl->{standalone}, }; if( my $xml_decl= $handler->can( 'xml_decl')) { $xml_decl->( $handler, $data); } } sub _DTD_toSAX { my( $t, $handler)= @_; my $doctype= $t->{twig_doctype}; return unless( $doctype); my $data= { Name => $doctype->{name}, PublicId => $doctype->{pub}, SystemId => $doctype->{sysid}, }; if( my $start_dtd= $handler->can( 'start_dtd')) { $start_dtd->( $handler, $data); } # I should call code to export the internal subset here if( my $end_dtd= $handler->can( 'end_dtd')) { $end_dtd->( $handler); } } # input/output filters sub latin1 { local $SIG{__DIE__}; if( _use( 'Encode')) { return encode_convert( 'ISO-8859-15'); } elsif( _use( 'Text::Iconv')) { return iconv_convert( 'ISO-8859-15'); } elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) { return unicode_convert( 'ISO-8859-15'); } else { return \®exp2latin1; } } sub _encoding_filter { { local $SIG{__DIE__}; my $encoding= $_[1] || $_[0]; if( _use( 'Encode')) { my $sub= encode_convert( $encoding); return $sub; } elsif( _use( 'Text::Iconv')) { return iconv_convert( $encoding); } elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) { return unicode_convert( $encoding); } } _croak( "Encode, Text::Iconv or Unicode::Map8 and Unicode::String need to be installed in order to use encoding options"); } # shamelessly lifted from XML::TyePYX (works only with XML::Parse 2.27) sub regexp2latin1 { my $text=shift; $text=~s{([\xc0-\xc3])(.)}{ my $hi = ord($1); my $lo = ord($2); chr((($hi & 0x03) <<6) | ($lo & 0x3F)) }ge; return $text; } sub html_encode { _use( 'HTML::Entities') or croak "cannot use html_encode: missing HTML::Entities"; return HTML::Entities::encode_entities($_[0] ); } sub safe_encode { my $str= shift; if( $perl_version < 5.008) { # the no utf8 makes the regexp work in 5.6 no utf8; # = perl 5.6 $str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} {_XmlUtf8Decode($1)}egs; } else { $str= encode( ascii => $str, $FB_HTMLCREF); } return $str; } sub safe_encode_hex { my $str= shift; if( $perl_version < 5.008) { # the no utf8 makes the regexp work in 5.6 no utf8; # = perl 5.6 $str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} {_XmlUtf8Decode($1, 1)}egs; } else { $str= encode( ascii => $str, $FB_XMLCREF); } return $str; } # this one shamelessly lifted from XML::DOM # does NOT work on 5.8.0 sub _XmlUtf8Decode { my ($str, $hex) = @_; my $len = length ($str); my $n; if ($len == 2) { my @n = unpack "C2", $str; $n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f); } elsif ($len == 3) { my @n = unpack "C3", $str; $n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + ($n[2] & 0x3f); } elsif ($len == 4) { my @n = unpack "C4", $str; $n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) + (($n[2] & 0x3f) << 6) + ($n[3] & 0x3f); } elsif ($len == 1) # just to be complete... { $n = ord ($str); } else { croak "bad value [$str] for _XmlUtf8Decode"; } my $char= $hex ? sprintf ("&#x%x;", $n) : "&#$n;"; return $char; } sub unicode_convert { my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly _use( 'Unicode::Map8') or croak "Unicode::Map8 not available, needed for encoding filter: $!"; _use( 'Unicode::String') or croak "Unicode::String not available, needed for encoding filter: $!"; import Unicode::String qw(utf8); my $sub= eval qq{ { $NO_WARNINGS; my \$cnv; BEGIN { \$cnv= Unicode::Map8->new(\$enc) or croak "Can't create converter to \$enc"; } sub { return \$cnv->to8 (utf8(\$_[0])->ucs2); } } }; unless( $sub) { croak $@; } return $sub; } sub iconv_convert { my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly _use( 'Text::Iconv') or croak "Text::Iconv not available, needed for encoding filter: $!"; my $sub= eval qq{ { $NO_WARNINGS; my \$cnv; BEGIN { \$cnv = Text::Iconv->new( 'utf8', \$enc) or croak "Can't create iconv converter to \$enc"; } sub { return \$cnv->convert( \$_[0]); } } }; unless( $sub) { if( $@=~ m{^Unsupported conversion: Invalid argument}) { croak "Unsupported encoding: $enc"; } else { croak $@; } } return $sub; } sub encode_convert { my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly my $sub= eval qq{sub { $NO_WARNINGS; return encode( "$enc", \$_[0]); } }; croak "can't create Encode-based filter: $@" unless( $sub); return $sub; } # XML::XPath compatibility sub getRootNode { return $_[0]; } sub getParentNode { return undef; } sub getChildNodes { my @children= ($_[0]->root); return wantarray ? @children : \@children; } sub _weakrefs { return $weakrefs; } sub _set_weakrefs { $weakrefs=shift() || 0; XML::Twig::Elt::set_destroy()if ! $weakrefs; } # for testing purposes sub _dump { my $t= shift; my $dump=''; $dump="document\n"; # should dump twig level data here if( $t->root) { $dump .= $t->root->_dump( @_); } return $dump; } 1; ###################################################################### package XML::Twig::Entity_list; ###################################################################### *isa= *UNIVERSAL::isa; sub new { my $class = shift; my $self={ entities => {}, updated => 0}; bless $self, $class; return $self; } sub add_new_ent { my $ent_list= shift; my $ent= XML::Twig::Entity->new( @_); $ent_list->add( $ent); return $ent_list; } sub _add_list { my( $ent_list, $to_add)= @_; my $ents_to_add= $to_add->{entities}; return $ent_list unless( $ents_to_add && %$ents_to_add); @{$ent_list->{entities}}{keys %$ents_to_add}= values %$ents_to_add; $ent_list->{updated}=1; return $ent_list; } sub add { my( $ent_list, $ent)= @_; $ent_list->{entities}->{$ent->{name}}= $ent; $ent_list->{updated}=1; return $ent_list; } sub ent { my( $ent_list, $ent_name)= @_; return $ent_list->{entities}->{$ent_name}; } # can be called with an entity or with an entity name sub delete { my $ent_list= shift; if( isa( ref $_[0], 'XML::Twig::Entity')) { # the second arg is an entity my $ent= shift; delete $ent_list->{entities}->{$ent->{name}}; } else { # the second arg was not entity, must be a string then my $name= shift; delete $ent_list->{entities}->{$name}; } $ent_list->{updated}=1; return $ent_list; } sub print { my ($ent_list, $fh)= @_; my $old_select= defined $fh ? select $fh : undef; foreach my $ent_name ( sort keys %{$ent_list->{entities}}) { my $ent= $ent_list->{entities}->{$ent_name}; # we have to test what the entity is or un-defined entities can creep in if( isa( $ent, 'XML::Twig::Entity')) { $ent->print(); } } select $old_select if( defined $old_select); return $ent_list; } sub text { my ($ent_list)= @_; return join "\n", map { $ent_list->{entities}->{$_}->text} sort keys %{$ent_list->{entities}}; } # return the list of entity names sub entity_names { my $ent_list= shift; return (sort keys %{$ent_list->{entities}}) ; } sub list { my ($ent_list)= @_; return map { $ent_list->{entities}->{$_} } sort keys %{$ent_list->{entities}}; } 1; ###################################################################### package XML::Twig::Entity; ###################################################################### #*isa= *UNIVERSAL::isa; sub new { my( $class, $name, $val, $sysid, $pubid, $ndata, $param)= @_; $class= ref( $class) || $class; my $self={}; $self->{name} = $name; $self->{val} = $val if( defined $val ); $self->{sysid} = $sysid if( defined $sysid); $self->{pubid} = $pubid if( defined $pubid); $self->{ndata} = $ndata if( defined $ndata); $self->{param} = $param if( defined $param); bless $self, $class; return $self; } sub name { return $_[0]->{name}; } sub val { return $_[0]->{val}; } sub sysid { return defined( $_[0]->{sysid}) ? $_[0]->{sysid} : ''; } sub pubid { return defined( $_[0]->{pubid}) ? $_[0]->{pubid} : ''; } sub ndata { return defined( $_[0]->{ndata}) ? $_[0]->{ndata} : ''; } sub param { return defined( $_[0]->{param}) ? $_[0]->{param} : ''; } sub print { my ($ent, $fh)= @_; my $text= $ent->text; if( $fh) { print $fh $text . "\n"; } else { print $text . "\n"; } } sub sprint { my ($ent)= @_; return $ent->text; } sub text { my ($ent)= @_; #warn "text called: '", $ent->_dump, "'\n"; return '' if( !$ent->{name}); my @tokens; push @tokens, '{param}); push @tokens, $ent->{name}; if( defined $ent->{val} && !defined( $ent->{sysid}) && !defined($ent->{pubid}) ) { push @tokens, _quoted_val( $ent->{val}); } elsif( defined $ent->{sysid}) { push @tokens, 'PUBLIC', _quoted_val( $ent->{pubid}) if( $ent->{pubid}); push @tokens, 'SYSTEM' unless( $ent->{pubid}); push @tokens, _quoted_val( $ent->{sysid}); push @tokens, 'NDATA', $ent->{ndata} if( $ent->{ndata}); } return join( ' ', @tokens) . '>'; } sub _quoted_val { my $q= $_[0]=~ m{"} ? q{'} : q{"}; return qq{$q$_[0]$q}; } sub _dump { my( $ent)= @_; return join( " - ", map { "$_ => '$ent->{$_}'" } grep { defined $ent->{$_} } sort keys %$ent); } 1; ###################################################################### package XML::Twig::Notation_list; ###################################################################### *isa= *UNIVERSAL::isa; sub new { my $class = shift; my $self={ notations => {}, updated => 0}; bless $self, $class; return $self; } sub add_new_notation { my $notation_list= shift; my $notation= XML::Twig::Notation->new( @_); $notation_list->add( $notation); return $notation_list; } sub _add_list { my( $notation_list, $to_add)= @_; my $notations_to_add= $to_add->{notations}; return $notation_list unless( $notations_to_add && %$notations_to_add); @{$notation_list->{notations}}{keys %$notations_to_add}= values %$notations_to_add; $notation_list->{updated}=1; return $notation_list; } sub add { my( $notation_list, $notation)= @_; $notation_list->{notations}->{$notation->{name}}= $notation; $notation_list->{updated}=1; return $notation_list; } sub notation { my( $notation_list, $notation_name)= @_; return $notation_list->{notations}->{$notation_name}; } # can be called with an notation or with an notation name sub delete { my $notation_list= shift; if( isa( ref $_[0], 'XML::Twig::Notation')) { # the second arg is an notation my $notation= shift; delete $notation_list->{notations}->{$notation->{name}}; } else { # the second arg was not notation, must be a string then my $name= shift; delete $notation_list->{notations}->{$name}; } $notation_list->{updated}=1; return $notation_list; } sub print { my ($notation_list, $fh)= @_; my $old_select= defined $fh ? select $fh : undef; foreach my $notation_name ( sort keys %{$notation_list->{notations}}) { my $notation= $notation_list->{notations}->{$notation_name}; # we have to test what the notation is or un-defined notations can creep in if( isa( $notation, 'XML::Twig::Notation')) { $notation->print(); } } select $old_select if( defined $old_select); return $notation_list; } sub text { my ($notation_list)= @_; return join "\n", map { $notation_list->{notations}->{$_}->text} sort keys %{$notation_list->{notations}}; } # return the list of notation names sub notation_names { my $notation_list= shift; return (sort keys %{$notation_list->{notations}}) ; } sub list { my ($notation_list)= @_; return map { $notation_list->{notations}->{$_} } sort keys %{$notation_list->{notations}}; } 1; ###################################################################### package XML::Twig::Notation; ###################################################################### #*isa= *UNIVERSAL::isa; BEGIN { *sprint= *text; } sub new { my( $class, $name, $base, $sysid, $pubid)= @_; $class= ref( $class) || $class; my $self={}; $self->{name} = $name; $self->{base} = $base if( defined $base ); $self->{sysid} = $sysid if( defined $sysid); $self->{pubid} = $pubid if( defined $pubid); bless $self, $class; return $self; } sub name { return $_[0]->{name}; } sub base { return $_[0]->{base}; } sub sysid { return $_[0]->{sysid}; } sub pubid { return $_[0]->{pubid}; } sub print { my ($notation, $fh)= @_; my $text= $notation->text; if( $fh) { print $fh $text . "\n"; } else { print $text . "\n"; } } sub text { my ($notation)= @_; return '' if( !$notation->{name}); my @tokens; push @tokens, '{name}; push @tokens, ( 'PUBLIC', _quoted_val( $notation->{pubid} ) ) if $notation->{pubid}; push @tokens, ( 'SYSTEM') if ! $notation->{pubid} && $notation->{sysid}; push @tokens, (_quoted_val( $notation->{sysid}) ) if $notation->{sysid}; return join( ' ', @tokens) . '>'; } sub _quoted_val { my $q= $_[0]=~ m{"} ? q{'} : q{"}; return qq{$q$_[0]$q}; } sub _dump { my( $notation)= @_; return join( " - ", map { "$_ => '$notation->{$_}'" } grep { defined $notation->{$_} } sort keys %$notation); } 1; ###################################################################### package XML::Twig::Elt; ###################################################################### use Carp; *isa= *UNIVERSAL::isa; my $CDATA_START = ""; my $PI_START = ""; my $COMMENT_START = ""; my $XMLNS_URI = 'http://www.w3.org/2000/xmlns/'; BEGIN { # set some aliases for methods *tag = *gi; *name = *gi; *set_tag = *set_gi; *set_name = *set_gi; *find_nodes = *get_xpath; # as in XML::DOM *findnodes = *get_xpath; # as in XML::LibXML *field = *first_child_text; *trimmed_field = *first_child_trimmed_text; *is_field = *contains_only_text; *is = *passes; *matches = *passes; *has_child = *first_child; *has_children = *first_child; *all_children_pass = *all_children_are; *all_children_match= *all_children_are; *getElementsByTagName= *descendants; *find_by_tag_name= *descendants_or_self; *unwrap = *erase; *inner_xml = *xml_string; *outer_xml = *sprint; *add_class = *add_to_class; *first_child_is = *first_child_matches; *last_child_is = *last_child_matches; *next_sibling_is = *next_sibling_matches; *prev_sibling_is = *prev_sibling_matches; *next_elt_is = *next_elt_matches; *prev_elt_is = *prev_elt_matches; *parent_is = *parent_matches; *child_is = *child_matches; *inherited_att = *inherit_att; *sort_children_by_value= *sort_children_on_value; *has_atts= *att_nb; # imports from XML::Twig *_is_fh= *XML::Twig::_is_fh; # XML::XPath compatibility *string_value = *text; *toString = *sprint; *getName = *gi; *getRootNode = *twig; *getNextSibling = *_next_sibling; *getPreviousSibling = *_prev_sibling; *isElementNode = *is_elt; *isTextNode = *is_text; *isPI = *is_pi; *isPINode = *is_pi; *isProcessingInstructionNode= *is_pi; *isComment = *is_comment; *isCommentNode = *is_comment; *getTarget = *target; *getFirstChild = *_first_child; *getLastChild = *_last_child; # try using weak references # test whether we can use weak references { local $SIG{__DIE__}; if( eval 'require Scalar::Util' && defined( &Scalar::Util::weaken) ) { import Scalar::Util qw(weaken); } elsif( eval 'require WeakRef') { import WeakRef; } } } # can be called as XML::Twig::Elt->new( [[$gi, $atts, [@content]]) # - gi is an optional gi given to the element # - $atts is a hashref to attributes for the element # - @content is an optional list of text and elements that will # be inserted under the element sub new { my $class= shift; $class= ref $class || $class; my $elt = {}; bless ($elt, $class); return $elt unless @_; if( @_ == 1 && $_[0]=~ m{^\s*<}) { return $class->parse( @_); } # if a gi is passed then use it my $gi= shift; $elt->{gi}=$XML::Twig::gi2index{$gi} or $elt->set_gi( $gi); my $atts= ref $_[0] eq 'HASH' ? shift : undef; if( $atts && defined $atts->{$CDATA}) { delete $atts->{$CDATA}; my $cdata= $class->new( $CDATA => @_); return $class->new( $gi, $atts, $cdata); } if( $gi eq $PCDATA) { if( grep { ref $_ } @_) { croak "element $PCDATA can only be created from text"; } $elt->{pcdata}= join '', @_; } elsif( $gi eq $ENT) { $elt->{ent}= shift; } elsif( $gi eq $CDATA) { if( grep { ref $_ } @_) { croak "element $CDATA can only be created from text"; } $elt->{cdata}= join '', @_; } elsif( $gi eq $COMMENT) { if( grep { ref $_ } @_) { croak "element $COMMENT can only be created from text"; } $elt->{comment}= join '', @_; } elsif( $gi eq $PI) { if( grep { ref $_ } @_) { croak "element $PI can only be created from text"; } $elt->_set_pi( shift, join '', @_); } else { # the rest of the arguments are the content of the element if( @_) { $elt->set_content( @_); } else { $elt->{empty}= 1; } } if( $atts) { # the attribute hash can be used to pass the asis status if( defined $atts->{$ASIS}) { $elt->set_asis( $atts->{$ASIS} ); delete $atts->{$ASIS}; } if( defined $atts->{$EMPTY}) { $elt->{empty}= $atts->{$EMPTY}; delete $atts->{$EMPTY}; } if( keys %$atts) { $elt->set_atts( $atts); } $elt->_set_id( $atts->{$ID}) if( $atts->{$ID}); } return $elt; } # optimized version of $elt->new( PCDATA, $text); sub _new_pcdata { my $class= $_[0]; $class= ref $class || $class; my $elt = {}; bless $elt, $class; $elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA); $elt->{pcdata}= $_[1]; return $elt; } # this function creates an XM:::Twig::Elt from a string # it is quite clumsy at the moment, as it just creates a # new twig then returns its root # there might also be memory leaks there # additional arguments are passed to new XML::Twig sub parse { my $class= shift; if( ref( $class)) { $class= ref( $class); } my $string= shift; my %args= @_; my $t= XML::Twig->new(%args); $t->parse( $string); my $elt= $t->root; # clean-up the node delete $elt->{twig}; # get rid of the twig data delete $elt->{twig_current}; # better get rid of this too if( $t->{twig_id_list}) { $elt->{twig_id_list}= $t->{twig_id_list}; } $elt->cut; undef $t->{twig_root}; return $elt; } sub set_inner_xml { my( $elt, $xml, @args)= @_; my $new_elt= $elt->parse( "$xml", @args); $elt->cut_children; $new_elt->paste_first_child( $elt); $new_elt->erase; return $elt; } sub set_outer_xml { my( $elt, $xml, @args)= @_; my $new_elt= $elt->parse( "$xml", @args); $elt->cut_children; $new_elt->replace( $elt); $new_elt->erase; return $new_elt; } sub set_inner_html { my( $elt, $html)= @_; my $t= XML::Twig->new->parse_html( "$html"); my $new_elt= $t->root; if( $elt->tag eq 'head') { $new_elt->first_child( 'head')->unwrap; $new_elt->first_child( 'body')->cut; } elsif( $elt->tag ne 'html') { $new_elt->first_child( 'head')->cut; $new_elt->first_child( 'body')->unwrap; } $new_elt->cut; $elt->cut_children; $new_elt->paste_first_child( $elt); $new_elt->erase; return $elt; } sub set_gi { my ($elt, $gi)= @_; unless( defined $XML::Twig::gi2index{$gi}) { # new gi, create entries in %gi2index and @index2gi push @XML::Twig::index2gi, $gi; $XML::Twig::gi2index{$gi}= $#XML::Twig::index2gi; } $elt->{gi}= $XML::Twig::gi2index{$gi}; return $elt; } sub gi { return $XML::Twig::index2gi[$_[0]->{gi}]; } sub local_name { my $elt= shift; return _local_name( $XML::Twig::index2gi[$elt->{'gi'}]); } sub ns_prefix { my $elt= shift; return _ns_prefix( $XML::Twig::index2gi[$elt->{'gi'}]); } # namespace prefix for any qname (can be used for elements or attributes) sub _ns_prefix { my $qname= shift; if( $qname=~ m{^([^:]*):}) { return $1; } else { return( ''); } # should it be '' ? } # local name for any qname (can be used for elements or attributes) sub _local_name { my $qname= shift; (my $local= $qname)=~ s{^[^:]*:}{}; return $local; } #sub get_namespace sub namespace ## no critic (Subroutines::ProhibitNestedSubs); { my $elt= shift; my $prefix= defined $_[0] ? shift() : $elt->ns_prefix; my $ns_att= $prefix ? "xmlns:$prefix" : "xmlns"; my $expanded= $DEFAULT_NS{$prefix} || $elt->_inherit_att_through_cut( $ns_att) || ''; return $expanded; } sub declare_missing_ns ## no critic (Subroutines::ProhibitNestedSubs); { my $root= shift; my %missing_prefix; my $map= $root->_current_ns_prefix_map; foreach my $prefix (keys %$map) { my $prefix_att= $prefix eq '#default' ? 'xmlns' : "xmlns:$prefix"; if( ! $root->{'att'}->{$prefix_att}) { $root->set_att( $prefix_att => $map->{$prefix}); } } return $root; } sub _current_ns_prefix_map { my( $elt)= shift; my $map; while( $elt) { foreach my $att ($elt->att_names) { my $prefix= $att eq 'xmlns' ? '#default' : $att=~ m{^xmlns:(.*)$} ? $1 : next ; if( ! exists $map->{$prefix}) { $map->{$prefix}= $elt->{'att'}->{$att}; } } $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent}); } return $map; } sub set_ns_decl { my( $elt, $uri, $prefix)= @_; my $ns_att= $prefix ? "xmlns:$prefix" : 'xmlns'; $elt->set_att( $ns_att => $uri); return $elt; } sub set_ns_as_default { my( $root, $uri)= @_; my @ns_decl_to_remove; foreach my $elt ($root->descendants_or_self) { if( $elt->_ns_prefix && $elt->namespace eq $uri) { $elt->set_tag( $elt->local_name); } # store any namespace declaration for that uri foreach my $ns_decl (grep { $_=~ m{xmlns(:|$)} && $elt->{'att'}->{$_} eq $uri } $elt->att_names) { push @ns_decl_to_remove, [$elt, $ns_decl]; } } $root->set_ns_decl( $uri); # now remove the ns declarations (if done earlier then descendants of an element with the ns declaration # are not considered being in the namespace foreach my $ns_decl_to_remove ( @ns_decl_to_remove) { my( $elt, $ns_decl)= @$ns_decl_to_remove; $elt->del_att( $ns_decl); } return $root; } # return #ELT for an element and #PCDATA... for others sub get_type { my $gi_nb= $_[0]->{gi}; # the number, not the string return $ELT if( $gi_nb >= $XML::Twig::SPECIAL_GI); return $_[0]->gi; } # return the gi if it's a "real" element, 0 otherwise sub is_elt { if( $_[0]->{gi} >= $XML::Twig::SPECIAL_GI) { return $_[0]->gi; } else { return 0; } } sub is_pcdata { my $elt= shift; return (exists $elt->{'pcdata'}); } sub is_cdata { my $elt= shift; return (exists $elt->{'cdata'}); } sub is_pi { my $elt= shift; return (exists $elt->{'target'}); } sub is_comment { my $elt= shift; return (exists $elt->{'comment'}); } sub is_ent { my $elt= shift; return (exists $elt->{ent} || $elt->{ent_name}); } sub is_text { my $elt= shift; return (exists( $elt->{'pcdata'}) || (exists $elt->{'cdata'})); } sub is_empty { return $_[0]->{empty} || 0; } sub set_empty { $_[0]->{empty}= defined( $_[1]) ? $_[1] : 1; return $_[0]; } sub set_not_empty { delete $_[0]->{empty} if( $_[0]->{'empty'}); return $_[0]; } sub set_asis { my $elt=shift; foreach my $descendant ($elt, $elt->_descendants ) { $descendant->{asis}= 1; if( (exists $descendant->{'cdata'})) { $descendant->{gi}=$XML::Twig::gi2index{$PCDATA} or $descendant->set_gi( $PCDATA); $descendant->{pcdata}= $descendant->{cdata}; } } return $elt; } sub set_not_asis { my $elt=shift; foreach my $descendant ($elt, $elt->descendants) { delete $descendant->{asis} if $descendant->{asis};} return $elt; } sub is_asis { return $_[0]->{asis}; } sub closed { my $elt= shift; my $t= $elt->twig || return; my $curr_elt= $t->{twig_current}; return 1 unless( $curr_elt); return $curr_elt->in( $elt); } sub set_pcdata { my( $elt, $pcdata)= @_; if( $elt->{extra_data_in_pcdata}) { _try_moving_extra_data( $elt, $pcdata); } $elt->{pcdata}= $pcdata; return $elt; } sub _extra_data_in_pcdata { return $_[0]->{extra_data_in_pcdata}; } sub _set_extra_data_in_pcdata { $_[0]->{extra_data_in_pcdata}= $_[1]; return $_[0]; } sub _del_extra_data_in_pcdata { delete $_[0]->{extra_data_in_pcdata}; return $_[0]; } sub _unshift_extra_data_in_pcdata { my $e= shift; $e->{extra_data_in_pcdata}||=[]; unshift @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() }; } sub _push_extra_data_in_pcdata { my $e= shift; $e->{extra_data_in_pcdata}||=[]; push @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() }; } sub _extra_data_before_end_tag { return $_[0]->{extra_data_before_end_tag} || ''; } sub _set_extra_data_before_end_tag { $_[0]->{extra_data_before_end_tag}= $_[1]; return $_[0]} sub _del_extra_data_before_end_tag { delete $_[0]->{extra_data_before_end_tag}; return $_[0]} sub _prefix_extra_data_before_end_tag { my( $elt, $data)= @_; if($elt->{extra_data_before_end_tag}) { $elt->{extra_data_before_end_tag}= $data . $elt->{extra_data_before_end_tag}; } else { $elt->{extra_data_before_end_tag}= $data; } return $elt; } # internal, in cases where we know there is no extra_data (inlined anyway!) sub _set_pcdata { $_[0]->{pcdata}= $_[1]; } # try to figure out if we can keep the extra_data around sub _try_moving_extra_data { my( $elt, $modified)=@_; my $initial= $elt->{pcdata}; my $cpis= $elt->{extra_data_in_pcdata}; if( (my $offset= index( $modified, $initial)) != -1) { # text has been added foreach (@$cpis) { $_->{offset}+= $offset; } } elsif( ($offset= index( $initial, $modified)) != -1) { # text has been cut my $len= length( $modified); foreach my $cpi (@$cpis) { $cpi->{offset} -= $offset; } $elt->_set_extra_data_in_pcdata( [ grep { $_->{offset} >= 0 && $_->{offset} < $len } @$cpis ]); } else { _match_extra_data_words( $elt, $initial, $modified) || _match_extra_data_chars( $elt, $initial, $modified) || $elt->_del_extra_data_in_pcdata; } } sub _match_extra_data_words { my( $elt, $initial, $modified)= @_; my @initial= split /\b/, $initial; my @modified= split /\b/, $modified; return _match_extra_data( $elt, length( $initial), \@initial, \@modified); } sub _match_extra_data_chars { my( $elt, $initial, $modified)= @_; my @initial= split //, $initial; my @modified= split //, $modified; return _match_extra_data( $elt, length( $initial), \@initial, \@modified); } sub _match_extra_data { my( $elt, $length, $initial, $modified)= @_; my $cpis= $elt->{extra_data_in_pcdata}; if( @$initial <= @$modified) { my( $ok, $positions, $offsets)= _pos_offset( $initial, $modified); if( $ok) { my $offset=0; my $pos= shift @$positions; foreach my $cpi (@$cpis) { while( $cpi->{offset} >= $pos) { $offset= shift @$offsets; $pos= shift @$positions || $length +1; } $cpi->{offset} += $offset; } return 1; } } else { my( $ok, $positions, $offsets)= _pos_offset( $modified, $initial); if( $ok) { #print STDERR "pos: ", join( ':', @$positions), "\n", # "offset: ", join( ':', @$offsets), "\n"; my $offset=0; my $pos= shift @$positions; my $prev_pos= 0; foreach my $cpi (@$cpis) { while( $cpi->{offset} >= $pos) { $offset= shift @$offsets; $prev_pos= $pos; $pos= shift @$positions || $length +1; } $cpi->{offset} -= $offset; if( $cpi->{offset} < $prev_pos) { delete $cpi->{text}; } } $elt->_set_extra_data_in_pcdata( [ grep { exists $_->{text} } @$cpis ]); return 1; } } return 0; } sub _pos_offset { my( $short, $long)= @_; my( @pos, @offset); my( $s_length, $l_length)=(0,0); while (@$short) { my $s_word= shift @$short; my $l_word= shift @$long; if( $s_word ne $l_word) { while( @$long && $s_word ne $l_word) { $l_length += length( $l_word); $l_word= shift @$long; } if( !@$long && $s_word ne $l_word) { return 0; } push @pos, $s_length; push @offset, $l_length - $s_length; } my $length= length( $s_word); $s_length += $length; $l_length += $length; } return( 1, \@pos, \@offset); } sub append_pcdata { $_[0]->{'pcdata'}.= $_[1]; return $_[0]; } sub pcdata { return $_[0]->{pcdata}; } sub append_extra_data { $_[0]->{extra_data}.= $_[1]; return $_[0]; } sub set_extra_data { $_[0]->{extra_data}= $_[1]; return $_[0]; } sub extra_data { return $_[0]->{extra_data} || ''; } sub set_target { my( $elt, $target)= @_; $elt->{target}= $target; return $elt; } sub target { return $_[0]->{target}; } sub set_data { $_[0]->{'data'}= $_[1]; return $_[0]; } sub data { return $_[0]->{data}; } sub set_pi { my $elt= shift; unless( $elt->{gi} == $XML::Twig::gi2index{$PI}) { $elt->cut_children; $elt->{gi}=$XML::Twig::gi2index{$PI} or $elt->set_gi( $PI); } return $elt->_set_pi( @_); } sub _set_pi { $_[0]->set_target( $_[1]); $_[0]->{data}= $_[2]; return $_[0]; } sub pi_string { my $string= $PI_START . $_[0]->{target}; my $data= $_[0]->{data}; if( defined( $data) && $data ne '') { $string .= " $data"; } $string .= $PI_END ; return $string; } sub set_comment { my $elt= shift; unless( $elt->{gi} == $XML::Twig::gi2index{$COMMENT}) { $elt->cut_children; $elt->{gi}=$XML::Twig::gi2index{$COMMENT} or $elt->set_gi( $COMMENT); } $elt->{comment}= $_[0]; return $elt; } sub _set_comment { $_[0]->{comment}= $_[1]; return $_[0]; } sub comment { return $_[0]->{comment}; } sub comment_string { return $COMMENT_START . _comment_escaped_string( $_[0]->{comment}) . $COMMENT_END; } # comments cannot start or end with sub _comment_escaped_string { my( $c)= @_; $c=~ s{^-}{ -}; $c=~ s{-$}{- }; $c=~ s{--}{- -}g; return $c; } sub set_ent { $_[0]->{ent}= $_[1]; return $_[0]; } sub ent { return $_[0]->{ent}; } sub ent_name { return substr( $_[0]->{ent}, 1, -1);} sub set_cdata { my $elt= shift; unless( $elt->{gi} == $XML::Twig::gi2index{$CDATA}) { $elt->cut_children; $elt->insert_new_elt( first_child => $CDATA, @_); return $elt; } $elt->{cdata}= $_[0]; return $_[0]; } sub _set_cdata { $_[0]->{cdata}= $_[1]; return $_[0]; } sub append_cdata { $_[0]->{cdata}.= $_[1]; return $_[0]; } sub cdata { return $_[0]->{cdata}; } sub contains_only_text { my $elt= shift; return 0 unless $elt->is_elt; foreach my $child ($elt->_children) { return 0 if $child->is_elt; } return $elt; } sub contains_only { my( $elt, $exp)= @_; my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; foreach my $child (@children) { return 0 unless $child->is( $exp); } return @children || 1; } sub contains_a_single { my( $elt, $exp)= @_; my $child= $elt->{first_child} or return 0; return 0 unless $child->passes( $exp); return 0 if( $child->{next_sibling}); return $child; } sub root { my $elt= shift; while( $elt->{parent}) { $elt= $elt->{parent}; } return $elt; } sub _root_through_cut { my $elt= shift; while( $elt->{parent} || ($elt->{former} && $elt->{former}->{parent})) { $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent}); } return $elt; } sub twig { my $elt= shift; my $root= $elt->root; return $root->{twig}; } sub _twig_through_cut { my $elt= shift; my $root= $elt->_root_through_cut; return $root->{twig}; } # used for navigation # returns undef or the element, depending on whether $elt passes $cond # $cond can be # - empty: the element passes the condition # - ELT ('#ELT'): the element passes the condition if it is a "real" element # - TEXT ('#TEXT'): the element passes if it is a CDATA or PCDATA element # - a string with an XPath condition (only a subset of XPath is actually # supported). # - a regexp: the element passes if its gi matches the regexp # - a code ref: the element passes if the code, applied on the element, # returns true my %cond_cache; # expression => coderef sub reset_cond_cache { %cond_cache=(); } { sub _install_cond { my $cond= shift; my $test; my $init=''; my $original_cond= $cond; my $not= ($cond=~ s{^\s*!}{}) ? '!' : ''; if( ref $cond eq 'CODE') { return $cond; } if( ref $cond eq 'Regexp') { $test = qq{(\$_[0]->gi=~ /$cond/)}; } else { my @tests; while( $cond) { # the condition is a string if( $cond=~ s{$ELT$SEP}{}) { push @tests, qq{\$_[0]->is_elt}; } elsif( $cond=~ s{$TEXT$SEP}{}) { push @tests, qq{\$_[0]->is_text}; } elsif( $cond=~ s{^\s*($REG_TAG_PART)$SEP}{}) { push @tests, _gi_test( $1); } elsif( $cond=~ s{^\s*($REG_REGEXP)$SEP}{}) { # /regexp/ push @tests, qq{ \$_[0]->gi=~ $1 }; } elsif( $cond=~ s{^\s*($REG_TAG_PART)?\s* # $1 \[\s*(-?)\s*(\d+)\s*\] # [$2] $SEP}{}xo ) { my( $gi, $neg, $index)= ($1, $2, $3); my $siblings= $neg ? q{$_[0]->_next_siblings} : q{$_[0]->_prev_siblings}; if( $gi && ($gi ne '*')) #{ $test= qq{((\$_[0]->gi eq "$gi") && (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index))}; } { push @tests, _and( _gi_test( $gi), qq{ (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index)}); } else { push @tests, qq{(scalar( $siblings) + 1 == $index)}; } } elsif( $cond=~ s{^\s*($REG_TAG_PART?)\s*($REG_PREDICATE)$SEP}{}) { my( $gi, $predicate)= ( $1, $2); push @tests, _and( _gi_test( $gi), _parse_predicate_in_step( $predicate)); } elsif( $cond=~ s{^\s*($REG_NAKED_PREDICATE)$SEP}{}) { push @tests, _parse_predicate_in_step( $1); } else { croak "wrong navigation condition '$original_cond' ($@)"; } } $test= @tests > 1 ? '(' . join( '||', map { "($_)" } @tests) . ')' : $tests[0]; } #warn "init: '$init' - test: '$test'\n"; my $sub= qq{sub { $NO_WARNINGS; $init; return $not($test) ? \$_[0] : undef; } }; my $s= eval $sub; #warn "cond: $cond\n$sub\n"; if( $@) { croak "wrong navigation condition '$original_cond' ($@);" } return $s; } sub _gi_test { my( $full_gi)= @_; # optimize if the gi exists, including the case where the gi includes a dot my $index= $XML::Twig::gi2index{$full_gi}; if( $index) { return qq{\$_[0]->{gi} == $index}; } my( $gi, $class, $id)= $full_gi=~ m{^(.*?)(?:[.]([^.]*)|[#](.*))?$}; my $gi_test=''; if( $gi && $gi ne '*' ) { # 2 options, depending on whether the gi exists in gi2index # start optimization my $index= $XML::Twig::gi2index{$gi}; if( $index) { # the gi exists, use its index as a faster shortcut $gi_test = qq{\$_[0]->{gi} == $index}; } else # end optimization { # it does not exist (but might be created later), compare the strings $gi_test = qq{ \$_[0]->gi eq "$gi"}; } } else { $gi_test= 1; } my $class_test=''; #warn "class: '$class'"; if( $class) { $class_test = qq{ defined( \$_[0]->{att}->{class}) && \$_[0]->{att}->{class}=~ m{\\b$class\\b} }; } my $id_test=''; #warn "id: '$id'"; if( $id) { $id_test = qq{ defined( \$_[0]->{att}->{$ID}) && \$_[0]->{att}->{$ID} eq '$id' }; } #warn "gi_test: '$gi_test' - class_test: '$class_test' returning ", _and( $gi_test, $class_test); return _and( $gi_test, $class_test, $id_test); } # input: the original predicate sub _parse_predicate_in_step { my $cond= shift; my %PERL_ALPHA_TEST= ( '=' => ' eq ', '!=' => ' ne ', '>' => ' gt ', '>=' => ' ge ', '<' => ' lt ', '<=' => ' le '); $cond=~ s{^\s*\[\s*}{}; $cond=~ s{\s*\]\s*$}{}; $cond=~ s{( ($REG_STRING|$REG_REGEXP) # strings or regexps |\@($REG_TAG_NAME)(?=\s*(?:[><=!]|!~|=~)) # @att (followed by a comparison operator) |\@($REG_TAG_NAME) # @att (not followed by a comparison operator) |=~|!~ # matching operators |([><]=?|=|!=)(?=\s*[\d+-]) # test before a number |([><]=?|=|!=) # test, other cases |($REG_FUNCTION) # no arg functions # this bit is a mess, but it is the only solution with this half-baked parser |((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child) =~ /regexp/ |((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*!?=\s*$REG_VALUE) # string( child) = "value" (or !=) |((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*[<>]=?\s*$REG_VALUE) # string( child) > "value" |(and|or) )} { my( $token, $string, $att, $bare_att, $num_test, $alpha_test, $func, $string_regexp, $string_eq, $string_test, $and_or) = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11); if( defined $string) { $token } elsif( $att) { "( \$_[0]->{att} && exists( \$_[0]->{att}->{'$att'}) && \$_[0]->{att}->{'$att'})"; } elsif( $bare_att) { "(\$_[0]->{att} && defined( \$_[0]->{att}->{'$bare_att'}))"; } elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} } elsif( $func && $func=~ m{^(?:string|text)}) { "\$_[0]->text"; } elsif( $string_regexp && $string_regexp =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)}) { "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; } elsif( $string_eq && $string_eq =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*(!?=)\s*($REG_VALUE)}) {"(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $PERL_ALPHA_TEST{$2} $3) } 1, \$_[0]->_children)"; } elsif( $string_test && $string_test =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*([<>]=?)\s*($REG_VALUE)}) { "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; } elsif( $and_or) { $and_or eq 'and' ? '&&' : '||' ; } else { $token; } }gexs; return "($cond)"; } sub _op { my $op= shift; if( $op eq '=') { $op= 'eq'; } elsif( $op eq '!=') { $op= 'ne'; } return $op; } sub passes { my( $elt, $cond)= @_; return $elt unless $cond; my $sub= ($cond_cache{$cond} ||= _install_cond( $cond)); return $sub->( $elt); } } sub set_parent { $_[0]->{parent}= $_[1]; if( $XML::Twig::weakrefs) { weaken( $_[0]->{parent}); } } sub parent { my $elt= shift; my $cond= shift || return $elt->{parent}; do { $elt= $elt->{parent} || return; } until ( $elt->passes( $cond)); return $elt; } sub set_first_child { $_[0]->{'first_child'}= $_[1]; } sub first_child { my $elt= shift; my $cond= shift || return $elt->{first_child}; my $child= $elt->{first_child}; my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); while( $child && !$test_cond->( $child)) { $child= $child->{next_sibling}; } return $child; } sub _first_child { return $_[0]->{first_child}; } sub _last_child { return $_[0]->{last_child}; } sub _next_sibling { return $_[0]->{next_sibling}; } sub _prev_sibling { return $_[0]->{prev_sibling}; } sub _parent { return $_[0]->{parent}; } sub _next_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{next_sibling}) { push @siblings, $elt; } return @siblings; } sub _prev_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{prev_sibling}) { push @siblings, $elt; } return @siblings; } # sets a field # arguments $record, $cond, @content sub set_field { my $record = shift; my $cond = shift; my $child= $record->first_child( $cond); if( $child) { $child->set_content( @_); } else { if( $cond=~ m{^\s*($REG_TAG_NAME)}) { my $gi= $1; $child= $record->insert_new_elt( last_child => $gi, @_); } else { croak "can't create a field name from $cond"; } } return $child; } sub set_last_child { $_[0]->{'last_child'}= $_[1]; delete $_->[0]->{empty}; if( $XML::Twig::weakrefs) { weaken( $_[0]->{'last_child'}); } } sub last_child { my $elt= shift; my $cond= shift || return $elt->{last_child}; my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); my $child= $elt->{last_child}; while( $child && !$test_cond->( $child) ) { $child= $child->{prev_sibling}; } return $child } sub set_prev_sibling { $_[0]->{'prev_sibling'}= $_[1]; if( $XML::Twig::weakrefs) { weaken( $_[0]->{'prev_sibling'}); } } sub prev_sibling { my $elt= shift; my $cond= shift || return $elt->{prev_sibling}; my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); my $sibling= $elt->{prev_sibling}; while( $sibling && !$test_cond->( $sibling) ) { $sibling= $sibling->{prev_sibling}; } return $sibling; } sub set_next_sibling { $_[0]->{'next_sibling'}= $_[1]; } sub next_sibling { my $elt= shift; my $cond= shift || return $elt->{next_sibling}; my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); my $sibling= $elt->{next_sibling}; while( $sibling && !$test_cond->( $sibling) ) { $sibling= $sibling->{next_sibling}; } return $sibling; } # methods dealing with the class attribute, convenient if you work with xhtml sub class { $_[0]->{att}->{class}; } # lvalue version of class. separate from class to avoid problem like RT# sub lclass :lvalue # > perl 5.5 { $_[0]->{att}->{class}; } sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); } # adds a class to an element sub add_to_class { my( $elt, $new_class)= @_; return $elt unless $new_class; my $class= $elt->class; my %class= $class ? map { $_ => 1 } split /\s+/, $class : (); $class{$new_class}= 1; $elt->set_class( join( ' ', sort keys %class)); } sub remove_class { my( $elt, $class_to_remove)= @_; return $elt unless $class_to_remove; my $class= $elt->class; my %class= $class ? map { $_ => 1 } split /\s+/, $class : (); delete $class{$class_to_remove}; $elt->set_class( join( ' ', sort keys %class)); } sub att_to_class { my( $elt, $att)= @_; $elt->set_class( $elt->{'att'}->{$att}); } sub add_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->{'att'}->{$att}); } sub move_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->{'att'}->{$att}); $elt->del_att( $att); } sub tag_to_class { my( $elt)= @_; $elt->set_class( $elt->tag); } sub add_tag_to_class { my( $elt)= @_; $elt->add_to_class( $elt->tag); } sub set_tag_class { my( $elt, $new_tag)= @_; $elt->add_tag_to_class; $elt->set_tag( $new_tag); } sub tag_to_span { my( $elt)= @_; $elt->set_class( $elt->tag) unless( $elt->tag eq 'span' && $elt->class); # set class to span unless it would mean replacing it with span $elt->set_tag( 'span'); } sub tag_to_div { my( $elt)= @_; $elt->set_class( $elt->tag) unless( $elt->tag eq 'div' && $elt->class); # set class to div unless it would mean replacing it with div $elt->set_tag( 'div'); } sub in_class { my( $elt, $class)= @_; my $elt_class= $elt->class; return unless( defined $elt_class); return $elt->class=~ m{(?:^|\s)\Q$class\E(?:\s|$)} ? $elt : 0; } # get or set all attributes # argument can be a hash or a hashref sub set_atts { my $elt= shift; my %atts; tie %atts, 'Tie::IxHash' if( keep_atts_order()); %atts= @_ == 1 ? %{$_[0]} : @_; $elt->{att}= \%atts; if( exists $atts{$ID}) { $elt->_set_id( $atts{$ID}); } return $elt; } sub atts { return $_[0]->{att}; } sub att_names { return (sort keys %{$_[0]->{att}}); } sub del_atts { $_[0]->{att}={}; return $_[0]; } # get or set a single attribute (set works for several atts) sub set_att { my $elt= shift; if( $_[0] && ref( $_[0]) && !$_[1]) { croak "improper call to set_att, usage is \$elt->set_att( att1 => 'val1', att2 => 'val2',...)"; } unless( $elt->{att}) { $elt->{att}={}; tie %{$elt->{att}}, 'Tie::IxHash' if( keep_atts_order()); } while(@_) { my( $att, $val)= (shift, shift); $elt->{att}->{$att}= $val; if( $att eq $ID) { $elt->_set_id( $val); } } return $elt; } sub att { $_[0]->{att}->{$_[1]}; } # lvalue version of att. separate from class to avoid problem like RT# sub latt :lvalue # > perl 5.5 { $_[0]->{att}->{$_[1]}; } sub del_att { my $elt= shift; while( @_) { delete $elt->{'att'}->{shift()}; } return $elt; } sub att_exists { return exists $_[0]->{att}->{$_[1]}; } # delete an attribute from all descendants of an element sub strip_att { my( $elt, $att)= @_; $_->del_att( $att) foreach ($elt->descendants_or_self( qq{*[\@$att]})); return $elt; } sub change_att_name { my( $elt, $old_name, $new_name)= @_; my $value= $elt->{'att'}->{$old_name}; return $elt unless( defined $value); $elt->del_att( $old_name) ->set_att( $new_name => $value); return $elt; } sub lc_attnames { my $elt= shift; foreach my $att ($elt->att_names) { if( $att ne lc $att) { $elt->change_att_name( $att, lc $att); } } return $elt; } sub set_twig_current { $_[0]->{twig_current}=1; } sub del_twig_current { delete $_[0]->{twig_current}; } # get or set the id attribute sub set_id { my( $elt, $id)= @_; $elt->del_id() if( exists $elt->{att}->{$ID}); $elt->set_att($ID, $id); $elt->_set_id( $id); return $elt; } # only set id, does not update the attribute value sub _set_id { my( $elt, $id)= @_; my $t= $elt->twig || $elt; $t->{twig_id_list}->{$id}= $elt; if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); } return $elt; } sub id { return $_[0]->{att}->{$ID}; } # methods used to add ids to elements that don't have one BEGIN { my $id_nb = "0001"; my $id_seed = "twig_id_"; sub set_id_seed ## no critic (Subroutines::ProhibitNestedSubs); { $id_seed= $_[1]; $id_nb=1; } sub add_id ## no critic (Subroutines::ProhibitNestedSubs); { my $elt= shift; if( defined $elt->{'att'}->{$ID}) { return $elt->{'att'}->{$ID}; } else { my $id= $_[0] && ref( $_[0]) && isa( $_[0], 'CODE') ? $_[0]->( $elt) : $id_seed . $id_nb++; $elt->set_id( $id); return $id; } } } # delete the id attribute and remove the element from the id list sub del_id { my $elt= shift; if( ! exists $elt->{att}->{$ID}) { return $elt }; my $id= $elt->{att}->{$ID}; delete $elt->{att}->{$ID}; my $t= shift || $elt->twig; unless( $t) { return $elt; } if( exists $t->{twig_id_list}->{$id}) { delete $t->{twig_id_list}->{$id}; } return $elt; } # return the list of children sub children { my $elt= shift; my @children; my $child= $elt->first_child( @_); while( $child) { push @children, $child; $child= $child->next_sibling( @_); } return @children; } sub _children { my $elt= shift; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } return @children; } sub children_copy { my $elt= shift; my @children; my $child= $elt->first_child( @_); while( $child) { push @children, $child->copy; $child= $child->next_sibling( @_); } return @children; } sub children_count { my $elt= shift; my $cond= shift; my $count=0; my $child= $elt->{first_child}; while( $child) { $count++ if( $child->passes( $cond)); $child= $child->{next_sibling}; } return $count; } sub children_text { my $elt= shift; return wantarray() ? map { $_->text} $elt->children( @_) : join( '', map { $_->text} $elt->children( @_) ) ; } sub children_trimmed_text { my $elt= shift; return wantarray() ? map { $_->trimmed_text} $elt->children( @_) : join( '', map { $_->trimmed_text} $elt->children( @_) ) ; } sub all_children_are { my( $parent, $cond)= @_; foreach my $child ($parent->_children) { return 0 unless( $child->passes( $cond)); } return $parent; } sub ancestors { my( $elt, $cond)= @_; my @ancestors; while( $elt->{parent}) { $elt= $elt->{parent}; push @ancestors, $elt if( $elt->passes( $cond)); } return @ancestors; } sub ancestors_or_self { my( $elt, $cond)= @_; my @ancestors; while( $elt) { push @ancestors, $elt if( $elt->passes( $cond)); $elt= $elt->{parent}; } return @ancestors; } sub _ancestors { my( $elt, $include_self)= @_; my @ancestors= $include_self ? ($elt) : (); while( $elt= $elt->{parent}) { push @ancestors, $elt; } return @ancestors; } sub inherit_att { my $elt= shift; my $att= shift; my %tags= map { ($_, 1) } @_; do { if( (defined $elt->{'att'}->{$att}) && ( !%tags || $tags{$XML::Twig::index2gi[$elt->{'gi'}]}) ) { return $elt->{'att'}->{$att}; } } while( $elt= $elt->{parent}); return undef; } sub _inherit_att_through_cut { my $elt= shift; my $att= shift; my %tags= map { ($_, 1) } @_; do { if( (defined $elt->{'att'}->{$att}) && ( !%tags || $tags{$XML::Twig::index2gi[$elt->{'gi'}]}) ) { return $elt->{'att'}->{$att}; } } while( $elt= $elt->{parent} || ($elt->{former} && $elt->{former}->{parent})); return undef; } sub current_ns_prefixes { my $elt= shift; my %prefix; $prefix{''}=1 if( $elt->namespace( '')); while( $elt) { my @ns= grep { !m{^xml} } map { m{^([^:]+):} } ($XML::Twig::index2gi[$elt->{'gi'}], $elt->att_names); $prefix{$_}=1 foreach (@ns); $elt= $elt->{parent}; } return (sort keys %prefix); } # kinda counter-intuitive actually: # the next element is found by looking for the next open tag after from the # current one, which is the first child, if it exists, or the next sibling # or the first next sibling of an ancestor # optional arguments are: # - $subtree_root: a reference to an element, when the next element is not # within $subtree_root anymore then next_elt returns undef # - $cond: a condition, next_elt returns the next element matching the condition sub next_elt { my $elt= shift; my $subtree_root= 0; $subtree_root= shift if( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt')); my $cond= shift; my $next_elt; my $ind; # optimization my $test_cond; if( $cond) # optimization { unless( defined( $ind= $XML::Twig::gi2index{$cond}) ) # optimization { $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); } # optimization } # optimization do { if( $next_elt= $elt->{first_child}) { # simplest case: the elt has a child } elsif( $next_elt= $elt->{next_sibling}) { # no child but a next sibling (just check we stay within the subtree) # case where elt is subtree_root, is empty and has a sibling return undef if( $subtree_root && ($elt == $subtree_root)); } else { # case where the element has no child and no next sibling: # get the first next sibling of an ancestor, checking subtree_root # case where elt is subtree_root, is empty and has no sibling return undef if( $subtree_root && ($elt == $subtree_root)); $next_elt= $elt->{parent} || return undef; until( $next_elt->{next_sibling}) { return undef if( $subtree_root && ($subtree_root == $next_elt)); $next_elt= $next_elt->{parent} || return undef; } return undef if( $subtree_root && ($subtree_root == $next_elt)); $next_elt= $next_elt->{next_sibling}; } $elt= $next_elt; # just in case we need to loop } until( ! defined $elt || ! defined $cond || (defined $ind && ($elt->{gi} eq $ind)) # optimization || (defined $test_cond && ($test_cond->( $elt))) ); return $elt; } # return the next_elt within the element # just call next_elt with the element as first and second argument sub first_descendant { return $_[0]->next_elt( @_); } # get the last descendant, # then return the element found or call prev_elt with the condition sub last_descendant { my( $elt, $cond)= @_; my $last_descendant= $elt->_last_descendant; if( !$cond || $last_descendant->matches( $cond)) { return $last_descendant; } else { return $last_descendant->prev_elt( $elt, $cond); } } # no argument allowed here, just go down the last_child recursively sub _last_descendant { my $elt= shift; while( my $child= $elt->{last_child}) { $elt= $child; } return $elt; } # counter-intuitive too: # the previous element is found by looking # for the first open tag backwards from the current one # it's the last descendant of the previous sibling # if it exists, otherwise it's simply the parent sub prev_elt { my $elt= shift; my $subtree_root= 0; if( defined $_[0] and (ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt'))) { $subtree_root= shift ; return undef if( $elt == $subtree_root); } my $cond= shift; # get prev elt my $prev_elt; do { return undef if( $elt == $subtree_root); if( $prev_elt= $elt->{prev_sibling}) { while( $prev_elt->{last_child}) { $prev_elt= $prev_elt->{last_child}; } } else { $prev_elt= $elt->{parent} || return undef; } $elt= $prev_elt; # in case we need to loop } until( $elt->passes( $cond)); return $elt; } sub _following_elt { my( $elt)= @_; while( $elt && !$elt->{next_sibling}) { $elt= $elt->{parent}; } return $elt ? $elt->{next_sibling} : undef; } sub following_elt { my( $elt, $cond)= @_; $elt= $elt->_following_elt || return undef; return $elt if( !$cond || $elt->matches( $cond)); return $elt->next_elt( $cond); } sub following_elts { my( $elt, $cond)= @_; if( !$cond) { undef $cond; } my $following= $elt->following_elt( $cond); if( $following) { my @followings= $following; while( $following= $following->next_elt( $cond)) { push @followings, $following; } return( @followings); } else { return (); } } sub _preceding_elt { my( $elt)= @_; while( $elt && !$elt->{prev_sibling}) { $elt= $elt->{parent}; } return $elt ? $elt->{prev_sibling}->_last_descendant : undef; } sub preceding_elt { my( $elt, $cond)= @_; $elt= $elt->_preceding_elt || return undef; return $elt if( !$cond || $elt->matches( $cond)); return $elt->prev_elt( $cond); } sub preceding_elts { my( $elt, $cond)= @_; if( !$cond) { undef $cond; } my $preceding= $elt->preceding_elt( $cond); if( $preceding) { my @precedings= $preceding; while( $preceding= $preceding->prev_elt( $cond)) { push @precedings, $preceding; } return( @precedings); } else { return (); } } # used in get_xpath sub _self { my( $elt, $cond)= @_; return $cond ? $elt->matches( $cond) : $elt; } sub next_n_elt { my $elt= shift; my $offset= shift || return undef; foreach (1..$offset) { $elt= $elt->next_elt( @_) || return undef; } return $elt; } # checks whether $elt is included in $ancestor, returns 1 in that case sub in { my ($elt, $ancestor)= @_; if( ref( $ancestor) && isa( $ancestor, 'XML::Twig::Elt')) { # element while( $elt= $elt->{parent}) { return $elt if( $elt == $ancestor); } } else { # condition while( $elt= $elt->{parent}) { return $elt if( $elt->matches( $ancestor)); } } return 0; } sub first_child_text { my $elt= shift; my $dest=$elt->first_child(@_) or return ''; return $dest->text; } sub fields { my $elt= shift; return map { $elt->field( $_) } @_; } sub first_child_trimmed_text { my $elt= shift; my $dest=$elt->first_child(@_) or return ''; return $dest->trimmed_text; } sub first_child_matches { my $elt= shift; my $dest= $elt->{first_child} or return undef; return $dest->passes( @_); } sub last_child_text { my $elt= shift; my $dest=$elt->last_child(@_) or return ''; return $dest->text; } sub last_child_trimmed_text { my $elt= shift; my $dest=$elt->last_child(@_) or return ''; return $dest->trimmed_text; } sub last_child_matches { my $elt= shift; my $dest= $elt->{last_child} or return undef; return $dest->passes( @_); } sub child_text { my $elt= shift; my $dest=$elt->child(@_) or return ''; return $dest->text; } sub child_trimmed_text { my $elt= shift; my $dest=$elt->child(@_) or return ''; return $dest->trimmed_text; } sub child_matches { my $elt= shift; my $nb= shift; my $dest= $elt->child( $nb) or return undef; return $dest->passes( @_); } sub prev_sibling_text { my $elt= shift; my $dest= $elt->_prev_sibling(@_) or return ''; return $dest->text; } sub prev_sibling_trimmed_text { my $elt= shift; my $dest= $elt->_prev_sibling(@_) or return ''; return $dest->trimmed_text; } sub prev_sibling_matches { my $elt= shift; my $dest= $elt->{prev_sibling} or return undef; return $dest->passes( @_); } sub next_sibling_text { my $elt= shift; my $dest= $elt->next_sibling(@_) or return ''; return $dest->text; } sub next_sibling_trimmed_text { my $elt= shift; my $dest= $elt->next_sibling(@_) or return ''; return $dest->trimmed_text; } sub next_sibling_matches { my $elt= shift; my $dest= $elt->{next_sibling} or return undef; return $dest->passes( @_); } sub prev_elt_text { my $elt= shift; my $dest= $elt->prev_elt(@_) or return ''; return $dest->text; } sub prev_elt_trimmed_text { my $elt= shift; my $dest= $elt->prev_elt(@_) or return ''; return $dest->trimmed_text; } sub prev_elt_matches { my $elt= shift; my $dest= $elt->prev_elt or return undef; return $dest->passes( @_); } sub next_elt_text { my $elt= shift; my $dest= $elt->next_elt(@_) or return ''; return $dest->text; } sub next_elt_trimmed_text { my $elt= shift; my $dest= $elt->next_elt(@_) or return ''; return $dest->trimmed_text; } sub next_elt_matches { my $elt= shift; my $dest= $elt->next_elt or return undef; return $dest->passes( @_); } sub parent_text { my $elt= shift; my $dest= $elt->parent(@_) or return ''; return $dest->text; } sub parent_trimmed_text { my $elt= shift; my $dest= $elt->parent(@_) or return ''; return $dest->trimmed_text; } sub parent_matches { my $elt= shift; my $dest= $elt->{parent} or return undef; return $dest->passes( @_); } sub is_first_child { my $elt= shift; my $parent= $elt->{parent} or return 0; my $first_child= $parent->first_child( @_) or return 0; return ($first_child == $elt) ? $elt : 0; } sub is_last_child { my $elt= shift; my $parent= $elt->{parent} or return 0; my $last_child= $parent->last_child( @_) or return 0; return ($last_child == $elt) ? $elt : 0; } # returns the depth level of the element # if 2 parameter are used then counts the 2cd element name in the # ancestors list sub level { my( $elt, $cond)= @_; my $level=0; my $name=shift || ''; while( $elt= $elt->{parent}) { $level++ if( !$cond || $elt->matches( $cond)); } return $level; } # checks whether $elt has an ancestor that satisfies $cond, returns the ancestor sub in_context { my ($elt, $cond, $level)= @_; $level= -1 unless( $level) ; # $level-- will never hit 0 while( $level) { $elt= $elt->{parent} or return 0; if( $elt->matches( $cond)) { return $elt; } $level--; } return 0; } sub _descendants { my( $subtree_root, $include_self)= @_; my @descendants= $include_self ? ($subtree_root) : (); my $elt= $subtree_root; my $next_elt; MAIN: while( 1) { if( $next_elt= $elt->{first_child}) { # simplest case: the elt has a child } elsif( $next_elt= $elt->{next_sibling}) { # no child but a next sibling (just check we stay within the subtree) # case where elt is subtree_root, is empty and has a sibling last MAIN if( $elt == $subtree_root); } else { # case where the element has no child and no next sibling: # get the first next sibling of an ancestor, checking subtree_root # case where elt is subtree_root, is empty and has no sibling last MAIN if( $elt == $subtree_root); # backtrack until we find a parent with a next sibling $next_elt= $elt->{parent} || last; until( $next_elt->{next_sibling}) { last MAIN if( $subtree_root == $next_elt); $next_elt= $next_elt->{parent} || last MAIN; } last MAIN if( $subtree_root == $next_elt); $next_elt= $next_elt->{next_sibling}; } $elt= $next_elt || last MAIN; push @descendants, $elt; } return @descendants; } sub descendants { my( $subtree_root, $cond)= @_; my @descendants=(); my $elt= $subtree_root; # this branch is pure optimization for speed: if $cond is a gi replace it # by the index of the gi and loop here # start optimization my $ind; if( !$cond || ( defined ( $ind= $XML::Twig::gi2index{$cond})) ) { my $next_elt; while( 1) { if( $next_elt= $elt->{first_child}) { # simplest case: the elt has a child } elsif( $next_elt= $elt->{next_sibling}) { # no child but a next sibling (just check we stay within the subtree) # case where elt is subtree_root, is empty and has a sibling last if( $subtree_root && ($elt == $subtree_root)); } else { # case where the element has no child and no next sibling: # get the first next sibling of an ancestor, checking subtree_root # case where elt is subtree_root, is empty and has no sibling last if( $subtree_root && ($elt == $subtree_root)); # backtrack until we find a parent with a next sibling $next_elt= $elt->{parent} || last undef; until( $next_elt->{next_sibling}) { last if( $subtree_root && ($subtree_root == $next_elt)); $next_elt= $next_elt->{parent} || last; } last if( $subtree_root && ($subtree_root == $next_elt)); $next_elt= $next_elt->{next_sibling}; } $elt= $next_elt || last; push @descendants, $elt if( !$cond || ($elt->{gi} eq $ind)); } } else # end optimization { # branch for a complex condition: use the regular (slow but simple) way while( $elt= $elt->next_elt( $subtree_root, $cond)) { push @descendants, $elt; } } return @descendants; } sub descendants_or_self { my( $elt, $cond)= @_; my @descendants= $elt->passes( $cond) ? ($elt) : (); push @descendants, $elt->descendants( $cond); return @descendants; } sub sibling { my $elt= shift; my $nb= shift; if( $nb > 0) { foreach( 1..$nb) { $elt= $elt->next_sibling( @_) or return undef; } } elsif( $nb < 0) { foreach( 1..(-$nb)) { $elt= $elt->prev_sibling( @_) or return undef; } } else # $nb == 0 { return $elt->passes( $_[0]); } return $elt; } sub sibling_text { my $elt= sibling( @_); return $elt ? $elt->text : undef; } sub child { my $elt= shift; my $nb= shift; if( $nb >= 0) { $elt= $elt->first_child( @_) or return undef; foreach( 1..$nb) { $elt= $elt->next_sibling( @_) or return undef; } } else { $elt= $elt->last_child( @_) or return undef; foreach( 2..(-$nb)) { $elt= $elt->prev_sibling( @_) or return undef; } } return $elt; } sub prev_siblings { my $elt= shift; my @siblings=(); while( $elt= $elt->prev_sibling( @_)) { unshift @siblings, $elt; } return @siblings; } sub siblings { my $elt= shift; return grep { $_ ne $elt } $elt->{parent}->children( @_); } sub pos { my $elt= shift; return 0 if ($_[0] && !$elt->matches( @_)); my $pos=1; $pos++ while( $elt= $elt->prev_sibling( @_)); return $pos; } sub next_siblings { my $elt= shift; my @siblings=(); while( $elt= $elt->next_sibling( @_)) { push @siblings, $elt; } return @siblings; } # used by get_xpath: parses the xpath expression and generates a sub that performs the # search { my %axis2method; BEGIN { %axis2method= ( child => 'children', descendant => 'descendants', 'descendant-or-self' => 'descendants_or_self', parent => 'parent_is', ancestor => 'ancestors', 'ancestor-or-self' => 'ancestors_or_self', 'following-sibling' => 'next_siblings', 'preceding-sibling' => 'prev_siblings', following => 'following_elts', preceding => 'preceding_elts', self => '_self', ); } sub _install_xpath { my( $xpath_exp, $type)= @_; my $original_exp= $xpath_exp; my $sub= 'my $elt= shift; my @results;'; # grab the root if expression starts with a / if( $xpath_exp=~ s{^/}{}) { $sub .= '@results= ($elt->twig) || croak "cannot use an XPath query starting with a / on a node not attached to a whole twig";'; } elsif( $xpath_exp=~ s{^\./}{}) { $sub .= '@results= ($elt);'; } else { $sub .= '@results= ($elt);'; } #warn "xpath_exp= '$xpath_exp'\n"; while( $xpath_exp && $xpath_exp=~s{^\s*(/?) # the xxx=~/regexp/ is a pain as it includes / (\s*(?:(?:($REG_AXIS)::)?(\*|$REG_TAG_PART|\.\.|\.)\s*)?($REG_PREDICATE_ALT*) ) (/|$)}{}xo) { my( $wildcard, $sub_exp, $axis, $gi, $predicates)= ($1, $2, $3, $4, $5); if( $axis && ! $gi) { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp"); } # grab a parent if( $sub_exp eq '..') { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp") if( $wildcard); $sub .= '@results= map { $_->{parent}} @results;'; } # test the element itself elsif( $sub_exp=~ m{^\.(.*)$}s) { $sub .= "\@results= grep { \$_->matches( q{$1}) } \@results;" } # grab children else { if( !$axis) { $axis= $wildcard ? 'descendant' : 'child'; } if( !$gi or $gi eq '*') { $gi=''; } my $function; # "special" predicates, that return just one element if( $predicates && ($predicates =~ m{^\s*\[\s*((-\s*)?\d+)\s*\]\s*$})) { # [] my $offset= $1; $offset-- if( $offset > 0); $function= $axis eq 'descendant' ? "next_n_elt( $offset, '$gi')" : $axis eq 'child' ? "child( $offset, '$gi')" : _croak_and_doublecheck_xpath( $original_exp, "error [$1] not supported along axis '$axis'") ; $sub .= "\@results= grep { \$_ } map { \$_->$function } \@results;" } elsif( $predicates && ($predicates =~ m{^\s*\[\s*last\s*\(\s*\)\s*\]\s*$}) ) { # last() _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp, usage of // and last() not supported") if( $wildcard); $sub .= "\@results= map { \$_->last_child( '$gi') } \@results;"; } else { # follow the axis #warn "axis: '$axis' - method: '$axis2method{$axis}' - gi: '$gi'\n"; my $follow_axis= " \$_->$axis2method{$axis}( '$gi')"; my $step= $follow_axis; # now filter using the predicate while( $predicates=~ s{^\s*($REG_PREDICATE_ALT)\s*}{}o) { my $pred= $1; $pred=~ s{^\s*\[\s*}{}; $pred=~ s{\s*\]\s*$}{}; my $test=""; my $pos; if( $pred=~ m{^(-?\s*\d+)$}) { my $pos= $1; if( $step=~ m{^\s*grep(.*) (\$_->\w+\(\s*'[^']*'\s*\))}) { $step= "XML::Twig::_first_n $1 $pos, $2"; } else { if( $pos > 0) { $pos--; } $step= "($step)[$pos]"; } #warn "number predicate '$pos' - generated step '$step'\n"; } else { my $syntax_error=0; do { if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_STRING)\s*}{}o) # string()="string" pred { $test .= "\$_->text eq $1"; } elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_STRING)\s*}{}o) # string()!="string" pred { $test .= "\$_->text ne $1"; } if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_NUMBER)\s*}{}o) # string()= pred { $test .= "\$_->text eq $1"; } elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_NUMBER)\s*}{}o) # string()!= pred { $test .= "\$_->text ne $1"; } elsif( $pred =~ s{^string\(\s*\)\s*(>|<|>=|<=)\s*($REG_NUMBER)\s*}{}o) # string()!= pred { $test .= "\$_->text $1 $2"; } elsif( $pred =~ s{^string\(\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # string()=~/regex/ pred { my( $match, $regexp)= ($1, $2); $test .= "\$_->text $match $regexp"; } elsif( $pred =~ s{^string\(\s*\)\s*}{}o) # string() pred { $test .= "\$_->text"; } elsif( $pred=~ s{^@($REG_TAG_NAME)\s*($REG_OP)\s*($REG_STRING|$REG_NUMBER)}{}o) # @att="val" pred { my( $att, $oper, $val)= ($1, _op( $2), $3); $test .= qq{((defined \$_->{'att'}->{"$att"}) && (\$_->{'att'}->{"$att"} $oper $val))}; } elsif( $pred =~ s{^@($REG_TAG_NAME)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # @att=~/regex/ pred XXX { my( $att, $match, $regexp)= ($1, $2, $3); $test .= qq{((defined \$_->{'att'}->{"$att"}) && (\$_->{'att'}->{"$att"} $match $regexp))};; } elsif( $pred=~ s{^@($REG_TAG_NAME)\s*}{}o) # @att pred { $test .= qq{(defined \$_->{'att'}->{"$1"})}; } elsif( $pred=~ s{^\s*(?:not|!)\s*@($REG_TAG_NAME)\s*}{}o) # not @att pred { $test .= qq{((\$_->is_elt) && (not defined \$_->{'att'}->{"$1"}))}; } elsif( $pred=~ s{^\s*([()])}{}) # ( or ) (just add to the test) { $test .= qq{$1}; } elsif( $pred=~ s{^\s*(and|or)\s*}{}) { $test .= lc " $1 "; } else { $syntax_error=1; } } while( !$syntax_error && $pred); _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp at $pred") if( $pred); $step= " grep { $test } $step "; } } #warn "step: '$step'"; $sub .= "\@results= grep defined, map { $step } \@results;"; } } } if( $xpath_exp) { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp around $xpath_exp"); } $sub .= q{return XML::Twig::_unique_elts( @results); }; #warn "generated: '$sub'\n"; my $s= eval "sub { $NO_WARNINGS; $sub }"; if( $@) { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp ($@);") } return( $s); } } sub _croak_and_doublecheck_xpath { my $xpath_expression= shift; my $mess= join( "\n", @_); if( $XML::Twig::XPath::VERSION || 0) { my $check_twig= XML::Twig::XPath->new; if( eval { $check_twig->{twig_xp}->_parse( $xpath_expression) }) { $mess .= "\nthe expression is a valid XPath statement, and you are using XML::Twig::XPath, but" . "\nyou are using either 'find_nodes' or 'get_xpath' where the method you likely wanted" . "\nto use is 'findnodes', which is the only one that uses the full XPath engine\n"; } } croak $mess; } { # extremely elaborate caching mechanism my %xpath; # xpath_expression => subroutine_code; sub get_xpath { my( $elt, $xpath_exp, $offset)= @_; my $sub= ($xpath{$xpath_exp} ||= _install_xpath( $xpath_exp)); return $sub->( $elt) unless( defined $offset); my @res= $sub->( $elt); return $res[$offset]; } } sub findvalues { my $elt= shift; return map { $_->text } $elt->get_xpath( @_); } sub findvalue { my $elt= shift; return join '', map { $_->text } $elt->get_xpath( @_); } # XML::XPath compatibility sub getElementById { return $_[0]->twig->elt_id( $_[1]); } sub getChildNodes { my @children= do { my $elt= $_[0]; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; return wantarray ? @children : \@children; } sub _flushed { return $_[0]->{flushed}; } sub _set_flushed { $_[0]->{flushed}=1; } sub _del_flushed { delete $_[0]->{flushed}; } sub cut { my $elt= shift; my( $parent, $prev_sibling, $next_sibling); $parent= $elt->{parent}; if( ! $parent && $elt->is_elt) { # are we cutting the root? my $t= $elt->{twig}; if( $t && ! $t->{twig_parsing}) { delete $t->{twig_root}; delete $elt->{twig}; return $elt; } # cutt`ing the root else { return; } # cutting an orphan, returning $elt would break backward compatibility } # save the old links, that'll make it easier for some loops foreach my $link ( qw(parent prev_sibling next_sibling) ) { $elt->{former}->{$link}= $elt->{$link}; if( $XML::Twig::weakrefs) { weaken( $elt->{former}->{$link}); } } # if we cut the current element then its parent becomes the current elt if( $elt->{twig_current}) { my $twig_current= $elt->{parent}; $elt->twig->{twig_current}= $twig_current; $twig_current->{'twig_current'}=1; delete $elt->{'twig_current'}; } if( $parent->{first_child} && $parent->{first_child} == $elt) { $parent->{first_child}= $elt->{next_sibling}; # cutting can make the parent empty if( ! $parent->{first_child}) { $parent->{empty}= 1; } } if( $parent->{last_child} && $parent->{last_child} == $elt) { delete $parent->{empty}; $parent->{last_child}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } if( $prev_sibling= $elt->{prev_sibling}) { $prev_sibling->{next_sibling}= $elt->{next_sibling}; } if( $next_sibling= $elt->{next_sibling}) { $next_sibling->{prev_sibling}=$elt->{prev_sibling}; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } $elt->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; $elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; $elt->{next_sibling}= undef; # merge 2 (now) consecutive text nodes if they are of the same type # (type can be PCDATA or CDATA) if( $prev_sibling && $next_sibling && $prev_sibling->is_text && ( $XML::Twig::index2gi[$prev_sibling->{'gi'}] eq $XML::Twig::index2gi[$next_sibling->{'gi'}])) { $prev_sibling->merge_text( $next_sibling); } return $elt; } sub former_next_sibling { return $_[0]->{former}->{next_sibling}; } sub former_prev_sibling { return $_[0]->{former}->{prev_sibling}; } sub former_parent { return $_[0]->{former}->{parent}; } sub cut_children { my( $elt, $exp)= @_; my @children= $elt->children( $exp); foreach (@children) { $_->cut; } if( ! $elt->has_children) { $elt->{empty}= 1; } return @children; } sub cut_descendants { my( $elt, $exp)= @_; my @descendants= $elt->descendants( $exp); foreach ($elt->descendants( $exp)) { $_->cut; } if( ! $elt->has_children) { $elt->{empty}= 1; } return @descendants; } sub erase { my $elt= shift; #you cannot erase the current element if( $elt->{twig_current}) { croak "trying to erase an element before it has been completely parsed"; } if( my $parent= $elt->{parent}) { # normal case $elt->_move_extra_data_after_erase; my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; if( @children) { # elt has children, move them up # the first child may need to be merged with a previous text my $first_child= shift @children; $first_child->move( before => $elt); my $prev= $first_child->{prev_sibling}; if( $prev && $prev->is_text && ($XML::Twig::index2gi[$first_child->{'gi'}] eq $XML::Twig::index2gi[$prev->{'gi'}]) ) { $prev->merge_text( $first_child); } # move the rest of the children foreach my $child (@children) { $child->move( before => $elt); } # now the elt had no child, delete it $elt->delete; # now see if we need to merge the last child with the next element my $last_child= $children[-1] || $first_child; # if no last child, then it's also the first child my $next= $last_child->{next_sibling}; if( $next && $next->is_text && ($XML::Twig::index2gi[$last_child->{'gi'}] eq $XML::Twig::index2gi[$next->{'gi'}]) ) { $last_child->merge_text( $next); } # if parsing and have now a PCDATA text, mark so we can normalize later on if need be if( $parent->{twig_current} && $last_child->is_text) { $parent->{twig_to_be_normalized}=1; } } else { # no children, just cut the elt $elt->delete; } } else { # trying to erase the root (of a twig or of a cut/new element) my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; unless( @children == 1) { croak "can only erase an element with no parent if it has a single child"; } $elt->_move_extra_data_after_erase; my $child= shift @children; $child->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $child->{parent});} ; my $twig= $elt->twig; $twig->set_root( $child); } return $elt; } sub _move_extra_data_after_erase { my( $elt)= @_; # extra_data if( my $extra_data= $elt->{extra_data}) { my $target= $elt->{first_child} || $elt->{next_sibling}; if( $target) { if( $target->is( $ELT)) { $target->set_extra_data( $extra_data . ($target->extra_data || '')); } elsif( $target->is( $TEXT)) { $target->_unshift_extra_data_in_pcdata( $extra_data, 0); } # TO CHECK } else { my $parent= $elt->{parent}; # always exists or the erase cannot be performed $parent->_prefix_extra_data_before_end_tag( $extra_data); } } # extra_data_before_end_tag if( my $extra_data= $elt->{extra_data_before_end_tag}) { if( my $target= $elt->{next_sibling}) { if( $target->is( $ELT)) { $target->set_extra_data( $extra_data . ($target->extra_data || '')); } elsif( $target->is( $TEXT)) { $target->_unshift_extra_data_in_pcdata( $extra_data, 0); } } elsif( my $parent= $elt->{parent}) { $parent->_prefix_extra_data_before_end_tag( $extra_data); } } return $elt; } BEGIN { my %method= ( before => \&paste_before, after => \&paste_after, first_child => \&paste_first_child, last_child => \&paste_last_child, within => \&paste_within, ); # paste elt somewhere around ref # pos can be first_child (default), last_child, before, after or within sub paste ## no critic (Subroutines::ProhibitNestedSubs); { my $elt= shift; if( $elt->{parent}) { croak "cannot paste an element that belongs to a tree"; } my $pos; my $ref; if( ref $_[0]) { $pos= 'first_child'; croak "wrong argument order in paste, should be $_[1] first" if($_[1]); } else { $pos= shift; } if( my $method= $method{$pos}) { unless( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt')) { if( ! defined( $_[0])) { croak "missing target in paste"; } elsif( ! ref( $_[0])) { croak "wrong target type in paste (not a reference), should be XML::Twig::Elt or a subclass"; } else { my $ref= ref $_[0]; croak "wrong target type in paste: '$ref', should be XML::Twig::Elt or a subclass"; } } $ref= $_[0]; # check here so error message lists the caller file/line if( !$ref->{parent} && ($pos=~ m{^(before|after)$}) && !(exists $elt->{'target'}) && !(exists $elt->{'comment'})) { croak "cannot paste $1 root"; } $elt->$method( @_); } else { croak "tried to paste in wrong position '$pos', allowed positions " . " are 'first_child', 'last_child', 'before', 'after' and " . "'within'"; } if( (my $ids= $elt->{twig_id_list}) && (my $t= $ref->twig) ) { $t->{twig_id_list}||={}; foreach my $id (keys %$ids) { $t->{twig_id_list}->{$id}= $ids->{$id}; if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); } } } return $elt; } sub paste_before { my( $elt, $ref)= @_; my( $parent, $prev_sibling, $next_sibling ); # trying to paste before an orphan (root or detached wlt) unless( $ref->{parent}) { if( my $t= $ref->twig) { if( (exists $elt->{'comment'}) || (exists $elt->{'target'})) # we can still do this { $t->_add_cpi_outside_of_root( leading_cpi => $elt); return; } else { croak "cannot paste before root"; } } else { croak "cannot paste before an orphan element"; } } $parent= $ref->{parent}; $prev_sibling= $ref->{prev_sibling}; $next_sibling= $ref; $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; if( $parent->{first_child} == $ref) { $parent->{first_child}= $elt; } if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; } $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; $elt->{next_sibling}= $ref; return $elt; } sub paste_after { my( $elt, $ref)= @_; my( $parent, $prev_sibling, $next_sibling ); # trying to paste after an orphan (root or detached wlt) unless( $ref->{parent}) { if( my $t= $ref->twig) { if( (exists $elt->{'comment'}) || (exists $elt->{'target'})) # we can still do this { $t->_add_cpi_outside_of_root( trailing_cpi => $elt); return; } else { croak "cannot paste after root"; } } else { croak "cannot paste after an orphan element"; } } $parent= $ref->{parent}; $prev_sibling= $ref; $next_sibling= $ref->{next_sibling}; $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; if( $parent->{last_child}== $ref) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } $prev_sibling->{next_sibling}= $elt; $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; if( $next_sibling) { $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } $elt->{next_sibling}= $next_sibling; return $elt; } sub paste_first_child { my( $elt, $ref)= @_; my( $parent, $prev_sibling, $next_sibling ); $parent= $ref; $next_sibling= $ref->{first_child}; $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; $parent->{first_child}= $elt; unless( $parent->{last_child}) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } $elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; if( $next_sibling) { $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } $elt->{next_sibling}= $next_sibling; return $elt; } sub paste_last_child { my( $elt, $ref)= @_; my( $parent, $prev_sibling, $next_sibling ); $parent= $ref; $prev_sibling= $ref->{last_child}; $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; unless( $parent->{first_child}) { $parent->{first_child}= $elt; } $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; if( $prev_sibling) { $prev_sibling->{next_sibling}= $elt; } $elt->{next_sibling}= undef; return $elt; } sub paste_within { my( $elt, $ref, $offset)= @_; my $text= $ref->is_text ? $ref : $ref->next_elt( $TEXT, $ref); my $new= $text->split_at( $offset); $elt->paste_before( $new); return $elt; } } # load an element into a structure similar to XML::Simple's sub simplify { my $elt= shift; # normalize option names my %options= @_; %options= map { my ($key, $val)= ($_, $options{$_}); $key=~ s{(\w)([A-Z])}{$1_\L$2}g; $key => $val } keys %options; # check options my @allowed_options= qw( keyattr forcearray noattr content_key var var_regexp variables var_attr group_tags forcecontent normalise_space normalize_space ); my %allowed_options= map { $_ => 1 } @allowed_options; foreach my $option (keys %options) { carp "invalid option $option\n" unless( $allowed_options{$option}); } $options{normalise_space} ||= $options{normalize_space} || 0; $options{content_key} ||= 'content'; if( $options{content_key}=~ m{^-}) { # need to remove the - and to activate extra folding $options{content_key}=~ s{^-}{}; $options{extra_folding}= 1; } else { $options{extra_folding}= 0; } $options{forcearray} ||=0; if( isa( $options{forcearray}, 'ARRAY')) { my %forcearray_tags= map { $_ => 1 } @{$options{forcearray}}; $options{forcearray_tags}= \%forcearray_tags; $options{forcearray}= 0; } $options{keyattr} ||= ['name', 'key', 'id']; if( ref $options{keyattr} eq 'ARRAY') { foreach my $keyattr (@{$options{keyattr}}) { my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)}); $prefix ||= ''; $options{key_for_all}->{$att}= 1; $options{remove_key_for_all}->{$att}=1 unless( $prefix eq '+'); $options{prefix_key_for_all}->{$att}=1 if( $prefix eq '-'); } } elsif( ref $options{keyattr} eq 'HASH') { while( my( $elt, $keyattr)= each %{$options{keyattr}}) { my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)}); $prefix ||=''; $options{key_for_elt}->{$elt}= $att; $options{remove_key_for_elt}->{"$elt#$att"}=1 unless( $prefix); $options{prefix_key_for_elt}->{"$elt#$att"}=1 if( $prefix eq '-'); } } $options{var}||= $options{var_attr}; # for compat with XML::Simple if( $options{var}) { $options{var_values}= {}; } else { $options{var}=''; } if( $options{variables}) { $options{var}||= 1; $options{var_values}= $options{variables}; } if( $options{var_regexp} and !$options{var}) { warn "var option not used, var_regexp option ignored\n"; } $options{var_regexp} ||= '\$\{?(\w+)\}?'; $elt->_simplify( \%options); } sub _simplify { my( $elt, $options)= @_; my $data; my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; my %atts= $options->{noattr} || !$elt->{att} ? () : %{$elt->{att}}; my $nb_atts= keys %atts; my $nb_children= $elt->children_count + $nb_atts; my %nb_children; foreach (@children) { $nb_children{$_->tag}++; } foreach (keys %atts) { $nb_children{$_}++; } my $arrays; # tag => array where elements are stored # store children foreach my $child (@children) { if( $child->is_text) { # generate with a content key my $text= $elt->_text_with_vars( $options); if( $options->{normalise_space} >= 2) { $text= _normalize_space( $text); } if( $options->{force_content} || $nb_atts || (scalar @children > 1) ) { $data->{$options->{content_key}}= $text; } else { $data= $text; } } else { # element with sub-elements my $child_gi= $XML::Twig::index2gi[$child->{'gi'}]; my $child_data= $child->_simplify( $options); # first see if we need to simplify further the child data # simplify because of grouped tags if( my $grouped_tag= $options->{group_tags}->{$child_gi}) { # check that the child data is a hash with a single field unless( (ref( $child_data) eq 'HASH') && (keys %$child_data == 1) && defined ( my $grouped_child_data= $child_data->{$grouped_tag}) ) { croak "error in grouped tag $child_gi"; } else { $child_data= $grouped_child_data; } } # simplify because of extra folding if( $options->{extra_folding}) { if( (ref( $child_data) eq 'HASH') && (keys %$child_data == 1) && defined( my $content= $child_data->{$options->{content_key}}) ) { $child_data= $content; } } if( my $keyatt= $child->_key_attr( $options)) { # simplify element with key my $key= $child->{'att'}->{$keyatt}; if( $options->{normalise_space} >= 1) { $key= _normalize_space( $key); } $data->{$child_gi}->{$key}= $child_data; } elsif( $options->{forcearray} || $options->{forcearray_tags}->{$child_gi} || ( $nb_children{$child_gi} > 1) ) { # simplify element to store in an array if( defined $child_data && $child_data ne "" ) { $data->{$child_gi} ||= []; push @{$data->{$child_gi}}, $child_data; } else { $data->{$child_gi}= [{}]; } } else { # simplify element to store as a hash field $data->{$child_gi}=$child_data; $data->{$child_gi}= defined $child_data && $child_data ne "" ? $child_data : {}; } } } # store atts # TODO: deal with att that already have an element by that name foreach my $att (keys %atts) { # do not store if the att is a key that needs to be removed if( $options->{remove_key_for_all}->{$att} || $options->{remove_key_for_elt}->{"$gi#$att"} ) { next; } my $att_text= $options->{var} ? _replace_vars_in_text( $atts{$att}, $options) : $atts{$att} ; if( $options->{normalise_space} >= 2) { $att_text= _normalize_space( $att_text); } if( $options->{prefix_key_for_all}->{$att} || $options->{prefix_key_for_elt}->{"$gi#$att"} ) { # prefix the att $data->{"-$att"}= $att_text; } else { # normal case $data->{$att}= $att_text; } } return $data; } sub _key_attr { my( $elt, $options)=@_; return if( $options->{noattr}); if( $options->{key_for_all}) { foreach my $att ($elt->att_names) { if( $options->{key_for_all}->{$att}) { return $att; } } } elsif( $options->{key_for_elt}) { if( my $key_for_elt= $options->{key_for_elt}->{$XML::Twig::index2gi[$elt->{'gi'}]} ) { return $key_for_elt if( defined( $elt->{'att'}->{$key_for_elt})); } } return; } sub _text_with_vars { my( $elt, $options)= @_; my $text; if( $options->{var}) { $text= _replace_vars_in_text( $elt->text, $options); $elt->_store_var( $options); } else { $text= $elt->text; } return $text; } sub _normalize_space { my $text= shift; $text=~ s{\s+}{ }sg; $text=~ s{^\s}{}; $text=~ s{\s$}{}; return $text; } sub att_nb { return 0 unless( my $atts= $_[0]->{att}); return scalar keys %$atts; } sub has_no_atts { return 1 unless( my $atts= $_[0]->{att}); return scalar keys %$atts ? 0 : 1; } sub _replace_vars_in_text { my( $text, $options)= @_; $text=~ s{($options->{var_regexp})} { if( defined( my $value= $options->{var_values}->{$2})) { $value } else { warn "unknown variable $2\n"; $1 } }gex; return $text; } sub _store_var { my( $elt, $options)= @_; if( defined (my $var_name= $elt->{'att'}->{$options->{var}})) { $options->{var_values}->{$var_name}= $elt->text; } } # split a text element at a given offset sub split_at { my( $elt, $offset)= @_; my $text_elt= $elt->is_text ? $elt : $elt->first_child( $TEXT) || return ''; my $string= $text_elt->text; my $left_string= substr( $string, 0, $offset); my $right_string= substr( $string, $offset); $text_elt->{pcdata}= (delete $text_elt->{empty} || 1) && $left_string; my $new_elt= $elt->new( $XML::Twig::index2gi[$elt->{'gi'}], $right_string); $new_elt->paste( after => $elt); return $new_elt; } # split an element or its text descendants into several, in place # all elements (new and untouched) are returned sub split { my $elt= shift; my @text_chunks; my @result; if( $elt->is_text) { @text_chunks= ($elt); } else { @text_chunks= $elt->descendants( $TEXT); } foreach my $text_chunk (@text_chunks) { push @result, $text_chunk->_split( 1, @_); } return @result; } # split an element or its text descendants into several, in place # created elements (those which match the regexp) are returned sub mark { my $elt= shift; my @text_chunks; my @result; if( $elt->is_text) { @text_chunks= ($elt); } else { @text_chunks= $elt->descendants( $TEXT); } foreach my $text_chunk (@text_chunks) { push @result, $text_chunk->_split( 0, @_); } return @result; } # split a single text element # return_all defines what is returned: if it is true # only returns the elements created by matches in the split regexp # otherwise all elements (new and untouched) are returned { sub _split { my $elt= shift; my $return_all= shift; my $regexp= shift; my @tags; while( @_) { my $tag= shift(); if( ref $_[0]) { push @tags, { tag => $tag, atts => shift }; } else { push @tags, { tag => $tag }; } } unless( @tags) { @tags= { tag => $elt->{parent}->gi }; } my @result; # the returned list of elements my $text= $elt->text; my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; # 2 uses: if split matches then the first substring reuses $elt # once a split has occurred then the last match needs to be put in # a new element my $previous_match= 0; while( my( $pre_match, @matches)= $text=~ /^(.*?)$regexp(.*)$/gcs) { $text= pop @matches; if( $previous_match) { # match, not the first one, create a new text ($gi) element _utf8_ify( $pre_match) if( $] < 5.010); $elt= $elt->insert_new_elt( after => $gi, $pre_match); push @result, $elt if( $return_all); } else { # first match in $elt, re-use $elt for the first sub-string _utf8_ify( $pre_match) if( $] < 5.010); $elt->set_text( $pre_match); $previous_match++; # store the fact that there was a match push @result, $elt if( $return_all); } # now deal with matches captured in the regexp if( @matches) { # match, with capture my $i=0; foreach my $match (@matches) { # create new element, text is the match _utf8_ify( $match) if( $] < 5.010); my $tag = _repl_match( $tags[$i]->{tag}, @matches) || '#PCDATA'; my $atts = \%{$tags[$i]->{atts}} || {}; my %atts= map { _repl_match( $_, @matches) => _repl_match( $atts->{$_}, @matches) } keys %$atts; $elt= $elt->insert_new_elt( after => $tag, \%atts, $match); push @result, $elt; $i= ($i + 1) % @tags; } } else { # match, no captures my $tag = $tags[0]->{tag}; my $atts = \%{$tags[0]->{atts}} || {}; $elt= $elt->insert_new_elt( after => $tag, $atts); push @result, $elt; } } if( $previous_match && $text) { # there was at least 1 match, and there is text left after the match $elt= $elt->insert_new_elt( after => $gi, $text); } push @result, $elt if( $return_all); return @result; # return all elements } sub _repl_match { my( $val, @matches)= @_; $val=~ s{\$(\d+)}{$matches[$1-1]}g; return $val; } # evil hack needed as sometimes my $encode_is_loaded=0; # so we only load Encode once sub _utf8_ify { if( $perl_version >= 5.008 and $perl_version < 5.010 and !_keep_encoding()) { unless( $encode_is_loaded) { require Encode; import Encode; $encode_is_loaded++; } Encode::_utf8_on( $_[0]); # the flag should be set but is not } } } { my %replace_sub; # cache for complex expressions (expression => sub) sub subs_text { my( $elt, $regexp, $replace)= @_; my $replacement_string; my $is_string= _is_string( $replace); my @parents; foreach my $text_elt ($elt->descendants_or_self( $TEXT)) { if( $is_string) { my $text= $text_elt->text; $text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx; $text_elt->set_text( $text); } else { no utf8; # = perl 5.6 my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace)); my $text= $text_elt->text; my $pos=0; # used to skip text that was previously matched my $found_hit; while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg)) { $found_hit=1; my $match_start = length( $pre_match_string); my $match = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt; my $match_length = length( $match_string); my $post_match = $match->split_at( $match_length); $replace_sub->( $match, @var); # go to next $text_elt= $post_match; $text= $post_match->text; if( $found_hit) { push @parents, $text_elt->{parent} unless $parents[-1] && $parents[-1]== $text_elt->{parent}; } } } } foreach my $parent (@parents) { $parent->normalize; } return $elt; } sub _is_string { return ($_[0]=~ m{&e[ln]t}) ? 0: 1 } sub _replace_var { my( $string, @var)= @_; unshift @var, undef; $string=~ s{\$(\d)}{$var[$1]}g; return $string; } sub _install_replace_sub { my $replace_exp= shift; my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp; my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;}; my( $gi, $exp); foreach my $item (@item) { next if ! length $item; if( $item=~ m{^&elt\s*\(([^)]*)\)}) { $exp= $1; } elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)}) { $exp= " '#ENT' => $1"; } else { $exp= qq{ '#PCDATA' => "$item"}; } $exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches $sub.= qq{ \$new= \$match->new( $exp); }; $sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;}; } $sub .= q{ $match->delete; }; #$sub=~ s/;/;\n/g; warn "subs: $sub"; my $coderef= eval "sub { $NO_WARNINGS; $sub }"; if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); } return $coderef; } } sub merge_text { my( $e1, $e2)= @_; croak "invalid merge: can only merge 2 elements" unless( isa( $e2, 'XML::Twig::Elt')); croak "invalid merge: can only merge 2 text elements" unless( $e1->is_text && $e2->is_text && ($e1->gi eq $e2->gi)); my $t1_length= length( $e1->text); $e1->set_text( $e1->text . $e2->text); if( my $extra_data_in_pcdata= $e2->_extra_data_in_pcdata) { foreach my $data (@$extra_data_in_pcdata) { $e1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } } $e2->delete; return $e1; } sub merge { my( $e1, $e2)= @_; my @e2_children= $e2->_children; if( $e1->_last_child && $e1->_last_child->is_pcdata && @e2_children && $e2_children[0]->is_pcdata ) { my $t1_length= length( $e1->_last_child->{pcdata}); my $child1= $e1->_last_child; my $child2= shift @e2_children; $child1->{pcdata} .= $child2->{pcdata}; my $extra_data= $e1->_extra_data_before_end_tag . $e2->extra_data; if( $extra_data) { $e1->_del_extra_data_before_end_tag; $child1->_push_extra_data_in_pcdata( $extra_data, $t1_length); } if( my $extra_data_in_pcdata= $child2->_extra_data_in_pcdata) { foreach my $data (@$extra_data_in_pcdata) { $child1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } } if( my $extra_data_before_end_tag= $e2->_extra_data_before_end_tag) { $e1->_set_extra_data_before_end_tag( $extra_data_before_end_tag); } } foreach my $e (@e2_children) { $e->move( last_child => $e1); } $e2->delete; return $e1; } # recursively copy an element and returns the copy (can be huge and long) sub copy { my $elt= shift; my $copy= $elt->new( $XML::Twig::index2gi[$elt->{'gi'}]); if( $elt->extra_data) { $copy->set_extra_data( $elt->extra_data); } if( $elt->{extra_data_before_end_tag}) { $copy->_set_extra_data_before_end_tag( $elt->{extra_data_before_end_tag}); } if( $elt->is_asis) { $copy->set_asis; } if( (exists $elt->{'pcdata'})) { $copy->{pcdata}= (delete $copy->{empty} || 1) && $elt->{pcdata}; if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); } } elsif( (exists $elt->{'cdata'})) { $copy->{cdata}= $elt->{cdata}; if( $elt->{extra_data_in_pcdata}) { $copy->_set_extra_data_in_pcdata( $elt->{extra_data_in_pcdata}); } } elsif( (exists $elt->{'target'})) { $copy->_set_pi( $elt->{target}, $elt->{data}); } elsif( (exists $elt->{'comment'})) { $copy->{comment}= $elt->{comment}; } elsif( (exists $elt->{'ent'})) { $copy->{ent}= $elt->{ent}; } else { my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; if( my $atts= $elt->{att}) { my %atts; tie %atts, 'Tie::IxHash' if (keep_atts_order()); %atts= %{$atts}; # we want to do a real copy of the attributes $copy->set_atts( \%atts); } foreach my $child (@children) { my $child_copy= $child->copy; $child_copy->paste( 'last_child', $copy); } } # save links to the original location, which can be convenient and is used for namespace resolution foreach my $link ( qw(parent prev_sibling next_sibling) ) { $copy->{former}->{$link}= $elt->{$link}; if( $XML::Twig::weakrefs) { weaken( $copy->{former}->{$link}); } } $copy->{empty}= $elt->{'empty'}; return $copy; } sub delete { my $elt= shift; $elt->cut; $elt->DESTROY unless $XML::Twig::weakrefs; return undef; } sub __destroy { my $elt= shift; return if( $XML::Twig::weakrefs); my $t= shift || $elt->twig; # optional argument, passed in recursive calls foreach( @{[$elt->_children]}) { $_->DESTROY( $t); } # the id reference needs to be destroyed # lots of tests to avoid warnings during the cleanup phase $elt->del_id( $t) if( $ID && $t && defined( $elt->{att}) && exists( $elt->{att}->{$ID})); if( $elt->{former}) { foreach (keys %{$elt->{former}}) { delete $elt->{former}->{$_}; } delete $elt->{former}; } foreach (qw( keys %$elt)) { delete $elt->{$_}; } undef $elt; } BEGIN { sub set_destroy { if( $XML::Twig::weakrefs) { undef *DESTROY } else { *DESTROY= *__destroy; } } set_destroy(); } # ignores the element sub ignore { my $elt= shift; my $t= $elt->twig; $t->ignore( $elt, @_); } BEGIN { my $pretty = 0; my $quote = '"'; my $INDENT = ' '; my $empty_tag_style = 0; my $remove_cdata = 0; my $keep_encoding = 0; my $expand_external_entities = 0; my $keep_atts_order = 0; my $do_not_escape_amp_in_atts = 0; my $WRAP = '80'; my $REPLACED_ENTS = qq{&<}; my ($NSGMLS, $NICE, $INDENTED, $INDENTEDCT, $INDENTEDC, $WRAPPED, $RECORD1, $RECORD2, $INDENTEDA)= (1..9); my %KEEP_TEXT_TAG_ON_ONE_LINE= map { $_ => 1 } ( $INDENTED, $INDENTEDCT, $INDENTEDC, $INDENTEDA, $WRAPPED); my %WRAPPED = map { $_ => 1 } ( $WRAPPED, $INDENTEDA, $INDENTEDC); my %pretty_print_style= ( none => 0, # no added \n nsgmls => $NSGMLS, # nsgmls-style, \n in tags # below this line styles are UNSAFE (the generated XML can be well-formed but invalid) nice => $NICE, # \n after open/close tags except when the # element starts with text indented => $INDENTED, # nice plus idented indented_close_tag => $INDENTEDCT, # nice plus idented indented_c => $INDENTEDC, # slightly more compact than indented (closing # tags are on the same line) wrapped => $WRAPPED, # text is wrapped at column record_c => $RECORD1, # for record-like data (compact) record => $RECORD2, # for record-like data (not so compact) indented_a => $INDENTEDA, # nice, indented, and with attributes on separate # lines as the nsgmls style, as well as wrapped # lines - to make the xml friendly to line-oriented tools cvs => $INDENTEDA, # alias for indented_a ); my ($HTML, $EXPAND)= (1..2); my %empty_tag_style= ( normal => 0, # html => $HTML, # xhtml => $HTML, # expand => $EXPAND, # ); my %quote_style= ( double => '"', single => "'", # smart => "smart", ); my $xml_space_preserve; # set when an element includes xml:space="preserve" my $output_filter; # filters the entire output (including < and >) my $output_text_filter; # filters only the text part (tag names, attributes, pcdata) my $replaced_ents= $REPLACED_ENTS; # returns those pesky "global" variables so you can switch between twigs sub global_state ## no critic (Subroutines::ProhibitNestedSubs); { return { pretty => $pretty, quote => $quote, indent => $INDENT, empty_tag_style => $empty_tag_style, remove_cdata => $remove_cdata, keep_encoding => $keep_encoding, expand_external_entities => $expand_external_entities, output_filter => $output_filter, output_text_filter => $output_text_filter, keep_atts_order => $keep_atts_order, do_not_escape_amp_in_atts => $do_not_escape_amp_in_atts, wrap => $WRAP, replaced_ents => $replaced_ents, }; } # restores the global variables sub set_global_state { my $state= shift; $pretty = $state->{pretty}; $quote = $state->{quote}; $INDENT = $state->{indent}; $empty_tag_style = $state->{empty_tag_style}; $remove_cdata = $state->{remove_cdata}; $keep_encoding = $state->{keep_encoding}; $expand_external_entities = $state->{expand_external_entities}; $output_filter = $state->{output_filter}; $output_text_filter = $state->{output_text_filter}; $keep_atts_order = $state->{keep_atts_order}; $do_not_escape_amp_in_atts = $state->{do_not_escape_amp_in_atts}; $WRAP = $state->{wrap}; $replaced_ents = $state->{replaced_ents}, } # sets global state to defaults sub init_global_state { set_global_state( { pretty => 0, quote => '"', indent => $INDENT, empty_tag_style => 0, remove_cdata => 0, keep_encoding => 0, expand_external_entities => 0, output_filter => undef, output_text_filter => undef, keep_atts_order => undef, do_not_escape_amp_in_atts => 0, wrap => $WRAP, replaced_ents => $REPLACED_ENTS, }); } # set the pretty_print style (in $pretty) and returns the old one # can be called from outside the package with 2 arguments (elt, style) # or from inside with only one argument (style) # the style can be either a string (one of the keys of %pretty_print_style # or a number (presumably an old value saved) sub set_pretty_print { my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases my $old_pretty= $pretty; if( $style=~ /^\d+$/) { croak "invalid pretty print style $style" unless( $style < keys %pretty_print_style); $pretty= $style; } else { croak "invalid pretty print style '$style'" unless( exists $pretty_print_style{$style}); $pretty= $pretty_print_style{$style}; } if( $WRAPPED{$pretty} ) { XML::Twig::_use( 'Text::Wrap') or croak( "Text::Wrap not available, cannot use style $style"); } return $old_pretty; } sub _pretty_print { return $pretty; } # set the empty tag style (in $empty_tag_style) and returns the old one # can be called from outside the package with 2 arguments (elt, style) # or from inside with only one argument (style) # the style can be either a string (one of the keys of %empty_tag_style # or a number (presumably an old value saved) sub set_empty_tag_style { my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases my $old_style= $empty_tag_style; if( $style=~ /^\d+$/) { croak "invalid empty tag style $style" unless( $style < keys %empty_tag_style); $empty_tag_style= $style; } else { croak "invalid empty tag style '$style'" unless( exists $empty_tag_style{$style}); $empty_tag_style= $empty_tag_style{$style}; } return $old_style; } sub _pretty_print_styles { return (sort { $pretty_print_style{$a} <=> $pretty_print_style{$b} || $a cmp $b } keys %pretty_print_style); } sub set_quote { my $style= $_[1] || $_[0]; my $old_quote= $quote; croak "invalid quote '$style'" unless( exists $quote_style{$style}); $quote= $quote_style{$style}; return $old_quote; } sub set_remove_cdata { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $remove_cdata; $remove_cdata= $new_value; return $old_value; } sub set_indent { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $INDENT; $INDENT= $new_value; return $old_value; } sub set_wrap { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $WRAP; $WRAP= $new_value; return $old_value; } sub set_keep_encoding { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $keep_encoding; $keep_encoding= $new_value; return $old_value; } sub set_replaced_ents { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $replaced_ents; $replaced_ents= $new_value; return $old_value; } sub do_not_escape_gt { my $old_value= $replaced_ents; $replaced_ents= q{&<}; # & needs to be first return $old_value; } sub escape_gt { my $old_value= $replaced_ents; $replaced_ents= qq{&<>}; # & needs to be first return $old_value; } sub _keep_encoding { return $keep_encoding; } # so I can use elsewhere in the module sub set_do_not_escape_amp_in_atts { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $do_not_escape_amp_in_atts; $do_not_escape_amp_in_atts= $new_value; return $old_value; } sub output_filter { return $output_filter; } sub output_text_filter { return $output_text_filter; } sub set_output_filter { my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode # if called in object mode with no argument, the filter is undefined if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; } my $old_value= $output_filter; if( !$new_value || isa( $new_value, 'CODE') ) { $output_filter= $new_value; } elsif( $new_value eq 'latin1') { $output_filter= XML::Twig::latin1(); } elsif( $XML::Twig::filter{$new_value}) { $output_filter= $XML::Twig::filter{$new_value}; } else { croak "invalid output filter '$new_value'"; } return $old_value; } sub set_output_text_filter { my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode # if called in object mode with no argument, the filter is undefined if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; } my $old_value= $output_text_filter; if( !$new_value || isa( $new_value, 'CODE') ) { $output_text_filter= $new_value; } elsif( $new_value eq 'latin1') { $output_text_filter= XML::Twig::latin1(); } elsif( $XML::Twig::filter{$new_value}) { $output_text_filter= $XML::Twig::filter{$new_value}; } else { croak "invalid output text filter '$new_value'"; } return $old_value; } sub set_expand_external_entities { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $expand_external_entities; $expand_external_entities= $new_value; return $old_value; } sub set_keep_atts_order { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $keep_atts_order; $keep_atts_order= $new_value; return $old_value; } sub keep_atts_order { return $keep_atts_order; } # so I can use elsewhere in the module my %html_empty_elt; BEGIN { %html_empty_elt= map { $_ => 1} qw( base meta link hr br param img area input col); } sub start_tag { my( $elt, $option)= @_; return if( $elt->{gi} < $XML::Twig::SPECIAL_GI); my $extra_data= $elt->{extra_data} || ''; my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; my $att= $elt->{att}; # should be $elt->{att}, optimized into a pure hash look-up my $ns_map= $att ? $att->{'#original_gi'} : ''; if( $ns_map) { $gi= _restore_original_prefix( $ns_map, $gi); } $gi=~ s{^#default:}{}; # remove default prefix if( $output_text_filter) { $gi= $output_text_filter->( $gi); } # get the attribute and their values my $att_sep = $pretty==$NSGMLS ? "\n" : $pretty==$INDENTEDA ? "\n" . $INDENT x ($elt->level+1) . ' ' : ' ' ; my $replace_in_att_value= $replaced_ents . "$quote\t\r\n"; if( $option->{escape_gt} && $replaced_ents !~ m{>}) { $replace_in_att_value.= '>'; } my $tag; my @att_names= grep { !( $_=~ m{^#(?!default:)} ) } $keep_atts_order ? keys %{$att} : sort keys %{$att}; if( @att_names) { my $atts= join $att_sep, map { my $output_att_name= $ns_map ? _restore_original_prefix( $ns_map, $_) : $_; if( $output_text_filter) { $output_att_name= $output_text_filter->( $output_att_name); } $output_att_name . '=' . $quote . _att_xml_string( $att->{$_}, $replace_in_att_value) . $quote } @att_names ; if( $pretty==$INDENTEDA && @att_names == 1) { $att_sep= ' '; } $tag= "<$gi$att_sep$atts"; } else { $tag= "<$gi"; } $tag .= "\n" if($pretty==$NSGMLS); # force empty if suitable HTML tag, otherwise use the value from the input tree if( ($empty_tag_style eq $HTML) && !$elt->{first_child} && !$elt->{extra_data_before_end_tag} && $html_empty_elt{$gi}) { $elt->{empty}= 1; } my $empty= defined $elt->{empty} ? $elt->{empty} : $elt->{first_child} ? 0 : 1; $tag .= (!$elt->{empty} || $elt->{extra_data_before_end_tag}) ? '>' # element has content : (($empty_tag_style eq $HTML) && $html_empty_elt{$gi}) ? ' />' # html empty element # cvs-friendly format : ( $pretty == $INDENTEDA && @att_names > 1) ? "\n" . $INDENT x $elt->level . "/>" : ( $pretty == $INDENTEDA && @att_names == 1) ? " />" : $empty_tag_style ? ">{'gi'}] . ">" # $empty_tag_style is $HTML or $EXPAND : '/>' ; if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )) { $tag= ''; } #warn "TRACE: ", $tag,": ", Encode::is_utf8( $tag) ? "has flag" : "FLAG NOT SET"; unless( $pretty) { return defined( $extra_data) ? $extra_data . $tag : $tag; } my $prefix=''; my $return=''; # '' or \n is to be printed before the tag my $indent=0; # number of indents before the tag if( $pretty==$RECORD1) { my $level= $elt->level; $return= "\n" if( $level < 2); $indent= 1 if( $level == 1); } elsif( $pretty==$RECORD2) { $return= "\n"; $indent= $elt->level; } elsif( $pretty==$NICE) { my $parent= $elt->{parent}; unless( !$parent || $parent->{contains_text}) { $return= "\n"; } $elt->{contains_text}= 1 if( ($parent && $parent->{contains_text}) || $elt->contains_text); } elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty}) { my $parent= $elt->{parent}; unless( !$parent || $parent->{contains_text}) { $return= "\n"; $indent= $elt->level; } $elt->{contains_text}= 1 if( ($parent && $parent->{contains_text}) || $elt->contains_text); } if( $return || $indent) { # check for elements in which spaces should be kept my $t= $elt->twig; return $extra_data . $tag if( $xml_space_preserve); if( $t && $t->{twig_keep_spaces_in}) { foreach my $ancestor ($elt->ancestors) { return $extra_data . $tag if( $t->{twig_keep_spaces_in}->{$XML::Twig::index2gi[$ancestor->{'gi'}]}) } } $prefix= $INDENT x $indent; if( $extra_data) { $extra_data=~ s{\s+$}{}; $extra_data=~ s{^\s+}{}; $extra_data= $prefix . $extra_data . $return; } } return $return . $extra_data . $prefix . $tag; } sub end_tag { my $elt= shift; return '' if( ($elt->{gi}<$XML::Twig::SPECIAL_GI) || ($elt->{'empty'} && !$elt->{extra_data_before_end_tag}) ); my $tag= "<"; my $gi= $XML::Twig::index2gi[$elt->{'gi'}]; if( my $map= $elt->{'att'}->{'#original_gi'}) { $gi= _restore_original_prefix( $map, $gi); } $gi=~ s{^#default:}{}; # remove default prefix if( $output_text_filter) { $gi= $output_text_filter->( $XML::Twig::index2gi[$elt->{'gi'}]); } $tag .= "/$gi>"; $tag = ($elt->{extra_data_before_end_tag} || '') . $tag; if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )) { $tag= ''; } return $tag unless $pretty; my $prefix=''; my $return=0; # 1 if a \n is to be printed before the tag my $indent=0; # number of indents before the tag if( $pretty==$RECORD1) { $return= 1 if( $elt->level == 0); } elsif( $pretty==$RECORD2) { unless( $elt->contains_text) { $return= 1 ; $indent= $elt->level; } } elsif( $pretty==$NICE) { my $parent= $elt->{parent}; if( ( ($parent && !$parent->{contains_text}) || !$parent ) && ( !$elt->{contains_text} && ($elt->{has_flushed_child} || $elt->{first_child}) ) ) { $return= 1; } } elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty}) { my $parent= $elt->{parent}; if( ( ($parent && !$parent->{contains_text}) || !$parent ) && ( !$elt->{contains_text} && ($elt->{has_flushed_child} || $elt->{first_child}) ) ) { $return= 1; $indent= $elt->level; } } if( $return || $indent) { # check for elements in which spaces should be kept my $t= $elt->twig; return $tag if( $xml_space_preserve); if( $t && $t->{twig_keep_spaces_in}) { foreach my $ancestor ($elt, $elt->ancestors) { return $tag if( $t->{twig_keep_spaces_in}->{$XML::Twig::index2gi[$ancestor->{'gi'}]}) } } if( $return) { $prefix= ($pretty== $INDENTEDCT) ? "\n$INDENT" : "\n"; } $prefix.= $INDENT x $indent; } # add a \n at the end of the document (after the root element) $tag .= "\n" unless( $elt->{parent}); return $prefix . $tag; } sub _restore_original_prefix { my( $map, $name)= @_; my $prefix= _ns_prefix( $name); if( my $original_prefix= $map->{$prefix}) { if( $original_prefix eq '#default') { $name=~ s{^$prefix:}{}; } else { $name=~ s{^$prefix(?=:)}{$original_prefix}; } } return $name; } # buffer used to hold the text to print/sprint, to avoid passing it back and forth between methods my @sprint; # $elt is an element to print # $fh is an optional filehandle to print to # $pretty is an optional value, if true a \n is printed after the < of the # opening tag sub print { my $elt= shift; my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; my $old_select= defined $fh ? select $fh : undef; print $elt->sprint( @_); select $old_select if( defined $old_select); } # those next 2 methods need to be refactored, they are copies of the same methods in XML::Twig sub print_to_file { my( $elt, $filename)= (shift, shift); my $out_fh; # open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8 my $mode= $keep_encoding ? '>' : '>:utf8'; # >= perl 5.8 open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8 $elt->print( $out_fh, @_); close $out_fh; return $elt; } # probably only works on *nix (at least the chmod bit) # first print to a temporary file, then rename that file to the desired file name, then change permissions # to the original file permissions (or to the current umask) sub safe_print_to_file { my( $elt, $filename)= (shift, shift); my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ; XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n"; XML::Twig::_use( 'File::Basename') || croak "need File::Basename to use safe_print_to_file\n"; my $tmpdir= File::Basename::dirname( $filename); my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir); $elt->print_to_file( $tmpfilename, @_); rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!"); chmod $perm, $filename; return $elt; } # same as print but does not output the start tag if the element # is marked as flushed sub flush { my $elt= shift; my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt; $elt->twig->flush_up_to( $up_to, @_); } sub purge { my $elt= shift; my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt; $elt->twig->purge_up_to( $up_to, @_); } sub _flush { my $elt= shift; my $pretty; my $fh= isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar') ? shift : undef; my $old_select= defined $fh ? select $fh : undef; my $old_pretty= defined ($pretty= shift) ? set_pretty_print( $pretty) : undef; $xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve'); $elt->__flush(); $xml_space_preserve= 0; select $old_select if( defined $old_select); set_pretty_print( $old_pretty) if( defined $old_pretty); } sub __flush { my $elt= shift; if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) { my $preserve= ($elt->{'att'}->{'xml:space'} || '') eq 'preserve'; $xml_space_preserve++ if $preserve; unless( $elt->{'flushed'}) { print $elt->start_tag(); } # flush the children my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; foreach my $child (@children) { $child->_flush( $pretty); $child->{'flushed'}=1; } if( ! $elt->{end_tag_flushed}) { print $elt->end_tag; $elt->{end_tag_flushed}=1; $elt->{'flushed'}=1; } $xml_space_preserve-- if $preserve; # used for pretty printing if( my $parent= $elt->{parent}) { $parent->{has_flushed_child}= 1; } } else # text or special element { my $text; if( (exists $elt->{'pcdata'})) { $text= $elt->pcdata_xml_string; if( my $parent= $elt->{parent}) { $parent->{contains_text}= 1; } } elsif( (exists $elt->{'cdata'})) { $text= $elt->cdata_string; if( my $parent= $elt->{parent}) { $parent->{contains_text}= 1; } } elsif( (exists $elt->{'target'})) { $text= $elt->pi_string; } elsif( (exists $elt->{'comment'})) { $text= $elt->comment_string; } elsif( (exists $elt->{'ent'})) { $text= $elt->ent_string; } print $output_filter ? $output_filter->( $text) : $text; } } sub xml_text { my( $elt, @options)= @_; if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->xml_text_only; } my $string=''; if( ($elt->{gi} >= $XML::Twig::SPECIAL_GI) ) { # sprint the children my $child= $elt->{first_child} || ''; while( $child) { $string.= $child->xml_text; } continue { $child= $child->{next_sibling}; } } elsif( (exists $elt->{'pcdata'})) { $string .= $output_filter ? $output_filter->($elt->pcdata_xml_string) : $elt->pcdata_xml_string; } elsif( (exists $elt->{'cdata'})) { $string .= $output_filter ? $output_filter->($elt->cdata_string) : $elt->cdata_string; } elsif( (exists $elt->{'ent'})) { $string .= $elt->ent_string; } return $string; } sub xml_text_only { return join '', map { $_->xml_text if( $_->is_text || (exists $_->{'ent'})) } $_[0]->_children; } # same as print but except... it does not print but rather returns the string # if the second parameter is set then only the content is returned, not the # start and end tags of the element (but the tags of the included elements are # returned) sub sprint { my $elt= shift; my( $old_pretty, $old_empty_tag_style); if( $_[0]) { if( isa( $_[0], 'HASH')) { # "proper way, using a hashref for options my %args= XML::Twig::_normalize_args( %{shift()}); if( defined $args{PrettyPrint}) { $old_pretty = set_pretty_print( $args{PrettyPrint}); } if( defined $args{EmptyTags}) { $old_empty_tag_style = set_empty_tag_style( $args{EmptyTags}); } } else { # "old" way, just using the option name my @other_opt; foreach my $opt (@_) { if( exists $pretty_print_style{$opt}) { $old_pretty = set_pretty_print( $opt); } elsif( exists $empty_tag_style{$opt}) { $old_empty_tag_style = set_empty_tag_style( $opt); } else { push @other_opt, $opt; } } @_= @other_opt; } } $xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve'); @sprint=(); $elt->_sprint( @_); my $sprint= join( '', @sprint); if( $output_filter) { $sprint= $output_filter->( $sprint); } if( ( ($pretty== $WRAPPED) || ($pretty==$INDENTEDC)) && !$xml_space_preserve) { $sprint= _wrap_text( $sprint); } $xml_space_preserve= 0; if( defined $old_pretty) { set_pretty_print( $old_pretty); } if( defined $old_empty_tag_style) { set_empty_tag_style( $old_empty_tag_style); } return $sprint; } sub _wrap_text { my( $string)= @_; my $wrapped; foreach my $line (split /\n/, $string) { my( $initial_indent)= $line=~ m{^(\s*)}; my $wrapped_line= Text::Wrap::wrap( '', $initial_indent . $INDENT, $line) . "\n"; # fix glitch with Text::wrap when the first line is long and does not include spaces # the first line ends up being too short by 2 chars, but we'll have to live with it! $wrapped_line=~ s{^ +\n }{}s; # this prefix needs to be removed $wrapped .= $wrapped_line; } return $wrapped; } sub _sprint { my $elt= shift; my $no_tag= shift || 0; # in case there's some comments or PI's piggybacking if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) { my $preserve= ($elt->{'att'}->{'xml:space'} || '') eq 'preserve'; $xml_space_preserve++ if $preserve; push @sprint, $elt->start_tag unless( $no_tag); # sprint the children my $child= $elt->{first_child}; while( $child) { $child->_sprint; $child= $child->{next_sibling}; } push @sprint, $elt->end_tag unless( $no_tag); $xml_space_preserve-- if $preserve; } else { push @sprint, $elt->{extra_data} if( $elt->{extra_data}) ; if( (exists $elt->{'pcdata'})) { push @sprint, $elt->pcdata_xml_string; } elsif( (exists $elt->{'cdata'})) { push @sprint, $elt->cdata_string; } elsif( (exists $elt->{'target'})) { if( ($pretty >= $INDENTED) && !$elt->{parent}->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; } push @sprint, $elt->pi_string; } elsif( (exists $elt->{'comment'})) { if( ($pretty >= $INDENTED) && !$elt->{parent}->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; } push @sprint, $elt->comment_string; } elsif( (exists $elt->{'ent'})) { push @sprint, $elt->ent_string; } } return; } # just a shortcut to $elt->sprint( 1) sub xml_string { my $elt= shift; isa( $_[0], 'HASH') ? $elt->sprint( shift(), 1) : $elt->sprint( 1); } sub pcdata_xml_string { my $elt= shift; if( defined( my $string= $elt->{pcdata}) ) { if( ! $elt->{extra_data_in_pcdata}) { $string=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g unless( !$replaced_ents || $keep_encoding || $elt->{asis}); $string=~ s{\Q]]>}{]]>}g; } else { _gen_mark( $string); # used by _(un)?protect_extra_data foreach my $data (reverse @{$elt->{extra_data_in_pcdata}}) { my $substr= substr( $string, $data->{offset}); if( $keep_encoding || $elt->{asis}) { substr( $string, $data->{offset}, 0, $data->{text}); } else { substr( $string, $data->{offset}, 0, _protect_extra_data( $data->{text})); } } unless( $keep_encoding || $elt->{asis}) { $string=~ s{([$replaced_ents])}{$XML::Twig::base_ent{$1}}g ; $string=~ s{\Q]]>}{]]>}g; _unprotect_extra_data( $string); } } return $output_text_filter ? $output_text_filter->( $string) : $string; } else { return ''; } } { my $mark; my( %char2ent, %ent2char); BEGIN { %char2ent= ( '<' => 'lt', '&' => 'amp', '>' => 'gt'); %ent2char= map { $char2ent{$_} => $_ } keys %char2ent; } # generate a unique mark (a string) not found in the string, # used to mark < and & in the extra data sub _gen_mark { $mark="AAAA"; $mark++ while( index( $_[0], $mark) > -1); return $mark; } sub _protect_extra_data { my( $extra_data)= @_; $extra_data=~ s{([<&>])}{:$mark:$char2ent{$1}:}g; return $extra_data; } sub _unprotect_extra_data { $_[0]=~ s{:$mark:(\w+):}{$ent2char{$1}}g; } } sub cdata_string { my $cdata= $_[0]->{cdata}; unless( defined $cdata) { return ''; } if( $remove_cdata) { $cdata=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g; } else { $cdata= $CDATA_START . $cdata . $CDATA_END; } return $cdata; } sub att_xml_string { my $elt= shift; my $att= shift; my $replace= $replaced_ents . "$quote\n\r\t"; if($_[0] && $_[0]->{escape_gt} && ($replace!~ m{>}) ) { $replace .='>'; } if( defined (my $string= $elt->{att}->{$att})) { return _att_xml_string( $string, $replace); } else { return ''; } } # escaped xml string for an attribute value sub _att_xml_string { my( $string, $escape)= @_; if( !defined( $string)) { return ''; } if( $keep_encoding) { $string=~ s{$quote}{$XML::Twig::base_ent{$quote}}g; } else { if( $do_not_escape_amp_in_atts) { $escape=~ s{^.}{}; # seems like the most backward compatible way to remove & from the list $string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g; $string=~ s{&(?!(\w+|#\d+|[xX][0-9a-fA-F]+);)}{&}g; # dodgy: escape & that do not start an entity } else { $string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g; $string=~ s{\Q]]>}{]]>}g; } } return $output_text_filter ? $output_text_filter->( $string) : $string; } sub ent_string { my $ent= shift; my $ent_text= $ent->{ent}; my( $t, $el, $ent_string); if( $expand_external_entities && ($t= $ent->twig) && ($el= $t->entity_list) && ($ent_string= $el->{entities}->{$ent->ent_name}->{val}) ) { return $ent_string; } else { return $ent_text; } } # returns just the text, no tags, for an element sub text { my( $elt, @options)= @_; if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->text_only; } my $sep = (@options && grep { lc( $_) eq 'sep' } @options) ? ' ' : ''; my $string; if( (exists $elt->{'pcdata'})) { return $elt->{pcdata} . $sep; } elsif( (exists $elt->{'cdata'})) { return $elt->{cdata} . $sep; } elsif( (exists $elt->{'target'})) { return $elt->pi_string . $sep; } elsif( (exists $elt->{'comment'})) { return $elt->{comment} . $sep; } elsif( (exists $elt->{'ent'})) { return $elt->{ent} . $sep ; } my $child= $elt->{first_child} ||''; while( $child) { my $child_text= $child->text( @options); $string.= defined( $child_text) ? $sep . $child_text : ''; } continue { $child= $child->{next_sibling}; } unless( defined $string) { $string=''; } return $output_text_filter ? $output_text_filter->( $string) : $string; } sub text_only { return join '', map { $_->text if( $_->is_text || (exists $_->{'ent'})) } $_[0]->_children; } sub trimmed_text { my $elt= shift; my $text= $elt->text( @_); $text=~ s{\s+}{ }sg; $text=~ s{^\s*}{}; $text=~ s{\s*$}{}; return $text; } sub trim { my( $elt)= @_; my $pcdata= $elt->first_descendant( $TEXT); (my $pcdata_text= $pcdata->text)=~ s{^\s+}{}s; $pcdata->set_text( $pcdata_text); $pcdata= $elt->last_descendant( $TEXT); ($pcdata_text= $pcdata->text)=~ s{\s+$}{}; $pcdata->set_text( $pcdata_text); foreach my $pcdata ($elt->descendants( $TEXT)) { ($pcdata_text= $pcdata->text)=~ s{\s+}{ }g; $pcdata->set_text( $pcdata_text); } return $elt; } # remove cdata sections (turns them into regular pcdata) in an element sub remove_cdata { my $elt= shift; foreach my $cdata ($elt->descendants_or_self( $CDATA)) { if( $keep_encoding) { my $data= $cdata->{cdata}; $data=~ s{([&<"'])}{$XML::Twig::base_ent{$1}}g; $cdata->{pcdata}= (delete $cdata->{empty} || 1) && $data; } else { $cdata->{pcdata}= (delete $cdata->{empty} || 1) && $cdata->{cdata}; } $cdata->{gi}=$XML::Twig::gi2index{$PCDATA} or $cdata->set_gi( $PCDATA); undef $cdata->{cdata}; } } sub _is_private { return _is_private_name( $_[0]->gi); } sub _is_private_name { return $_[0]=~ m{^#(?!default:)}; } } # end of block containing package globals ($pretty_print, $quotes, keep_encoding...) # merges consecutive #PCDATAs in am element sub normalize { my( $elt)= @_; my @descendants= $elt->descendants( $PCDATA); while( my $desc= shift @descendants) { if( ! length $desc->{pcdata}) { $desc->delete; next; } while( @descendants && $desc->{next_sibling} && $desc->{next_sibling}== $descendants[0]) { my $to_merge= shift @descendants; $desc->merge_text( $to_merge); } } return $elt; } # SAX export methods sub toSAX1 { _toSAX(@_, \&_start_tag_data_SAX1, \&_end_tag_data_SAX1); } sub toSAX2 { _toSAX(@_, \&_start_tag_data_SAX2, \&_end_tag_data_SAX2); } sub _toSAX { my( $elt, $handler, $start_tag_data, $end_tag_data)= @_; if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) { my $data= $start_tag_data->( $elt); _start_prefix_mapping( $elt, $handler, $data); if( $data && (my $start_element = $handler->can( 'start_element'))) { unless( $elt->{'flushed'}) { $start_element->( $handler, $data); } } foreach my $child ($elt->_children) { $child->_toSAX( $handler, $start_tag_data, $end_tag_data); } if( (my $data= $end_tag_data->( $elt)) && (my $end_element = $handler->can( 'end_element')) ) { $end_element->( $handler, $data); } _end_prefix_mapping( $elt, $handler); } else # text or special element { if( (exists $elt->{'pcdata'}) && (my $characters= $handler->can( 'characters'))) { $characters->( $handler, { Data => $elt->{pcdata} }); } elsif( (exists $elt->{'cdata'})) { if( my $start_cdata= $handler->can( 'start_cdata')) { $start_cdata->( $handler); } if( my $characters= $handler->can( 'characters')) { $characters->( $handler, {Data => $elt->{cdata} }); } if( my $end_cdata= $handler->can( 'end_cdata')) { $end_cdata->( $handler); } } elsif( ((exists $elt->{'target'})) && (my $pi= $handler->can( 'processing_instruction'))) { $pi->( $handler, { Target =>$elt->{target}, Data => $elt->{data} }); } elsif( ((exists $elt->{'comment'})) && (my $comment= $handler->can( 'comment'))) { $comment->( $handler, { Data => $elt->{comment} }); } elsif( ((exists $elt->{'ent'}))) { if( my $se= $handler->can( 'skipped_entity')) { $se->( $handler, { Name => $elt->ent_name }); } elsif( my $characters= $handler->can( 'characters')) { if( defined $elt->ent_string) { $characters->( $handler, {Data => $elt->ent_string}); } else { $characters->( $handler, {Data => $elt->ent_name}); } } } } } sub _start_tag_data_SAX1 { my( $elt)= @_; my $name= $XML::Twig::index2gi[$elt->{'gi'}]; return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); my $attributes={}; my $atts= $elt->{att}; while( my( $att, $value)= each %$atts) { $attributes->{$att}= $value unless( ( $att=~ m{^#(?!default:)} )); } my $data= { Name => $name, Attributes => $attributes}; return $data; } sub _end_tag_data_SAX1 { my( $elt)= @_; return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); return { Name => $XML::Twig::index2gi[$elt->{'gi'}] }; } sub _start_tag_data_SAX2 { my( $elt)= @_; my $data={}; my $name= $XML::Twig::index2gi[$elt->{'gi'}]; return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); $data->{Name} = $name; $data->{Prefix} = $elt->ns_prefix; $data->{LocalName} = $elt->local_name; $data->{NamespaceURI} = $elt->namespace; # save a copy of the data so we can re-use it for the end tag my %sax2_data= %$data; $elt->{twig_elt_SAX2_data}= \%sax2_data; # add the attributes $data->{Attributes}= $elt->_atts_to_SAX2; return $data; } sub _atts_to_SAX2 { my $elt= shift; my $SAX2_atts= {}; foreach my $att (keys %{$elt->{att}}) { next if( ( $att=~ m{^#(?!default:)} )); my $SAX2_att={}; $SAX2_att->{Name} = $att; $SAX2_att->{Prefix} = _ns_prefix( $att); $SAX2_att->{LocalName} = _local_name( $att); $SAX2_att->{NamespaceURI} = $elt->namespace( $SAX2_att->{Prefix}); $SAX2_att->{Value} = $elt->{'att'}->{$att}; my $SAX2_att_name= "{$SAX2_att->{NamespaceURI}}$SAX2_att->{LocalName}"; $SAX2_atts->{$SAX2_att_name}= $SAX2_att; } return $SAX2_atts; } sub _start_prefix_mapping { my( $elt, $handler, $data)= @_; if( my $start_prefix_mapping= $handler->can( 'start_prefix_mapping') and my @new_prefix_mappings= grep { /^\{[^}]*\}xmlns/ || /^\{$XMLNS_URI\}/ } keys %{$data->{Attributes}} ) { foreach my $prefix (@new_prefix_mappings) { my $prefix_string= $data->{Attributes}->{$prefix}->{LocalName}; if( $prefix_string eq 'xmlns') { $prefix_string=''; } my $prefix_data= { Prefix => $prefix_string, NamespaceURI => $data->{Attributes}->{$prefix}->{Value} }; $start_prefix_mapping->( $handler, $prefix_data); $elt->{twig_end_prefix_mapping} ||= []; push @{$elt->{twig_end_prefix_mapping}}, $prefix_string; } } } sub _end_prefix_mapping { my( $elt, $handler)= @_; if( my $end_prefix_mapping= $handler->can( 'end_prefix_mapping')) { foreach my $prefix (@{$elt->{twig_end_prefix_mapping}}) { $end_prefix_mapping->( $handler, { Prefix => $prefix} ); } } } sub _end_tag_data_SAX2 { my( $elt)= @_; return if( ( (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 1) eq '#') && (substr( $XML::Twig::index2gi[$elt->{'gi'}], 0, 9) ne '#default:') )); return $elt->{twig_elt_SAX2_data}; } sub contains_text { my $elt= shift; my $child= $elt->{first_child}; while ($child) { return 1 if( $child->is_text || (exists $child->{'ent'})); $child= $child->{next_sibling}; } return 0; } # creates a single pcdata element containing the text as child of the element # options: # - force_pcdata: when set to a true value forces the text to be in a #PCDATA # even if the original element was a #CDATA sub set_text { my( $elt, $string, %option)= @_; if( $XML::Twig::index2gi[$elt->{'gi'}] eq $PCDATA) { return $elt->{pcdata}= (delete $elt->{empty} || 1) && $string; } elsif( $XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA) { if( $option{force_pcdata}) { $elt->{gi}=$XML::Twig::gi2index{$PCDATA} or $elt->set_gi( $PCDATA); $elt->{cdata}= ''; return $elt->{pcdata}= (delete $elt->{empty} || 1) && $string; } else { $elt->{cdata}= $string; return $string; } } elsif( $elt->contains_a_single( $PCDATA) ) { # optimized so we have a slight chance of not losing embedded comments and pi's $elt->{first_child}->set_pcdata( $string); return $elt; } foreach my $child (@{[$elt->_children]}) { $child->delete; } my $pcdata= $elt->_new_pcdata( $string); $pcdata->paste( $elt); delete $elt->{empty}; return $elt; } # set the content of an element from a list of strings and elements sub set_content { my $elt= shift; return $elt unless defined $_[0]; # attributes can be given as a hash (passed by ref) if( ref $_[0] eq 'HASH') { my $atts= shift; $elt->del_atts; # usually useless but better safe than sorry $elt->set_atts( $atts); return $elt unless defined $_[0]; } # check next argument for #EMPTY if( !(ref $_[0]) && ($_[0] eq $EMPTY) ) { $elt->{empty}= 1; return $elt; } # case where we really want to do a set_text, the element is '#PCDATA' # or contains a single PCDATA and we only want to add text in it if( ($XML::Twig::index2gi[$elt->{'gi'}] eq $PCDATA || $elt->contains_a_single( $PCDATA)) && (@_ == 1) && !( ref $_[0])) { $elt->set_text( $_[0]); return $elt; } elsif( ($XML::Twig::index2gi[$elt->{'gi'}] eq $CDATA) && (@_ == 1) && !( ref $_[0])) { $elt->{cdata}= $_[0]; return $elt; } # delete the children foreach my $child (@{[$elt->_children]}) { $child->delete; } if( @_) { delete $elt->{empty}; } foreach my $child (@_) { if( ref( $child) && isa( $child, 'XML::Twig::Elt')) { # argument is an element $child->paste( 'last_child', $elt); } else { # argument is a string if( (my $pcdata= $elt->{last_child}) && $elt->{last_child}->is_pcdata) { # previous child is also pcdata: just concatenate $pcdata->{pcdata}= (delete $pcdata->{empty} || 1) && $pcdata->{pcdata} . $child } else { # previous child is not a string: create a new pcdata element $pcdata= $elt->_new_pcdata( $child); $pcdata->paste( 'last_child', $elt); } } } return $elt; } # inserts an element (whose gi is given) as child of the element # all children of the element are now children of the new element # returns the new element sub insert { my ($elt, @args)= @_; # first cut the children my @children= do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }; foreach my $child (@children) { $child->cut; } # insert elements while( my $gi= shift @args) { my $new_elt= $elt->new( $gi); # add attributes if needed if( defined( $args[0]) && ( isa( $args[0], 'HASH')) ) { $new_elt->set_atts( shift @args); } # paste the element $new_elt->paste( $elt); delete $elt->{empty}; $elt= $new_elt; } # paste back the children foreach my $child (@children) { $child->paste( 'last_child', $elt); } return $elt; } # insert a new element # $elt->insert_new_element( $opt_position, $gi, $opt_atts_hash, @opt_content); # the element is created with the same syntax as new # position is the same as in paste, first_child by default sub insert_new_elt { my $elt= shift; my $position= $_[0]; if( ($position eq 'before') || ($position eq 'after') || ($position eq 'first_child') || ($position eq 'last_child')) { shift; } else { $position= 'first_child'; } my $new_elt= $elt->new( @_); $new_elt->paste( $position, $elt); #if( defined $new_elt->{'att'}->{$ID}) { $new_elt->set_id( $new_elt->{'att'}->{$ID}); } return $new_elt; } # wraps an element in elements which gi's are given as arguments # $elt->wrap_in( 'td', 'tr', 'table') wraps the element as a single # cell in a table for example # returns the new element sub wrap_in { my $elt= shift; while( my $gi = shift @_) { my $new_elt = $elt->new( $gi); if( $elt->{twig_current}) { my $t= $elt->twig; $t->{twig_current}= $new_elt; delete $elt->{'twig_current'}; $new_elt->{'twig_current'}=1; } if( my $parent= $elt->{parent}) { $new_elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $new_elt->{parent});} ; if( $parent->{first_child} == $elt) { $parent->{first_child}= $new_elt; } if( $parent->{last_child} == $elt) { delete $parent->{empty}; $parent->{last_child}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } } else { # wrapping the root my $twig= $elt->twig; if( $twig && $twig->root && ($twig->root eq $elt) ) { $twig->set_root( $new_elt); } } if( my $prev_sibling= $elt->{prev_sibling}) { $new_elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $new_elt->{prev_sibling});} ; $prev_sibling->{next_sibling}= $new_elt; } if( my $next_sibling= $elt->{next_sibling}) { $new_elt->{next_sibling}= $next_sibling; $next_sibling->{prev_sibling}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } $new_elt->{first_child}= $elt; delete $new_elt->{empty}; $new_elt->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $new_elt->{last_child});} ; $elt->{parent}=$new_elt; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; $elt->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; $elt->{next_sibling}= undef; # add the attributes if the next argument is a hash ref if( defined( $_[0]) && (isa( $_[0], 'HASH')) ) { $new_elt->set_atts( shift @_); } $elt= $new_elt; } return $elt; } sub replace { my( $elt, $ref)= @_; if( $elt->{parent}) { $elt->cut; } if( my $parent= $ref->{parent}) { $elt->{parent}=$parent; if( $XML::Twig::weakrefs) { weaken( $elt->{parent});} ; if( $parent->{first_child} == $ref) { $parent->{first_child}= $elt; } if( $parent->{last_child} == $ref) { delete $parent->{empty}; $parent->{last_child}=$elt; if( $XML::Twig::weakrefs) { weaken( $parent->{last_child});} ; } } elsif( $ref->twig && $ref == $ref->twig->root) { $ref->twig->set_root( $elt); } if( my $prev_sibling= $ref->{prev_sibling}) { $elt->{prev_sibling}=$prev_sibling; if( $XML::Twig::weakrefs) { weaken( $elt->{prev_sibling});} ; $prev_sibling->{next_sibling}= $elt; } if( my $next_sibling= $ref->{next_sibling}) { $elt->{next_sibling}= $next_sibling; $next_sibling->{prev_sibling}=$elt; if( $XML::Twig::weakrefs) { weaken( $next_sibling->{prev_sibling});} ; } $ref->{parent}=undef; if( $XML::Twig::weakrefs) { weaken( $ref->{parent});} ; $ref->{prev_sibling}=undef; if( $XML::Twig::weakrefs) { weaken( $ref->{prev_sibling});} ; $ref->{next_sibling}= undef; return $ref; } sub replace_with { my $ref= shift; my $elt= shift; $elt->replace( $ref); foreach my $new_elt (reverse @_) { $new_elt->paste( after => $elt); } return $elt; } # move an element, same syntax as paste, except the element is first cut sub move { my $elt= shift; $elt->cut; $elt->paste( @_); return $elt; } # adds a prefix to an element, creating a pcdata child if needed sub prefix { my ($elt, $prefix, $option)= @_; my $asis= ($option && ($option eq 'asis')) ? 1 : 0; if( (exists $elt->{'pcdata'}) && (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis})) ) { $elt->{pcdata}= (delete $elt->{empty} || 1) && $prefix . $elt->{pcdata}; } elsif( $elt->{first_child} && $elt->{first_child}->is_pcdata && ( ($asis && $elt->{first_child}->{asis}) || (!$asis && ! $elt->{first_child}->{asis})) ) { $elt->{first_child}->set_pcdata( $prefix . $elt->{first_child}->pcdata); } else { my $new_elt= $elt->_new_pcdata( $prefix); my $pos= (exists $elt->{'pcdata'}) ? 'before' : 'first_child'; $new_elt->paste( $pos => $elt); if( $asis) { $new_elt->set_asis; } } return $elt; } # adds a suffix to an element, creating a pcdata child if needed sub suffix { my ($elt, $suffix, $option)= @_; my $asis= ($option && ($option eq 'asis')) ? 1 : 0; if( (exists $elt->{'pcdata'}) && (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis})) ) { $elt->{pcdata}= (delete $elt->{empty} || 1) && $elt->{pcdata} . $suffix; } elsif( $elt->{last_child} && $elt->{last_child}->is_pcdata && ( ($asis && $elt->{last_child}->{asis}) || (!$asis && ! $elt->{last_child}->{asis})) ) { $elt->{last_child}->set_pcdata( $elt->{last_child}->pcdata . $suffix); } else { my $new_elt= $elt->_new_pcdata( $suffix); my $pos= (exists $elt->{'pcdata'}) ? 'after' : 'last_child'; $new_elt->paste( $pos => $elt); if( $asis) { $new_elt->set_asis; } } return $elt; } # create a path to an element ('/root/.../gi) sub path { my $elt= shift; my @context= ( $elt, $elt->ancestors); return "/" . join( "/", reverse map {$_->gi} @context); } sub xpath { my $elt= shift; my $xpath; foreach my $ancestor (reverse $elt->ancestors_or_self) { my $gi= $XML::Twig::index2gi[$ancestor->{'gi'}]; $xpath.= "/$gi"; my $index= $ancestor->prev_siblings( $gi) + 1; unless( ($index == 1) && !$ancestor->next_sibling( $gi)) { $xpath.= "[$index]"; } } return $xpath; } # methods used mainly by wrap_children # return a string with the # for an element ...... # returns '' sub _stringify_struct { my( $elt, %opt)= @_; my $string=''; my $pretty_print= set_pretty_print( 'none'); foreach my $child ($elt->_children) { $child->add_id; $string .= $child->start_tag( { escape_gt => 1 }) ||''; } set_pretty_print( $pretty_print); return $string; } # wrap a series of elements in a new one sub _wrap_range { my $elt= shift; my $gi= shift; my $atts= isa( $_[0], 'HASH') ? shift : undef; my $range= shift; # the string with the tags to wrap my $t= $elt->twig; # get the tags to wrap my @to_wrap; while( $range=~ m{<\w+\s+[^>]*id=("[^"]*"|'[^']*')[^>]*>}g) { push @to_wrap, $t->elt_id( substr( $1, 1, -1)); } return '' unless @to_wrap; my $to_wrap= shift @to_wrap; my %atts= %$atts; my $new_elt= $to_wrap->wrap_in( $gi, \%atts); $_->move( last_child => $new_elt) foreach (@to_wrap); return ''; } # wrap children matching a regexp in a new element sub wrap_children { my( $elt, $regexp, $gi, $atts)= @_; $atts ||={}; my $elt_as_string= $elt->_stringify_struct; # stringify the elt structure $regexp=~ s{(<[^>]*>)}{_match_expr( $1)}eg; # in the regexp, replace gi's by the proper regexp $elt_as_string=~ s{($regexp)}{$elt->_wrap_range( $gi, $atts, $1)}eg; # then do the actual replace return $elt; } sub _match_expr { my $tag= shift; my( $gi, %atts)= XML::Twig::_parse_start_tag( $tag); return _match_tag( $gi, %atts); } sub _match_tag { my( $elt, %atts)= @_; my $string= "<$elt\\b"; foreach my $key (sort keys %atts) { my $val= qq{\Q$atts{$key}\E}; $string.= qq{[^>]*$key=(?:"$val"|'$val')}; } $string.= qq{[^>]*>}; return "(?:$string)"; } sub field_to_att { my( $elt, $cond, $att)= @_; $att ||= $cond; my $child= $elt->first_child( $cond) or return undef; $elt->set_att( $att => $child->text); $child->cut; return $elt; } sub att_to_field { my( $elt, $att, $tag)= @_; $tag ||= $att; my $child= $elt->insert_new_elt( first_child => $tag, $elt->{'att'}->{$att}); $elt->del_att( $att); return $elt; } # sort children methods sub sort_children_on_field { my $elt = shift; my $field = shift; my $get_key= sub { return $_[0]->field( $field) }; return $elt->sort_children( $get_key, @_); } sub sort_children_on_att { my $elt = shift; my $att = shift; my $get_key= sub { return $_[0]->{'att'}->{$att} }; return $elt->sort_children( $get_key, @_); } sub sort_children_on_value { my $elt = shift; #my $get_key= eval qq{ sub { $NO_WARNINGS; return \$_[0]->text } }; my $get_key= \&text; return $elt->sort_children( $get_key, @_); } sub sort_children { my( $elt, $get_key, %opt)=@_; $opt{order} ||= 'normal'; $opt{type} ||= 'alpha'; my( $par_a, $par_b)= ($opt{order} eq 'reverse') ? qw( b a) : qw ( a b) ; my $op= ($opt{type} eq 'numeric') ? '<=>' : 'cmp' ; my @children= $elt->cut_children; if( $opt{type} eq 'numeric') { @children= map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [ $get_key->( $_), $_] } @children; } elsif( $opt{type} eq 'alpha') { @children= map { $_->[1] } sort { $a->[0] cmp $b->[0] } map { [ $get_key->( $_), $_] } @children; } else { croak "wrong sort type '$opt{type}', should be either 'alpha' or 'numeric'"; } @children= reverse @children if( $opt{order} eq 'reverse'); $elt->set_content( @children); } # comparison methods sub before { my( $a, $b)=@_; if( $a->cmp( $b) == -1) { return 1; } else { return 0; } } sub after { my( $a, $b)=@_; if( $a->cmp( $b) == 1) { return 1; } else { return 0; } } sub lt { my( $a, $b)=@_; return 1 if( $a->cmp( $b) == -1); return 0; } sub le { my( $a, $b)=@_; return 1 unless( $a->cmp( $b) == 1); return 0; } sub gt { my( $a, $b)=@_; return 1 if( $a->cmp( $b) == 1); return 0; } sub ge { my( $a, $b)=@_; return 1 unless( $a->cmp( $b) == -1); return 0; } sub cmp { my( $a, $b)=@_; # easy cases return 0 if( $a == $b); return 1 if( $a->in($b)); # a in b => a starts after b return -1 if( $b->in($a)); # b in a => a starts before b # ancestors does not include the element itself my @a_pile= ($a, $a->ancestors); my @b_pile= ($b, $b->ancestors); # the 2 elements are not in the same twig return undef unless( $a_pile[-1] == $b_pile[-1]); # find the first non common ancestors (they are siblings) my $a_anc= pop @a_pile; my $b_anc= pop @b_pile; while( $a_anc == $b_anc) { $a_anc= pop @a_pile; $b_anc= pop @b_pile; } # from there move left and right and figure out the order my( $a_prev, $a_next, $b_prev, $b_next)= ($a_anc, $a_anc, $b_anc, $b_anc); while() { $a_prev= $a_prev->{prev_sibling} || return( -1); return 1 if( $a_prev == $b_next); $a_next= $a_next->{next_sibling} || return( 1); return -1 if( $a_next == $b_prev); $b_prev= $b_prev->{prev_sibling} || return( 1); return -1 if( $b_prev == $a_next); $b_next= $b_next->{next_sibling} || return( -1); return 1 if( $b_next == $a_prev); } } sub _dump { my( $elt, $option)= @_; my $atts = defined $option->{atts} ? $option->{atts} : 1; my $extra = defined $option->{extra} ? $option->{extra} : 0; my $short_text = defined $option->{short_text} ? $option->{short_text} : 40; my $sp= '| '; my $indent= $sp x $elt->level; my $indent_sp= ' ' x $elt->level; my $dump=''; if( $elt->is_elt) { $dump .= $indent . '|-' . $XML::Twig::index2gi[$elt->{'gi'}]; if( $atts && (my @atts= $elt->att_names) ) { $dump .= ' ' . join( ' ', map { qq{$_="} . $elt->{'att'}->{$_} . qq{"} } @atts); } $dump .= "\n"; if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); } $dump .= join( "", map { $_->_dump( $option) } do { my $elt= $elt; my @children=(); my $child= $elt->{first_child}; while( $child) { push @children, $child; $child= $child->{next_sibling}; } @children; }); } else { if( (exists $elt->{'pcdata'})) { $dump .= "$indent|-PCDATA: '" . _short_text( $elt->{pcdata}, $short_text) . "'\n" } elsif( (exists $elt->{'ent'})) { $dump .= "$indent|-ENTITY: '" . _short_text( $elt->{ent}, $short_text) . "'\n" } elsif( (exists $elt->{'cdata'})) { $dump .= "$indent|-CDATA: '" . _short_text( $elt->{cdata}, $short_text) . "'\n" } elsif( (exists $elt->{'comment'})) { $dump .= "$indent|-COMMENT: '" . _short_text( $elt->comment_string, $short_text) . "'\n" } elsif( (exists $elt->{'target'})) { $dump .= "$indent|-PI: '" . $elt->{target} . "' - '" . _short_text( $elt->{data}, $short_text) . "'\n" } if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); } } return $dump; } sub _dump_extra_data { my( $elt, $indent, $indent_sp, $short_text)= @_; my $dump=''; if( $elt->extra_data) { my $extra_data = $indent . "|-- (cpi before) '" . _short_text( $elt->extra_data, $short_text) . "'"; $extra_data=~ s{\n}{$indent_sp}g; $dump .= $extra_data . "\n"; } if( $elt->{extra_data_in_pcdata}) { foreach my $data ( @{$elt->{extra_data_in_pcdata}}) { my $extra_data = $indent . "|-- (cpi offset $data->{offset}) '" . _short_text( $data->{text}, $short_text) . "'"; $extra_data=~ s{\n}{$indent_sp}g; $dump .= $extra_data . "\n"; } } if( $elt->{extra_data_before_end_tag}) { my $extra_data = $indent . "|-- (cpi end) '" . _short_text( $elt->{extra_data_before_end_tag}, $short_text) . "'"; $extra_data=~ s{\n}{$indent_sp}g; $dump .= $extra_data . "\n"; } return $dump; } sub _short_text { my( $string, $length)= @_; if( !$length || (length( $string) < $length) ) { return $string; } my $l1= (length( $string) -5) /2; my $l2= length( $string) - ($l1 + 5); return substr( $string, 0, $l1) . ' ... ' . substr( $string, -$l2); } sub _and { return _join_defined( ' && ', @_); } sub _join_defined { return join( shift(), grep { $_ } @_); } 1; __END__ =head1 NAME XML::Twig - A perl module for processing huge XML documents in tree mode. =head1 SYNOPSIS Note that this documentation is intended as a reference to the module. Complete docs, including a tutorial, examples, an easier to use HTML version, a quick reference card and a FAQ are available at L Small documents (loaded in memory as a tree): my $twig=XML::Twig->new(); # create the twig $twig->parsefile( 'doc.xml'); # build it my_process( $twig); # use twig methods to process it $twig->print; # output the twig Huge documents (processed in combined stream/tree mode): # at most one div will be loaded in memory my $twig=XML::Twig->new( twig_handlers => { title => sub { $_->set_tag( 'h2') }, # change title tags to h2 # $_ is the current element para => sub { $_->set_tag( 'p') }, # change para to p hidden => sub { $_->delete; }, # remove hidden elements list => \&my_list_process, # process list elements div => sub { $_[0]->flush; }, # output and free memory }, pretty_print => 'indented', # output will be nicely formatted empty_tags => 'html', # outputs ); $twig->parsefile( 'my_big.xml'); sub my_list_process { my( $twig, $list)= @_; # ... } See L for other ways to use the module, as a filter for example. =encoding utf8 =head1 DESCRIPTION This module provides a way to process XML documents. It is build on top of C. The module offers a tree interface to the document, while allowing you to output the parts of it that have been completely processed. It allows minimal resource (CPU and memory) usage by building the tree only for the parts of the documents that need actual processing, through the use of the C > and C > options. The C > and C > methods also help to increase performances. XML::Twig tries to make simple things easy so it tries its best to takes care of a lot of the (usually) annoying (but sometimes necessary) features that come with XML and XML::Parser. =head1 TOOLS XML::Twig comes with a few command-line utilities: =head2 xml_pp - xml pretty-printer XML pretty printer using XML::Twig =head2 xml_grep - grep XML files looking for specific elements C does a grep on XML files. Instead of using regular expressions it uses XPath expressions (in fact the subset of XPath supported by XML::Twig). =head2 xml_split - cut a big XML file into smaller chunks C takes a (presumably big) XML file and split it in several smaller files, based on various criteria (level in the tree, size or an XPath expression) =head2 xml_merge - merge back XML files split with xml_split C takes several xml files that have been split using C and recreates a single file. =head2 xml_spellcheck - spellcheck XML files C lets you spell check the content of an XML file. It extracts the text (the content of elements and optionally of attributes), call a spell checker on it and then recreates the XML document. =head1 XML::Twig 101 XML::Twig can be used either on "small" XML documents (that fit in memory) or on huge ones, by processing parts of the document and outputting or discarding them once they are processed. =head2 Loading an XML document and processing it my $t= XML::Twig->new(); $t->parse( 'titlep 1p 2'); my $root= $t->root; $root->set_tag( 'html'); # change doc to html $title= $root->first_child( 'title'); # get the title $title->set_tag( 'h1'); # turn it into h1 my @para= $root->children( 'para'); # get the para children foreach my $para (@para) { $para->set_tag( 'p'); } # turn them into p $t->print; # output the document Other useful methods include: L: C<< $elt->{'att'}->{'foo'} >> return the C attribute for an element, L : C<< $elt->set_att( foo => "bar") >> sets the C attribute to the C value, L: C<< $elt->{next_sibling} >> return the next sibling in the document (in the example C<< $title->{next_sibling} >> is the first C, you can also (and actually should) use C<< $elt->next_sibling( 'para') >> to get it The document can also be transformed through the use of the L, L, L and L methods: C<< $title->cut; $title->paste( after => $p); >> for example And much, much more, see L. =head2 Processing an XML document chunk by chunk One of the strengths of XML::Twig is that it let you work with files that do not fit in memory (BTW storing an XML document in memory as a tree is quite memory-expensive, the expansion factor being often around 10). To do this you can define handlers, that will be called once a specific element has been completely parsed. In these handlers you can access the element and process it as you see fit, using the navigation and the cut-n-paste methods, plus lots of convenient ones like C >. Once the element is completely processed you can then C > it, which will output it and free the memory. You can also C > it if you don't need to output it (if you are just extracting some data from the document for example). The handler will be called again once the next relevant element has been parsed. my $t= XML::Twig->new( twig_handlers => { section => \§ion, para => sub { $_->set_tag( 'p'); } }, ); $t->parsefile( 'doc.xml'); # the handler is called once a section is completely parsed, ie when # the end tag for section is found, it receives the twig itself and # the element (including all its sub-elements) as arguments sub section { my( $t, $section)= @_; # arguments for all twig_handlers $section->set_tag( 'div'); # change the tag name # let's use the attribute nb as a prefix to the title my $title= $section->first_child( 'title'); # find the title my $nb= $title->{'att'}->{'nb'}; # get the attribute $title->prefix( "$nb - "); # easy isn't it? $section->flush; # outputs the section and frees memory } There is of course more to it: you can trigger handlers on more elaborate conditions than just the name of the element, C
for example. my $t= XML::Twig->new( twig_handlers => { 'section/title' => sub { $_->print } } ) ->parsefile( 'doc.xml'); Here C<< sub { $_->print } >> simply prints the current element (C<$_> is aliased to the element in the handler). You can also trigger a handler on a test on an attribute: my $t= XML::Twig->new( twig_handlers => { 'section[@level="1"]' => sub { $_->print } } ); ->parsefile( 'doc.xml'); You can also use C > to process an element as soon as the start tag is found. Besides C > you can also use C >, =head2 Processing just parts of an XML document The twig_roots mode builds only the required sub-trees from the document Anything outside of the twig roots will just be ignored: my $t= XML::Twig->new( # the twig will include just the root and selected titles twig_roots => { 'section/title' => \&print_n_purge, 'annex/title' => \&print_n_purge } ); $t->parsefile( 'doc.xml'); sub print_n_purge { my( $t, $elt)= @_; print $elt->text; # print the text (including sub-element texts) $t->purge; # frees the memory } You can use that mode when you want to process parts of a documents but are not interested in the rest and you don't want to pay the price, either in time or memory, to build the tree for the it. =head2 Building an XML filter You can combine the C and the C options to build filters, which let you modify selected elements and will output the rest of the document as is. This would convert prices in $ to prices in Euro in a document: my $t= XML::Twig->new( twig_roots => { 'price' => \&convert, }, # process prices twig_print_outside_roots => 1, # print the rest ); $t->parsefile( 'doc.xml'); sub convert { my( $t, $price)= @_; my $currency= $price->{'att'}->{'currency'}; # get the currency if( $currency eq 'USD') { $usd_price= $price->text; # get the price # %rate is just a conversion table my $euro_price= $usd_price * $rate{usd2euro}; $price->set_text( $euro_price); # set the new price $price->set_att( currency => 'EUR'); # don't forget this! } $price->print; # output the price } =head2 XML::Twig and various versions of Perl, XML::Parser and expat: XML::Twig is a lot more sensitive to variations in versions of perl, XML::Parser and expat than to the OS, so this should cover some reasonable configurations. The "recommended configuration" is perl 5.8.3+ (for good Unicode support), XML::Parser 2.31+ and expat 1.95.5+ See L for the CPAN testers reports on XML::Twig, which list all tested configurations. An Atom feed of the CPAN Testers results is available at L Finally: =over 4 =item XML::Twig does B work with expat 1.95.4 =item XML::Twig only works with XML::Parser 2.27 in perl 5.6.* Note that I can't compile XML::Parser 2.27 anymore, so I can't guarantee that it still works =item XML::Parser 2.28 does not really work =back When in doubt, upgrade expat, XML::Parser and Scalar::Util Finally, for some optional features, XML::Twig depends on some additional modules. The complete list, which depends somewhat on the version of Perl that you are running, is given by running C =head1 Simplifying XML processing =over 4 =item Whitespaces Whitespaces that look non-significant are discarded, this behaviour can be controlled using the C >, C > and C > options. =item Encoding You can specify that you want the output in the same encoding as the input (provided you have valid XML, which means you have to specify the encoding either in the document or when you create the Twig object) using the C > option You can also use C> to convert the internal UTF-8 format to the required encoding. =item Comments and Processing Instructions (PI) Comments and PI's can be hidden from the processing, but still appear in the output (they are carried by the "real" element closer to them) =item Pretty Printing XML::Twig can output the document pretty printed so it is easier to read for us humans. =item Surviving an untimely death XML parsers are supposed to react violently when fed improper XML. XML::Parser just dies. XML::Twig provides the C > and the C > methods which wrap the parse in an eval and return either the parsed twig or 0 in case of failure. =item Private attributes Attributes with a name starting with # (illegal in XML) will not be output, so you can safely use them to store temporary values during processing. Note that you can store anything in a private attribute, not just text, it's just a regular Perl variable, so a reference to an object or a huge data structure is perfectly fine. =back =head1 CLASSES XML::Twig uses a very limited number of classes. The ones you are most likely to use are C> of course, which represents a complete XML document, including the document itself (the root of the document itself is C>), its handlers, its input or output filters... The other main class is C>, which models an XML element. Element here has a very wide definition: it can be a regular element, or but also text, with an element C> of C<#PCDATA> (or C<#CDATA>), an entity (tag is C<#ENT>), a Processing Instruction (C<#PI>), a comment (C<#COMMENT>). Those are the 2 commonly used classes. You might want to look the C> option if you want to subclass C. Attributes are just attached to their parent element, they are not objects per se. (Please use the provided methods C> and C> to access them, if you access them as a hash, then your code becomes implementation dependent and might break in the future). Other classes that are seldom used are C> and C>. If you use C> instead of C, elements are then created as C> =head1 METHODS =head2 XML::Twig A twig is a subclass of XML::Parser, so all XML::Parser methods can be called on a twig object, including parse and parsefile. C on the other hand cannot be used, see C > =over 4 =item new This is a class method, the constructor for XML::Twig. Options are passed as keyword value pairs. Recognized options are the same as XML::Parser, plus some (in fact a lot!) XML::Twig specifics. New Options: =over 4 =item twig_handlers This argument consists of a hash C<{ expression => \&handler}> where expression is a an I (+ some others). XPath expressions are limited to using the child and descendant axis (indeed you can't specify an axis), and predicates cannot be nested. You can use the C, or C<< string() >> function (except in C triggers). Additionally you can use regexps (/ delimited) to match attribute and string values. Examples: foo foo/bar foo//bar /foo/bar /foo//bar /foo/bar[@att1 = "val1" and @att2 = "val2"]/baz[@a >= 1] foo[string()=~ /^duh!+/] /foo[string(bar)=~ /\d+/]/baz[@att != 3] #CDATA can be used to call a handler for a CDATA section. #COMMENT can be used to call a handler for comments Some additional (non-XPath) expressions are also provided for convenience: =over 4 =item processing instructions C<'?'> or C<'#PI'> triggers the handler for any processing instruction, and C<< '?' >> or C<< '#PI ' >> triggers a handler for processing instruction with the given target( ex: C<'#PI xml-stylesheet'>). =item level() Triggers the handler on any element at that level in the tree (root is level 1) =item _all_ Triggers the handler for B elements in the tree =item _default_ Triggers the handler for each element that does NOT have any other handler. =back Expressions are evaluated against the input document. Which means that even if you have changed the tag of an element (changing the tag of a parent element from a handler for example) the change will not impact the expression evaluation. There is an exception to this: "private" attributes (which name start with a '#', and can only be created during the parsing, as they are not valid XML) are checked against the current twig. Handlers are triggered in fixed order, sorted by their type (xpath expressions first, then regexps, then level), then by whether they specify a full path (starting at the root element) or not, then by number of steps in the expression, then number of predicates, then number of tests in predicates. Handlers where the last step does not specify a step (C) are triggered after other XPath handlers. Finally C<_all_> handlers are triggered last. B: once a handler has been triggered if it returns 0 then no other handler is called, except a C<_all_> handler which will be called anyway. If a handler returns a true value and other handlers apply, then the next applicable handler will be called. Repeat, rinse, lather..; The exception to that rule is when the C> option is set, in which case only the first handler will be called. Note that it might be a good idea to explicitly return a short true value (like 1) from handlers: this ensures that other applicable handlers are called even if the last statement for the handler happens to evaluate to false. This might also speedup the code by avoiding the result of the last statement of the code to be copied and passed to the code managing handlers. It can really pay to have 1 instead of a long string returned. When the closing tag for an element is parsed the corresponding handler is called, with 2 arguments: the twig and the C >. The twig includes the document tree that has been built so far, the element is the complete sub-tree for the element. B. C<$_> is also set to the element, so it is easy to write inline handlers like para => sub { $_->set_tag( 'p'); } Text is stored in elements whose tag name is #PCDATA (due to mixed content, text and sub-element in an element there is no way to store the text as just an attribute of the enclosing element, this is similar to the DOM model). B: if you have used purge or flush on the twig the element might not be complete, some of its children might have been entirely flushed or purged, and the start tag might even have been printed (by C) already, so changing its tag might not give the expected result. =item twig_roots This argument let's you build the tree only for those elements you are interested in. Example: my $t= XML::Twig->new( twig_roots => { title => 1, subtitle => 1}); $t->parsefile( file); my $t= XML::Twig->new( twig_roots => { 'section/title' => 1}); $t->parsefile( file); return a twig containing a document including only C and C<subtitle> elements, as children of the root element. You can use I<generic_attribute_condition>, I<attribute_condition>, I<full_path>, I<partial_path>, I<tag>, I<tag_regexp>, I<_default_> and I<_all_> to trigger the building of the twig. I<string_condition> and I<regexp_condition> cannot be used as the content of the element, and the string, have not yet been parsed when the condition is checked. B<WARNING>: path are checked for the document. Even if the C<twig_roots> option is used they will be checked against the full document tree, not the virtual tree created by XML::Twig B<WARNING>: twig_roots elements should NOT be nested, that would hopelessly confuse XML::Twig ;--( Note: you can set handlers (twig_handlers) using twig_roots Example: my $t= XML::Twig->new( twig_roots => { title => sub { $_[1]->print;}, subtitle => \&process_subtitle } ); $t->parsefile( file); =item twig_print_outside_roots To be used in conjunction with the C<twig_roots> argument. When set to a true value this will print the document outside of the C<twig_roots> elements. Example: my $t= XML::Twig->new( twig_roots => { title => \&number_title }, twig_print_outside_roots => 1, ); $t->parsefile( file); { my $nb; sub number_title { my( $twig, $title); $nb++; $title->prefix( "$nb "); $title->print; } } This example prints the document outside of the title element, calls C<number_title> for each C<title> element, prints it, and then resumes printing the document. The twig is built only for the C<title> elements. If the value is a reference to a file handle then the document outside the C<twig_roots> elements will be output to this file handle: open( my $out, '>', 'out_file.xml') or die "cannot open out file.xml out_file:$!"; my $t= XML::Twig->new( twig_roots => { title => \&number_title }, # default output to $out twig_print_outside_roots => $out, ); { my $nb; sub number_title { my( $twig, $title); $nb++; $title->prefix( "$nb "); $title->print( $out); # you have to print to \*OUT here } } =item start_tag_handlers A hash C<{ expression => \&handler}>. Sets element handlers that are called when the element is open (at the end of the XML::Parser C<Start> handler). The handlers are called with 2 params: the twig and the element. The element is empty at that point, its attributes are created though. You can use I<generic_attribute_condition>, I<attribute_condition>, I<full_path>, I<partial_path>, I<tag>, I<tag_regexp>, I<_default_> and I<_all_> to trigger the handler. I<string_condition> and I<regexp_condition> cannot be used as the content of the element, and the string, have not yet been parsed when the condition is checked. The main uses for those handlers are to change the tag name (you might have to do it as soon as you find the open tag if you plan to C<flush> the twig at some point in the element, and to create temporary attributes that will be used when processing sub-element with C<twig_hanlders>. B<Note>: C<start_tag> handlers can be called outside of C<twig_roots> if this argument is used. Since the element object is not built, in this case handlers are called with the following arguments: C<$t> (the twig), C<$tag> (the tag of the element) and C<%att> (a hash of the attributes of the element). If the C<twig_print_outside_roots> argument is also used, if the last handler called returns a C<true> value, then the start tag will be output as it appeared in the original document, if the handler returns a C<false> value then the start tag will B<not> be printed (so you can print a modified string yourself for example). Note that you can use the L<ignore> method in C<start_tag_handlers> (and only there). =item end_tag_handlers A hash C<{ expression => \&handler}>. Sets element handlers that are called when the element is closed (at the end of the XML::Parser C<End> handler). The handlers are called with 2 params: the twig and the tag of the element. I<twig_handlers> are called when an element is completely parsed, so why have this redundant option? There is only one use for C<end_tag_handlers>: when using the C<twig_roots> option, to trigger a handler for an element B<outside> the roots. It is for example very useful to number titles in a document using nested sections: my @no= (0); my $no; my $t= XML::Twig->new( start_tag_handlers => { section => sub { $no[$#no]++; $no= join '.', @no; push @no, 0; } }, twig_roots => { title => sub { $_->prefix( $no); $_->print; } }, end_tag_handlers => { section => sub { pop @no; } }, twig_print_outside_roots => 1 ); $t->parsefile( $file); Using the C<end_tag_handlers> argument without C<twig_roots> will result in an error. =item do_not_chain_handlers If this option is set to a true value, then only one handler will be called for each element, even if several satisfy the condition Note that the C<_all_> handler will still be called regardless =item ignore_elts This option lets you ignore elements when building the twig. This is useful in cases where you cannot use C<twig_roots> to ignore elements, for example if the element to ignore is a sibling of elements you are interested in. Example: my $twig= XML::Twig->new( ignore_elts => { elt => 'discard' }); $twig->parsefile( 'doc.xml'); This will build the complete twig for the document, except that all C<elt> elements (and their children) will be left out. The keys in the hash are triggers, limited to the same subset as C<L<start_tag_handlers>>. The values can be C<discard>, to discard the element, C<print>, to output the element as-is, C<string> to store the text of the ignored element(s), including markup, in a field of the twig: C<< $t->{twig_buffered_string} >> or a reference to a scalar, in which case the text of the ignored element(s), including markup, will be stored in the scalar. Any other value will be treated as C<discard>. =item char_handler A reference to a subroutine that will be called every time C<PCDATA> is found. The subroutine receives the string as argument, and returns the modified string: # WE WANT ALL STRINGS IN UPPER CASE sub my_char_handler { my( $text)= @_; $text= uc( $text); return $text; } =item elt_class The name of a class used to store elements. this class should inherit from C<XML::Twig::Elt> (and by default it is C<XML::Twig::Elt>). This option is used to subclass the element class and extend it with new methods. This option is needed because during the parsing of the XML, elements are created by C<XML::Twig>, without any control from the user code. =item keep_atts_order Setting this option to a true value causes the attribute hash to be tied to a C<Tie::IxHash> object. This means that C<Tie::IxHash> needs to be installed for this option to be available. It also means that the hash keeps its order, so you will get the attributes in order. This allows outputting the attributes in the same order as they were in the original document. =item keep_encoding This is a (slightly?) evil option: if the XML document is not UTF-8 encoded and you want to keep it that way, then setting keep_encoding will use theC<Expat> original_string method for character, thus keeping the original encoding, as well as the original entities in the strings. See the C<t/test6.t> test file to see what results you can expect from the various encoding options. B<WARNING>: if the original encoding is multi-byte then attribute parsing will be EXTREMELY unsafe under any Perl before 5.6, as it uses regular expressions which do not deal properly with multi-byte characters. You can specify an alternate function to parse the start tags with the C<parse_start_tag> option (see below) B<WARNING>: this option is NOT used when parsing with XML::Parser non-blocking parser (C<parse_start>, C<parse_more>, C<parse_done> methods) which you probably should not use with XML::Twig anyway as they are totally untested! =item output_encoding This option generates an output_filter using C<Encode>, C<Text::Iconv> or C<Unicode::Map8> and C<Unicode::Strings>, and sets the encoding in the XML declaration. This is the easiest way to deal with encodings, if you need more sophisticated features, look at C<output_filter> below =item output_filter This option is used to convert the character encoding of the output document. It is passed either a string corresponding to a predefined filter or a subroutine reference. The filter will be called every time a document or element is processed by the "print" functions (C<print>, C<sprint>, C<flush>). Pre-defined filters: =over 4 =item latin1 uses either C<Encode>, C<Text::Iconv> or C<Unicode::Map8> and C<Unicode::String> or a regexp (which works only with XML::Parser 2.27), in this order, to convert all characters to ISO-8859-15 (usually latin1 is synonym to ISO-8859-1, but in practice it seems that ISO-8859-15, which includes the euro sign, is more useful and probably what most people want). =item html does the same conversion as C<latin1>, plus encodes entities using C<HTML::Entities> (oddly enough you will need to have HTML::Entities installed for it to be available). This should only be used if the tags and attribute names themselves are in US-ASCII, or they will be converted and the output will not be valid XML any more =item safe converts the output to ASCII (US) only plus I<character entities> (C<&#nnn;>) this should be used only if the tags and attribute names themselves are in US-ASCII, or they will be converted and the output will not be valid XML any more =item safe_hex same as C<safe> except that the character entities are in hex (C<&#xnnn;>) =item encode_convert ($encoding) Return a subref that can be used to convert utf8 strings to C<$encoding>). Uses C<Encode>. my $conv = XML::Twig::encode_convert( 'latin1'); my $t = XML::Twig->new(output_filter => $conv); =item iconv_convert ($encoding) this function is used to create a filter subroutine that will be used to convert the characters to the target encoding using C<Text::Iconv> (which needs to be installed, look at the documentation for the module and for the C<iconv> library to find out which encodings are available on your system, C<iconv -l> should give you a list of available encodings) my $conv = XML::Twig::iconv_convert( 'latin1'); my $t = XML::Twig->new(output_filter => $conv); =item unicode_convert ($encoding) this function is used to create a filter subroutine that will be used to convert the characters to the target encoding using C<Unicode::Strings> and C<Unicode::Map8> (which need to be installed, look at the documentation for the modules to find out which encodings are available on your system) my $conv = XML::Twig::unicode_convert( 'latin1'); my $t = XML::Twig->new(output_filter => $conv); =back The C<text> and C<att> methods do not use the filter, so their result are always in unicode. Those predeclared filters are based on subroutines that can be used by themselves (as C<XML::Twig::foo>). =over 4 =item html_encode ($string) Use C<HTML::Entities> to encode a utf8 string =item safe_encode ($string) Use either a regexp (perl < 5.8) or C<Encode> to encode non-ascii characters in the string in C<< &#<nnnn>; >> format =item safe_encode_hex ($string) Use either a regexp (perl < 5.8) or C<Encode> to encode non-ascii characters in the string in C<< &#x<nnnn>; >> format =item regexp2latin1 ($string) Use a regexp to encode a utf8 string into latin 1 (ISO-8859-1). Does not work with Perl 5.8.0! =back =item output_text_filter same as output_filter, except it doesn't apply to the brackets and quotes around attribute values. This is useful for all filters that could change the tagging, basically anything that does not just change the encoding of the output. C<html>, C<safe> and C<safe_hex> are better used with this option. =item input_filter This option is similar to C<output_filter> except the filter is applied to the characters before they are stored in the twig, at parsing time. =item remove_cdata Setting this option to a true value will force the twig to output CDATA sections as regular (escaped) PCDATA =item parse_start_tag If you use the C<keep_encoding> option then this option can be used to replace the default parsing function. You should provide a coderef (a reference to a subroutine) as the argument, this subroutine takes the original tag (given by XML::Parser::Expat C<original_string()> method) and returns a tag and the attributes in a hash (or in a list attribute_name/attribute value). =item no_xxe prevents external entities to be parsed. This is a security feature, in case the input XML cannot be trusted. With this option set to a true value defining external entities in the document will cause the parse to fail. This prevents an entity like C<< <!ENTITY xxe PUBLIC "bar" "/etc/passwd"> >> to make the password fiel available in the document. =item expand_external_ents When this option is used external entities (that are defined) are expanded when the document is output using "print" functions such as C<L<print> >, C<L<sprint> >, C<L<flush> > and C<L<xml_string> >. Note that in the twig the entity will be stored as an element with a tag 'C<#ENT>', the entity will not be expanded there, so you might want to process the entities before outputting it. If an external entity is not available, then the parse will fail. A special case is when the value of this option is -1. In that case a missing entity will not cause the parser to die, but its C<name>, C<sysid> and C<pubid> will be stored in the twig as C<< $twig->{twig_missing_system_entities} >> (a reference to an array of hashes { name => <name>, sysid => <sysid>, pubid => <pubid> }). Yes, this is a bit of a hack, but it's useful in some cases. =item load_DTD If this argument is set to a true value, C<parse> or C<parsefile> on the twig will load the DTD information. This information can then be accessed through the twig, in a C<DTD_handler> for example. This will load even an external DTD. Default and fixed values for attributes will also be filled, based on the DTD. Note that to do this the module will generate a temporary file in the current directory. If this is a problem let me know and I will add an option to specify an alternate directory. See L<DTD Handling> for more information =item DTD_base <path_to_DTD_directory> If the DTD is in a different directory, looks for it there, useful to make up somewhat for the lack of catalog suport in C<expat>. You still need a SYSTEM declaration =item DTD_handler Set a handler that will be called once the doctype (and the DTD) have been loaded, with 2 arguments, the twig and the DTD. =item no_prolog Does not output a prolog (XML declaration and DTD) =item id This optional argument gives the name of an attribute that can be used as an ID in the document. Elements whose ID is known can be accessed through the elt_id method. id defaults to 'id'. See C<L<BUGS> > =item discard_spaces If this optional argument is set to a true value then spaces are discarded when they look non-significant: strings containing only spaces and at least one line feed are discarded. This argument is set to true by default. The exact algorithm to drop spaces is: strings including only spaces (perl \s) and at least one \n right before an open or close tag are dropped. =item discard_all_spaces If this argument is set to a true value, spaces are discarded more aggressively than with C<discard_spaces>: strings not including a \n are also dropped. This option is appropriate for data-oriented XML. =item keep_spaces If this optional argument is set to a true value then all spaces in the document are kept, and stored as C<PCDATA>. B<Warning>: adding this option can result in changes in the twig generated: space that was previously discarded might end up in a new text element. see the difference by calling the following code with 0 and 1 as arguments: perl -MXML::Twig -e'print XML::Twig->new( keep_spaces => shift)->parse( "<d> \n<e/></d>")->_dump' C<keep_spaces> and C<discard_spaces> cannot be both set. =item discard_spaces_in This argument sets C<keep_spaces> to true but will cause the twig builder to discard spaces in the elements listed. The syntax for using this argument is: XML::Twig->new( discard_spaces_in => [ 'elt1', 'elt2']); =item keep_spaces_in This argument sets C<discard_spaces> to true but will cause the twig builder to keep spaces in the elements listed. The syntax for using this argument is: XML::Twig->new( keep_spaces_in => [ 'elt1', 'elt2']); B<Warning>: adding this option can result in changes in the twig generated: space that was previously discarded might end up in a new text element. =item pretty_print Set the pretty print method, amongst 'C<none>' (default), 'C<nsgmls>', 'C<nice>', 'C<indented>', 'C<indented_c>', 'C<indented_a>', 'C<indented_close_tag>', 'C<cvs>', 'C<wrapped>', 'C<record>' and 'C<record_c>' pretty_print formats: =over 4 =item none The document is output as one ling string, with no line breaks except those found within text elements =item nsgmls Line breaks are inserted in safe places: that is within tags, between a tag and an attribute, between attributes and before the > at the end of a tag. This is quite ugly but better than C<none>, and it is very safe, the document will still be valid (conforming to its DTD). This is how the SGML parser C<sgmls> splits documents, hence the name. =item nice This option inserts line breaks before any tag that does not contain text (so element with textual content are not broken as the \n is the significant). B<WARNING>: this option leaves the document well-formed but might make it invalid (not conformant to its DTD). If you have elements declared as <!ELEMENT foo (#PCDATA|bar)> then a C<foo> element including a C<bar> one will be printed as <foo> <bar>bar is just pcdata</bar> </foo> This is invalid, as the parser will take the line break after the C<foo> tag as a sign that the element contains PCDATA, it will then die when it finds the C<bar> tag. This may or may not be important for you, but be aware of it! =item indented Same as C<nice> (and with the same warning) but indents elements according to their level =item indented_c Same as C<indented> but a little more compact: the closing tags are on the same line as the preceding text =item indented_close_tag Same as C<indented> except that the closing tag is also indented, to line up with the tags within the element =item idented_a This formats XML files in a line-oriented version control friendly way. The format is described in L<http://tinyurl.com/2kwscq> (that's an Oracle document with an insanely long URL). Note that to be totaly conformant to the "spec", the order of attributes should not be changed, so if they are not already in alphabetical order you will need to use the C<L<keep_atts_order>> option. =item cvs Same as C<L<idented_a>>. =item wrapped Same as C<indented_c> but lines are wrapped using L<Text::Wrap::wrap>. The default length for lines is the default for C<$Text::Wrap::columns>, and can be changed by changing that variable. =item record This is a record-oriented pretty print, that display data in records, one field per line (which looks a LOT like C<indented>) =item record_c Stands for record compact, one record per line =back =item empty_tags Set the empty tag display style ('C<normal>', 'C<html>' or 'C<expand>'). C<normal> outputs an empty tag 'C<< <tag/> >>', C<html> adds a space 'C<< <tag /> >>' for elements that can be empty in XHTML and C<expand> outputs 'C<< <tag></tag> >>' =item quote Set the quote character for attributes ('C<single>' or 'C<double>'). =item escape_gt By default XML::Twig does not escape the character > in its output, as it is not mandated by the XML spec. With this option on, > will be replaced by C<>> =item comments Set the way comments are processed: 'C<drop>' (default), 'C<keep>' or 'C<process>' Comments processing options: =over 4 =item drop drops the comments, they are not read, nor printed to the output =item keep comments are loaded and will appear on the output, they are not accessible within the twig and will not interfere with processing though B<Note>: comments in the middle of a text element such as <p>text <!-- comment --> more text --></p> are kept at their original position in the text. Using Ë"print" methods like C<print> or C<sprint> will return the comments in the text. Using C<text> or C<field> on the other hand will not. Any use of C<set_pcdata> on the C<#PCDATA> element (directly or through other methods like C<set_content>) will delete the comment(s). =item process comments are loaded in the twig and will be treated as regular elements (their C<tag> is C<#COMMENT>) this can interfere with processing if you expect C<< $elt->{first_child} >> to be an element but find a comment there. Validation will not protect you from this as comments can happen anywhere. You can use C<< $elt->first_child( 'tag') >> (which is a good habit anyway) to get where you want. Consider using C<process> if you are outputting SAX events from XML::Twig. =back =item pi Set the way processing instructions are processed: 'C<drop>', 'C<keep>' (default) or 'C<process>' Note that you can also set PI handlers in the C<twig_handlers> option: '?' => \&handler '?target' => \&handler 2 The handlers will be called with 2 parameters, the twig and the PI element if C<pi> is set to C<process>, and with 3, the twig, the target and the data if C<pi> is set to C<keep>. Of course they will not be called if C<pi> is set to C<drop>. If C<pi> is set to C<keep> the handler should return a string that will be used as-is as the PI text (it should look like "C< <?target data?> >" or '' if you want to remove the PI), Only one handler will be called, C<?target> or C<?> if no specific handler for that target is available. =item map_xmlns This option is passed a hashref that maps uri's to prefixes. The prefixes in the document will be replaced by the ones in the map. The mapped prefixes can (actually have to) be used to trigger handlers, navigate or query the document. Here is an example: my $t= XML::Twig->new( map_xmlns => {'http://www.w3.org/2000/svg' => "svg"}, twig_handlers => { 'svg:circle' => sub { $_->set_att( r => 20) } }, pretty_print => 'indented', ) ->parse( '<doc xmlns:gr="http://www.w3.org/2000/svg"> <gr:circle cx="10" cy="90" r="10"/> </doc>' ) ->print; This will output: <doc xmlns:svg="http://www.w3.org/2000/svg"> <svg:circle cx="10" cy="90" r="20"/> </doc> =item keep_original_prefix When used with C<L<map_xmlns>> this option will make C<XML::Twig> use the original namespace prefixes when outputting a document. The mapped prefix will still be used for triggering handlers and in navigation and query methods. my $t= XML::Twig->new( map_xmlns => {'http://www.w3.org/2000/svg' => "svg"}, twig_handlers => { 'svg:circle' => sub { $_->set_att( r => 20) } }, keep_original_prefix => 1, pretty_print => 'indented', ) ->parse( '<doc xmlns:gr="http://www.w3.org/2000/svg"> <gr:circle cx="10" cy="90" r="10"/> </doc>' ) ->print; This will output: <doc xmlns:gr="http://www.w3.org/2000/svg"> <gr:circle cx="10" cy="90" r="20"/> </doc> =item original_uri ($prefix) called within a handler, this will return the uri bound to the namespace prefix in the original document. =item index ($arrayref or $hashref) This option creates lists of specific elements during the parsing of the XML. It takes a reference to either a list of triggering expressions or to a hash name => expression, and for each one generates the list of elements that match the expression. The list can be accessed through the C<L<index>> method. example: # using an array ref my $t= XML::Twig->new( index => [ 'div', 'table' ]) ->parsefile( "foo.xml"); my $divs= $t->index( 'div'); my $first_div= $divs->[0]; my $last_table= $t->index( table => -1); # using a hashref to name the indexes my $t= XML::Twig->new( index => { email => 'a[@href=~/^ \s*mailto:/]'}) ->parsefile( "foo.xml"); my $last_emails= $t->index( email => -1); Note that the index is not maintained after the parsing. If elements are deleted, renamed or otherwise hurt during processing, the index is NOT updated. (changing the id element OTOH will update the index) =item att_accessors <list of attribute names> creates methods that give direct access to attribute: my $t= XML::Twig->new( att_accessors => [ 'href', 'src']) ->parsefile( $file); my $first_href= $t->first_elt( 'img')->src; # same as ->att( 'src') $t->first_elt( 'img')->src( 'new_logo.png') # changes the attribute value =item elt_accessors creates methods that give direct access to the first child element (in scalar context) or the list of elements (in list context): the list of accessors to create can be given 1 2 different ways: in an array, or in a hash alias => expression my $t= XML::Twig->new( elt_accessors => [ 'head']) ->parsefile( $file); my $title_text= $t->root->head->field( 'title'); # same as $title_text= $t->root->first_child( 'head')->field( 'title'); my $t= XML::Twig->new( elt_accessors => { warnings => 'p[@class="warning"]', d2 => 'div[2]'}, ) ->parsefile( $file); my $body= $t->first_elt( 'body'); my @warnings= $body->warnings; # same as $body->children( 'p[@class="warning"]'); my $s2= $body->d2; # same as $body->first_child( 'div[2]') =item field_accessors creates methods that give direct access to the first child element text: my $t= XML::Twig->new( field_accessors => [ 'h1']) ->parsefile( $file); my $div_title_text= $t->first_elt( 'div')->title; # same as $title_text= $t->first_elt( 'div')->field( 'title'); =item use_tidy set this option to use HTML::Tidy instead of HTML::TreeBuilder to convert HTML to XML. HTML, especially real (real "crap") HTML found in the wild, so depending on the data, one module or the other does a better job at the conversion. Also, HTML::Tidy can be a bit difficult to install, so XML::Twig offers both option. TIMTOWTDI =item output_html_doctype when using HTML::TreeBuilder to convert HTML, this option causes the DOCTYPE declaration to be output, which may be important for some legacy browsers. Without that option the DOCTYPE definition is NOT output. Also if the definition is completely wrong (ie not easily parsable), it is not output either. =back B<Note>: I _HATE_ the Java-like name of arguments used by most XML modules. So in pure TIMTOWTDI fashion all arguments can be written either as C<UglyJavaLikeName> or as C<readable_perl_name>: C<twig_print_outside_roots> or C<TwigPrintOutsideRoots> (or even C<twigPrintOutsideRoots> {shudder}). XML::Twig normalizes them before processing them. =item parse ( $source) The C<$source> parameter should either be a string containing the whole XML document, or it should be an open C<IO::Handle> (aka a filehandle). A die call is thrown if a parse error occurs. Otherwise it will return the twig built by the parse. Use C<safe_parse> if you want the parsing to return even when an error occurs. If this method is called as a class method (C<< XML::Twig->parse( $some_xml_or_html) >>) then an XML::Twig object is created, using the parameters except the last one (eg C<< XML::Twig->parse( pretty_print => 'indented', $some_xml_or_html) >>) and C<L<xparse>> is called on it. Note that when parsing a filehandle, the handle should NOT be open with an encoding (ie open with C<open( my $in, '<', $filename)>. The file will be parsed by C<expat>, so specifying the encoding actually causes problems for the parser (as in: it can crash it, see https://rt.cpan.org/Ticket/Display.html?id=78877). For parsing a file it is actually recommended to use C<parsefile> on the file name, instead of <parse> on the open file. =item parsestring This is just an alias for C<parse> for backwards compatibility. =item parsefile (FILE [, OPT => OPT_VALUE [...]]) Open C<FILE> for reading, then call C<parse> with the open handle. The file is closed no matter how C<parse> returns. A C<die> call is thrown if a parse error occurs. Otherwise it will return the twig built by the parse. Use C<safe_parsefile> if you want the parsing to return even when an error occurs. =item parsefile_inplace ( $file, $optional_extension) Parse and update a file "in place". It does this by creating a temp file, selecting it as the default for print() statements (and methods), then parsing the input file. If the parsing is successful, then the temp file is moved to replace the input file. If an extension is given then the original file is backed-up (the rules for the extension are the same as the rule for the -i option in perl). =item parsefile_html_inplace ( $file, $optional_extension) Same as parsefile_inplace, except that it parses HTML instead of XML =item parseurl ($url $optional_user_agent) Gets the data from C<$url> and parse it. The data is piped to the parser in chunks the size of the XML::Parser::Expat buffer, so memory consumption and hopefully speed are optimal. For most (read "small") XML it is probably as efficient (and easier to debug) to just C<get> the XML file and then parse it as a string. use XML::Twig; use LWP::Simple; my $twig= XML::Twig->new(); $twig->parse( LWP::Simple::get( $URL )); or use XML::Twig; my $twig= XML::Twig->nparse( $URL); If the C<$optional_user_agent> argument is used then it is used, otherwise a new one is created. =item safe_parse ( SOURCE [, OPT => OPT_VALUE [...]]) This method is similar to C<parse> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig). C<$@> contains the error message on failure. Note that the parsing still stops as soon as an error is detected, there is no way to keep going after an error. =item safe_parsefile (FILE [, OPT => OPT_VALUE [...]]) This method is similar to C<parsefile> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig) . C<$@> contains the error message on failure Note that the parsing still stops as soon as an error is detected, there is no way to keep going after an error. =item safe_parseurl ($url $optional_user_agent) Same as C<parseurl> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig) . C<$@> contains the error message on failure =item parse_html ($string_or_fh) parse an HTML string or file handle (by converting it to XML using HTML::TreeBuilder, which needs to be available). This works nicely, but some information gets lost in the process: newlines are removed, and (at least on the version I use), comments get an extra CDATA section inside ( <!-- foo --> becomes <!-- <![CDATA[ foo ]]> --> =item parsefile_html ($file) parse an HTML file (by converting it to XML using HTML::TreeBuilder, which needs to be available, or HTML::Tidy if the C<use_tidy> option was used). The file is loaded completely in memory and converted to XML before being parsed. this method is to be used with caution though, as it doesn't know about the file encoding, it is usually better to use C<L<parse_html>>, which gives you a chance to open the file with the proper encoding layer. =item parseurl_html ($url $optional_user_agent) parse an URL as html the same way C<L<parse_html>> does =item safe_parseurl_html ($url $optional_user_agent) Same as C<L<parseurl_html>>> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig) . C<$@> contains the error message on failure =item safe_parsefile_html ($file $optional_user_agent) Same as C<L<parsefile_html>>> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig) . C<$@> contains the error message on failure =item safe_parse_html ($string_or_fh) Same as C<L<parse_html>> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig) . C<$@> contains the error message on failure =item xparse ($thing_to_parse) parse the C<$thing_to_parse>, whether it is a filehandle, a string, an HTML file, an HTML URL, an URL or a file. Note that this is mostly a convenience method for one-off scripts. For example files that end in '.htm' or '.html' are parsed first as XML, and if this fails as HTML. This is certainly not the most efficient way to do this in general. =item nparse ($optional_twig_options, $thing_to_parse) create a twig with the C<$optional_options>, and parse the C<$thing_to_parse>, whether it is a filehandle, a string, an HTML file, an HTML URL, an URL or a file. Examples: XML::Twig->nparse( "file.xml"); XML::Twig->nparse( error_context => 1, "file://file.xml"); =item nparse_pp ($optional_twig_options, $thing_to_parse) same as C<L<nparse>> but also sets the C<pretty_print> option to C<indented>. =item nparse_e ($optional_twig_options, $thing_to_parse) same as C<L<nparse>> but also sets the C<error_context> option to 1. =item nparse_ppe ($optional_twig_options, $thing_to_parse) same as C<L<nparse>> but also sets the C<pretty_print> option to C<indented> and the C<error_context> option to 1. =item parser This method returns the C<expat> object (actually the XML::Parser::Expat object) used during parsing. It is useful for example to call XML::Parser::Expat methods on it. To get the line of a tag for example use C<< $t->parser->current_line >>. =item setTwigHandlers ($handlers) Set the twig_handlers. C<$handlers> is a reference to a hash similar to the one in the C<twig_handlers> option of new. All previous handlers are unset. The method returns the reference to the previous handlers. =item setTwigHandler ($exp $handler) Set a single twig_handler for elements matching C<$exp>. C<$handler> is a reference to a subroutine. If the handler was previously set then the reference to the previous handler is returned. =item setStartTagHandlers ($handlers) Set the start_tag handlers. C<$handlers> is a reference to a hash similar to the one in the C<start_tag_handlers> option of new. All previous handlers are unset. The method returns the reference to the previous handlers. =item setStartTagHandler ($exp $handler) Set a single start_tag handlers for elements matching C<$exp>. C<$handler> is a reference to a subroutine. If the handler was previously set then the reference to the previous handler is returned. =item setEndTagHandlers ($handlers) Set the end_tag handlers. C<$handlers> is a reference to a hash similar to the one in the C<end_tag_handlers> option of new. All previous handlers are unset. The method returns the reference to the previous handlers. =item setEndTagHandler ($exp $handler) Set a single end_tag handlers for elements matching C<$exp>. C<$handler> is a reference to a subroutine. If the handler was previously set then the reference to the previous handler is returned. =item setTwigRoots ($handlers) Same as using the C<L<twig_roots>> option when creating the twig =item setCharHandler ($exp $handler) Set a C<char_handler> =item setIgnoreEltsHandler ($exp) Set a C<ignore_elt> handler (elements that match C<$exp> will be ignored =item setIgnoreEltsHandlers ($exp) Set all C<ignore_elt> handlers (previous handlers are replaced) =item dtd Return the dtd (an L<XML::Twig::DTD> object) of a twig =item xmldecl Return the XML declaration for the document, or a default one if it doesn't have one =item doctype Return the doctype for the document =item doctype_name returns the doctype of the document from the doctype declaration =item system_id returns the system value of the DTD of the document from the doctype declaration =item public_id returns the public doctype of the document from the doctype declaration =item internal_subset returns the internal subset of the DTD =item dtd_text Return the DTD text =item dtd_print Print the DTD =item model ($tag) Return the model (in the DTD) for the element C<$tag> =item root Return the root element of a twig =item set_root ($elt) Set the root of a twig =item first_elt ($optional_condition) Return the first element matching C<$optional_condition> of a twig, if no condition is given then the root is returned =item last_elt ($optional_condition) Return the last element matching C<$optional_condition> of a twig, if no condition is given then the last element of the twig is returned =item elt_id ($id) Return the element whose C<id> attribute is $id =item getEltById Same as C<L<elt_id>> =item index ($index_name, $optional_index) If the C<$optional_index> argument is present, return the corresponding element in the index (created using the C<index> option for C<XML::Twig->new>) If the argument is not present, return an arrayref to the index =item normalize merge together all consecutive pcdata elements in the document (if for example you have turned some elements into pcdata using C<L<erase>>, this will give you a "clean" document in which there all text elements are as long as possible). =item encoding This method returns the encoding of the XML document, as defined by the C<encoding> attribute in the XML declaration (ie it is C<undef> if the attribute is not defined) =item set_encoding This method sets the value of the C<encoding> attribute in the XML declaration. Note that if the document did not have a declaration it is generated (with an XML version of 1.0) =item xml_version This method returns the XML version, as defined by the C<version> attribute in the XML declaration (ie it is C<undef> if the attribute is not defined) =item set_xml_version This method sets the value of the C<version> attribute in the XML declaration. If the declaration did not exist it is created. =item standalone This method returns the value of the C<standalone> declaration for the document =item set_standalone This method sets the value of the C<standalone> attribute in the XML declaration. Note that if the document did not have a declaration it is generated (with an XML version of 1.0) =item set_output_encoding Set the C<encoding> "attribute" in the XML declaration =item set_doctype ($name, $system, $public, $internal) Set the doctype of the element. If an argument is C<undef> (or not present) then its former value is retained, if a false ('' or 0) value is passed then the former value is deleted; =item entity_list Return the entity list of a twig =item entity_names Return the list of all defined entities =item entity ($entity_name) Return the entity =item notation_list Return the notation list of a twig =item notation_names Return the list of all defined notations =item notation ($notation_name) Return the notation =item change_gi ($old_gi, $new_gi) Performs a (very fast) global change. All elements C<$old_gi> are now C<$new_gi>. This is a bit dangerous though and should be avoided if < possible, as the new tag might be ignored in subsequent processing. See C<L<BUGS> > =item flush ($optional_filehandle, %options) Flushes a twig up to (and including) the current element, then deletes all unnecessary elements from the tree that's kept in memory. C<flush> keeps track of which elements need to be open/closed, so if you flush from handlers you don't have to worry about anything. Just keep flushing the twig every time you're done with a sub-tree and it will come out well-formed. After the whole parsing don't forget toC<flush> one more time to print the end of the document. The doctype and entity declarations are also printed. flush take an optional filehandle as an argument. If you use C<flush> at any point during parsing, the document will be flushed one last time at the end of the parsing, to the proper filehandle. options: use the C<update_DTD> option if you have updated the (internal) DTD and/or the entity list and you want the updated DTD to be output The C<pretty_print> option sets the pretty printing of the document. Example: $t->flush( Update_DTD => 1); $t->flush( $filehandle, pretty_print => 'indented'); $t->flush( \*FILE); =item flush_up_to ($elt, $optional_filehandle, %options) Flushes up to the C<$elt> element. This allows you to keep part of the tree in memory when you C<flush>. options: see flush. =item purge Does the same as a C<flush> except it does not print the twig. It just deletes all elements that have been completely parsed so far. =item purge_up_to ($elt) Purges up to the C<$elt> element. This allows you to keep part of the tree in memory when you C<purge>. =item print ($optional_filehandle, %options) Prints the whole document associated with the twig. To be used only AFTER the parse. options: see C<flush>. =item print_to_file ($filename, %options) Prints the whole document associated with the twig to file C<$filename>. To be used only AFTER the parse. options: see C<flush>. =item safe_print_to_file ($filename, %options) Prints the whole document associated with the twig to file C<$filename>. This variant, which probably only works on *nix prints to a temp file, then move the temp file to overwrite the original file. This is a bit safer when 2 processes an potentiallywrite the same file: only the last one will succeed, but the file won't be corruted. I often use this for cron jobs, so testing the code doesn't interfere with the cron job running at the same time. options: see C<flush>. =item sprint Return the text of the whole document associated with the twig. To be used only AFTER the parse. options: see C<flush>. =item trim Trim the document: gets rid of initial and trailing spaces, and replaces multiple spaces by a single one. =item toSAX1 ($handler) Send SAX events for the twig to the SAX1 handler C<$handler> =item toSAX2 ($handler) Send SAX events for the twig to the SAX2 handler C<$handler> =item flush_toSAX1 ($handler) Same as flush, except that SAX events are sent to the SAX1 handler C<$handler> instead of the twig being printed =item flush_toSAX2 ($handler) Same as flush, except that SAX events are sent to the SAX2 handler C<$handler> instead of the twig being printed =item ignore This method should be called during parsing, usually in C<start_tag_handlers>. It causes the element to be skipped during the parsing: the twig is not built for this element, it will not be accessible during parsing or after it. The element will not take up any memory and parsing will be faster. Note that this method can also be called on an element. If the element is a parent of the current element then this element will be ignored (the twig will not be built any more for it and what has already been built will be deleted). =item set_pretty_print ($style) Set the pretty print method, amongst 'C<none>' (default), 'C<nsgmls>', 'C<nice>', 'C<indented>', C<indented_c>, 'C<wrapped>', 'C<record>' and 'C<record_c>' B<WARNING:> the pretty print style is a B<GLOBAL> variable, so once set it's applied to B<ALL> C<print>'s (and C<sprint>'s). Same goes if you use XML::Twig with C<mod_perl> . This should not be a problem as the XML that's generated is valid anyway, and XML processors (as well as HTML processors, including browsers) should not care. Let me know if this is a big problem, but at the moment the performance/cleanliness trade-off clearly favors the global approach. =item set_empty_tag_style ($style) Set the empty tag display style ('C<normal>', 'C<html>' or 'C<expand>'). As with C<L<set_pretty_print>> this sets a global flag. C<normal> outputs an empty tag 'C<< <tag/> >>', C<html> adds a space 'C<< <tag /> >>' for elements that can be empty in XHTML and C<expand> outputs 'C<< <tag></tag> >>' =item set_remove_cdata ($flag) set (or unset) the flag that forces the twig to output CDATA sections as regular (escaped) PCDATA =item print_prolog ($optional_filehandle, %options) Prints the prolog (XML declaration + DTD + entity declarations) of a document. options: see C<L<flush>>. =item prolog ($optional_filehandle, %options) Return the prolog (XML declaration + DTD + entity declarations) of a document. options: see C<L<flush>>. =item finish Call Expat C<finish> method. Unsets all handlers (including internal ones that set context), but expat continues parsing to the end of the document or until it finds an error. It should finish up a lot faster than with the handlers set. =item finish_print Stops twig processing, flush the twig and proceed to finish printing the document as fast as possible. Use this method when modifying a document and the modification is done. =item finish_now Stops twig processing, does not finish parsing the document (which could actually be not well-formed after the point where C<finish_now> is called). Execution resumes after the C<Lparse>> or C<L<parsefile>> call. The content of the twig is what has been parsed so far (all open elements at the time C<finish_now> is called are considered closed). =item set_expand_external_entities Same as using the C<L<expand_external_ents>> option when creating the twig =item set_input_filter Same as using the C<L<input_filter>> option when creating the twig =item set_keep_atts_order Same as using the C<L<keep_atts_order>> option when creating the twig =item set_keep_encoding Same as using the C<L<keep_encoding>> option when creating the twig =item escape_gt usually XML::Twig does not escape > in its output. Using this option makes it replace > by > =item do_not_escape_gt reverts XML::Twig behavior to its default of not escaping > in its output. =item set_output_filter Same as using the C<L<output_filter>> option when creating the twig =item set_output_text_filter Same as using the C<L<output_text_filter>> option when creating the twig =item add_stylesheet ($type, @options) Adds an external stylesheet to an XML document. Supported types and options: =over 4 =item xsl option: the url of the stylesheet Example: $t->add_stylesheet( xsl => "xsl_style.xsl"); will generate the following PI at the beginning of the document: <?xml-stylesheet type="text/xsl" href="xsl_style.xsl"?> =item css option: the url of the stylesheet =item active_twig a class method that returns the last processed twig, so you don't necessarily need the object to call methods on it. =back =item Methods inherited from XML::Parser::Expat A twig inherits all the relevant methods from XML::Parser::Expat. These methods can only be used during the parsing phase (they will generate a fatal error otherwise). Inherited methods are: =over 4 =item depth Returns the size of the context list. =item in_element Returns true if NAME is equal to the name of the innermost cur†rently opened element. If namespace processing is being used and you want to check against a name that may be in a namespace, then use the generate_ns_name method to create the NAME argument. =item within_element Returns the number of times the given name appears in the context list. If namespace processing is being used and you want to check against a name that may be in a namespace, then use the gener†ate_ns_name method to create the NAME argument. =item context Returns a list of element names that represent open elements, with the last one being the innermost. Inside start and end tag han†dlers, this will be the tag of the parent element. =item current_line Returns the line number of the current position of the parse. =item current_column Returns the column number of the current position of the parse. =item current_byte Returns the current position of the parse. =item position_in_context Returns a string that shows the current parse position. LINES should be an integer >= 0 that represents the number of lines on either side of the current parse line to place into the returned string. =item base ([NEWBASE]) Returns the current value of the base for resolving relative URIs. If NEWBASE is supplied, changes the base to that value. =item current_element Returns the name of the innermost currently opened element. Inside start or end handlers, returns the parent of the element associated with those tags. =item element_index Returns an integer that is the depth-first visit order of the cur†rent element. This will be zero outside of the root element. For example, this will return 1 when called from the start handler for the root element start tag. =item recognized_string Returns the string from the document that was recognized in order to call the current handler. For instance, when called from a start handler, it will give us the start-tag string. The string is encoded in UTF-8. This method doesn't return a meaningful string inside declaration handlers. =item original_string Returns the verbatim string from the document that was recognized in order to call the current handler. The string is in the original document encoding. This method doesn't return a meaningful string inside declaration handlers. =item xpcroak Concatenate onto the given message the current line number within the XML document plus the message implied by ErrorContext. Then croak with the formed message. =item xpcarp Concatenate onto the given message the current line number within the XML document plus the message implied by ErrorContext. Then carp with the formed message. =item xml_escape(TEXT [, CHAR [, CHAR ...]]) Returns TEXT with markup characters turned into character entities. Any additional characters provided as arguments are also turned into character references where found in TEXT. (this method is broken on some versions of expat/XML::Parser) =back =item path ( $optional_tag) Return the element context in a form similar to XPath's short form: 'C</root/tag1/../tag>' =item get_xpath ( $optional_array_ref, $xpath, $optional_offset) Performs a C<get_xpath> on the document root (see <Elt|"Elt">) If the C<$optional_array_ref> argument is used the array must contain elements. The C<$xpath> expression is applied to each element in turn and the result is union of all results. This way a first query can be refined in further steps. =item find_nodes ( $optional_array_ref, $xpath, $optional_offset) same as C<get_xpath> =item findnodes ( $optional_array_ref, $xpath, $optional_offset) same as C<get_xpath> (similar to the XML::LibXML method) =item findvalue ( $optional_array_ref, $xpath, $optional_offset) Return the C<join> of all texts of the results of applying C<L<get_xpath>> to the node (similar to the XML::LibXML method) =item findvalues ( $optional_array_ref, $xpath, $optional_offset) Return an array of all texts of the results of applying C<L<get_xpath>> to the node =item subs_text ($regexp, $replace) subs_text does text substitution on the whole document, similar to perl's C< s///> operator. =item dispose Useful only if you don't have C<Scalar::Util> or C<WeakRef> installed. Reclaims properly the memory used by an XML::Twig object. As the object has circular references it never goes out of scope, so if you want to parse lots of XML documents then the memory leak becomes a problem. Use C<< $twig->dispose >> to clear this problem. =item att_accessors (list_of_attribute_names) A convenience method that creates l-valued accessors for attributes. So C<< $twig->create_accessors( 'foo') >> will create a C<foo> method that can be called on elements: $elt->foo; # equivalent to $elt->{'att'}->{'foo'}; $elt->foo( 'bar'); # equivalent to $elt->set_att( foo => 'bar'); The methods are l-valued only under those perl's that support this feature (5.6 and above) =item create_accessors (list_of_attribute_names) Same as att_accessors =item elt_accessors (list_of_attribute_names) A convenience method that creates accessors for elements. So C<< $twig->create_accessors( 'foo') >> will create a C<foo> method that can be called on elements: $elt->foo; # equivalent to $elt->first_child( 'foo'); =item field_accessors (list_of_attribute_names) A convenience method that creates accessors for element values (C<field>). So C<< $twig->create_accessors( 'foo') >> will create a C<foo> method that can be called on elements: $elt->foo; # equivalent to $elt->field( 'foo'); =item set_do_not_escape_amp_in_atts An evil method, that I only document because Test::Pod::Coverage complaints otherwise, but really, you don't want to know about it. =back =head2 XML::Twig::Elt =over 4 =item new ($optional_tag, $optional_atts, @optional_content) The C<tag> is optional (but then you can't have a content ), the C<$optional_atts> argument is a reference to a hash of attributes, the content can be just a string or a list of strings and element. A content of 'C<#EMPTY>' creates an empty element; Examples: my $elt= XML::Twig::Elt->new(); my $elt= XML::Twig::Elt->new( para => { align => 'center' }); my $elt= XML::Twig::Elt->new( para => { align => 'center' }, 'foo'); my $elt= XML::Twig::Elt->new( br => '#EMPTY'); my $elt= XML::Twig::Elt->new( 'para'); my $elt= XML::Twig::Elt->new( para => 'this is a para'); my $elt= XML::Twig::Elt->new( para => $elt3, 'another para'); The strings are not parsed, the element is not attached to any twig. B<WARNING>: if you rely on ID's then you will have to set the id yourself. At this point the element does not belong to a twig yet, so the ID attribute is not known so it won't be stored in the ID list. Note that C<#COMMENT>, C<#PCDATA> or C<#CDATA> are valid tag names, that will create text elements. To create an element C<foo> containing a CDATA section: my $foo= XML::Twig::Elt->new( '#CDATA' => "content of the CDATA section") ->wrap_in( 'foo'); An attribute of '#CDATA', will create the content of the element as CDATA: my $elt= XML::Twig::Elt->new( 'p' => { '#CDATA' => 1}, 'foo < bar'); creates an element <p><![CDATA[foo < bar]]></> =item parse ($string, %args) Creates an element from an XML string. The string is actually parsed as a new twig, then the root of that twig is returned. The arguments in C<%args> are passed to the twig. As always if the parse fails the parser will die, so use an eval if you want to trap syntax errors. As obviously the element does not exist beforehand this method has to be called on the class: my $elt= parse XML::Twig::Elt( "<a> string to parse, with <sub/> <elements>, actually tons of </elements> h</a>"); =item set_inner_xml ($string) Sets the content of the element to be the tree created from the string =item set_inner_html ($string) Sets the content of the element, after parsing the string with an HTML parser (HTML::Parser) =item set_outer_xml ($string) Replaces the element with the tree created from the string =item print ($optional_filehandle, $optional_pretty_print_style) Prints an entire element, including the tags, optionally to a C<$optional_filehandle>, optionally with a C<$pretty_print_style>. The print outputs XML data so base entities are escaped. =item print_to_file ($filename, %options) Prints the element to file C<$filename>. options: see C<flush>. =item sprint ($elt, $optional_no_enclosing_tag) Return the xml string for an entire element, including the tags. If the optional second argument is true then only the string inside the element is returned (the start and end tag for $elt are not). The text is XML-escaped: base entities (& and < in text, & < and " in attribute values) are turned into entities. =item gi Return the gi of the element (the gi is the C<generic identifier> the tag name in SGML parlance). C<tag> and C<name> are synonyms of C<gi>. =item tag Same as C<L<gi>> =item name Same as C<L<tag>> =item set_gi ($tag) Set the gi (tag) of an element =item set_tag ($tag) Set the tag (=C<L<tag>>) of an element =item set_name ($name) Set the name (=C<L<tag>>) of an element =item root Return the root of the twig in which the element is contained. =item twig Return the twig containing the element. =item parent ($optional_condition) Return the parent of the element, or the first ancestor matching the C<$optional_condition> =item first_child ($optional_condition) Return the first child of the element, or the first child matching the C<$optional_condition> =item has_child ($optional_condition) Return the first child of the element, or the first child matching the C<$optional_condition> (same as L<first_child>) =item has_children ($optional_condition) Return the first child of the element, or the first child matching the C<$optional_condition> (same as L<first_child>) =item first_child_text ($optional_condition) Return the text of the first child of the element, or the first child matching the C<$optional_condition> If there is no first_child then returns ''. This avoids getting the child, checking for its existence then getting the text for trivial cases. Similar methods are available for the other navigation methods: =over 4 =item last_child_text =item prev_sibling_text =item next_sibling_text =item prev_elt_text =item next_elt_text =item child_text =item parent_text =back All this methods also exist in "trimmed" variant: =over 4 =item first_child_trimmed_text =item last_child_trimmed_text =item prev_sibling_trimmed_text =item next_sibling_trimmed_text =item prev_elt_trimmed_text =item next_elt_trimmed_text =item child_trimmed_text =item parent_trimmed_text =back =item field ($condition) Same method as C<first_child_text> with a different name =item fields ($condition_list) Return the list of field (text of first child matching the conditions), missing fields are returned as the empty string. Same method as C<first_child_text> with a different name =item trimmed_field ($optional_condition) Same method as C<first_child_trimmed_text> with a different name =item set_field ($condition, $optional_atts, @list_of_elt_and_strings) Set the content of the first child of the element that matches C<$condition>, the rest of the arguments is the same as for C<L<set_content>> If no child matches C<$condition> _and_ if C<$condition> is a valid XML element name, then a new element by that name is created and inserted as the last child. =item first_child_matches ($optional_condition) Return the element if the first child of the element (if it exists) passes the C<$optional_condition> C<undef> otherwise if( $elt->first_child_matches( 'title')) ... is equivalent to if( $elt->{first_child} && $elt->{first_child}->passes( 'title')) C<first_child_is> is an other name for this method Similar methods are available for the other navigation methods: =over 4 =item last_child_matches =item prev_sibling_matches =item next_sibling_matches =item prev_elt_matches =item next_elt_matches =item child_matches =item parent_matches =back =item is_first_child ($optional_condition) returns true (the element) if the element is the first child of its parent (optionally that satisfies the C<$optional_condition>) =item is_last_child ($optional_condition) returns true (the element) if the element is the last child of its parent (optionally that satisfies the C<$optional_condition>) =item prev_sibling ($optional_condition) Return the previous sibling of the element, or the previous sibling matching C<$optional_condition> =item next_sibling ($optional_condition) Return the next sibling of the element, or the first one matching C<$optional_condition>. =item next_elt ($optional_elt, $optional_condition) Return the next elt (optionally matching C<$optional_condition>) of the element. This is defined as the next element which opens after the current element opens. Which usually means the first child of the element. Counter-intuitive as it might look this allows you to loop through the whole document by starting from the root. The C<$optional_elt> is the root of a subtree. When the C<next_elt> is out of the subtree then the method returns undef. You can then walk a sub-tree with: my $elt= $subtree_root; while( $elt= $elt->next_elt( $subtree_root)) { # insert processing code here } =item prev_elt ($optional_condition) Return the previous elt (optionally matching C<$optional_condition>) of the element. This is the first element which opens before the current one. It is usually either the last descendant of the previous sibling or simply the parent =item next_n_elt ($offset, $optional_condition) Return the C<$offset>-th element that matches the C<$optional_condition> =item following_elt Return the following element (as per the XPath following axis) =item preceding_elt Return the preceding element (as per the XPath preceding axis) =item following_elts Return the list of following elements (as per the XPath following axis) =item preceding_elts Return the list of preceding elements (as per the XPath preceding axis) =item children ($optional_condition) Return the list of children (optionally which matches C<$optional_condition>) of the element. The list is in document order. =item children_count ($optional_condition) Return the number of children of the element (optionally which matches C<$optional_condition>) =item children_text ($optional_condition) In array context, returns an array containing the text of children of the element (optionally which matches C<$optional_condition>) In scalar context, returns the concatenation of the text of children of the element =item children_trimmed_text ($optional_condition) In array context, returns an array containing the trimmed text of children of the element (optionally which matches C<$optional_condition>) In scalar context, returns the concatenation of the trimmed text of children of the element =item children_copy ($optional_condition) Return a list of elements that are copies of the children of the element, optionally which matches C<$optional_condition> =item descendants ($optional_condition) Return the list of all descendants (optionally which matches C<$optional_condition>) of the element. This is the equivalent of the C<getElementsByTagName> of the DOM (by the way, if you are really a DOM addict, you can use C<getElementsByTagName> instead) =item getElementsByTagName ($optional_condition) Same as C<L<descendants>> =item find_by_tag_name ($optional_condition) Same as C<L<descendants>> =item descendants_or_self ($optional_condition) Same as C<L<descendants>> except that the element itself is included in the list if it matches the C<$optional_condition> =item first_descendant ($optional_condition) Return the first descendant of the element that matches the condition =item last_descendant ($optional_condition) Return the last descendant of the element that matches the condition =item ancestors ($optional_condition) Return the list of ancestors (optionally matching C<$optional_condition>) of the element. The list is ordered from the innermost ancestor to the outermost one NOTE: the element itself is not part of the list, in order to include it you will have to use ancestors_or_self =item ancestors_or_self ($optional_condition) Return the list of ancestors (optionally matching C<$optional_condition>) of the element, including the element (if it matches the condition>). The list is ordered from the innermost ancestor to the outermost one =item passes ($condition) Return the element if it passes the C<$condition> =item att ($att) Return the value of attribute C<$att> or C<undef> =item latt ($att) Return the value of attribute C<$att> or C<undef> this method is an lvalue, so you can do C<< $elt->latt( 'foo')= 'bar' >> or C<< $elt->latt( 'foo')++; >> =item set_att ($att, $att_value) Set the attribute of the element to the given value You can actually set several attributes this way: $elt->set_att( att1 => "val1", att2 => "val2"); =item del_att ($att) Delete the attribute for the element You can actually delete several attributes at once: $elt->del_att( 'att1', 'att2', 'att3'); =item att_exists ($att) Returns true if the attribute C<$att> exists for the element, false otherwise =item cut Cut the element from the tree. The element still exists, it can be copied or pasted somewhere else, it is just not attached to the tree anymore. Note that the "old" links to the parent, previous and next siblings can still be accessed using the former_* methods =item former_next_sibling Returns the former next sibling of a cut node (or undef if the node has not been cut) This makes it easier to write loops where you cut elements: my $child= $parent->first_child( 'achild'); while( $child->{'att'}->{'cut'}) { $child->cut; $child= ($child->{former} && $child->{former}->{next_sibling}); } =item former_prev_sibling Returns the former previous sibling of a cut node (or undef if the node has not been cut) =item former_parent Returns the former parent of a cut node (or undef if the node has not been cut) =item cut_children ($optional_condition) Cut all the children of the element (or all of those which satisfy the C<$optional_condition>). Return the list of children =item cut_descendants ($optional_condition) Cut all the descendants of the element (or all of those which satisfy the C<$optional_condition>). Return the list of descendants =item copy ($elt) Return a copy of the element. The copy is a "deep" copy: all sub-elements of the element are duplicated. =item paste ($optional_position, $ref) Paste a (previously C<cut> or newly generated) element. Die if the element already belongs to a tree. Note that the calling element is pasted: $child->paste( first_child => $existing_parent); $new_sibling->paste( after => $this_sibling_is_already_in_the_tree); or my $new_elt= XML::Twig::Elt->new( tag => $content); $new_elt->paste( $position => $existing_elt); Example: my $t= XML::Twig->new->parse( 'doc.xml') my $toc= $t->root->new( 'toc'); $toc->paste( $t->root); # $toc is pasted as first child of the root foreach my $title ($t->findnodes( '/doc/section/title')) { my $title_toc= $title->copy; # paste $title_toc as the last child of toc $title_toc->paste( last_child => $toc) } Position options: =over 4 =item first_child (default) The element is pasted as the first child of C<$ref> =item last_child The element is pasted as the last child of C<$ref> =item before The element is pasted before C<$ref>, as its previous sibling. =item after The element is pasted after C<$ref>, as its next sibling. =item within In this case an extra argument, C<$offset>, should be supplied. The element will be pasted in the reference element (or in its first text child) at the given offset. To achieve this the reference element will be split at the offset. =back Note that you can call directly the underlying method: =over 4 =item paste_before =item paste_after =item paste_first_child =item paste_last_child =item paste_within =back =item move ($optional_position, $ref) Move an element in the tree. This is just a C<cut> then a C<paste>. The syntax is the same as C<paste>. =item replace ($ref) Replaces an element in the tree. Sometimes it is just not possible toC<cut> an element then C<paste> another in its place, so C<replace> comes in handy. The calling element replaces C<$ref>. =item replace_with (@elts) Replaces the calling element with one or more elements =item delete Cut the element and frees the memory. =item prefix ($text, $optional_option) Add a prefix to an element. If the element is a C<PCDATA> element the text is added to the pcdata, if the elements first child is a C<PCDATA> then the text is added to it's pcdata, otherwise a new C<PCDATA> element is created and pasted as the first child of the element. If the option is C<asis> then the prefix is added asis: it is created in a separate C<PCDATA> element with an C<asis> property. You can then write: $elt1->prefix( '<b>', 'asis'); to create a C<< <b> >> in the output of C<print>. =item suffix ($text, $optional_option) Add a suffix to an element. If the element is a C<PCDATA> element the text is added to the pcdata, if the elements last child is a C<PCDATA> then the text is added to it's pcdata, otherwise a new PCDATA element is created and pasted as the last child of the element. If the option is C<asis> then the suffix is added asis: it is created in a separate C<PCDATA> element with an C<asis> property. You can then write: $elt2->suffix( '</b>', 'asis'); =item trim Trim the element in-place: spaces at the beginning and at the end of the element are discarded and multiple spaces within the element (or its descendants) are replaced by a single space. Note that in some cases you can still end up with multiple spaces, if they are split between several elements: <doc> text <b> hah! </b> yep</doc> gets trimmed to <doc>text <b> hah! </b> yep</doc> This is somewhere in between a bug and a feature. =item normalize merge together all consecutive pcdata elements in the element (if for example you have turned some elements into pcdata using C<L<erase>>, this will give you a "clean" element in which there all text fragments are as long as possible). =item simplify (%options) Return a data structure suspiciously similar to XML::Simple's. Options are identical to XMLin options, see XML::Simple doc for more details (or use DATA::dumper or YAML to dump the data structure) B<Note>: there is no magic here, if you write C<< $twig->parsefile( $file )->simplify(); >> then it will load the entire document in memory. I am afraid you will have to put some work into it to get just the bits you want and discard the rest. Look at the synopsis or the XML::Twig 101 section at the top of the docs for more information. =over 4 =item content_key =item forcearray =item keyattr =item noattr =item normalize_space aka normalise_space =item variables (%var_hash) %var_hash is a hash { name => value } This option allows variables in the XML to be expanded when the file is read. (there is no facility for putting the variable names back if you regenerate XML using XMLout). A 'variable' is any text of the form ${name} (or $name) which occurs in an attribute value or in the text content of an element. If 'name' matches a key in the supplied hashref, ${name} will be replaced with the corresponding value from the hashref. If no matching key is found, the variable will not be replaced. =item var_att ($attribute_name) This option gives the name of an attribute that will be used to create variables in the XML: <dirs> <dir name="prefix">/usr/local</dir> <dir name="exec_prefix">$prefix/bin</dir> </dirs> use C<< var => 'name' >> to get $prefix replaced by /usr/local in the generated data structure By default variables are captured by the following regexp: /$(\w+)/ =item var_regexp (regexp) This option changes the regexp used to capture variables. The variable name should be in $1 =item group_tags { grouping tag => grouped tag, grouping tag 2 => grouped tag 2...} Option used to simplify the structure: elements listed will not be used. Their children will be, they will be considered children of the element parent. If the element is: <config host="laptop.xmltwig.org"> <server>localhost</server> <dirs> <dir name="base">/home/mrodrigu/standards</dir> <dir name="tools">$base/tools</dir> </dirs> <templates> <template name="std_def">std_def.templ</template> <template name="dummy">dummy</template> </templates> </config> Then calling simplify with C<< group_tags => { dirs => 'dir', templates => 'template'} >> makes the data structure be exactly as if the start and end tags for C<dirs> and C<templates> were not there. A YAML dump of the structure base: '/home/mrodrigu/standards' host: laptop.xmltwig.org server: localhost template: - std_def.templ - dummy.templ tools: '$base/tools' =back =item split_at ($offset) Split a text (C<PCDATA> or C<CDATA>) element in 2 at C<$offset>, the original element now holds the first part of the string and a new element holds the right part. The new element is returned If the element is not a text element then the first text child of the element is split =item split ( $optional_regexp, $tag1, $atts1, $tag2, $atts2...) Split the text descendants of an element in place, the text is split using the C<$regexp>, if the regexp includes () then the matched separators will be wrapped in elements. C<$1> is wrapped in $tag1, with attributes C<$atts1> if C<$atts1> is given (as a hashref), C<$2> is wrapped in $tag2... if $elt is C<< <p>tati tata <b>tutu tati titi</b> tata tati tata</p> >> $elt->split( qr/(ta)ti/, 'foo', {type => 'toto'} ) will change $elt to <p><foo type="toto">ta</foo> tata <b>tutu <foo type="toto">ta</foo> titi</b> tata <foo type="toto">ta</foo> tata</p> The regexp can be passed either as a string or as C<qr//> (perl 5.005 and later), it defaults to \s+ just as the C<split> built-in (but this would be quite a useless behaviour without the C<$optional_tag> parameter) C<$optional_tag> defaults to PCDATA or CDATA, depending on the initial element type The list of descendants is returned (including un-touched original elements and newly created ones) =item mark ( $regexp, $optional_tag, $optional_attribute_ref) This method behaves exactly as L<split>, except only the newly created elements are returned =item wrap_children ( $regexp_string, $tag, $optional_attribute_hashref) Wrap the children of the element that match the regexp in an element C<$tag>. If $optional_attribute_hashref is passed then the new element will have these attributes. The $regexp_string includes tags, within pointy brackets, as in C<< <title><para>+ >> and the usual Perl modifiers (+*?...). Tags can be further qualified with attributes: C<< <para type="warning" classif="cosmic_secret">+ >>. The values for attributes should be xml-escaped: C<< <candy type="M&Ms">* >> (C<E<lt>>, C<&> B<C<E<gt>>> and C<"> should be escaped). Note that elements might get extra C<id> attributes in the process. See L<add_id>. Use L<strip_att> to remove unwanted id's. Here is an example: If the element C<$elt> has the following content: <elt> <p>para 1</p> <l_l1_1>list 1 item 1 para 1</l_l1_1> <l_l1>list 1 item 1 para 2</l_l1> <l_l1_n>list 1 item 2 para 1 (only para)</l_l1_n> <l_l1_n>list 1 item 3 para 1</l_l1_n> <l_l1>list 1 item 3 para 2</l_l1> <l_l1>list 1 item 3 para 3</l_l1> <l_l1_1>list 2 item 1 para 1</l_l1_1> <l_l1>list 2 item 1 para 2</l_l1> <l_l1_n>list 2 item 2 para 1 (only para)</l_l1_n> <l_l1_n>list 2 item 3 para 1</l_l1_n> <l_l1>list 2 item 3 para 2</l_l1> <l_l1>list 2 item 3 para 3</l_l1> </elt> Then the code $elt->wrap_children( q{<l_l1_1><l_l1>*} , li => { type => "ul1" }); $elt->wrap_children( q{<l_l1_n><l_l1>*} , li => { type => "ul" }); $elt->wrap_children( q{<li type="ul1"><li type="ul">+}, "ul"); $elt->strip_att( 'id'); $elt->strip_att( 'type'); $elt->print; will output: <elt> <p>para 1</p> <ul> <li> <l_l1_1>list 1 item 1 para 1</l_l1_1> <l_l1>list 1 item 1 para 2</l_l1> </li> <li> <l_l1_n>list 1 item 2 para 1 (only para)</l_l1_n> </li> <li> <l_l1_n>list 1 item 3 para 1</l_l1_n> <l_l1>list 1 item 3 para 2</l_l1> <l_l1>list 1 item 3 para 3</l_l1> </li> </ul> <ul> <li> <l_l1_1>list 2 item 1 para 1</l_l1_1> <l_l1>list 2 item 1 para 2</l_l1> </li> <li> <l_l1_n>list 2 item 2 para 1 (only para)</l_l1_n> </li> <li> <l_l1_n>list 2 item 3 para 1</l_l1_n> <l_l1>list 2 item 3 para 2</l_l1> <l_l1>list 2 item 3 para 3</l_l1> </li> </ul> </elt> =item subs_text ($regexp, $replace) subs_text does text substitution, similar to perl's C< s///> operator. C<$regexp> must be a perl regexp, created with the C<qr> operator. C<$replace> can include C<$1, $2>... from the C<$regexp>. It can also be used to create element and entities, by using C<< &elt( tag => { att => val }, text) >> (similar syntax as C<L<new>>) and C<< &ent( name) >>. Here is a rather complex example: $elt->subs_text( qr{(?<!do not )link to (http://([^\s,]*))}, 'see &elt( a =>{ href => $1 }, $2)' ); This will replace text like I<link to http://www.xmltwig.org> by I<< see <a href="www.xmltwig.org">www.xmltwig.org</a> >>, but not I<do not link to...> Generating entities (here replacing spaces with  ): $elt->subs_text( qr{ }, '&ent( " ")'); or, using a variable: my $ent=" "; $elt->subs_text( qr{ }, "&ent( '$ent')"); Note that the substitution is always global, as in using the C<g> modifier in a perl substitution, and that it is performed on all text descendants of the element. B<Bug>: in the C<$regexp>, you can only use C<\1>, C<\2>... if the replacement expression does not include elements or attributes. eg $t->subs_text( qr/((t[aiou])\2)/, '$2'); # ok, replaces toto, tata, titi, tutu by to, ta, ti, tu $t->subs_text( qr/((t[aiou])\2)/, '&elt(p => $1)' ); # NOK, does not find toto... =item add_id ($optional_coderef) Add an id to the element. The id is an attribute, C<id> by default, see the C<id> option for XML::Twig C<new> to change it. Use an id starting with C<#> to get an id that's not output by L<print>, L<flush> or L<sprint>, yet that allows you to use the L<elt_id> method to get the element easily. If the element already has an id, no new id is generated. By default the method create an id of the form C<< twig_id_<nnnn> >>, where C<< <nnnn> >> is a number, incremented each time the method is called successfully. =item set_id_seed ($prefix) by default the id generated by C<L<add_id>> is C<< twig_id_<nnnn> >>, C<set_id_seed> changes the prefix to C<$prefix> and resets the number to 1 =item strip_att ($att) Remove the attribute C<$att> from all descendants of the element (including the element) Return the element =item change_att_name ($old_name, $new_name) Change the name of the attribute from C<$old_name> to C<$new_name>. If there is no attribute C<$old_name> nothing happens. =item lc_attnames Lower cases the name all the attributes of the element. =item sort_children_on_value( %options) Sort the children of the element in place according to their text. All children are sorted. Return the element, with its children sorted. C<%options> are type : numeric | alpha (default: alpha) order : normal | reverse (default: normal) Return the element, with its children sorted =item sort_children_on_att ($att, %options) Sort the children of the element in place according to attribute C<$att>. C<%options> are the same as for C<sort_children_on_value> Return the element. =item sort_children_on_field ($tag, %options) Sort the children of the element in place, according to the field C<$tag> (the text of the first child of the child with this tag). C<%options> are the same as for C<sort_children_on_value>. Return the element, with its children sorted =item sort_children( $get_key, %options) Sort the children of the element in place. The C<$get_key> argument is a reference to a function that returns the sort key when passed an element. For example: $elt->sort_children( sub { $_[0]->{'att'}->{"nb"} + $_[0]->text }, type => 'numeric', order => 'reverse' ); =item field_to_att ($cond, $att) Turn the text of the first sub-element matched by C<$cond> into the value of attribute C<$att> of the element. If C<$att> is omitted then C<$cond> is used as the name of the attribute, which makes sense only if C<$cond> is a valid element (and attribute) name. The sub-element is then cut. =item att_to_field ($att, $tag) Take the value of attribute C<$att> and create a sub-element C<$tag> as first child of the element. If C<$tag> is omitted then C<$att> is used as the name of the sub-element. =item get_xpath ($xpath, $optional_offset) Return a list of elements satisfying the C<$xpath>. C<$xpath> is an XPATH-like expression. A subset of the XPATH abbreviated syntax is covered: tag tag[1] (or any other positive number) tag[last()] tag[@att] (the attribute exists for the element) tag[@att="val"] tag[@att=~ /regexp/] tag[att1="val1" and att2="val2"] tag[att1="val1" or att2="val2"] tag[string()="toto"] (returns tag elements which text (as per the text method) is toto) tag[string()=~/regexp/] (returns tag elements which text (as per the text method) matches regexp) expressions can start with / (search starts at the document root) expressions can start with . (search starts at the current element) // can be used to get all descendants instead of just direct children * matches any tag So the following examples from the F<XPath recommendationL<http://www.w3.org/TR/xpath.html#path-abbrev>> work: para selects the para element children of the context node * selects all element children of the context node para[1] selects the first para child of the context node para[last()] selects the last para child of the context node */para selects all para grandchildren of the context node /doc/chapter[5]/section[2] selects the second section of the fifth chapter of the doc chapter//para selects the para element descendants of the chapter element children of the context node //para selects all the para descendants of the document root and thus selects all para elements in the same document as the context node //olist/item selects all the item elements in the same document as the context node that have an olist parent .//para selects the para element descendants of the context node .. selects the parent of the context node para[@type="warning"] selects all para children of the context node that have a type attribute with value warning employee[@secretary and @assistant] selects all the employee children of the context node that have both a secretary attribute and an assistant attribute The elements will be returned in the document order. If C<$optional_offset> is used then only one element will be returned, the one with the appropriate offset in the list, starting at 0 Quoting and interpolating variables can be a pain when the Perl syntax and the XPATH syntax collide, so use alternate quoting mechanisms like q or qq (I like q{} and qq{} myself). Here are some more examples to get you started: my $p1= "p1"; my $p2= "p2"; my @res= $t->get_xpath( qq{p[string( "$p1") or string( "$p2")]}); my $a= "a1"; my @res= $t->get_xpath( qq{//*[@att="$a"]}); my $val= "a1"; my $exp= qq{//p[ \@att='$val']}; # you need to use \@ or you will get a warning my @res= $t->get_xpath( $exp); Note that the only supported regexps delimiters are / and that you must backslash all / in regexps AND in regular strings. XML::Twig does not provide natively full XPATH support, but you can use C<L<XML::Twig::XPath>> to get C<findnodes> to use C<XML::XPath> as the XPath engine, with full coverage of the spec. C<L<XML::Twig::XPath>> to get C<findnodes> to use C<XML::XPath> as the XPath engine, with full coverage of the spec. =item find_nodes same asC<get_xpath> =item findnodes same as C<get_xpath> =item text @optional_options Return a string consisting of all the C<PCDATA> and C<CDATA> in an element, without any tags. The text is not XML-escaped: base entities such as C<&> and C<< < >> are not escaped. The 'C<no_recurse>' option will only return the text of the element, not of any included sub-elements (same as C<L<text_only>>). =item text_only Same as C<L<text>> except that the text returned doesn't include the text of sub-elements. =item trimmed_text Same as C<text> except that the text is trimmed: leading and trailing spaces are discarded, consecutive spaces are collapsed =item set_text ($string) Set the text for the element: if the element is a C<PCDATA>, just set its text, otherwise cut all the children of the element and create a single C<PCDATA> child for it, which holds the text. =item merge ($elt2) Move the content of C<$elt2> within the element =item insert ($tag1, [$optional_atts1], $tag2, [$optional_atts2],...) For each tag in the list inserts an element C<$tag> as the only child of the element. The element gets the optional attributes inC<< $optional_atts<n>. >> All children of the element are set as children of the new element. The upper level element is returned. $p->insert( table => { border=> 1}, 'tr', 'td') put C<$p> in a table with a visible border, a single C<tr> and a single C<td> and return the C<table> element: <p><table border="1"><tr><td>original content of p</td></tr></table></p> =item wrap_in (@tag) Wrap elements in C<@tag> as the successive ancestors of the element, returns the new element. C<< $elt->wrap_in( 'td', 'tr', 'table') >> wraps the element as a single cell in a table for example. Optionally each tag can be followed by a hashref of attributes, that will be set on the wrapping element: $elt->wrap_in( p => { class => "advisory" }, div => { class => "intro", id => "div_intro" }); =item insert_new_elt ($opt_position, $tag, $opt_atts_hashref, @opt_content) Combines a C<L<new> > and a C<L<paste> >: creates a new element using C<$tag>, C<$opt_atts_hashref >and C<@opt_content> which are arguments similar to those for C<new>, then paste it, using C<$opt_position> or C<'first_child'>, relative to C<$elt>. Return the newly created element =item erase Erase the element: the element is deleted and all of its children are pasted in its place. =item set_content ( $optional_atts, @list_of_elt_and_strings) ( $optional_atts, '#EMPTY') Set the content for the element, from a list of strings and elements. Cuts all the element children, then pastes the list elements as the children. This method will create a C<PCDATA> element for any strings in the list. The C<$optional_atts> argument is the ref of a hash of attributes. If this argument is used then the previous attributes are deleted, otherwise they are left untouched. B<WARNING>: if you rely on ID's then you will have to set the id yourself. At this point the element does not belong to a twig yet, so the ID attribute is not known so it won't be stored in the ID list. A content of 'C<#EMPTY>' creates an empty element; =item namespace ($optional_prefix) Return the URI of the namespace that C<$optional_prefix> or the element name belongs to. If the name doesn't belong to any namespace, C<undef> is returned. =item local_name Return the local name (without the prefix) for the element =item ns_prefix Return the namespace prefix for the element =item current_ns_prefixes Return a list of namespace prefixes valid for the element. The order of the prefixes in the list has no meaning. If the default namespace is currently bound, '' appears in the list. =item inherit_att ($att, @optional_tag_list) Return the value of an attribute inherited from parent tags. The value returned is found by looking for the attribute in the element then in turn in each of its ancestors. If the C<@optional_tag_list> is supplied only those ancestors whose tag is in the list will be checked. =item all_children_are ($optional_condition) return 1 if all children of the element pass the C<$optional_condition>, 0 otherwise =item level ($optional_condition) Return the depth of the element in the twig (root is 0). If C<$optional_condition> is given then only ancestors that match the condition are counted. B<WARNING>: in a tree created using the C<twig_roots> option this will not return the level in the document tree, level 0 will be the document root, level 1 will be the C<twig_roots> elements. During the parsing (in a C<twig_handler>) you can use the C<depth> method on the twig object to get the real parsing depth. =item in ($potential_parent) Return true if the element is in the potential_parent (C<$potential_parent> is an element) =item in_context ($cond, $optional_level) Return true if the element is included in an element which passes C<$cond> optionally within C<$optional_level> levels. The returned value is the including element. =item pcdata Return the text of a C<PCDATA> element or C<undef> if the element is not C<PCDATA>. =item pcdata_xml_string Return the text of a C<PCDATA> element or undef if the element is not C<PCDATA>. The text is "XML-escaped" ('&' and '<' are replaced by '&' and '<') =item set_pcdata ($text) Set the text of a C<PCDATA> element. This method does not check that the element is indeed a C<PCDATA> so usually you should use C<L<set_text>> instead. =item append_pcdata ($text) Add the text at the end of a C<PCDATA> element. =item is_cdata Return 1 if the element is a C<CDATA> element, returns 0 otherwise. =item is_text Return 1 if the element is a C<CDATA> or C<PCDATA> element, returns 0 otherwise. =item cdata Return the text of a C<CDATA> element or C<undef> if the element is not C<CDATA>. =item cdata_string Return the XML string of a C<CDATA> element, including the opening and closing markers. =item set_cdata ($text) Set the text of a C<CDATA> element. =item append_cdata ($text) Add the text at the end of a C<CDATA> element. =item remove_cdata Turns all C<CDATA> sections in the element into regular C<PCDATA> elements. This is useful when converting XML to HTML, as browsers do not support CDATA sections. =item extra_data Return the extra_data (comments and PI's) attached to an element =item set_extra_data ($extra_data) Set the extra_data (comments and PI's) attached to an element =item append_extra_data ($extra_data) Append extra_data to the existing extra_data before the element (if no previous extra_data exists then it is created) =item set_asis Set a property of the element that causes it to be output without being XML escaped by the print functions: if it contains C<< a < b >> it will be output as such and not as C<< a < b >>. This can be useful to create text elements that will be output as markup. Note that all C<PCDATA> descendants of the element are also marked as having the property (they are the ones that are actually impacted by the change). If the element is a C<CDATA> element it will also be output asis, without the C<CDATA> markers. The same goes for any C<CDATA> descendant of the element =item set_not_asis Unsets the C<asis> property for the element and its text descendants. =item is_asis Return the C<asis> property status of the element ( 1 or C<undef>) =item closed Return true if the element has been closed. Might be useful if you are somewhere in the tree, during the parse, and have no idea whether a parent element is completely loaded or not. =item get_type Return the type of the element: 'C<#ELT>' for "real" elements, or 'C<#PCDATA>', 'C<#CDATA>', 'C<#COMMENT>', 'C<#ENT>', 'C<#PI>' =item is_elt Return the tag if the element is a "real" element, or 0 if it is C<PCDATA>, C<CDATA>... =item contains_only_text Return 1 if the element does not contain any other "real" element =item contains_only ($exp) Return the list of children if all children of the element match the expression C<$exp> if( $para->contains_only( 'tt')) { ... } =item contains_a_single ($exp) If the element contains a single child that matches the expression C<$exp> returns that element. Otherwise returns 0. =item is_field same as C<contains_only_text> =item is_pcdata Return 1 if the element is a C<PCDATA> element, returns 0 otherwise. =item is_ent Return 1 if the element is an entity (an unexpanded entity) element, return 0 otherwise. =item is_empty Return 1 if the element is empty, 0 otherwise =item set_empty Flags the element as empty. No further check is made, so if the element is actually not empty the output will be messed. The only effect of this method is that the output will be C<< <tag att="value""/> >>. =item set_not_empty Flags the element as not empty. if it is actually empty then the element will be output as C<< <tag att="value""></tag> >> =item is_pi Return 1 if the element is a processing instruction (C<#PI>) element, return 0 otherwise. =item target Return the target of a processing instruction =item set_target ($target) Set the target of a processing instruction =item data Return the data part of a processing instruction =item set_data ($data) Set the data of a processing instruction =item set_pi ($target, $data) Set the target and data of a processing instruction =item pi_string Return the string form of a processing instruction (C<< <?target data?> >>) =item is_comment Return 1 if the element is a comment (C<#COMMENT>) element, return 0 otherwise. =item set_comment ($comment_text) Set the text for a comment =item comment Return the content of a comment (just the text, not the C<< <!-- >> and C<< --> >>) =item comment_string Return the XML string for a comment (C<< <!-- comment --> >>) Note that an XML comment cannot start or end with a '-', or include '--' (http://www.w3.org/TR/2008/REC-xml-20081126/#sec-comments), if that is the case (because you have created the comment yourself presumably, as it could not be in the input XML), then a space will be inserted before an initial '-', after a trailing one or between two '-' in the comment (which could presumably mangle javascript "hidden" in an XHTML comment); =item set_ent ($entity) Set an (non-expanded) entity (C<#ENT>). C<$entity>) is the entity text (C<&ent;>) =item ent Return the entity for an entity (C<#ENT>) element (C<&ent;>) =item ent_name Return the entity name for an entity (C<#ENT>) element (C<ent>) =item ent_string Return the entity, either expanded if the expanded version is available, or non-expanded (C<&ent;>) otherwise =item child ($offset, $optional_condition) Return the C<$offset>-th child of the element, optionally the C<$offset>-th child that matches C<$optional_condition>. The children are treated as a list, so C<< $elt->child( 0) >> is the first child, while C<< $elt->child( -1) >> is the last child. =item child_text ($offset, $optional_condition) Return the text of a child or C<undef> if the sibling does not exist. Arguments are the same as child. =item last_child ($optional_condition) Return the last child of the element, or the last child matching C<$optional_condition> (ie the last of the element children matching the condition). =item last_child_text ($optional_condition) Same as C<first_child_text> but for the last child. =item sibling ($offset, $optional_condition) Return the next or previous C<$offset>-th sibling of the element, or the C<$offset>-th one matching C<$optional_condition>. If C<$offset> is negative then a previous sibling is returned, if $offset is positive then a next sibling is returned. C<$offset=0> returns the element if there is no condition or if the element matches the condition>, C<undef> otherwise. =item sibling_text ($offset, $optional_condition) Return the text of a sibling or C<undef> if the sibling does not exist. Arguments are the same as C<sibling>. =item prev_siblings ($optional_condition) Return the list of previous siblings (optionally matching C<$optional_condition>) for the element. The elements are ordered in document order. =item next_siblings ($optional_condition) Return the list of siblings (optionally matching C<$optional_condition>) following the element. The elements are ordered in document order. =item siblings ($optional_condition) Return the list of siblings (optionally matching C<$optional_condition>) of the element (excluding the element itself). The elements are ordered in document order. =item pos ($optional_condition) Return the position of the element in the children list. The first child has a position of 1 (as in XPath). If the C<$optional_condition> is given then only siblings that match the condition are counted. If the element itself does not match the condition then 0 is returned. =item atts Return a hash ref containing the element attributes =item set_atts ({ att1=>$att1_val, att2=> $att2_val... }) Set the element attributes with the hash ref supplied as the argument. The previous attributes are lost (ie the attributes set by C<set_atts> replace all of the attributes of the element). You can also pass a list instead of a hashref: C<< $elt->set_atts( att1 => 'val1',...) >> =item del_atts Deletes all the element attributes. =item att_nb Return the number of attributes for the element =item has_atts Return true if the element has attributes (in fact return the number of attributes, thus being an alias to C<L<att_nb>> =item has_no_atts Return true if the element has no attributes, false (0) otherwise =item att_names return a list of the attribute names for the element =item att_xml_string ($att, $options) Return the attribute value, where '&', '<' and quote (" or the value of the quote option at twig creation) are XML-escaped. The options are passed as a hashref, setting C<escape_gt> to a true value will also escape '>' ($elt( 'myatt', { escape_gt => 1 }); =item set_id ($id) Set the C<id> attribute of the element to the value. See C<L<elt_id> > to change the id attribute name =item id Gets the id attribute value =item del_id ($id) Deletes the C<id> attribute of the element and remove it from the id list for the document =item class Return the C<class> attribute for the element (methods on the C<class> attribute are quite convenient when dealing with XHTML, or plain XML that will eventually be displayed using CSS) =item lclass same as class, except that this method is an lvalue, so you can do C<< $elt->lclass= "foo" >> =item set_class ($class) Set the C<class> attribute for the element to C<$class> =item add_class ($class) Add C<$class> to the element C<class> attribute: the new class is added only if it is not already present. Note that classes are then sorted alphabetically, so the C<class> attribute can be changed even if the class is already there =item remove_class ($class) Remove C<$class> from the element C<class> attribute. Note that classes are then sorted alphabetically, so the C<class> attribute can be changed even if the class is already there =item add_to_class ($class) alias for add_class =item att_to_class ($att) Set the C<class> attribute to the value of attribute C<$att> =item add_att_to_class ($att) Add the value of attribute C<$att> to the C<class> attribute of the element =item move_att_to_class ($att) Add the value of attribute C<$att> to the C<class> attribute of the element and delete the attribute =item tag_to_class Set the C<class> attribute of the element to the element tag =item add_tag_to_class Add the element tag to its C<class> attribute =item set_tag_class ($new_tag) Add the element tag to its C<class> attribute and sets the tag to C<$new_tag> =item in_class ($class) Return true (C<1>) if the element is in the class C<$class> (if C<$class> is one of the tokens in the element C<class> attribute) =item tag_to_span Change the element tag tp C<span> and set its class to the old tag =item tag_to_div Change the element tag tp C<div> and set its class to the old tag =item DESTROY Frees the element from memory. =item start_tag Return the string for the start tag for the element, including the C<< /> >> at the end of an empty element tag =item end_tag Return the string for the end tag of an element. For an empty element, this returns the empty string (''). =item xml_string @optional_options Equivalent to C<< $elt->sprint( 1) >>, returns the string for the entire element, excluding the element's tags (but nested element tags are present) The 'C<no_recurse>' option will only return the text of the element, not of any included sub-elements (same as C<L<xml_text_only>>). =item inner_xml Another synonym for xml_string =item outer_xml An other synonym for sprint =item xml_text Return the text of the element, encoded (and processed by the current C<L<output_filter>> or C<L<output_encoding>> options, without any tag. =item xml_text_only Same as C<L<xml_text>> except that the text returned doesn't include the text of sub-elements. =item set_pretty_print ($style) Set the pretty print method, amongst 'C<none>' (default), 'C<nsgmls>', 'C<nice>', 'C<indented>', 'C<record>' and 'C<record_c>' pretty_print styles: =over 4 =item none the default, no C<\n> is used =item nsgmls nsgmls style, with C<\n> added within tags =item nice adds C<\n> wherever possible (NOT SAFE, can lead to invalid XML) =item indented same as C<nice> plus indents elements (NOT SAFE, can lead to invalid XML) =item record table-oriented pretty print, one field per line =item record_c table-oriented pretty print, more compact than C<record>, one record per line =back =item set_empty_tag_style ($style) Set the method to output empty tags, amongst 'C<normal>' (default), 'C<html>', and 'C<expand>', C<normal> outputs an empty tag 'C<< <tag/> >>', C<html> adds a space 'C<< <tag /> >>' for elements that can be empty in XHTML and C<expand> outputs 'C<< <tag></tag> >>' =item set_remove_cdata ($flag) set (or unset) the flag that forces the twig to output CDATA sections as regular (escaped) PCDATA =item set_indent ($string) Set the indentation for the indented pretty print style (default is 2 spaces) =item set_quote ($quote) Set the quotes used for attributes. can be 'C<double>' (default) or 'C<single>' =item cmp ($elt) Compare the order of the 2 elements in a twig. C<$a> is the <A>..</A> element, C<$b> is the <B>...</B> element document $a->cmp( $b) <A> ... </A> ... <B> ... </B> -1 <A> ... <B> ... </B> ... </A> -1 <B> ... </B> ... <A> ... </A> 1 <B> ... <A> ... </A> ... </B> 1 $a == $b 0 $a and $b not in the same tree undef =item before ($elt) Return 1 if C<$elt> starts before the element, 0 otherwise. If the 2 elements are not in the same twig then return C<undef>. if( $a->cmp( $b) == -1) { return 1; } else { return 0; } =item after ($elt) Return 1 if $elt starts after the element, 0 otherwise. If the 2 elements are not in the same twig then return C<undef>. if( $a->cmp( $b) == -1) { return 1; } else { return 0; } =item other comparison methods =over 4 =item lt =item le =item gt =item ge =back =item path Return the element context in a form similar to XPath's short form: 'C</root/tag1/../tag>' =item xpath Return a unique XPath expression that can be used to find the element again. It looks like C</doc/sect[3]/title>: unique elements do not have an index, the others do. =item flush flushes the twig up to the current element (strictly equivalent to C<< $elt->root->flush >>) =item private methods Low-level methods on the twig: =over 4 =item set_parent ($parent) =item set_first_child ($first_child) =item set_last_child ($last_child) =item set_prev_sibling ($prev_sibling) =item set_next_sibling ($next_sibling) =item set_twig_current =item del_twig_current =item twig_current =item contains_text =back Those methods should not be used, unless of course you find some creative and interesting, not to mention useful, ways to do it. =back =head2 cond Most of the navigation functions accept a condition as an optional argument The first element (or all elements for C<L<children> > or C<L<ancestors> >) that passes the condition is returned. The condition is a single step of an XPath expression using the XPath subset defined by C<L<get_xpath>>. Additional conditions are: The condition can be =over 4 =item #ELT return a "real" element (not a PCDATA, CDATA, comment or pi element) =item #TEXT return a PCDATA or CDATA element =item regular expression return an element whose tag matches the regexp. The regexp has to be created with C<qr//> (hence this is available only on perl 5.005 and above) =item code reference applies the code, passing the current element as argument, if the code returns true then the element is returned, if it returns false then the code is applied to the next candidate. =back =head2 XML::Twig::XPath XML::Twig implements a subset of XPath through the C<L<get_xpath>> method. If you want to use the whole XPath power, then you can use C<XML::Twig::XPath> instead. In this case C<XML::Twig> uses C<XML::XPath> to execute XPath queries. You will of course need C<XML::XPath> installed to be able to use C<XML::Twig::XPath>. See L<XML::XPath> for more information. The methods you can use are: =over 4 =item findnodes ($path) return a list of nodes found by C<$path>. =item findnodes_as_string ($path) return the nodes found reproduced as XML. The result is not guaranteed to be valid XML though. =item findvalue ($path) return the concatenation of the text content of the result nodes =back In order for C<XML::XPath> to be used as the XPath engine the following methods are included in C<XML::Twig>: in XML::Twig =over 4 =item getRootNode =item getParentNode =item getChildNodes =back in XML::Twig::Elt =over 4 =item string_value =item toString =item getName =item getRootNode =item getNextSibling =item getPreviousSibling =item isElementNode =item isTextNode =item isPI =item isPINode =item isProcessingInstructionNode =item isComment =item isCommentNode =item getTarget =item getChildNodes =item getElementById =back =head2 XML::Twig::XPath::Elt The methods you can use are the same as on C<XML::Twig::XPath> elements: =over 4 =item findnodes ($path) return a list of nodes found by C<$path>. =item findnodes_as_string ($path) return the nodes found reproduced as XML. The result is not guaranteed to be valid XML though. =item findvalue ($path) return the concatenation of the text content of the result nodes =back =head2 XML::Twig::Entity_list =over 4 =item new Create an entity list. =item add ($ent) Add an entity to an entity list. =item add_new_ent ($name, $val, $sysid, $pubid, $ndata, $param) Create a new entity and add it to the entity list =item delete ($ent or $tag). Delete an entity (defined by its name or by the Entity object) from the list. =item print ($optional_filehandle) Print the entity list. =item list Return the list as an array =back =head2 XML::Twig::Entity =over 4 =item new ($name, $val, $sysid, $pubid, $ndata, $param) Same arguments as the Entity handler for XML::Parser. =item print ($optional_filehandle) Print an entity declaration. =item name Return the name of the entity =item val Return the value of the entity =item sysid Return the system id for the entity (for NDATA entities) =item pubid Return the public id for the entity (for NDATA entities) =item ndata Return true if the entity is an NDATA entity =item param Return true if the entity is a parameter entity =item text Return the entity declaration text. =back =head2 XML::Twig::Notation_list =over 4 =item new Create an notation list. =item add ($notation) Add an notation to an notation list. =item add_new_notation ($name, $base, $sysid, $pubid) Create a new notation and add it to the notation list =item delete ($notation or $tag). Delete an notation (defined by its name or by the Notation object) from the list. =item print ($optional_filehandle) Print the notation list. =item list Return the list as an array =back =head2 XML::Twig::Notation =over 4 =item new ($name, $base, $sysid, $pubid) Same argumnotations as the Notation handler for XML::Parser. =item print ($optional_filehandle) Print an notation declaration. =item name Return the name of the notation =item base Return the base to be used for resolving a relative URI =item sysid Return the system id for the notation =item pubid Return the public id for the notation =item text Return the notation declaration text. =back =head1 EXAMPLES Additional examples (and a complete tutorial) can be found on the F<XML::Twig PageL<http://www.xmltwig.org/xmltwig/>> To figure out what flush does call the following script with an XML file and an element name as arguments use XML::Twig; my ($file, $elt)= @ARGV; my $t= XML::Twig->new( twig_handlers => { $elt => sub {$_[0]->flush; print "\n[flushed here]\n";} }); $t->parsefile( $file, ErrorContext => 2); $t->flush; print "\n"; =head1 NOTES =head2 Subclassing XML::Twig Useful methods: =over 4 =item elt_class In order to subclass C<XML::Twig> you will probably need to subclass also C<L<XML::Twig::Elt>>. Use the C<elt_class> option when you create the C<XML::Twig> object to get the elements created in a different class (which should be a subclass of C<XML::Twig::Elt>. =item add_options If you inherit C<XML::Twig> new method but want to add more options to it you can use this method to prevent XML::Twig to issue warnings for those additional options. =back =head2 DTD Handling There are 3 possibilities here. They are: =over 4 =item No DTD No doctype, no DTD information, no entity information, the world is simple... =item Internal DTD The XML document includes an internal DTD, and maybe entity declarations. If you use the load_DTD option when creating the twig the DTD information and the entity declarations can be accessed. The DTD and the entity declarations will be C<flush>'ed (or C<print>'ed) either as is (if they have not been modified) or as reconstructed (poorly, comments are lost, order is not kept, due to it's content this DTD should not be viewed by anyone) if they have been modified. You can also modify them directly by changing the C<< $twig->{twig_doctype}->{internal} >> field (straight from XML::Parser, see the C<Doctype> handler doc) =item External DTD The XML document includes a reference to an external DTD, and maybe entity declarations. If you use the C<load_DTD> when creating the twig the DTD information and the entity declarations can be accessed. The entity declarations will be C<flush>'ed (or C<print>'ed) either as is (if they have not been modified) or as reconstructed (badly, comments are lost, order is not kept). You can change the doctype through the C<< $twig->set_doctype >> method and print the dtd through the C<< $twig->dtd_text >> or C<< $twig->dtd_print >> methods. If you need to modify the entity list this is probably the easiest way to do it. =back =head2 Flush Remember that element handlers are called when the element is CLOSED, so if you have handlers for nested elements the inner handlers will be called first. It makes it for example trickier than it would seem to number nested sections (or clauses, or divs), as the titles in the inner sections are handled before the outer sections. =head1 BUGS =over 4 =item segfault during parsing This happens when parsing huge documents, or lots of small ones, with a version of Perl before 5.16. This is due to a bug in the way weak references are handled in Perl itself. The fix is either to upgrade to Perl 5.16 or later (C<perlbrew> is a great tool to manage several installations of perl on the same machine). An other, NOT RECOMMENDED, way of fixing the problem, is to switch off weak references by writing C<XML::Twig::_set_weakrefs( 0);> at the top of the code. This is totally unsupported, and may lead to other problems though, =item entity handling Due to XML::Parser behaviour, non-base entities in attribute values disappear if they are not declared in the document: C<att="val&ent;"> will be turned into C<< att => val >>, unless you use the C<keep_encoding> argument to C<< XML::Twig->new >> =item DTD handling The DTD handling methods are quite bugged. No one uses them and it seems very difficult to get them to work in all cases, including with several slightly incompatible versions of XML::Parser and of libexpat. Basically you can read the DTD, output it back properly, and update entities, but not much more. So use XML::Twig with standalone documents, or with documents referring to an external DTD, but don't expect it to properly parse and even output back the DTD. =item memory leak If you use a REALLY old Perl (5.005!) and a lot of twigs you might find that you leak quite a lot of memory (about 2Ks per twig). You can use the C<L<dispose> > method to free that memory after you are done. If you create elements the same thing might happen, use the C<L<delete>> method to get rid of them. Alternatively installing the C<Scalar::Util> (or C<WeakRef>) module on a version of Perl that supports it (>5.6.0) will get rid of the memory leaks automagically. =item ID list The ID list is NOT updated when elements are cut or deleted. =item change_gi This method will not function properly if you do: $twig->change_gi( $old1, $new); $twig->change_gi( $old2, $new); $twig->change_gi( $new, $even_newer); =item sanity check on XML::Parser method calls XML::Twig should really prevent calls to some XML::Parser methods, especially the C<setHandlers> method. =item pretty printing Pretty printing (at least using the 'C<indented>' style) is hard to get right! Only elements that belong to the document will be properly indented. Printing elements that do not belong to the twig makes it impossible for XML::Twig to figure out their depth, and thus their indentation level. Also there is an unavoidable bug when using C<flush> and pretty printing for elements with mixed content that start with an embedded element: <elt><b>b</b>toto<b>bold</b></elt> will be output as <elt> <b>b</b>toto<b>bold</b></elt> if you flush the twig when you find the C<< <b> >> element =back =head1 Globals These are the things that can mess up calling code, especially if threaded. They might also cause problem under mod_perl. =over 4 =item Exported constants Whether you want them or not you get them! These are subroutines to use as constant when creating or testing elements PCDATA return '#PCDATA' CDATA return '#CDATA' PI return '#PI', I had the choice between PROC and PI :--( =item Module scoped values: constants these should cause no trouble: %base_ent= ( '>' => '>', '<' => '<', '&' => '&', "'" => ''', '"' => '"', ); CDATA_START = "<![CDATA["; CDATA_END = "]]>"; PI_START = "<?"; PI_END = "?>"; COMMENT_START = "<!--"; COMMENT_END = "-->"; pretty print styles ( $NSGMLS, $NICE, $INDENTED, $INDENTED_C, $WRAPPED, $RECORD1, $RECORD2)= (1..7); empty tag output style ( $HTML, $EXPAND)= (1..2); =item Module scoped values: might be changed Most of these deal with pretty printing, so the worst that can happen is probably that XML output does not look right, but is still valid and processed identically by XML processors. C<$empty_tag_style> can mess up HTML bowsers though and changing C<$ID> would most likely create problems. $pretty=0; # pretty print style $quote='"'; # quote for attributes $INDENT= ' '; # indent for indented pretty print $empty_tag_style= 0; # how to display empty tags $ID # attribute used as an id ('id' by default) =item Module scoped values: definitely changed These 2 variables are used to replace tags by an index, thus saving some space when creating a twig. If they really cause you too much trouble, let me know, it is probably possible to create either a switch or at least a version of XML::Twig that does not perform this optimization. %gi2index; # tag => index @index2gi; # list of tags =back If you need to manipulate all those values, you can use the following methods on the XML::Twig object: =over 4 =item global_state Return a hashref with all the global variables used by XML::Twig The hash has the following fields: C<pretty>, C<quote>, C<indent>, C<empty_tag_style>, C<keep_encoding>, C<expand_external_entities>, C<output_filter>, C<output_text_filter>, C<keep_atts_order> =item set_global_state ($state) Set the global state, C<$state> is a hashref =item save_global_state Save the current global state =item restore_global_state Restore the previously saved (using C<Lsave_global_state>> state =back =head1 TODO =over 4 =item SAX handlers Allowing XML::Twig to work on top of any SAX parser =item multiple twigs are not well supported A number of twig features are just global at the moment. These include the ID list and the "tag pool" (if you use C<change_gi> then you change the tag for ALL twigs). A future version will try to support this while trying not to be to hard on performance (at least when a single twig is used!). =back =head1 AUTHOR Michel Rodriguez <mirod@cpan.org> =head1 LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Bug reports should be sent using: F<RT L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-Twig>> Comments can be sent to mirod@cpan.org The XML::Twig page is at L<http://www.xmltwig.org/xmltwig/> It includes the development version of the module, a slightly better version of the documentation, examples, a tutorial and a: F<Processing XML efficiently with Perl and XML::Twig: L<http://www.xmltwig.org/xmltwig/tutorial/index.html>> =head1 SEE ALSO Complete docs, including a tutorial, examples, an easier to use HTML version of the docs, a quick reference card and a FAQ are available at L<http://www.xmltwig.org/xmltwig/> git repository at L<http://github.com/mirod/xmltwig> L<XML::Parser>, L<XML::Parser::Expat>, L<XML::XPath>, L<Encode>, L<Text::Iconv>, L<Scalar::Utils> =head2 Alternative Modules XML::Twig is not the only XML::Processing module available on CPAN (far from it!). The main alternative I would recommend is L<XML::LibXML>. Here is a quick comparison of the 2 modules: XML::LibXML, actually C<libxml2> on which it is based, sticks to the standards, and implements a good number of them in a rather strict way: XML, XPath, DOM, RelaxNG, I must be forgetting a couple (XInclude?). It is fast and rather frugal memory-wise. XML::Twig is older: when I started writing it XML::Parser/expat was the only game in town. It implements XML and that's about it (plus a subset of XPath, and you can use XML::Twig::XPath if you have XML::XPathEngine installed for full support). It is slower and requires more memory for a full tree than XML::LibXML. On the plus side (yes, there is a plus side!) it lets you process a big document in chunks, and thus let you tackle documents that couldn't be loaded in memory by XML::LibXML, and it offers a lot (and I mean a LOT!) of higher-level methods, for everything, from adding structure to "low-level" XML, to shortcuts for XHTML conversions and more. It also DWIMs quite a bit, getting comments and non-significant whitespaces out of the way but preserving them in the output for example. As it does not stick to the DOM, is also usually leads to shorter code than in XML::LibXML. Beyond the pure features of the 2 modules, XML::LibXML seems to be preferred by "XML-purists", while XML::Twig seems to be more used by Perl Hackers who have to deal with XML. As you have noted, XML::Twig also comes with quite a lot of docs, but I am sure if you ask for help about XML::LibXML here or on Perlmonks you will get answers. Note that it is actually quite hard for me to compare the 2 modules: on one hand I know XML::Twig inside-out and I can get it to do pretty much anything I need to (or I improve it ;--), while I have a very basic knowledge of XML::LibXML. So feature-wise, I'd rather use XML::Twig ;--). On the other hand, I am painfully aware of some of the deficiencies, potential bugs and plain ugly code that lurk in XML::Twig, even though you are unlikely to be affected by them (unless for example you need to change the DTD of a document programmatically), while I haven't looked much into XML::LibXML so it still looks shinny and clean to me. That said, if you need to process a document that is too big to fit memory and XML::Twig is too slow for you, my reluctant advice would be to use "bare" XML::Parser. It won't be as easy to use as XML::Twig: basically with XML::Twig you trade some speed (depending on what you do from a factor 3 to... none) for ease-of-use, but it will be easier IMHO than using SAX (albeit not standard), and at this point a LOT faster (see the last test in L<http://www.xmltwig.org/article/simple_benchmark/>). =cut �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/README��������������������������������������������������������������������������������0000644�0001750�0001750�00000006256�12732215763�014156� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������NAME XML::Twig - Tree interface to XML documents allowing processing chunk by chunk of huge documents. SUMMARY (see perldoc XML::Twig for full details) XML::Twig is (yet another!) XML transformation module. Its strong points: can be used to process huge documents while still being in tree mode; not bound by DOM or SAX, so it is very perlish and offers a very comprehensive set of methods; simple to use; DWIMs as much as possible What it doesn't offer: full SAX support (it can export SAX, but only reads XML), full XPath support (unless you use XML::Twig::XPath), nor DOM support. Other drawbacks: it is a big module, and with over 500 methods available it can be a bit overwhelming. A good starting point is the tutorial at http://xmltwig.org/xmltwig/tutorial/index.html. In fact the whole XML::Twig page at http://xmltwig.org/xmltwig/ has plenty of information to get you started with XML::Twig TOOLS XML::Twig comes with a few tools built on top of it: xml_pp XML pretty printer xml_grep XML grep - grep XML files using XML::Twig's subset of XPath xml_split split big XML files xml_merge merge back files created by xml_split xml_spellcheck spellcheck XML files skipping tags Running perl Makefile.PL will prompt you for each tool installation. perl Makefile.PL -y will install all of the tools without prompt perl Makefile.PL -n will skip the installation of the tools SYNOPSYS single-tree mode my $t= XML::Twig->new(); $t->parsefile( 'doc.xml'); $t->print; chunk mode # print the document, at most one full section is loaded in memory my $t= XML::Twig->new( twig_handlers => { section => \&flush}); $t->parsefile( 'doc.xml'); $t->flush; sub flush { (my $twig, $section)= @_; $twig->flush; } sub-tree mode # print all section title's in the document, # all other elements are ignored (and not stored) my $t= XML::Twig->new( twig_roots => { 'section/title' => sub { $_->print, "\n" } } ); $t->parsefile( 'doc.xml'); INSTALLATION perl Makefile.PL make make test make install DEPENDENCIES XML::Twig needs XML::Parser (and the expat library) installed Modules that can enhance XML::Twig are: Scalar::Util or WeakRef to avoid memory leaks Encode or Text::Iconv or Unicode::Map8 and Unicode::Strings to do encoding conversions Tie::IxHash to use the keep_atts_order option XML::XPathEngine to use XML::Twig::XPath LWP to use parseurl HTML::Entities to use the html_encode filter HTML::TreeBuilder to process HTML instead of XML CHANGES See the Changes file AUTHOR Michel Rodriguez (mirod@cpan.org) The Twig page is at http://www.xmltwig.org/xmltwig git project repository: http://github.com/mirod/xmltwig See the XML::Twig tutorial at http://www.xmltwig.org/xmltwig/tutorial/index.html COPYRIGHT Copyright (c) 1999-2012, Michel Rodriguez. All Rights Reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/Twig/���������������������������������������������������������������������������������0000755�0001750�0001750�00000000000�13015347632�014173� 5����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/Twig/XPath.pm�������������������������������������������������������������������������0000644�0001750�0001750�00000020570�13015347616�015563� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# $Id: /xmltwig/trunk/Twig/XPath.pm 32 2008-01-18T13:11:52.128782Z mrodrigu $ package XML::Twig::XPath; use strict; use warnings; use XML::Twig; my $XPATH; # XPath engine (XML::XPath or XML::XPathEngine); my $XPATH_NUMBER; # <$XPATH>::Number, the XPath number class BEGIN { foreach my $xpath_engine ( qw( XML::XPathEngine XML::XPath) ) { if( XML::Twig::_use( $xpath_engine) ) { $XPATH= $xpath_engine; last; } } unless( $XPATH) { die "cannot use XML::Twig::XPath: neither XML::XPathEngine 0.09+ nor XML::XPath are available"; } $XPATH_NUMBER= "${XPATH}::Number"; } use vars qw($VERSION); $VERSION="0.02"; BEGIN { package # hide from PAUSE XML::XPath::NodeSet; no warnings; # to avoid the "Subroutine sort redefined" message # replace the native sort routine by a Twig'd one sub sort { my $self = CORE::shift; @$self = CORE::sort { $a->node_cmp( $b) } @$self; return $self; } package # hide from PAUSE XML::XPathEngine::NodeSet; no warnings; # to avoid the "Subroutine sort redefined" message # replace the native sort routine by a Twig'd one sub sort { my $self = CORE::shift; @$self = CORE::sort { $a->node_cmp( $b) } @$self; return $self; } } package XML::Twig::XPath; use base 'XML::Twig'; my $XP; # the global xp object; sub to_number { return $XPATH_NUMBER->new( $_[0]->root->text); } sub new { my $class= shift; my $t= XML::Twig->new( elt_class => 'XML::Twig::XPath::Elt', @_); $t->{twig_xp}= $XPATH->new(); bless $t, $class; return $t; } sub set_namespace { my $t= shift; $t->{twig_xp}->set_namespace( @_); } sub set_strict_namespaces { my $t= shift; $t->{twig_xp}->set_strict_namespaces( @_); } sub node_cmp($$) { return $_[1] == $_[0] ? 0 : -1; } # document is before anything but itself sub isElementNode { 0 } sub isAttributeNode { 0 } sub isTextNode { 0 } sub isProcessingInstructionNode { 0 } sub isPINode { 0 } sub isCommentNode { 0 } sub isNamespaceNode { 0 } sub getAttributes { [] } sub getValue { return $_[0]->root->text; } sub findnodes { my( $t, $path)= @_; return $t->{twig_xp}->findnodes( $path, $t); } sub findnodes_as_string { my( $t, $path)= @_; return $t->{twig_xp}->findnodes_as_string( $path, $t); } sub findvalue { my( $t, $path)= @_; return $t->{twig_xp}->findvalue( $path, $t); } sub exists { my( $t, $path)= @_; return $t->{twig_xp}->exists( $path, $t); } sub find { my( $t, $path)= @_; return $t->{twig_xp}->find( $path, $t); } sub matches { my( $t, $path, $node)= @_; $node ||= $t; return $t->{twig_xp}->matches( $node, $path, $t) || 0; } sub getNamespaces { $_[0]->root->getNamespaces(); } #TODO: it would be nice to be able to pass in any object in this #distribution and cast it to the proper $XPATH class to use as a #variable (via 'nodes' argument or something) sub set_var { my ($t, $name, $value) = @_; if( ! ref $value) { $value= $t->findnodes( qq{"$value"}); } $t->{twig_xp}->set_var($name, $value); } 1; # adds the appropriate methods to XML::Twig::Elt so XML::XPath can be used as the XPath engine package XML::Twig::XPath::Elt; use base 'XML::Twig::Elt'; *getLocalName= *XML::Twig::Elt::local_name; *getValue = *XML::Twig::Elt::text; sub isAttributeNode { 0 } sub isNamespaceNode { 0 } sub to_number { return $XPATH_NUMBER->new( $_[0]->text); } sub getAttributes { my $elt= shift; my $atts= $elt->atts; # alternate, faster but less clean, way my @atts= map { bless( { name => $_, value => $atts->{$_}, elt => $elt }, 'XML::Twig::XPath::Attribute') } sort keys %$atts; # my @atts= map { XML::Twig::XPath::Attribute->new( $elt, $_) } sort keys %$atts; return wantarray ? @atts : \@atts; } sub getNamespace { my $elt= shift; my $prefix= shift() || $elt->ns_prefix; if( my $expanded= $elt->namespace( $prefix)) { return XML::Twig::XPath::Namespace->new( $prefix, $expanded); } else { return XML::Twig::XPath::Namespace->new( $prefix, ''); } } # returns namespaces declared in the element sub getNamespaces #_get_namespaces { my( $elt)= @_; my @namespaces; foreach my $att ($elt->att_names) { if( $att=~ m{^xmlns(?::(\w+))?$}) { my $prefix= $1 || ''; my $expanded= $elt->att( $att); push @namespaces, XML::Twig::XPath::Namespace->new( $prefix, $expanded); } } return wantarray() ? @namespaces : \@namespaces; } sub node_cmp($$) { my( $a, $b)= @_; if( UNIVERSAL::isa( $b, 'XML::Twig::XPath::Elt')) { # 2 elts, compare them return $a->cmp( $b); } elsif( UNIVERSAL::isa( $b, 'XML::Twig::XPath::Attribute')) { # elt <=> att, compare the elt to the att->{elt} # if the elt is the att->{elt} (cmp return 0) then -1, elt is before att return ($a->cmp( $b->{elt}) ) || -1 ; } elsif( UNIVERSAL::isa( $b, 'XML::Twig::XPath')) { # elt <=> document, elt is after document return 1; } else { die "unknown node type ", ref( $b); } } sub getParentNode { return $_[0]->_parent || $_[0]->twig; } sub findnodes { my( $elt, $path)= @_; return $elt->twig->{twig_xp}->findnodes( $path, $elt); } sub findnodes_as_string { my( $elt, $path)= @_; return $elt->twig->{twig_xp}->findnodes_as_string( $path, $elt); } sub findvalue { my( $elt, $path)= @_; return $elt->twig->{twig_xp}->findvalue( $path, $elt); } sub exists { my( $elt, $path)= @_; return $elt->twig->{twig_xp}->exists( $path, $elt); } sub find { my( $elt, $path)= @_; return $elt->twig->{twig_xp}->find( $path, $elt); } sub matches { my( $elt, $path)= @_; return $elt->twig->{twig_xp}->matches( $elt, $path, $elt->getParentNode) || 0; } 1; # this package is only used to allow XML::XPath as the XPath engine, otherwise # attributes are just attached to their parent element and are not considered objects package XML::Twig::XPath::Attribute; sub new { my( $class, $elt, $att)= @_; return bless { name => $att, value => $elt->att( $att), elt => $elt }, $class; } sub getValue { return $_[0]->{value}; } sub getName { return $_[0]->{name} ; } sub getLocalName { (my $name= $_[0]->{name}) =~ s{^.*:}{}; $name; } sub string_value { return $_[0]->{value}; } sub to_number { return $XPATH_NUMBER->new( $_[0]->{value}); } sub isElementNode { 0 } sub isAttributeNode { 1 } sub isNamespaceNode { 0 } sub isTextNode { 0 } sub isProcessingInstructionNode { 0 } sub isPINode { 0 } sub isCommentNode { 0 } sub toString { return qq{$_[0]->{name}="$_[0]->{value}"}; } sub getNamespace { my $att= shift; my $prefix= shift(); if( ! defined( $prefix)) { if($att->{name}=~ m{^(.*):}) { $prefix= $1; } else { $prefix=''; } } if( my $expanded= $att->{elt}->namespace( $prefix)) { return XML::Twig::XPath::Namespace->new( $prefix, $expanded); } } sub node_cmp($$) { my( $a, $b)= @_; if( UNIVERSAL::isa( $b, 'XML::Twig::XPath::Attribute')) { # 2 attributes, compare their elements, then their name return ($a->{elt}->cmp( $b->{elt}) ) || ($a->{name} cmp $b->{name}); } elsif( UNIVERSAL::isa( $b, 'XML::Twig::XPath::Elt')) { # att <=> elt : compare the att->elt and the elt # if att->elt is the elt (cmp returns 0) then 1 (elt is before att) return ($a->{elt}->cmp( $b) ) || 1 ; } elsif( UNIVERSAL::isa( $b, 'XML::Twig::XPath')) { # att <=> document, att is after document return 1; } else { die "unknown node type ", ref( $b); } } *cmp=*node_cmp; 1; package XML::Twig::XPath::Namespace; sub new { my( $class, $prefix, $expanded)= @_; bless { prefix => $prefix, expanded => $expanded }, $class; } sub isNamespaceNode { 1; } sub getPrefix { $_[0]->{prefix}; } sub getExpanded { $_[0]->{expanded}; } sub getValue { $_[0]->{expanded}; } sub getData { $_[0]->{expanded}; } sub node_cmp($$) { my( $a, $b)= @_; if( UNIVERSAL::isa( $b, 'XML::Twig::XPath::Namespace')) { # 2 attributes, compare their elements, then their name return $a->{prefix} cmp $b->{prefix}; } else { die "unknown node type ", ref( $b); } } *cmp=*node_cmp; 1 ����������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/Makefile.PL���������������������������������������������������������������������������0000644�0001750�0001750�00000013010�12732215763�015232� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# $Id: /xmltwig/trunk/Makefile.PL 33 2008-04-30T08:03:41.004487Z mrodrigu $ # tool installation part shamelessly lifted from YAML's Makefile.PL use ExtUtils::MakeMaker; my @prompts=( [ xml_pp => y => "XML pretty printer" ], [ xml_grep => y => "XML grep - grep XML files using XML::Twig's subset of XPath" ], [ xml_split => y => "split big XML files" ], [ xml_merge => y => "merge back files created by xml_split" ], [ xml_spellcheck => y => "spellcheck XML files skipping tags" ], ); my @programs; my $opt= $ARGV[0] ? $ARGV[0] : ''; if( $opt eq "-n") { @programs=(); } elsif( $opt eq "-y") { @programs= map { $_->[0] } @prompts; } elsif( $opt eq "-d") { @programs= map { $_->[0] if( $_->[1] eq 'y') } @prompts; } elsif( $ENV{AUTOMATED_TESTING} || $ENV{NONINTERACTIVE_TESTING}) { @programs=(); } else { print "run 'perl Makefile.PL -y' to install all tools,\n", " 'perl Makefile.PL -n' to skip installation\n"; foreach my $prompt (@prompts) { my ($program, $default, $description) = @$prompt; if( prompt("Do you want to install '$program' ($description)?", $default) =~ /^y/i) { push(@programs, $program); } } } MyWriteMakefile( META_MERGE => { resources => { repository => 'http://github.com/mirod/xmltwig', }, }, META_ADD => { prereqs => { build => { requires => { 'ExtUtils::MakeMaker' => "0", } }, configure => { requires => { 'ExtUtils::MakeMaker' => "0", } }, test => { recommends => { 'Test' => '1.25_02', 'IO::Scalar' => '2.110', 'IO::CaptureOutput' => '1.1102', }, suggests => { 'Test::Pod' => '1.45', 'XML::Simple' => '2.18', 'XML::Handler::YAWriter' => '0.23', 'XML::SAX::Writer' => '0.53', 'XML::Filter::BufferText' => '1.01', }, }, runtime => { requires => { 'XML::Parser' => '2.23', }, recommends => { 'Scalar::Util' => '1.23', 'Encode' => '2.42_01', 'XML::XPathEngine' => '0.13', }, suggests => { 'LWP' => '6.04', 'HTML::TreeBuilder' => '4.2', 'HTML::Entities::Numbered' => '0.04', 'HTML::Tidy' => '1.50', 'HTML::Entities' => '3.69', 'Tie::IxHash' => '1.22', 'Text::Wrap' => '2009.0305', }, } } }, #BUILD_REQUIRES => { #}, NAME => 'XML::Twig', ABSTRACT => 'XML, The Perl Way', AUTHOR => 'Michel Rodriguez <mirod@cpan.org>', LICENSE => 'perl', EXE_FILES => [ map {"tools/$_/$_"} @programs], VERSION_FROM => 'Twig.pm', PREREQ_PM => { 'XML::Parser' => '2.23' }, dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, depend => { 'Twig.pm' => "FORCE\n\t\$(PERL) speedup Twig_pm.slow > Twig.pm\n\t\$(PERL) -i_bak -p filter_for_5.005 Twig.pm Twig/XPath.pm\n\t\$(PERL) check_optional_modules", 'FORCE' => '', }, ); sub MyWriteMakefile { #Written by Alexandr Ciornii, version 0.21. Added by eumm-upgrade. my %params=@_; my $eumm_version=$ExtUtils::MakeMaker::VERSION; $eumm_version=eval $eumm_version; die "EXTRA_META is deprecated" if exists $params{EXTRA_META}; die "License not specified" if not exists $params{LICENSE}; if ($params{BUILD_REQUIRES} and $eumm_version < 6.5503) { #EUMM 6.5502 has problems with BUILD_REQUIRES $params{PREREQ_PM}={ %{$params{PREREQ_PM} || {}} , %{$params{BUILD_REQUIRES}} }; delete $params{BUILD_REQUIRES}; } delete $params{CONFIGURE_REQUIRES} if $eumm_version < 6.52; delete $params{MIN_PERL_VERSION} if $eumm_version < 6.48; delete $params{META_MERGE} if $eumm_version < 6.46; delete $params{META_ADD} if $eumm_version < 6.46; delete $params{LICENSE} if $eumm_version < 6.31; delete $params{AUTHOR} if $] < 5.005; delete $params{ABSTRACT_FROM} if $] < 5.005; delete $params{BINARY_LOCATION} if $] < 5.005; WriteMakefile(%params); add_prereqs_to_mymeta( $params{META_ADD}->{prereqs}); } sub add_prereqs_to_mymeta { my $prereqs= shift; my $MYJSON= 'MYMETA.json'; my $MYYAML= 'MYMETA.yml'; my $JSON = 'META.json'; my $YAML = 'META.yml'; rename $MYYAML, $YAML; if( eval { require JSON; }) { my $json= JSON->new()->pretty->canonical; if( my $meta= eval { $json->decode( slurp( -s $MYJSON ? $MYJSON : $JSON )); }) { $meta->{prereqs}= $prereqs; spit( $JSON, $json->encode( $meta)); warn "updated prereqs in $JSON\n"; } } } sub slurp { my( $file)= @_; my $in; open( $in, "<$file") or return ''; # can't use fancy open so this works in 5.005 local undef $/; return <$in>; } sub spit { my $file= shift; my $out; open( $out, ">$file") or ( warn "cannot update $file: $!" && return); print {$out} @_; } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/META.yml������������������������������������������������������������������������������0000664�0001750�0001750�00000001135�13015347632�014534� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������--- abstract: 'XML, The Perl Way' author: - 'Michel Rodriguez <mirod@cpan.org>' build_requires: ExtUtils::MakeMaker: '0' configure_requires: ExtUtils::MakeMaker: '0' dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 7.1001, CPAN::Meta::Converter version 2.150005' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: XML-Twig no_index: directory: - t - inc requires: XML::Parser: '2.23' resources: repository: http://github.com/mirod/xmltwig version: '3.52' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/check_optional_modules����������������������������������������������������������������0000644�0001750�0001750�00000001711�12732215763�017722� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/bin/perl -w # $Id: /xmltwig/trunk/check_optional_modules 4 2007-03-16T12:16:25.259192Z mrodrigu $ use strict; exit if( $] >= 5.008); if( $] >= 5.0060) { unless( eval 'require Scalar::Util' or eval 'require WeakRef' ) { warn "Neither Scalar::Util nor WeakRef is installed. ", "Installing one of these modules would improve ", "XML::Twig memory management and eliminate memory ", "leaks when re-using twigs.\n"; } else { warn "weaken is available\n"; } } unless( eval 'require Text::Iconv') { my $version= `iconv -V` || ''; if($version) { warn "The iconv library was found on your system ", "but the Text::Iconv module is not installed. ", "Installing Text::Iconv would make character ", "encoding translations fast and efficient.\n"; } else { warn "Did not find iconv\n"; } } else { warn "Text::Iconv is installed\n"; } �������������������������������������������������������XML-Twig-3.52/Changes�������������������������������������������������������������������������������0000644�0001750�0001750�00000162424�13015347442�014564� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������CHANGES 3.52 - 2016-11-23 - minor maintenance release - fixed: the previous fix was buggy... 3.51 - 2016-11-23 - minor maintenance release - fixed: failing tests when XML::XPathEngine and XML::XPath not available 3.50 - 2016-11-22 - minor maintenance release - added: the no_xxe option to XML::Twig::new, which causes the parse to fail if external entities are used (to prevent malicious XML to access the filesystem). See RT#118097 https://rt.cpan.org/Public/Bug/Display.html?id=118097 - fixed: warning (and soon error) due to unescaped literal left braces in regular expressions in the code generating Twig.pm reported by trwyant https://github.com/mirod/xmltwig/issues/26 - fixed: (partial fix) implement getNamespaces in XML::Twig::XPath::Elt the expression doesn't crash the code, but doesn't return anything interesting (yet) reported by Nathan Glenn https://github.com/mirod/xmltwig/issues/12 - fixed: various spelling mistakes https://github.com/mirod/xmltwig/pull/24 thanks to James McCoy for the patch - git repo cleanup, thanks to mjg17 3.49 - 2015-04-12 - minor maintenance release - added: the DTD_base option to XML::Twig new, that forces XML::Twig to look for the DTD in a given directory thanks to Arun lakhana for the idea - Prevent PAUSE from trying to index packages that are only used for monkey patching (to re-use XML::XPath as the XPath engine for XML::Twig::XPath). Will also prevent UNAUTHORIZED flag on metacpan. patch sent by Graham Knop - fixed: RT # 96009 keep_atts_order => 0 behaviour. Spotted by Dolmen https://rt.cpan.org/Public/Bug/Display.html?id=96009 - fixed: bug RT #97461 https://rt.cpan.org/Public/Bug/Display.html?id=97461 wrong error message was returned calling parse on an invalid filehandle Thanks to Slaven Rezic for the bug report and test case - COMPATIBILITY WARNING fixed: bug RT # inconsistency between simplify and XML::Simple for empty elements (including elements with start and end tags but no contents) the XML::Simple behaviour is to map them to an empty hash, not an empty/undef scalar (depending of whether the element is a PCDATA or not) as was the case in previous versions of the module. This has the potential to break some existing code, but simplify should be strictly the same as XML::Simple's XMLin Thanks to Vangelis Katsikaros for the bug report and test case 3.48 - 2014-03-30 - minor maintenance release - fixed: tests 3.47 - 2014-03-27 - minor maintenance release - fixed: missing entities when parsing HTML RT #93604 https://rt.cpan.org/Public/Bug/Display.html?id=93604 - fixed: tests failed when using a version of HTML::TreeBuilder with a non-numeric version - fixed in twig_handlers, '=' in regexps on attributes are turned into 'eq' RT #94295 https://rt.cpan.org/Public/Bug/Display.html?id=94295 3.46 - 2014-03-05 - minor maintenance release - fixed: test failed on Windows 3.45 - 2014-02-27 - minor maintenance release - fixed: link to idented_a format description RT #85400 https://rt.cpan.org/Public/Bug/Display.html?id=85400 fixed by Martin McGrath - fixed: code that gave a warning in 5.19.9 - fixed: RT #86651 https://rt.cpan.org/Ticket/Display.html?id=86773 xml_pp, quote not escaped in attribute values - fixed: various typos in docs RT#87660 thanks to David Steinbrunner - fixed: RT #86773 https://rt.cpan.org/Ticket/Display.html?id=86773 CDATA sections in HTML were not properly escaped when using the (default) HTML::TreeBuilder conversion spotted by Marco Pessotto - fixed: RT #85933 https://rt.cpan.org/Ticket/Display.html?id=85933 quotes in attributes were not properly escaped spotted by Arun Lakhana - added: docs for tools and safe_print_to_file - added: support for XPath variables thanks to Nathan Glenn for the initial implementation - updated: Changes to conform to CPAN::Changes + test 3.44 - 2013-05-13 - minor maintenance release - added: XML::Twig::Elt new method now acccepts literal content, eg my $e= XML::Twig::Elt->new( '<div><p>foo</p><p>bar</p></div>'); - added: twig handler triggers now accept the syntax <tag>#<id> use *#<id> if you don't want to specify the tag name (#<id> would not work since this is the syntax for "private" elements, this makes it ugly, but is due to the fact that when I started working on XML::Twig CSS wasn't really around) - fixed: merge had some problems dealing with embedded comments - improved: more tests - improved: make Changes conform to the CPAN::Changes spec 3.43 - 2013-02-31 - minor maintenance release - improved: docs for parse, see RT #78877 https://rt.cpan.org/Ticket/Display.html?id=78877 - fixed: xml_pp -i now preserves the permissions of the original file, see RT #81165 https://rt.cpan.org/Ticket/Display.html?id=81165 reported by Alberto Simoes - fixed: RT #80503 Newlines in attribute values https://rt.cpan.org/Ticket/Display.html?id=80503 reported (and explained) by Ambrus Zsban: \r, \n and \n explicitely set in attribute values should be escaped (with &#x<nb>;) when output 3.42 - 2012-11-06 - minor maintenance release - fixed: bug, elements created with XML::Twig::Elt->parse were garbage collected too early, see http://stackoverflow.com/questions/13263193/xmltwig-changes-erased - added: some tests 3.41 - 2012-08-08 - minor maintenance release - fixed: META.json generation 3.40 - 2012-05-10 - minor maintenance release - added: support for alternations ('|') at the top level of handler triggers and navigation you can now have twig_handlers => { 't1|t2' => \&handler } and $elt->children( 't1|t2') - added: the discard_all_spaces option that discards more aggressively non-significant white spaces see RT #71164 https://rt.cpan.org/Ticket/Display.html?id=71164 - fixed: used $[ instead of $] in 3 tests, see RT#72765 https://rt.cpan.org/Ticket/Display.html?id=72765 reported by Kevin Ryde - fixed: did not use Text::Wrap with indented_c see RT #71375 https://rt.cpan.org/Ticket/Display.html?id=71375 reported and patch provided by Martin Str?mberg - fixed: doc change for XML::Twig::Elt flush, see RT #72279 https://rt.cpan.org/Ticket/Display.html?id=72279 - fixed: replaced HTML::TreeBuilder::as_XML with am XML::Twig specific version, to avoid bugs in the original version and improve stability of the output 3.39 - 2011-09-22 - minor maintenance release - fixed: xml_pp -i would blank all files after the first one thanks to dvercande for spotting this - added: findvalues method (XML::Twig and XML::Twig::Elt) same as findvalue except that it returns an array of value - added: the output_html_doctype option to XML::Twig::new, that outputs the DOCTYPE declaration for HTML docs converted by HTML::TreeBuilder (fixing it if necessary) see RT #71009: https://rt.cpan.org/Ticket/Display.html?id=71009 - fixed: t/test_autoencoding_conversion.t failed with $PERL_UNICODE set to SA* (which prevents autoconversion) reported by Martin J Evans, RT #71084 https://rt.cpan.org/Ticket/Display.html?id=71084 3.38 - 2011-02-27 - minor maintenance release - fixed: RT 65865: _ should be allowed at the start on an XML name https://rt.cpan.org/Ticket/Display.html?id=65865 reported by Steve Prokopowich 3.37 - 2010-10-08 - minor maintenance release - fixed: more tests fixed for HTML::TreeBuilder, hopefully will pass now - changed: making att and class lvalues created problems: in certain context they made regular calls to the method create empty attributes. I could find no satisfactory fix,they were either incompletes, or to complex for often used methods. So att and class are back to being regular, non l-value methods. latt and lclass are the l-value versions. - added: documented the -html option for xml_grep, that allows processing HTML input - added: the -Tidy option to xml_grep, that uses HTML::Tidy to convert HTML to XML 3.36 - 2010-10-07 - minor maintenance release - added: the use_tidy option to XML::Twig->new, which uses HTML::Tidy to convert HTML to well-formed XHTML, as an alternative to the default conversion which uses HTML::TreeBuilder - added: XML::Twig::Elt method siblings which returns the siblings of the element - added: methods att_accessors, elt_accessors and field_accessor as well as the similarly named options when creating an XML::Twig - added: set_outer_xml XML::Twig::Elt method - added: print_to_file on an XML::Twig::Elt - added: can use the tag[nested] form in twig handlers that triggers on elements 'tag' that include a child 'nested' - added: aliased the add_to_class XML::Twig::Elt method to add_class, which seems more natural - added: the remove_class method - added: made att and class lvalues (in perl 5.6 and up) - fixed: copy did not copy the empty status of an element RT#31664 spotted by Roland Minner https://rt.cpan.org/Ticket/Display.html?id=31664 - fixed: cut_children would always set the empty status of an element, even if it had children left - fixed: tests did not pass with HTML::TreeBuilder 3.23_1 due to a change in an error message 3.35 - 2010-05-15 - minor maintenance release - added: the by_file option to xml_grep that limits the number of hits per file - added: allowed the text of ignored elements to be buffered in a string - fixed: comments need to be escaped (you can't have 2 '-' in a row), RT#57389 spotted by Konstantin Tchernov https://rt.cpan.org/Ticket/Display.html?id=57389 - fixed: after $elt->cut_children, $elt->empty is false RT#54570 spotted and patched by Andrew Pimlott https://rt.cpan.org/Ticket/Display.html?id=54570 - fixed: documented the fact that latin1 is ISO-8859-15, see RT#37431 https://rt.cpan.org/Ticket/Display.html?id=37431 3.34 - 2010-01-18 - minor maintenance release, test suite fixes - fixed: tests failed when XML::XPath was used as the XPath engine 3.33 - 2010-01-15 - minor maintenance release, bug fixes - added: XML::Twig::Elt method att_exists which returns true if the attribute exists in the XML - added: XML::Twig::Elt method lc_attnames which lower cases the names of all the attribute of the element - added: better error message if find_nodes or get_xpath are called instead of findnodes when using XML::Twig::XPath (suggested by Zed Pobre) - added: indented_close_tag pretty_print option (suggested by H.Merijn Brand) - added: RT #49692 xml_split test on win 32 systems. Patch sent through RT http://rt.cpan.org/Ticket/Display.html?id=49692 - added: using position selector (eg foo[2]) in handler triggers now raises an error, spotted by Selvakumar - added: you can use css like selectors for class in navigation: 'p.title' will select p elements with a class that contains title. In order to preserve backward compatibility and to allow the use of elements with a dot in their name, if there are already parsed elements with a tag name of 'p.title' then they will be selected instead - added: you can also use css class selectors in trigger handlers. - fixed: avoids expat (and XML::Parser) "Ran out of memory for input buffer" error, and instead reports an "empty file" error (and does not attempt to parse the file). - fixed: RT #51432 attributes containing quote character don't escape properly found, and patch provided by Jeremy Kahn https://rt.cpan.org/Ticket/Display.html?id=51432 - fixed: RT #48616 handler condition of foo/* crashed the module reported by Osfameron http://rt.cpan.org/Public/Bug/Display.html?id=48616 - fixed: xml_grep bug: warning when --count is used and no match is found https://rt.cpan.org/Ticket/Display.html?id=33269 found by Hermann Peifer - fixed: xml_split bug when using an XML declaration and a utf8 encoding. Spotted by Chris Price. - fixed: xml_pp bug, pod2text command to display help was not properly quoted. Spotted by Chris Price. - fixed: failing tests when LWP::UserAgent is not available - fixed: RT #41147: use of uninitialized value in eval when attribute isn't found reported by Zed Pobre http://rt.cpan.org/Ticket/Display.html?id=41147 - fixed: memory leak when the XML included id's - fixed: XML::Twig::Elt->set_content fails when argument is 'XML::Twig::Elt' (or the name of a subclass of XML::Twig::Elt) http://rt.cpan.org/Ticket/Display.html?id=40399 - fixed: bug RT #39849, set_output_encoding( 'utf-8') did not work quite right on filehandles that were already open in >:utf-8 mode spotted by Zed Pobre http://rt.cpan.org/Ticket/Display.html?id=39849 - fixed: xml_pp now accepts all formating options available in XML::Twig - fixed: RT #31664, element attributes are not preserving their order when using elt->copy spotted, and fix provided by jbubbabrown - fixed: RT #31832, wrapped link to xmltwig.com in L< > tag in the doc spotted by Slaven Srezic http://rt.cpan.org/Ticket/Display.html?id=31832 - fixed: RT #31833 doc fix, spotted by Slaven Srezic - fixed: Makefile.PL doesn't nag the poor tester anymore when running with $AUTOMATED_TESTING set - fixed: bug calling set_text when using XML::Twig::XPath, spotted by Ted Sung - fixed: improved speed when parsing big elements, RT#35672, reported by Seth Viebrock (fi is to explicitely return null from the character handler, instead of the text already parsed... a few hundred thousand times) http://rt.cpan.org/Ticket/Display.html?id=35672 - fixed: RT #47257, minor doc bug, spotted by David Steinbrunner http://rt.cpan.org/Ticket/Display.html?id=47257 - fixed: bug in navigation conditions of the form elt[text()=~ /text with 'or' or 'and'/] - improved: speed, somewhat -improved: put the project on github: http://github.com/mirod/xmltwig 3.32 - 2007-11-13 - minor maintenance release with a bug fix - fixed: change to the regexp that parses XPath-like conditions so it can accept leading non-ascii letters ([^\W\d] does not work), not used in perl 5.005 - fixed: set use utf8 (except in 5.005), which gets rid of the dreaded "SWASHNEW" error in 5.6.*, fixed things that then broke in 5.6. 3.31 - 2007-11-07 - minor maintenance release, fixing some tests - fixed: fixes to stop tests from failing in various configurations 3.30 - 2007-11-06 - fixed: a couple of bugs in namespace handling, spotted by Shlomo Yonas (see https://rt.cpan.org/Ticket/Display.html?id=27617 and http://www.perlmonks.org/?node_id=624830) - added: the XML::Twig::Elt fields method which returns a list of fields - added: the normalize method in XML::Twig and XML::Twig::Elt, which merge together consecutive pcdata elements. As much as possible (so far after a cut, delete or erase), the twig is kept normalized, eg there are no consecutive #PCDATA elements in it. Suggestion of someone whose name (and emails) I can't find at the moment. - added: the indented_a / cvs format for pretty_print, that makes the output friendly to line-oriented version control tools, as described in http://tinyurl.com/2kwscq (RT #24954). Thanks to Sjur Moshagen for a patch that I adapted to the current version. - fixed: bug RT #25113: system entities were not properly resolved if the XML file was not in the current directory. Thanks to Dave Charness for the patch. - added: the XML::Twig method finish_now that terminates parsing immediately, without checking the rest of the XML. This feature was half suggested by Nick Clayton - added: the -s option to xml_split, which splits when the given size is reached for a file, suggested by Radek Saturka. - added: the -g option to xml_split, which groups elements to be split, suggested and tested by Dhirendra Singh Kholia. - added: the safe_parsefile_html and safe_parseurl_html methods, and a --html option to xml_grep. Suggested by Bill Ricker. - improved: by default xml_grep now skips non well-formed files, the --strict option makes it die when it finds one - fixed: a bunch of bugs in xml_grep - fixed: a warning when using optional modules with a version number that includes an _, spotted and fix suggested by Bill Ricker. - fixed: test failure on cygwin, thanks to Erik Rantapaa for the patch. - fixed: a bunch of typos in docs, RT #25836, spotted and fixed by David Steinbrunner - improved: re-use of XML::Twig objects for repetitive parsing. It looks like it should be OK now , but I am sure I haven't tested all cases yet (especially when DTDs and entities are involved). - improved: HTML parsing; XML::Twig now tries to find the proper encoding for the document (that's not done by HTML::TreeBuilder at the moment). -fixed: XML::Twig::Elt purge and flush methods now only purge/flush up to the element, not up to the current element in the twig (duh!) - fixed: bug in handlers of the form elt[string(subelt)="foo"] and elt[string(subelt)=1] which did not work at all - fixed: bug in parameter entity output, spotted by BenHopkins on perlmonks (see http://www.perlmonks.org/?node_id=618360) - fixed: bug in xml_string: options were not used - improved error reporting for missing SYSTEM entities, including the option to set twig_expand_external_ents to -1, which makes missing SYSTEM entities not fatal, but reports them in $t->{twig_missing_system_entities} Thanks to Frank Wegmann for his suggestions and for testing the various versions of the feature - fixed: internals so new versions of Pod::Coverage won't barf 3.29 - 2007-01-22 - fixed: a bug in the handling of handlers after an ignore (RT #24392, reported by Robert Eden). 3.28 - 2007-01-05 - now builds on Windows and OS2 - improved: refactored the code that triggers handlers, more complex expressions can now be handled, such as '/doc/section[@def="1"]/title' - COMPATIBILITY WARNING Up to version 3.26, you could change the attribute of a parent of a node on which you had a handler, and be able to trigger a handler on that parent node based on the new attribute value: XML::Twig->new( twig_handlers => { 'sect/title' => sub { $_->parent->set_att( has_title => 1)}, 'sect[@has_title="1"]'=> sub { ... }, # called for any sect that has } # a title ); This won't work now. The trigger expression ('sect[@has_title="1"]') is evaluated strictly against the input XML. This is more logical and consistent (if you changed the element name, the new name was never used in the evaluation of the trigger). The only exception to that rule is if you use "private attributes": attributes which name starts with a '#'. By definition this in an invalid XML name, so it can't be in the input, and has to have been created . In that case the code that evaluates the trigger looks at the attribute in the element in the tree in memory (if it exists). So in the example above, if you replace 'has_title' by '#has_title', everything will work fine. Note that private attributes are not output when using the print/sprint/xml_string... methods. - fixed: xml_pp so it does not leave a tempfile and a broken original file all when the original file is not well-formed. - added: the nparse_pp method that does an nparse with pretty_print set to 'indented', nparse_e that sets error_context, and nparse_ppe that does both - added: XML::Twig::Elt tag_to_span and tag_to_div methods (turn an element into a span/div and set its class to the old tag name) - added: the quote option for XML::Twig new, which sets the output quote character for attributes ('single' or 'double') - added: the text_only and xml_text_only methods that return the text of the element, but not of the sub-elements. - added: outer_xml method (synonym for sprint) - fixed: bug where entity names were not matched properly (RT #22854, spotted by Bob Faist) - fixed: bug on some DOCTYPE config with twig_print_outside_roots - fixed: bug in set_keep_encoding (the method, not the option). - fixed: bug in simplify: the code attempted to replace variables in attribute values even if no option required it, spotted by Klaus Rush - improved: clean-up and fixed bugs in ignore: the method can now be called from a regular handler (it always could but the docs did not say so, thanks to kudra for noticing this). It can also be called to ignore a parent of the current element. There were bugs there, and the tree was not built properly - added: error message when an XPath query with a leading / is used on a node that does not belong to a whole twig (because it's been cut or because the twig itself went out of scope) - improved: when parsing HTML with error_context set, the HTML is indented, in order to give better error report 3.26 - 2006-07-01 - added: argument to -i in the Makefile to prevent problem in win32 - added: XML::Twig::Elt former_next_sibling, former_prev_sibling and former_parent methods - squashed a memory leak when parsing html (forgot to call delete on the HTML::Tree object) - fixed: bug that caused XML::Twig to hang if there was a syntax error in a predicate (RT#19499, reported by Dan Dascalescu) -improved: made start_tag and end_tag more consistent: they now both return the empty string for comments, PIs... (reported by Dan Dascalescu) - added: parsefile_inplace and parsefile_html_inplace methods (thanks to GrandFather on perlmonks) - added: support to add css stylesheet in the add_stylesheet method (thanks to Georgi Sotirov) - patched tests to work on Win32 - added: set_inner_xml inner_xml and set_inner_html methods 3.25 - 2006-05-10 - patched to work with perl 5.005! - fixed: a bug in xml_pp when pretty printing a file in place in a different file system 3.24 - 2006-05-09 - added: loading the text of entities stored in separate files (using SYSTEM) when the (awfully named!) expand_external_ents option is used. Thanks to jhx for spotting this. - changed: set_cdata, set_pi and set_comment so that if you call them on an element of the wrong kind, everything works as expected, instead of swallowing silently the data. Bug spotted by cmccutcheon - fixed: a whole bunch of things to make the module run and the tests pass on VMS, thanks to Peter (Stig) Edwards who reported bug RT #18655 and provided a patch. - fixed: bug on get_xpath( '/root[1]') expressions, RT #18789 spotted by memfrob. - added: the add_stylesheet method, that... adds a stylesheet (xsl type is supported, let me know if other types are needed) to a document. - improved: allowed pasting PI/Comment elements before or after the root of a document (see discussion at http://perlmonks.org/index.pl?node_id=538550). Thanks to rogue90 for noticing the problem, and to Tanktalus for finding the best way to solve it. - added: aliased unwrap to erase (ie added the unwrap method to XML::Twig::Elt, identical to the existing erase) suggested by Chris Burbridge. - fixed: bug RT #17522: flushing twice at the end of the the parse would output the last fragment twice. Spotted by Harco de Hilster. - fixed: bug RT #17500: parsing a pipe when using the UTF8 perlIO layer (through PERL_UNICODE or -C) now raises an error, found by Nikolaus Rath. cwimproved: made the tests pass when the UTF8 perlIO layer is used. At this point potential problems when parsing non-UTF8 XML in this configuration are not trapped. 3.23 - 2006-01-23 - added: autoflush: there is no more need for the last $twig->flush after the parsing, it is done automatically at the end of the parsing, with the same arguments as the first flush on the twig. This can be turned of by setting $twig->{twig_autoflush} to 0. WARNING: if you finished the output with a direct print instead of a flush, then this change will cause a bug. Hopefully this should not be the case and is easily fixable. - fixed: bug RT #17145 where get_xpath('//root/elt[1]/child') would produce a fatal error if there were no elt element under root. Spotted by Dan Dascalescu. - fixed: bug RT #17064 (comments and PIs after the root element were not properly processed), spotted by Dan Dascalescu. - fixed: bug RT #17044: the SYSTEM value was not output in UpdateDTD mode, thanks to Michal Lewandowski for pointing this out. - changed: the way empty tags are expanded with the 'html' style: only tags that are allowed to be empty in XHTML are output as '<tag />', thanks to Tom Rathborne for proding me to look into this. - added: a 'wrapped' pretty_print option, that is a bit dodgy I think but that might please some. - fixed: bug RT #16540 (tags with specific names (like 'level'), tripped XML::Twig, spotted by Graham - added: comparison with XML::LibXML in the SEE ALSO section (and in the FAQ), following a question from surf on c.l.p.m - added: XML::Twig now rejects string/regexp condition in twig_roots - added: better error checking in xml_grep - fixed: string/regexp condition in xml_grep - added: support for ! @att (or not @att) in get_xpath - added: support for several predicates in get_xpath (not nested predicates though). - fixed: bug RT #15671 (wrong condition interpretation for attribute value 0) - added: XML::Twig print_to_file method - added: XML::Twig::Elt methods: following_elt, following_elts, preceding_elt, preceding_elts (needed to support the corresponding axis in get_xpath) 3.22 - 2005-10-14 - added: the XML::Twig xparse method, which parses whatever is thrown at it (filehandle, string, HTML file, HTML URL, URL or file). - added: the XML::Twig nparse method, which creates a twig and then calls xparse on the last parameter. - added: the parse_html and parsefile_html methods, which parse HTML strings (or fh) and files respectively, with the help of HTML::TreeBuilder. the implementation may still change. Note that at the moment there seems to be encoding problems with it (if the input is not UTF8). - added: info to t/zz_dump_config.t - fixed: a bug that caused subs_text to leave empty #PCDATA elements if the regexp matched at the beginning or at the end of the text of an element. - fixed: RT #15014: in a few methods objects were created as XML::Twig::Elt, instead of in the class?!F of the calling object. - fixed: RT #14959: problem with wrap_children when an attribute of one of the child element includes a '>' - improved: the docs for wrap_children - added: a better error message when re-using an existing twig during the parse - fixed: (partially) a bug with windows line-endings in CDATA sections with keep_encoding set (RT #14815) - added: Test::Pod::Coverage test to please the kwalitee police ;--) 3.21 - 2005-08-12 - fixed: a test that failed if Tie::IxHash was not available - added: link to Atom feed for the CPAN testers results at http://xmltwig.com/rss/twig_testers.rss 3.20 - 2005-08-11 - fixed: the pod (which caused the tests to fail) 3.19 - 2005-08-10 - fixed: the fix to RT # 14008, this one should be ok restructured tests - added: the _dump method (probably not finished) 3.18 - 2005-08-08 - added: a fix to deal with a bug in XML::Parser in the original_string method when used in CDATA sections longer than 1024 chars (RT # 14008) thanks to Dan Dascalescu for spotting the bug and providing a test case. - added: better error diagnostics when the wrong arguments are used in paste - fixed: a bug in subs_text when the text of an element included \n (RT #13665) spotted by Dan Dascalescu - improved: cleaned up the behaviour of erase when the element being erased has extra_data (comments or pis) attached - fixed: a bug in subs_text that sometimes messed up text after the matching text - fixed: the erase/group_tags option of simplify to make it exactly similar to XML::Simple's - fixed: a bug that caused XML::Twig to crash when ignore was used with twig_roots (RT #13382) spotted by Larry Siden - fixed: bug in xml_split with default entities (they ended up being doubly escaped) - fixed: various bugs when dealing with ids (changing existing ids, setting the attribute directly...) - improved mark and split, both methods now accepts several tags/ as arguments, so you can write for example: $elt->mark( qr/^(\w+): (.*)$/, 'dt', 'dd'); - added: XML::Twig::Elt children_trimmed_text method, patch sent by ambrus (RT #12510) - changed: children_text and children_trimmed_text to have them return the entire text in scalar context - fixed: bug that caused XML::Twig not to play nice with XML::Xerces (due to improper import of UNIVERSAL::isa) spotted and patched by Colin Robertson. - changed: most references to 'gi' in the docs, replaced them by tag. I guess Robin Berjon's relentless teasing is to be credited with this one. - added: tag_regexp condition on handlers (a regexp instead of a regular condition will trigger the handler if the tag matches), suggested by Franck Porcher, implementation helped by a few Perl Monks (http://perlmonks.org/index.pl?node_id=445677). - fixed: typos in xml_split (RT #11911 and #11911), reported by Alexey Tourbin - added: tests for xml_split and xml_merge and fixed a few bugs in the process - added: the -i option to xml_split and xml_merge, that use XInclude instead of PIs (preliminary support, the XInclude namespace is not declared for example). - added the XML::Twig and XML::Twig::Elt trim method that trims an element in-place -added the XML::Twig last_elt method and the XML::Twig::Elt last_descendant method - added: more tests 3.17 - 2005-03-16 - improved: documentation, mostly to point better to the resources at http://www.xmltwig.com -fixed: a few tests that would fail under perl 5.6.* and Solaris (t/test_safe_encode.t and t/test_bug_3.15.t), see RT bug # 11844, thanks to Sven Neuhaus - changed: the licensing terms in the README to match the ones in the main module (same as Perl), see RT bug #11725 - added: a test on XML::SAX::Writer version number to avoid failing tests with old versions (<0.39) - improved: xml_split 3.16 - 2005-02-11 - added: the xml_split/xml_merge tools - fixed: PI handler behaviour when used in twig_roots mode - fixed: bug that prevented the DTD to be output when update_DTD mode is on, no DTD is present but entities have been created - added: level(<n>) trigger for handlers - fixed: bug that prevented the output_filter to be called when printing an element. Spotted thanks to Louis Strous. - fixed: bug in the nsgmls pretty printer that output invalid XML (an extra \n was added in the end tag) found by Lee Goddard - fixed: test 284 in test_additional to make it pass in RedHat's version of perl 5.8.0, thanks to rdhayes for debugging and fixing that test. - improved: first shot at getting Pis and comments back in the proper place, even in 'keep' mode. At the moment using set_pcdata (or set_cdata) removes all embedded comments/pis - fixed: a bug with pi's in keep mode (pi's would not be copied if they were within an element) found by Pascal Sternis - added: a fix to get rid of spurious warnings, sent by Anthony Persaud - added: the remove_cdata option to the XML::Twig new method, that will output CDATA sections as regular (escaped) PCDATA - added: the index option to the XML::Twig new method, and the associated XML::Twig index method, which generates a list of element matching a condition during parsing - added: the XML::Twig::Elt first_descendant method - fixed: bug with the keep_encoding option where attributes were not parsed when the element name was followed by more than one space (spotted by Gerald Sedrati-Dinet), see https://rt.cpan.org/Ticket/Display.html?id=8137 - fixed: a bug where whitespace at the beginning of an element could be dropped (if followed by an element before any other character). Now whitespace is dropped only if it includes a \n - added: feature: when load_DTD is used, default attributes are now filled - fixed: bug on xmlns in path expression trigger (would not replace prefixes in path expressions), spotted by amonroy on perlmonks, see http://perlmonks.org/index.pl?node_id=386764 - optimized: XML::Twig text, thanks to Nick Lassonde for the patch - fixed: bug that generated an empty line before some comments (pointed out by Tanya Huang) - fixed: tests to check XML::Filter::BufferText version (1.00 has a bug in the CDATA handling that makes XML::Twig tests fail). - added: new options --nowrap and --exclude (-v) to xml_grep - fixed: warning in tests under 5.8.0 (spotted by Ed Avis) - improved: skipped HTML::Entities tests in 5.8.0 (make test for this module seem to fail on my system, it might be the same elsewhere) - fixed: bug RT #6067 (problems with non-standard versions of Scalar::Utils which do not include weaken) - fixed: bug RT #6092 (error when using safe output filter) - fixed: bug when using map_xmlns, tags in default namespace were not output 3.15 - 2004-04-05 - fixed: tests now pass on more systems (thanks to Ed Avis for his testing) - added: normalize_space option for simplify (suggestion of Lambert Lum) - improved: removed usage of $& - improved: the doc for paste, as it was a bit short (suggestion of Richard Jolly) 3.14 - 2004-03-17 - improved: namespace processing , it should work fine now, as long as twig_roots is not used. - COMPATIBILITY WARNING: Potentially uncompatible change: the behaviour of simplify has been changed to mimic as exactly as possible XML::Simple's XMLin - improved: the pod to cover the entire API - improved: tests, now pass with perl 5.005_04-RC1 (fail with 5.005 reported by David Claughton), added more tests and a config summary at the end of the tests - added: methods on the class attribute, convenient for dealing with XHTML or preparing display with CSS: class set_class add_to_class att_to_class add_att_to_class move_att_to_class tag_to_class add_tag_to_class set_tag_class in_class navigation functions can use '.<class>' expressions - fixed: (yet another!) bug in the way DTDs were output - fixed: bug for pi => 'drop' option - changed: the names of lots on internal (undocumented) methods, prefixed them with _ 3.13 - 2004-03-16 - maintenance release to get the tests to pass on various platforms - improved: the README file - fixed: problem with encoding conversions (using safe_encode and safe_encode_hex) under perl 5.8.0, see RT ticket #5111 - fixed: tests to pass when trying to use an unsupported iconv filter 3.12 - 2004-01-29 - new features and greatly increased test coverage - added: lots of tests (>900), thanks to David Rigaudiere, Forrest Cahoon, Sebastien Aperghis-Tramoni, Henrik Tougaard and Sam Tregar for testing this release on various OSs, Perl, XML::Parser and expat versions. - added: XML::Twig::XPath that uses XML::XPath as the XPath engine for findnodes, findnodes_as_string, findvalue, exists, find and matches. Just use XML::Twig::XPath instead of use XML::Twig; (see the tests in t/xmlxpath_*). - added: special case to output some HTML tags ('script' to start with) as not empty. - fixed: XML::Twig::Elt->new now properly flags empty elements (spotted by Dave Roe) - added: XML::Twig::Elt contains_a_single method - added: #ENT twig_handlers (not necessarily complete, so not yet documented, needs more tests) - added: doc for XML::Twig and XML::Twig::Elt subs_text methods tags starting with # are now "invisible" (they are not output), useful for example for pretty_printing - added: new options --wrap '' and --date to xml_grep improved XPath support (added [nb] support) - added: xpath method, which generates a unique XPath for an element - added: has_child and has_children as synonyms of first_child - added: XML::Twig::set_id_seed to control how generated id's are created - improved: when using ignore on an element, end_tag_handlers are now tested at the end of the element (so you can for example get the byte offset in the document), suggestion of Philippe Verdret - added: XML::Twig::Elt change_att_name - fixed: XML::Twig::Elt new now properly works when called as an object (and not a class) method - fixed: namespace processing somewhat - fixed: SAX output methods - fixed: bug when keep_atts_order on and using set_att on an element with no existing attribute (spotted by scharloi) - COMPATIBILITY WARNING: WARNING - potentially incompatible changes - when using finish_print, the document used to be flushed. This is no longer the case, you will have to do it before calling finish_print. This way you have the choice of doing it or not. - improved: removed XML::Twig::Elt::unescape function (was no longer used) 3.11 - 2003-08-28 - added: --text_only option to xml_grep (outputs the text of the result, without tags) - fixed: bug where "Comments [was] always dropped after a twig object set 'comments' to 'drop'" (RT#3711), bug report and first patch by Simon Flack - added: option "keep_atts_order" that keeps the original attribute order in the output. This option needs the Tie::IxHash module to work. 3.10 - 2003-06-09 - added: xml_pp xml_grep and xml_spellcheck to the distribution - improved: the print method now calls 'print $elt->sprint' instead of printing content as it converts them to text, in order to reduce the number of calls to Perl's print (which should increase performance) - changed: XML::Twig::Elt erase to allow erasing the root element if it has only 1 child - added: findvalue method to XML::Twig and XML::Twig::Elt - added: aliased findnodes to get_xpath in XML::Twig and XML::Twig::Elt - added: the elt_class option to XML::Twig::new - added: the do_not_chain_handlers option to XML::Twig::new - added: the XML::Twig::Elt is_first_child and is_last_child methods - improved: set_gi,set_text, prefix, suffix, set_att, set_atts, del_atts, del_att now return the element for easier chains - fixed: bug in pretty printing comments before elements (RT #2315) - added: the XML::Twig::Elt children_copy method which returns a list of elements that are copies of the children of the element - fixed: a bug in wrap_in when the element wrapped is not attached to a tree - fixed: bug with get_xpath: regexp modifiers were not taken into account spotted by Eric Safern (RT #2284) - fixed: bug in methods inherited from XML::Parser::Expat (arguments were not properly passed) - improved: installed local empty SIG handlers to trap error messages triggered by require for optional modules, so that user signal handlers would not have to deal with them (suggestion from Philippe Verdret) - fixed: bug in the navigation XPath engine: text() was used instead of string(). Both are now allowed. - added: XML::Twig::Elt sort_children, sort_children_on_value, sort_children_on_att and sort_children_on_field methods that sort the children of an element in place - added:XML::Twig::Elt field_to_att and att_to_field methods - fixed:a memory leak due to ids not being weak references - added: the XML::Twig::Elt wrap_children method that wraps children of an element that satisfy a regexp in a new element - added: the XML::Twig::Elt add_id method that adds an id to an element - added: the XML::Twig::Elt strip_add method that deletes an attribute from an element and its descendants - COMPATIBILITY WARNING fixed:a quasi-bug in set_att where the hash passed in reference was used directly, which makes it a problem when the same reference is passed several times: all the elements share the same attributes. This is a potentially incompatible change for code that relied on this feature. Please report problems to the author. - fixed: bug in set_id - fixed: bug spotted by Bill Gunter: allowed _ as the initial character for XML names. Also now allow ':' as the first element - added: the simplify methods, which load a twig into an XML::Simple like data structure - fixed: bug in get_type and is_elt, spotted and fixed by Paul Stodghill - added: the XML::Twig::Elt ancestors_or_self method - fixed: bug when doc root is also a twig_root (twig was not built) - improved: the README (fleshed out examples, added OS X to the list of tested platforms) - fixed: bug when using the no_dtd_output option - added: doc for the XML::Twig::Elt children_count method - added: the XML::Twig::Elt children_text method - improved: updated the doc so it can be properly formatted by my custom pod2html, the generated doc (with a bigger ToC and better links) is available from the XML::Twig page at http://xmltwig.com/xmltwig/ 3.09 - 2002-11-10 - added: XML::Twig::Elt xml_text method - fixed: several bugs in the split method under 5.8.0 when matching a utf8 character (thanks to Dominic Mitchell who spotted them) - improved: cleaned-up the pod (still in progress) - added: the XML::Twig::Elt pos method that gives the position of an element in its parent's child list - fixed: re-introduced parseurl (thanks to Denis Kolokol for spotting its absence in this version) - fixed: ent_tag_handlers were not called on the root (thanks to Philippe Verdret - improved: #PI (also declared as '?') and #COMMENT handler support - added: check on reference type (must be XML::Twig::Elt) in XML::Twig::Elt::paste (patch by Forrest Cahoon) 3.08 - 2002-09-17 - fixed: the previous fix wasn't enough :--( 3.07 - 2002-09-17 - fixed:the way weaken is imported from Scalar::Util 3.06 - 2002-09-17 - added: XML::Twig::Elt trimmed_text and related methods (trimmed_field, first_child_trimmed_text, last_child_trimmed_text...) - added: XML::Twig::Elt replace_with method - added: XML::Twig::Elt cut_children method - added: XML::Twig contains_only method - added: *[att=~ /regexp/] condition type (suggested by Nikola Janceski) - fixed: bug in the way handlers for gi, path and subpath were chained (Thanks to Tommy Wareing) - fixed: bug where entities caused an error on other handlers (Thanks to Tommy Wareing) - fixed: bug with string(sub_elt)=~ /regexp/ (thanks to Tommy Wareing) - fixed: bug with output_filter used with expand_external_entities (thanks to Tommy Wareing) - fixed: (yet another!) bug with whitespace handling (whitespace, then an entity made the whitespace move after the entity) (spotted by the usual Tommy Wareing) - added: an error message when pasting on an undef reference (suggestion of Tommy Wareing) - fixed: bug in in_context (found by Tommy Wareing) - fixed: bug when loading the DTD (local undef $/ did not stay local, bug found and patch sent by Steve Pomeroy and Henry Cipolla) - fixed: bug in setting output filter - fixed: bug in using a filehandle with twig_print_outside_roots - added: safe_encode_hex filter - fixed: bug in set_indent, $INDENT not set properly (thanks to Eric Jain) - fixed: dependencies (no check with 5.8.0, added Scalar::Util as a possible source for weaken) - added: no_prolog option to XML:Twig::new - improved: tested build on Windows (thanks to Cory Trese and Josh Hawkins) - changed:in 3.05 - added: _ALPHA_ SAX export methods: XML::Twig toSAX1, toSAX2, flush_toSAX1, flush_toSAX2 XML::Twig::Elt toSAX1, toSAX2 The following gotchas apply: + these methods work only for documents that are completely loaded by XML::Twig (ie if you use twig_roots the data outside of the roots will not be output as SAX). + SAX1 support is a bit dodgy: the encoding is not preserved (it is always set to 'UTF-8'), + locator is not supported (and probably will not, what's the location of a newly created element?) Also when exporting SAX you should consider setting Twig to a mode where all aspects of the XML are treated as nodes by XML::Twig, by setting the following options when you create the twig: comments => 'process', pi => 'process', keep_spaces => 1 - improved: twig_print_outside_roots now supports a file handle ref as argument: the untouched part of the tree will be output to the filehandle: - added: the 'indented_c' style that gives a slightly more compact pretty print than 'indented': the end tags are on the same line as the preceding text (suggestion of Hugh Myers) - added: option in get_xpath (aka find_nodes) to apply the query to a list of elements - added: processing of conditions on the current node in get_xpath: my @result= get_xpath( q{.[@att="val"]}); This is of course mostly useful with the previous option. The idea stemmed from a post from Liam Quin to the perl-xml list - added: XML::Twig xml_version, set_xml_version, standalone, set_standalone methods on the XML declaration - fixed: bug in change_gi (which simply did not work at all), found by Ron Hayden. - fixed: bug in space handling with CDATA (spaces before the CDATA section were moved to within the section), comments and PI's - fixed: bug in parse_url (exit was not called at the end of the child), found by David Kulp - improved: cleanup a bit the code that parses xpath expressions (still some work to be done on this though), fixed a bug with last, found by Roel de Cock - fixed: the SYNOPSIS (parsefile is used to parse files, spotted by e.sammer) - fixed: bug in pretty printing (reported by Zhu Zhou) - fixed: bugin the install: the Makefile now uses the same perl used to perl Makefile.PL to run speedup and check_optional_modules (reported by Ralf Santos) - fixed: bugs in pretty printing when using flush, trying to figure out as well as possible if an element contains other elements or text (there is still a gotcha, see the BUGS section in the docs) - fixed: bug that caused the XML declaration and the DTD not to be reset between parses - improved: the conversion functions (errors are now reported when the function is created and not when it is first used) - added: the output_encoding option to XML::Twig->new, which allows specifying an encoding for the output: the conversion filter is created using Encode (perl 5.8.0) Text::Iconv or Unicode::* The XML declaration is also updated - added: #CDATA and #ENT can now be used in handler expressions - added: XML::Twig::Elt remove_cdata method, which turns CDATA sections into regular PCDATA elements - improved: set_asis can now be used to output CDATA sections un-escaped (and without the CDATA section markers) 3.04 - 2002-04-01 - fixed: handlers for XML::Parser 2.27 so the module can pass the tests 3.03 - 2002-03-26 - fixed: bugs in entity handling in twig_roots mode - added: the ignore_elts option, to skip completely elements - improved: enhanced the XPath-like syntax in navigation and get_xpath methods: added operators (>, < ...) - fixed: [RT 168]: setTwigHandler failed when no handler was already set (thanks to Jerry) - improved: turned %valid_option into a package global so AnyData can access it - fixed: bug in sprint that prevented it from working with filters - fixed: bug in erase when erasing an empty element that was the last child of its parent ([RT390], thanks to Julian Arnold) - fixed: copy now correctly copies the asis status of elements - fixed:typos on the docs (thanks to Shlomo Yona) - added: tests (for erase and entities in twig_roots mode) 3.02 - 2002-01-16 - fixed: tweaked speedup to replace constructs that did not work in perl 5.005003 3.01 - 2002-01-09 - fixed: the directory name in the tar file 3.00 - 2002-01-09 - COMPATIBILITY WARNING: THIS CHANGE IS NOT BACKWARD COMPATIBLE But it is The Right Thing To Do In normal mode (when KeepEncoding is not used) the XML data is now stored as parsed by XML::Parser, ie the base entities are expanded. The "print" methods (print, sprint and flush, plus the new xml_string, pcdata_xml_string and att_xml_string) return the data in XML-escaped form: & and < are escaped in PCDATA and &, < and the quote (" by default) are turned to & < and " (or ' if the quote is '). The "text" methods (text, att and pcdata) return the stored text as is. So if you want to output XML you should use the "print" methods and if you want to output text you should use the "text" methods. Note that this breaks the trick consisting in adding tags to the content of an element: $elt->prefix( "<b>") no longer adds a <b> tag before an element. $elt->print will now output "<b>...". (but you can still use it by marking those elements as 'asis'). It also fixes the annoying ' thingie that used to replace ' in the data. When the KeepEncoding option is used this is not true, the data is stored asis, base entities are kept un-escaped. Note that KeepEncoding is a global setting, if you use several twigs, some with KeepEncoding and some without then you will have to manually set the option using the set_keep_encoding method, otherwise the last XML::Twig::new call will have set it In addition when the KeepEncoding option is used the start tag is parsed using a custom function parse_start_tag, which works only for 1-byte encodings (it is regexp-based). This method can be overridden using the ParseStartTag (or parse_start_tag) option when creating the twig. This function takes the original string as input and returns the gi and the attributes (in a hash). If you write a function that works for multi-byte encodings I would very much appreciate if you could send it back to me so I can add it to the module, so other users can benefit from it. An additional option ExpansExternalEnts will expand external entity references to their text (in the output, the text stored is &ent;). - added: when handlers (twig_handlers or start_tag_handlers) are called $_ is set to the element node, so quick hacks look better: my $t= new XML::Twig( twig_handlers => { elt => sub { print $_->att( 'id'), ": ", $_->text, "\n"; } } ); - added: XML::Twig dispose method which properly reclaims all the memory used by the object (useful if you don't have WeakRef installed) - added: XML::Twig and XML::Twig::Elt ignore methods, which can be called from a start_tag_handlers handler and cause the element (or the current element if called on a twig) to be ignored by the parsing - added: XML::Twig parse_start_tag option that overrides the default function used to parse start tags when KeepEncoding is used - added: XML::Twig::Elt xml_string, pcdata_xml_string and att_xml_string all return an XML-escaped string for an element (including sub-elements and their tags but not the enclosing tags for the element), a #PCDATA element and an attribute - added: XML::Twig::Elt methods tag and set_tag, equivalent respectively to gi and set_gi - added: XML::Twig and XML::Twig::Elt set_keep_encoding methods can be used to set the keep_encoding value if you use several twigs with different keep_encoding options - improved: option names for XML::Twig::new are now checked (a warning is output if the option is not a valid one); - improved: when using pretty_print nice or indented keep_spaces_in is now checked so the elements within an element listed in keep_spaces_in are not indented - added: XML::Twig::Elt insert_new_elt method that does a new and a paste - added: XML::Twig::Elt split_at method splits a #PCDATA element in 2 - added: XML::Twig::Elt split method splits all the text descendants of an element, on a regep, wrapping text captured in brackets in the regexp in a specified element, all elements are returned - added: XML::Twig::Elt mark method is similar to the split method, except that only newly created elements (matched by the regexp) are returned - added: XML::Twig::Elt get_type method returns #ELT for elements and the gi (#PCDATA, #CDATA...) otherwise - added: XML::Twig::Elt is_elt returns the gi if the element is a real element and 0 if it is #PCDATA, #CDATA... - added: XML::Twig::Elt contains_only_text returns 1 if the element contains no "real" element (is_field is another name for it) - added: First implementation of the output_filter option which filters the text before it is output by the print, sprint, flush and text methods (only works for print at the moment, and still under test with various versions of XML::Parser). Standard filters are also available Example: #!/bin/perl -w use strict; use XML::Twig; my $t = new XML::Twig(output_filter => 'latin1'); $t->parse( \*DATA); $t->print; __DATA__ <?xml version="1.0" encoding="ISO-8859-1"?> <docù atté="valuè">Un homme soupçonné d'être impliqué dans la mort d'un motard de la police, renversé </docù> The 'latin1', 'html' and 'safe' filters are predefined, you can also build additional filters using Iconv (requires text::Iconv) and Unicode::String (requires Unicode::String and Unicode::Map8): my $conv = XML::Twig::iconv_convert( 'latin1'); my $t = new XML::Twig(output_filter => $conv); my $conv = XML::Twig::unicode_convert( 'latin1'); my $t = new XML::Twig(output_filter => $conv); warning: conversions work fine with XML::Parser 2.27 but sometimes fail with XML::Parser 2.30 (on Perl 5.6.1, Linux 2.4 on a PC) when using 'latin1' without Text::Iconv or Unicode::String and Unicode::Map8 installed. The input_filter option works the same way, except the text is converted before it is stored in the twig (so you can use regexp in your native encoding for example) - added: the XML::Twig::Elt set_asis method sets a property of an element that causes it to be output asis (without XML-escaping < " and &) so you can still create tagged text - added: the XML::Twig::Elt prefix and suffix methods accept an optional 'asis' argument that causes the prefix or suffix to get the asis property (so you can do $elt->prefix( '<b>foo</b>', 'asis') for example) - added: the XML::Twig and XML::Twig::Elt find_nodes methods are aliases to the get_xpath method (this is the name used in XML::XPath) - added: the XML::Twig parseurl and safe_parseurl methods parse a document whose url is given - added: XML::Twig::Elt extra_data, set_extra_data and append_extra_data to access the... extra data (PI's and comments) attached to an element - added: XML::Twig method parser returns the XML::Parser::Expat object used by the twig - improved: Most XML::Parser::Expat methods are now inherited by XML::Twig objects - added: XML::Twig::Elt descendant_or_self method that returns the element and its descendants - fixed: element (and attribute) names can now include '.' - fixed: get_xpath now works for root based XPath expressions ('/doc/elt') - fixed: get_xpath now works for regexps (including regexps on attribute values) - fixed: you can now properly restore pretty_print and empty_tag_style values - fixed: speedup (at install) now checks the Perl version and uses qr or "" so XML::Twig works in 5.004 - fixed: XML::Twig::Elt wrap_in now allows wrapping the root element - fixed: various bugs in the DOCTYPE and DTD output with XML::Parser 2.30 - fixed: the tests to fix a bug when working with XML::Parser 2.27 - fixed: the tests to fix a bug preventing test2 to pass under windows - fixed: _default_ handlers now work (thanks Zoogie) - fixed: the text method now returns the XML base entities (<>&'") un-escaped (thanks to Hakan Kallberg's persistence to ask for it ;--) - fixed: pretty_print works better for elements without content - fixed: end_tag_handlers now work properly (thanks to Phil Glanville for the patch). - improved: attributes which name starts with # are not output by the print methods, and thus can be used to store private data on elements - improved: WeakRef is used if installed, so no more memory leaks - improved: sped-up print and flush by creating the _print and _flush methods which do not check for file handle and pretty print options - improved: the doc has been enhanced and somewhat restructured. All options are now written as this_is_an_option although the legacy form thisIsAnOption can still be used. Links now display properly in the text form (thanks to Dominic Mitchell for spotting this and sending a patch) - improved: navigation functions (including descendants) now allow not only a gi to be used as filter, but also the '#ELT' token, to filter only "real" elements (as opposed to #PCDATA, #CDATA, #PI, #COMMENT, #ENT), the '#TEXT' token, to filter only text (PCDATA and CDATA elements), regular expressions (built with qr//) applied on the elements gi's, code references, the code is passed the element as argument, and a subset of XPath. Functions that can use this token are: children, first_child, last_child, prev_sibling, last_sibling, next_elt, last_elt, descendants, get_xpath, child, sibling, sibling_text, prev_siblings, next_siblings field, first_child_text - improved: the paste method now accepts a 'within' position, which inserts the element at the $offset argument (a 3rd, required, argument) in the reference element or in its first text child - improved: the XML::Twig::Elt insert method now accepts attributes (hashrefs) applied to the element(s) being inserted: $elt->insert( e1 => { a => 'v'}, e2 => e3 => { a1 =>'v1', a2 => 'v2'}); - improved: the XML::Twig::erase method now outputs a meaningful error message if applied to the root (or a cut element) - improved: optimizations for better performances (in the end performances are about the same or a little worse than XML::Twig 2.02 but the module is much more powerful) [Known bugs] - the DTD interface is completely broken, and I have little hope of fixing it considering I have to deal with 2 incompatible versions of XML::Parser. Plus no one seems to be using it... - some XPath/Navigation expressions using " or ' in the text()="" part of the expression will cause a fatal error - note that this version works better (but doesn't necessarily require) with WeakRef (Perl version 5.6.0 and above) and Text::Iconv for all its encoding conversions. ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/filter_for_5.005����������������������������������������������������������������������0000644�0001750�0001750�00000000320�12732215763�016065� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# $Id: /xmltwig/trunk/filter_for_5.005 4 2007-03-16T12:16:25.259192Z mrodrigu $ if( $] < 5.006) { s{^(\s*)no warnings;}{$1# no warnings;}; } else { s{^(\s*)# no warnings; }{$1no warnings;}; } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/Twig_pm.slow��������������������������������������������������������������������������0000644�0001750�0001750�00001645545�13015347463�015623� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������use strict; use warnings; # > perl 5.5 # This is created in the caller's space # I realize (now!) that it's not clean, but it's been there for 10+ years... BEGIN { sub ::PCDATA { '#PCDATA' } ## no critic (Subroutines::ProhibitNestedSubs); sub ::CDATA { '#CDATA' } ## no critic (Subroutines::ProhibitNestedSubs); } use UNIVERSAL(); ## if a sub returns a scalar, it better not bloody disappear in list context ## no critic (Subroutines::ProhibitExplicitReturnUndef); my $perl_version; my $parser_version; ###################################################################### package XML::Twig; ###################################################################### require 5.004; use utf8; # > perl 5.5 use vars qw($VERSION @ISA %valid_option); use Carp; use File::Spec; use File::Basename; use Config; # to get perl's path name in case we need to know if perlio is available *isa= *UNIVERSAL::isa; # flag, set to true if the weaken sub is available use vars qw( $weakrefs); # flag set to true if the version of expat seems to be 1.95.2, which has annoying bugs # wrt doctype handling. This is global for performance reasons. my $expat_1_95_2=0; # a slight non-xml mod: # is allowed as a first character my $REG_TAG_FIRST_LETTER; $REG_TAG_FIRST_LETTER= q{(?:[^\W\d]|[:#_])}; # < perl 5.6 - does not work for leading non-ascii letters $REG_TAG_FIRST_LETTER= q{(?:[[:alpha:]:#_])}; # >= perl 5.6 my $REG_TAG_LETTER= q{(?:[\w_.-]*)}; # a simple name (no colon) my $REG_NAME_TOKEN= qq{(?:$REG_TAG_FIRST_LETTER$REG_TAG_LETTER*)}; # a tag name, possibly including namespace my $REG_NAME= qq{(?:(?:$REG_NAME_TOKEN:)?$REG_NAME_TOKEN)}; # tag name (leading # allowed) # first line is for perl 5.005, second line for modern perl, that accept character classes my $REG_TAG_NAME=$REG_NAME; # name or wildcard (* or '') (leading # allowed) my $REG_NAME_W = qq{(?:$REG_NAME|[*])}; # class and ids are deliberately permissive my $REG_NTOKEN_FIRST_LETTER; $REG_NTOKEN_FIRST_LETTER= q{(?:[^\W\d]|[:_])}; # < perl 5.6 - does not work for leading non-ascii letters $REG_NTOKEN_FIRST_LETTER= q{(?:[[:alpha:]:_])}; # >= perl 5.6 my $REG_NTOKEN_LETTER= q{(?:[\w_:.-]*)}; my $REG_NTOKEN= qq{(?:$REG_NTOKEN_FIRST_LETTER$REG_NTOKEN_LETTER*)}; my $REG_CLASS = $REG_NTOKEN; my $REG_ID = $REG_NTOKEN; # allow <tag> #<tag> (private elt) * <tag>.<class> *.<class> <tag>#<id> *#<id> my $REG_TAG_PART= qq{(?:$REG_NAME_W(?:[.]$REG_CLASS|[#]$REG_ID)?|[.]$REG_CLASS)}; my $REG_REGEXP = q{(?:/(?:[^\\/]|\\.)*/[eimsox]*)}; # regexp my $REG_MATCH = q{[!=]~}; # match (or not) my $REG_STRING = q{(?:"(?:[^\\"]|\\.)*"|'(?:[^\\']|\\.)*')}; # string (simple or double quoted) my $REG_NUMBER = q{(?:\d+(?:\.\d*)?|\.\d+)}; # number my $REG_VALUE = qq{(?:$REG_STRING|$REG_NUMBER)}; # value my $REG_OP = q{==|!=|>|<|>=|<=|eq|ne|lt|gt|le|ge|=}; # op my $REG_FUNCTION = q{(?:string|text)\(\s*\)}; my $REG_STRING_ARG = qq{(?:string|text)\\(\\s*$REG_NAME_W\\s*\\)}; my $REG_COMP = q{(?:>=|<=|!=|<|>|=)}; my $REG_TAG_IN_PREDICATE= $REG_NAME_W . q{(?=\s*(?i:and\b|or\b|\]|$))}; # keys in the context stack, chosen not to interfere with att names, even private (#-prefixed) ones my $ST_TAG = '##tag'; my $ST_ELT = '##elt'; my $ST_NS = '##ns' ; # used in the handler trigger code my $REG_NAKED_PREDICATE= qq{((?:"[^"]*"|'[^']*'|$REG_STRING_ARG|$REG_FUNCTION|\@$REG_NAME_W|$REG_MATCH\\s*$REG_REGEXP|[\\s\\d><=!()+.-]|(?i:and)|(?i:or)|$REG_TAG_IN_PREDICATE)*)}; my $REG_PREDICATE= qq{\\[$REG_NAKED_PREDICATE\\]}; # not all axis, only supported ones (in get_xpath) my @supported_axis= ( 'ancestor', 'ancestor-or-self', 'child', 'descendant', 'descendant-or-self', 'following', 'following-sibling', 'parent', 'preceding', 'preceding-sibling', 'self' ); my $REG_AXIS = "(?:" . join( '|', @supported_axis) .")"; # only used in the "xpath"engine (for get_xpath/findnodes) for now my $REG_PREDICATE_ALT = qr{\[(?:(?:string\(\s*\)|\@$REG_TAG_NAME)\s*$REG_MATCH\s*$REG_REGEXP\s*|[^\]]*)\]}; # used to convert XPath tests on strings to the perl equivalent my %PERL_ALPHA_TEST= ( '=' => ' eq ', '!=' => ' ne ', '>' => ' gt ', '>=' => ' ge ', '<' => ' lt ', '<=' => ' le '); my( $FB_HTMLCREF, $FB_XMLCREF); my $NO_WARNINGS= $perl_version >= 5.006 ? 'no warnings' : 'local $^W=0'; # default namespaces, both ways my %DEFAULT_NS= ( xml => "http://www.w3.org/XML/1998/namespace", xmlns => "http://www.w3.org/2000/xmlns/", ); my %DEFAULT_URI2NS= map { $DEFAULT_NS{$_} => $_ } keys %DEFAULT_NS; # constants my( $PCDATA, $CDATA, $PI, $COMMENT, $ENT, $ELT, $NOTATION, $TEXT, $ASIS, $EMPTY, $BUFSIZE); # used when an HTML doc only has a PUBLIC declaration, to generate the SYSTEM one # this should really be done by HTML::TreeBuilder, but as of HTML::TreeBuilder 4.2 it isn't # the various declarations are taken from http://en.wikipedia.org/wiki/Document_Type_Declaration my %HTML_DECL= ( "-//W3C//DTD HTML 4.0 Transitional//EN" => "http://www.w3.org/TR/REC-html40/loose.dtd", "-//W3C//DTD HTML 4.01//EN" => "http://www.w3.org/TR/html4/strict.dtd", "-//W3C//DTD HTML 4.01 Transitional//EN" => "http://www.w3.org/TR/html4/loose.dtd", "-//W3C//DTD HTML 4.01 Frameset//EN" => "http://www.w3.org/TR/html4/frameset.dtd", "-//W3C//DTD XHTML 1.0 Strict//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd", "-//W3C//DTD XHTML 1.0 Transitional//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", "-//W3C//DTD XHTML 1.0 Frameset//EN" => "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd", "-//W3C//DTD XHTML 1.1//EN" => "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", "-//W3C//DTD XHTML Basic 1.0//EN" => "http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd", "-//W3C//DTD XHTML Basic 1.1//EN" => "http://www.w3.org/TR/xhtml-basic/xhtml-basic11.dtd", "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" => "http://www.wapforum.org/DTD/xhtml-mobile10.dtd", "-//WAPFORUM//DTD XHTML Mobile 1.1//EN" => "http://www.openmobilealliance.org/tech/DTD/xhtml-mobile11.dtd", "-//WAPFORUM//DTD XHTML Mobile 1.2//EN" => "http://www.openmobilealliance.org/tech/DTD/xhtml-mobile12.dtd", "-//W3C//DTD XHTML+RDFa 1.0//EN" => "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd", ); my $DEFAULT_HTML_TYPE= "-//W3C//DTD HTML 4.0 Transitional//EN"; my $SEP= qr/\s*(?:$|\|)/; BEGIN { $VERSION = '3.52'; use XML::Parser; my $needVersion = '2.23'; ($parser_version= $XML::Parser::VERSION)=~ s{_\d+}{}; # remove _<n> from version so numeric tests do not warn croak "need at least XML::Parser version $needVersion" unless $parser_version >= $needVersion; ($perl_version= $])=~ s{_\d+}{}; if( $perl_version >= 5.008) { eval "use Encode qw( :all)"; ## no critic ProhibitStringyEval $FB_XMLCREF = 0x0400; # Encode::FB_XMLCREF; $FB_HTMLCREF = 0x0200; # Encode::FB_HTMLCREF; } # test whether we can use weak references # set local empty signal handler to trap error messages { local $SIG{__DIE__}; if( eval( 'require Scalar::Util') && defined( \&Scalar::Util::weaken)) { import Scalar::Util( 'weaken'); $weakrefs= 1; } elsif( eval( 'require WeakRef')) { import WeakRef; $weakrefs= 1; } else { $weakrefs= 0; } } import XML::Twig::Elt; import XML::Twig::Entity; import XML::Twig::Entity_list; # used to store the gi's # should be set for each twig really, at least when there are several # the init ensures that special gi's are always the same # constants: element types $PCDATA = '#PCDATA'; $CDATA = '#CDATA'; $PI = '#PI'; $COMMENT = '#COMMENT'; $ENT = '#ENT'; $NOTATION = '#NOTATION'; # element classes $ELT = '#ELT'; $TEXT = '#TEXT'; # element properties $ASIS = '#ASIS'; $EMPTY = '#EMPTY'; # used in parseurl to set the buffer size to the same size as in XML::Parser::Expat $BUFSIZE = 32768; # gi => index %XML::Twig::gi2index=( '', 0, $PCDATA => 1, $CDATA => 2, $PI => 3, $COMMENT => 4, $ENT => 5); # list of gi's @XML::Twig::index2gi=( '', $PCDATA, $CDATA, $PI, $COMMENT, $ENT); # gi's under this value are special $XML::Twig::SPECIAL_GI= @XML::Twig::index2gi; %XML::Twig::base_ent= ( '>' => '>', '<' => '<', '&' => '&', "'" => ''', '"' => '"',); foreach my $c ( "\n", "\r", "\t") { $XML::Twig::base_ent{$c}= sprintf( "&#x%02x;", ord( $c)); } # now set some aliases *find_nodes = *get_xpath; # same as XML::XPath *findnodes = *get_xpath; # same as XML::LibXML *getElementsByTagName = *descendants; *descendants_or_self = *descendants; # valid in XML::Twig, not in XML::Twig::Elt *find_by_tag_name = *descendants; *getElementById = *elt_id; *getEltById = *elt_id; *toString = *sprint; *create_accessors = *att_accessors; } @ISA = qw(XML::Parser); # fake gi's used in twig_handlers and start_tag_handlers my $ALL = '_all_'; # the associated function is always called my $DEFAULT= '_default_'; # the function is called if no other handler has been # some defaults my $COMMENTS_DEFAULT= 'keep'; my $PI_DEFAULT = 'keep'; # handlers used in regular mode my %twig_handlers=( Start => \&_twig_start, End => \&_twig_end, Char => \&_twig_char, Entity => \&_twig_entity, Notation => \&_twig_notation, XMLDecl => \&_twig_xmldecl, Doctype => \&_twig_doctype, Element => \&_twig_element, Attlist => \&_twig_attlist, CdataStart => \&_twig_cdatastart, CdataEnd => \&_twig_cdataend, Proc => \&_twig_pi, Comment => \&_twig_comment, Default => \&_twig_default, ExternEnt => \&_twig_extern_ent, ); # handlers used when twig_roots is used and we are outside of the roots my %twig_handlers_roots= ( Start => \&_twig_start_check_roots, End => \&_twig_end_check_roots, Doctype => \&_twig_doctype, Char => undef, Entity => undef, XMLDecl => \&_twig_xmldecl, Element => undef, Attlist => undef, CdataStart => undef, CdataEnd => undef, Proc => undef, Comment => undef, Proc => \&_twig_pi_check_roots, Default => sub {}, # hack needed for XML::Parser 2.27 ExternEnt => \&_twig_extern_ent, ); # handlers used when twig_roots and print_outside_roots are used and we are # outside of the roots my %twig_handlers_roots_print_2_30= ( Start => \&_twig_start_check_roots, End => \&_twig_end_check_roots, Char => \&_twig_print, Entity => \&_twig_print_entity, ExternEnt => \&_twig_print_entity, DoctypeFin => \&_twig_doctype_fin_print, XMLDecl => sub { _twig_xmldecl( @_); _twig_print( @_); }, Doctype => \&_twig_print_doctype, # because recognized_string is broken here # Element => \&_twig_print, Attlist => \&_twig_print, CdataStart => \&_twig_print, CdataEnd => \&_twig_print, Proc => \&_twig_pi_check_roots, Comment => \&_twig_print, Default => \&_twig_print_check_doctype, ExternEnt => \&_twig_extern_ent, ); # handlers used when twig_roots, print_outside_roots and keep_encoding are used # and we are outside of the roots my %twig_handlers_roots_print_original_2_30= ( Start => \&_twig_start_check_roots, End => \&_twig_end_check_roots, Char => \&_twig_print_original, # I have no idea why I should not be using this handler! Entity => \&_twig_print_entity, ExternEnt => \&_twig_print_entity, DoctypeFin => \&_twig_doctype_fin_print, XMLDecl => sub { _twig_xmldecl( @_); _twig_print_original( @_) }, Doctype => \&_twig_print_original_doctype, # because original_string is broken here Element => \&_twig_print_original, Attlist => \&_twig_print_original, CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original, Proc => \&_twig_pi_check_roots, Comment => \&_twig_print_original, Default => \&_twig_print_original_check_doctype, ); # handlers used when twig_roots and print_outside_roots are used and we are # outside of the roots my %twig_handlers_roots_print_2_27= ( Start => \&_twig_start_check_roots, End => \&_twig_end_check_roots, Char => \&_twig_print, # if the Entity handler is set then it prints the entity declaration # before the entire internal subset (including the declaration!) is output Entity => sub {}, XMLDecl => \&_twig_print, Doctype => \&_twig_print, CdataStart => \&_twig_print, CdataEnd => \&_twig_print, Proc => \&_twig_pi_check_roots, Comment => \&_twig_print, Default => \&_twig_print, ExternEnt => \&_twig_extern_ent, ); # handlers used when twig_roots, print_outside_roots and keep_encoding are used # and we are outside of the roots my %twig_handlers_roots_print_original_2_27= ( Start => \&_twig_start_check_roots, End => \&_twig_end_check_roots, Char => \&_twig_print_original, # for some reason original_string is wrong here # this can be a problem if the doctype includes non ascii characters XMLDecl => \&_twig_print, Doctype => \&_twig_print, # if the Entity handler is set then it prints the entity declaration # before the entire internal subset (including the declaration!) is output Entity => sub {}, #Element => undef, Attlist => undef, CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original, Proc => \&_twig_pi_check_roots, Comment => \&_twig_print_original, Default => \&_twig_print, # _twig_print_original does not work ExternEnt => \&_twig_extern_ent, ); my %twig_handlers_roots_print= $parser_version > 2.27 ? %twig_handlers_roots_print_2_30 : %twig_handlers_roots_print_2_27; my %twig_handlers_roots_print_original= $parser_version > 2.27 ? %twig_handlers_roots_print_original_2_30 : %twig_handlers_roots_print_original_2_27; # handlers used when the finish_print method has been called my %twig_handlers_finish_print= ( Start => \&_twig_print, End => \&_twig_print, Char => \&_twig_print, Entity => \&_twig_print, XMLDecl => \&_twig_print, Doctype => \&_twig_print, Element => \&_twig_print, Attlist => \&_twig_print, CdataStart => \&_twig_print, CdataEnd => \&_twig_print, Proc => \&_twig_print, Comment => \&_twig_print, Default => \&_twig_print, ExternEnt => \&_twig_extern_ent, ); # handlers used when the finish_print method has been called and the keep_encoding # option is used my %twig_handlers_finish_print_original= ( Start => \&_twig_print_original, End => \&_twig_print_end_original, Char => \&_twig_print_original, Entity => \&_twig_print_original, XMLDecl => \&_twig_print_original, Doctype => \&_twig_print_original, Element => \&_twig_print_original, Attlist => \&_twig_print_original, CdataStart => \&_twig_print_original, CdataEnd => \&_twig_print_original, Proc => \&_twig_print_original, Comment => \&_twig_print_original, Default => \&_twig_print_original, ); # handlers used within ignored elements my %twig_handlers_ignore= ( Start => \&_twig_ignore_start, End => \&_twig_ignore_end, Char => undef, Entity => undef, XMLDecl => undef, Doctype => undef, Element => undef, Attlist => undef, CdataStart => undef, CdataEnd => undef, Proc => undef, Comment => undef, Default => undef, ExternEnt => undef, ); # those handlers are only used if the entities are NOT to be expanded my %twig_noexpand_handlers= ( ExternEnt => undef, Default => \&_twig_default ); my @saved_default_handler; my $ID= 'id'; # default value, set by the Id argument my $css_sel=0; # set through the css_sel option to allow .class selectors in triggers # all allowed options %valid_option= ( # XML::Twig options TwigHandlers => 1, Id => 1, TwigRoots => 1, TwigPrintOutsideRoots => 1, StartTagHandlers => 1, EndTagHandlers => 1, ForceEndTagHandlersUsage => 1, DoNotChainHandlers => 1, IgnoreElts => 1, Index => 1, AttAccessors => 1, EltAccessors => 1, FieldAccessors => 1, CharHandler => 1, TopDownHandlers => 1, KeepEncoding => 1, DoNotEscapeAmpInAtts => 1, ParseStartTag => 1, KeepAttsOrder => 1, LoadDTD => 1, DTDHandler => 1, DTDBase => 1, NoXxe => 1, DoNotOutputDTD => 1, NoProlog => 1, ExpandExternalEnts => 1, DiscardSpaces => 1, KeepSpaces => 1, DiscardAllSpaces => 1, DiscardSpacesIn => 1, KeepSpacesIn => 1, PrettyPrint => 1, EmptyTags => 1, EscapeGt => 1, Quote => 1, Comments => 1, Pi => 1, OutputFilter => 1, InputFilter => 1, OutputTextFilter => 1, OutputEncoding => 1, RemoveCdata => 1, EltClass => 1, MapXmlns => 1, KeepOriginalPrefix => 1, SkipMissingEnts => 1, # XML::Parser options ErrorContext => 1, ProtocolEncoding => 1, Namespaces => 1, NoExpand => 1, Stream_Delimiter => 1, ParseParamEnt => 1, NoLWP => 1, Non_Expat_Options => 1, Xmlns => 1, CssSel => 1, UseTidy => 1, TidyOptions => 1, OutputHtmlDoctype => 1, ); my $active_twig; # last active twig,for XML::Twig::s # predefined input and output filters use vars qw( %filter); %filter= ( html => \&html_encode, safe => \&safe_encode, safe_hex => \&safe_encode_hex, ); # trigger types (used to sort them) my ($LEVEL_TRIGGER, $REGEXP_TRIGGER, $XPATH_TRIGGER)=(1..3); sub new { my ($class, %args) = @_; my $handlers; # change all nice_perlish_names into nicePerlishNames %args= _normalize_args( %args); # check options unless( $args{MoreOptions}) { foreach my $arg (keys %args) { carp "invalid option $arg" unless $valid_option{$arg}; } } # a twig is really an XML::Parser # my $self= XML::Parser->new(%args); my $self; $self= XML::Parser->new(%args); bless $self, $class; $self->{_twig_context_stack}= []; # allow tag.class selectors in handler triggers $css_sel= $args{CssSel} || 0; if( exists $args{TwigHandlers}) { $handlers= $args{TwigHandlers}; $self->setTwigHandlers( $handlers); delete $args{TwigHandlers}; } # take care of twig-specific arguments if( exists $args{StartTagHandlers}) { $self->setStartTagHandlers( $args{StartTagHandlers}); delete $args{StartTagHandlers}; } if( exists $args{DoNotChainHandlers}) { $self->{twig_do_not_chain_handlers}= $args{DoNotChainHandlers}; } if( exists $args{IgnoreElts}) { # change array to hash so you can write ignore_elts => [ qw(foo bar baz)] if( isa( $args{IgnoreElts}, 'ARRAY')) { $args{IgnoreElts}= { map { $_ => 1 } @{$args{IgnoreElts}} }; } $self->setIgnoreEltsHandlers( $args{IgnoreElts}); delete $args{IgnoreElts}; } if( exists $args{Index}) { my $index= $args{Index}; # we really want a hash name => path, we turn an array into a hash if necessary if( ref( $index) eq 'ARRAY') { my %index= map { $_ => $_ } @$index; $index= \%index; } while( my( $name, $exp)= each %$index) { $self->setTwigHandler( $exp, sub { push @{$_[0]->{_twig_index}->{$name}}, $_; 1; }); } } $self->{twig_elt_class}= $args{EltClass} || 'XML::Twig::Elt'; if( defined( $args{EltClass}) && $args{EltClass} ne 'XML::Twig::Elt') { $self->{twig_alt_elt_class}=1; } if( exists( $args{EltClass})) { delete $args{EltClass}; } if( exists( $args{MapXmlns})) { $self->{twig_map_xmlns}= $args{MapXmlns}; $self->{Namespaces}=1; delete $args{MapXmlns}; } if( exists( $args{KeepOriginalPrefix})) { $self->{twig_keep_original_prefix}= $args{KeepOriginalPrefix}; delete $args{KeepOriginalPrefix}; } $self->{twig_dtd_handler}= $args{DTDHandler}; delete $args{DTDHandler}; if( $args{ExpandExternalEnts}) { $self->set_expand_external_entities( 1); $self->{twig_expand_external_ents}= $args{ExpandExternalEnts}; $self->{twig_read_external_dtd}= 1; # implied by ExpandExternalEnts if( $args{ExpandExternalEnts} == -1) { $self->{twig_extern_ent_nofail}= 1; $self->setHandlers( ExternEnt => \&_twig_extern_ent_nofail); } delete $args{LoadDTD}; delete $args{ExpandExternalEnts}; } else { $self->set_expand_external_entities( 0); } if( !$args{NoLWP} && ! _use( 'URI') && ! _use( 'URI::File') && ! _use( 'LWP')) { $self->{twig_ext_ent_handler}= \&XML::Parser::initial_ext_ent_handler } elsif( $args{NoXxe}) { $self->{twig_ext_ent_handler}= sub { my($xp, $base, $path) = @_; $xp->{ErrorMessage}.= "cannot use entities in document when the no_xxe option is on"; return undef; }; } else { $self->{twig_ext_ent_handler}= \&XML::Parser::file_ext_ent_handler } if( $args{DoNotEscapeAmpInAtts}) { $self->set_do_not_escape_amp_in_atts( 1); $self->{twig_do_not_escape_amp_in_atts}=1; } else { $self->set_do_not_escape_amp_in_atts( 0); $self->{twig_do_not_escape_amp_in_atts}=0; } # deal with TwigRoots argument, a hash of elements for which # subtrees will be built (and associated handlers) if( $args{TwigRoots}) { $self->setTwigRoots( $args{TwigRoots}); delete $args{TwigRoots}; } if( $args{EndTagHandlers}) { unless ($self->{twig_roots} || $args{ForceEndTagHandlersUsage}) { croak "you should not use EndTagHandlers without TwigRoots\n", "if you want to use it anyway, normally because you have ", "a start_tag_handlers that calls 'ignore' and you want to ", "call an ent_tag_handlers at the end of the element, then ", "pass 'force_end_tag_handlers_usage => 1' as an argument ", "to new"; } $self->setEndTagHandlers( $args{EndTagHandlers}); delete $args{EndTagHandlers}; } if( $args{TwigPrintOutsideRoots}) { croak "cannot use twig_print_outside_roots without twig_roots" unless( $self->{twig_roots}); # if the arg is a filehandle then store it if( _is_fh( $args{TwigPrintOutsideRoots}) ) { $self->{twig_output_fh}= $args{TwigPrintOutsideRoots}; } $self->{twig_default_print}= $args{TwigPrintOutsideRoots}; } # space policy if( $args{KeepSpaces}) { croak "cannot use both keep_spaces and discard_spaces" if( $args{DiscardSpaces}); croak "cannot use both keep_spaces and discard_all_spaces" if( $args{DiscardAllSpaces}); croak "cannot use both keep_spaces and keep_spaces_in" if( $args{KeepSpacesIn}); $self->{twig_keep_spaces}=1; delete $args{KeepSpaces}; } if( $args{DiscardSpaces}) { croak "cannot use both discard_spaces and keep_spaces_in" if( $args{KeepSpacesIn}); croak "cannot use both discard_spaces and discard_all_spaces" if( $args{DiscardAllSpaces}); croak "cannot use both discard_spaces and discard_spaces_in" if( $args{DiscardSpacesIn}); $self->{twig_discard_spaces}=1; delete $args{DiscardSpaces}; } if( $args{KeepSpacesIn}) { croak "cannot use both keep_spaces_in and discard_spaces_in" if( $args{DiscardSpacesIn}); croak "cannot use both keep_spaces_in and discard_all_spaces" if( $args{DiscardAllSpaces}); $self->{twig_discard_spaces}=1; $self->{twig_keep_spaces_in}={}; my @tags= @{$args{KeepSpacesIn}}; foreach my $tag (@tags) { $self->{twig_keep_spaces_in}->{$tag}=1; } delete $args{KeepSpacesIn}; } if( $args{DiscardAllSpaces}) { croak "cannot use both discard_all_spaces and discard_spaces_in" if( $args{DiscardSpacesIn}); $self->{twig_discard_all_spaces}=1; delete $args{DiscardAllSpaces}; } if( $args{DiscardSpacesIn}) { $self->{twig_keep_spaces}=1; $self->{twig_discard_spaces_in}={}; my @tags= @{$args{DiscardSpacesIn}}; foreach my $tag (@tags) { $self->{twig_discard_spaces_in}->{$tag}=1; } delete $args{DiscardSpacesIn}; } # discard spaces by default $self->{twig_discard_spaces}= 1 unless( $self->{twig_keep_spaces}); $args{Comments}||= $COMMENTS_DEFAULT; if( $args{Comments} eq 'drop') { $self->{twig_keep_comments}= 0; } elsif( $args{Comments} eq 'keep') { $self->{twig_keep_comments}= 1; } elsif( $args{Comments} eq 'process') { $self->{twig_process_comments}= 1; } else { croak "wrong value for comments argument: '$args{Comments}' (should be 'drop', 'keep' or 'process')"; } delete $args{Comments}; $args{Pi}||= $PI_DEFAULT; if( $args{Pi} eq 'drop') { $self->{twig_keep_pi}= 0; } elsif( $args{Pi} eq 'keep') { $self->{twig_keep_pi}= 1; } elsif( $args{Pi} eq 'process') { $self->{twig_process_pi}= 1; } else { croak "wrong value for pi argument: '$args{Pi}' (should be 'drop', 'keep' or 'process')"; } delete $args{Pi}; if( $args{KeepEncoding}) { # set it in XML::Twig::Elt so print functions know what to do $self->set_keep_encoding( 1); $self->{parse_start_tag}= $args{ParseStartTag} || \&_parse_start_tag; delete $args{ParseStartTag} if defined( $args{ParseStartTag}) ; delete $args{KeepEncoding}; } else { $self->set_keep_encoding( 0); if( $args{ParseStartTag}) { $self->{parse_start_tag}= $args{ParseStartTag}; } else { delete $self->{parse_start_tag}; } delete $args{ParseStartTag}; } if( $args{OutputFilter}) { $self->set_output_filter( $args{OutputFilter}); delete $args{OutputFilter}; } else { $self->set_output_filter( 0); } if( $args{RemoveCdata}) { $self->set_remove_cdata( $args{RemoveCdata}); delete $args{RemoveCdata}; } else { $self->set_remove_cdata( 0); } if( $args{OutputTextFilter}) { $self->set_output_text_filter( $args{OutputTextFilter}); delete $args{OutputTextFilter}; } else { $self->set_output_text_filter( 0); } if( $args{KeepAttsOrder}) { $self->{keep_atts_order}= $args{KeepAttsOrder}; if( _use( 'Tie::IxHash')) { $self->set_keep_atts_order( $self->{keep_atts_order}); } else { croak "Tie::IxHash not available, option keep_atts_order not allowed"; } } else { $self->set_keep_atts_order( 0); } if( $args{PrettyPrint}) { $self->set_pretty_print( $args{PrettyPrint}); } if( $args{EscapeGt}) { $self->escape_gt( $args{EscapeGt}); } if( $args{EmptyTags}) { $self->set_empty_tag_style( $args{EmptyTags}) } if( exists $args{Id}) { $ID= $args{Id}; delete $args{ID}; } if( $args{NoProlog}) { $self->{no_prolog}= 1; delete $args{NoProlog}; } if( $args{DoNotOutputDTD}) { $self->{no_dtd_output}= 1; delete $args{DoNotOutputDTD}; } if( $args{LoadDTD}) { $self->{twig_read_external_dtd}= 1; delete $args{LoadDTD}; } if( $args{CharHandler}) { $self->setCharHandler( $args{CharHandler}); delete $args{CharHandler}; } if( $args{InputFilter}) { $self->set_input_filter( $args{InputFilter}); delete $args{InputFilter}; } if( $args{NoExpand}) { $self->setHandlers( %twig_noexpand_handlers); $self->{twig_no_expand}=1; } if( my $output_encoding= $args{OutputEncoding}) { $self->set_output_encoding( $output_encoding); delete $args{OutputFilter}; } if( my $tdh= $args{TopDownHandlers}) { $self->{twig_tdh}=1; delete $args{TopDownHandlers}; } if( my $acc_a= $args{AttAccessors}) { $self->att_accessors( @$acc_a); } if( my $acc_e= $args{EltAccessors}) { $self->elt_accessors( isa( $acc_e, 'ARRAY') ? @$acc_e : $acc_e); } if( my $acc_f= $args{FieldAccessors}) { $self->field_accessors( isa( $acc_f, 'ARRAY') ? @$acc_f : $acc_f); } if( $args{UseTidy}) { $self->{use_tidy}= 1; } $self->{tidy_options}= $args{TidyOptions} || {}; if( $args{OutputHtmlDoctype}) { $self->{html_doctype}= 1; } $self->set_quote( $args{Quote} || 'double'); # set handlers if( $self->{twig_roots}) { if( $self->{twig_default_print}) { if( $self->{twig_keep_encoding}) { $self->setHandlers( %twig_handlers_roots_print_original); } else { $self->setHandlers( %twig_handlers_roots_print); } } else { $self->setHandlers( %twig_handlers_roots); } } else { $self->setHandlers( %twig_handlers); } # XML::Parser::Expat does not like these handler to be set. So in order to # use the various sets of handlers on XML::Parser or XML::Parser::Expat # objects when needed, these ones have to be set only once, here, at # XML::Parser level $self->setHandlers( Init => \&_twig_init, Final => \&_twig_final); $self->{twig_entity_list}= XML::Twig::Entity_list->new; $self->{twig_notation_list}= XML::Twig::Notation_list->new; $self->{twig_id}= $ID; $self->{twig_stored_spaces}=''; $self->{twig_autoflush}= 1; # auto flush by default $self->{twig}= $self; if( $weakrefs) { weaken( $self->{twig}); } return $self; } sub parse { my $t= shift; # if called as a class method, calls nparse, which creates the twig then parses it if( !ref( $t) || !isa( $t, 'XML::Twig')) { return $t->nparse( @_); } # requires 5.006 at least (or the ${^UNICODE} causes a problem) # > perl 5.5 # trap underlying bug in IO::Handle (see RT #17500) # > perl 5.5 # croak if perl 5.8+, -CD (or PERL_UNICODE set to D) and parsing a pipe # > perl 5.5 if( $perl_version>=5.008 && ${^UNICODE} && (${^UNICODE} & 24) && isa( $_[0], 'GLOB') && -p $_[0] ) # > perl 5.5 { croak "cannot parse the output of a pipe when perl is set to use the UTF8 perlIO layer\n" # > perl 5.5 . "set the environment variable PERL_UNICODE or use the -C option (see perldoc perlrun)\n" # > perl 5.5 . "not to include 'D'"; # > perl 5.5 } # > perl 5.5 $t= eval { $t->SUPER::parse( @_); }; if( !$t && $@=~m{(syntax error at line 1, column 0, byte 0|not well-formed \(invalid token\) at line 1, column 1, byte 1)} && -f $_[0] && ( ! ref( $_[0]) || ref( $_[0])) ne 'GLOB' # -f works on a filehandle, so this make sure $_[0] is a real file ) { croak "you seem to have used the parse method on a filename ($_[0]), you probably want parsefile instead"; } return _checked_parse_result( $t, $@); } sub parsefile { my $t= shift; if( -f $_[0] && ! -s $_[0]) { return _checked_parse_result( undef, "empty file '$_[0]'"); } $t= eval { $t->SUPER::parsefile( @_); }; return _checked_parse_result( $t, $@); } sub _checked_parse_result { my( $t, $returned)= @_; if( !$t) { if( isa( $returned, 'XML::Twig') && $returned->{twig_finish_now}) { $t= $returned; delete $t->{twig_finish_now}; return $t->_twig_final; } else { _croak( $returned, 0); } } $active_twig= $t; return $t; } sub active_twig { return $active_twig; } sub finish_now { my $t= shift; $t->{twig_finish_now}=1; # XML::Parser 2.43 changed xpcroak in a way that caused test failures for XML::Twig # the change was reverted in 2.44, but this is here to ensure that tests pass with 2.43 if( $XML::Parser::VERSION == 2.43) { no warnings; $t->parser->{twig_error}= $t; *XML::Parser::Expat::xpcroak= sub { die $_[0]->{twig_error}; }; die $t; } else { die $t; } } sub parsefile_inplace { shift->_parse_inplace( parsefile => @_); } sub parsefile_html_inplace { shift->_parse_inplace( parsefile_html => @_); } sub _parse_inplace { my( $t, $method, $file, $suffix)= @_; _use( 'File::Temp') || croak "need File::Temp to use inplace methods\n"; _use( 'File::Basename'); my $tmpdir= dirname( $file); my( $tmpfh, $tmpfile)= File::Temp::tempfile( DIR => $tmpdir); my $original_fh= select $tmpfh; # we can only use binmode :utf8 if perl was compiled with useperlio # might be a problem if keep_encoding used but the file is already in utf8 if( $perl_version > 5.006 && !$t->{twig_keep_encoding} && _use_perlio()) { binmode( $tmpfh, ":utf8" ); } $t->$method( $file); select $original_fh; close $tmpfh; my $mode= (stat( $file))[2] & oct(7777); chmod $mode, $tmpfile or croak "cannot change temp file mode to $mode: $!"; if( $suffix) { my $backup; if( $suffix=~ m{\*}) { ($backup = $suffix) =~ s/\*/$file/g; } else { $backup= $file . $suffix; } rename( $file, $backup) or croak "cannot backup initial file ($file) to $backup: $!"; } rename( $tmpfile, $file) or croak "cannot rename temp file ($tmpfile) to initial file ($file): $!"; return $t; } sub parseurl { my $t= shift; $t->_parseurl( 0, @_); } sub safe_parseurl { my $t= shift; $t->_parseurl( 1, @_); } sub safe_parsefile_html { my $t= shift; eval { $t->parsefile_html( @_); }; return $@ ? $t->_reset_twig_after_error : $t; } sub safe_parseurl_html { my $t= shift; _use( 'LWP::Simple') or croak "missing LWP::Simple"; eval { $t->parse_html( LWP::Simple::get( shift()), @_); } ; return $@ ? $t->_reset_twig_after_error : $t; } sub parseurl_html { my $t= shift; _use( 'LWP::Simple') or croak "missing LWP::Simple"; $t->parse_html( LWP::Simple::get( shift()), @_); } # uses eval to catch the parser's death sub safe_parse_html { my $t= shift; eval { $t->parse_html( @_); } ; return $@ ? $t->_reset_twig_after_error : $t; } sub parsefile_html { my $t= shift; my $file= shift; my $indent= $t->{ErrorContext} ? 1 : 0; $t->set_empty_tag_style( 'html'); my $html2xml= $t->{use_tidy} ? \&_tidy_html : \&_html2xml; my $options= $t->{use_tidy} ? $t->{tidy_options} || {} : { indent => $indent, html_doctype => $t->{html_doctype} }; $t->parse( $html2xml->( _slurp( $file), $options), @_); return $t; } sub parse_html { my $t= shift; my $options= ref $_[0] && ref $_[0] eq 'HASH' ? shift() : {}; my $use_tidy= exists $options->{use_tidy} ? $options->{use_tidy} : $t->{use_tidy}; my $content= shift; my $indent= $t->{ErrorContext} ? 1 : 0; $t->set_empty_tag_style( 'html'); my $html2xml= $use_tidy ? \&_tidy_html : \&_html2xml; my $conv_options= $use_tidy ? $t->{tidy_options} || {} : { indent => $indent, html_doctype => $t->{html_doctype} }; $t->parse( $html2xml->( isa( $content, 'GLOB') ? _slurp_fh( $content) : $content, $conv_options), @_); return $t; } sub xparse { my $t= shift; my $to_parse= $_[0]; if( isa( $to_parse, 'GLOB')) { $t->parse( @_); } elsif( $to_parse=~ m{^\s*<}) { $to_parse=~ m{<html}i ? $t->_parse_as_xml_or_html( @_) : $t->parse( @_); } elsif( $to_parse=~ m{^\w+://.*\.html?$}) { _use( 'LWP::Simple') or croak "missing LWP::Simple"; $t->_parse_as_xml_or_html( LWP::Simple::get( shift()), @_); } elsif( $to_parse=~ m{^\w+://}) { _use( 'LWP::Simple') or croak "missing LWP::Simple"; my $doc= LWP::Simple::get( shift); if( ! defined $doc) { $doc=''; } my $xml_parse_ok= $t->safe_parse( $doc, @_); if( $xml_parse_ok) { return $xml_parse_ok; } else { my $diag= $@; if( $doc=~ m{<html}i) { $t->parse_html( $doc, @_); } else { croak $diag; } } } elsif( $to_parse=~ m{\.html?$}) { my $content= _slurp( shift); $t->_parse_as_xml_or_html( $content, @_); } else { $t->parsefile( @_); } } sub _parse_as_xml_or_html { my $t= shift; if( _is_well_formed_xml( $_[0])) { $t->parse( @_) } else { my $html2xml= $t->{use_tidy} ? \&_tidy_html : \&_html2xml; my $options= $t->{use_tidy} ? $t->{tidy_options} || {} : { indent => 0, html_doctype => $t->{html_doctype} }; my $html= $html2xml->( $_[0], $options, @_); if( _is_well_formed_xml( $html)) { $t->parse( $html); } else { croak $@; } # can't really test this because HTML::Parser or HTML::Tidy may change how they deal with bas HTML between versions } } { my $parser; sub _is_well_formed_xml { $parser ||= XML::Parser->new; eval { $parser->parse( $_[0]); }; return $@ ? 0 : 1; } } sub nparse { my $class= shift; my $to_parse= pop; $class->new( @_)->xparse( $to_parse); } sub nparse_pp { shift()->nparse( pretty_print => 'indented', @_); } sub nparse_e { shift()->nparse( error_context => 1, @_); } sub nparse_ppe { shift()->nparse( pretty_print => 'indented', error_context => 1, @_); } sub _html2xml { my( $html, $options)= @_; _use( 'HTML::TreeBuilder', '3.13') or croak "cannot parse HTML: missing HTML::TreeBuilder v >= 3.13\n"; my $tree= HTML::TreeBuilder->new; $tree->ignore_ignorable_whitespace( 0); $tree->ignore_unknown( 0); $tree->no_space_compacting( 1); $tree->store_comments( 1); $tree->store_pis(1); $tree->parse( $html); $tree->eof; my $xml=''; if( $options->{html_doctype} && exists $tree->{_decl} ) { my $decl= $tree->{_decl}->as_XML; # first try to fix declarations that are missing the SYSTEM part $decl =~ s{^\s*<!DOCTYPE \s+ ((?i)html) \s+ PUBLIC \s+ "([^"]*)" \s* >} { my $system= $HTML_DECL{$2} || $HTML_DECL{$DEFAULT_HTML_TYPE}; qq{<!DOCTYPE $1 PUBLIC "$2" "$system">} }xe; # then check that the declaration looks OK (so it parses), if not remove it, # better to parse without the declaration than to die stupidly if( $decl =~ m{<!DOCTYPE \s+ (?i:HTML) (\s+ PUBLIC \s+ "[^"]*" \s+ (SYSTEM \s+)? "[^"]*")? \s*>}x # PUBLIC then SYSTEM || $decl =~ m{<!DOCTYPE \s+ (?i:HTML) \s+ SYSTEM \s+ "[^"]*" \s*>}x # just SYSTEM ) { $xml= $decl; } } $xml.= _as_XML( $tree); _fix_xml( $tree, \$xml); if( $options->{indent}) { _indent_xhtml( \$xml); } $tree->delete; $xml=~ s{\s+$}{}s; # trim end return $xml; } sub _tidy_html { my( $html, $options)= @_; _use( 'HTML::Tidy') or croak "cannot cleanup HTML using HTML::Tidy (required by the use_tidy option): $@\n"; ; my $TIDY_DEFAULTS= { output_xhtml => 1, # duh! tidy_mark => 0, # do not add the "generated by tidy" comment numeric_entities => 1, char_encoding => 'utf8', bare => 1, clean => 1, doctype => 'transitional', fix_backslash => 1, merge_divs => 0, merge_spans => 0, sort_attributes => 'alpha', indent => 0, wrap => 0, break_before_br => 0, }; $options ||= {}; my $tidy_options= { %$TIDY_DEFAULTS, %$options}; my $tidy = HTML::Tidy->new( $tidy_options); $tidy->ignore( type => 1, type => 2 ); # 1 is TIDY_WARNING, 2 is TIDY_ERROR, not clean my $xml= $tidy->clean( $html ); return $xml; } { my %xml_parser_encoding; sub _fix_xml { my( $tree, $xml)= @_; # $xml is a ref to the xml string my $max_tries=5; my $add_decl; while( ! _check_xml( $xml) && $max_tries--) { # a couple of fixes for weird HTML::TreeBuilder errors if( $@=~ m{^\s*xml (or text )?declaration not at start of (external )?entity}i) { $$xml=~ s{<\?xml.*?\?>}{}g; #warn " fixed xml declaration in the wrong place\n"; } elsif( $@=~ m{undefined entity}) { $$xml=~ s{&(amp;)?Amp;}{&}g if $HTML::TreeBuilder::VERSION < 4.00; if( _use( 'HTML::Entities::Numbered')) { $$xml=name2hex_xml( $$xml); } $$xml=~ s{&(\w+);}{ my $ent= $1; if( $ent !~ m{^(amp|lt|gt|apos|quote)$}) { "&$ent;" } }eg; } elsif( $@=~ m{&Amp; used in html}) # if $Amp; is used instead of & then HTML::TreeBuilder's as_xml is tripped (old version) { $$xml=~ s{&(amp;)?Amp;}{&}g if $HTML::TreeBuilder::VERSION < 4.00; } elsif( $@=~ m{^\s*not well-formed \(invalid token\)}) { if( $HTML::TreeBuilder::VERSION < 4.00) { $$xml=~ s{&(amp;)?Amp;}{&}g; $$xml=~ s{(<[^>]* )(\d+=)"}{$1a$2"}g; # <table 1> comes out as <table 1="1">, "fix the attribute } my $q= '<img "=""" '; # extracted so vim doesn't get confused if( _use( 'HTML::Entities::Numbered')) { $$xml=name2hex_xml( $$xml); } if( $$xml=~ m{$q}) { $$xml=~ s{$q}{<img }g; # happens with <img src="foo.png"" ... } else { my $encoding= _encoding_from_meta( $tree); unless( keys %xml_parser_encoding) { %xml_parser_encoding= _xml_parser_encodings(); } if( ! $add_decl) { if( $xml_parser_encoding{$encoding}) { $add_decl=1; } elsif( $encoding eq 'euc-jp' && $xml_parser_encoding{'x-euc-jp-jisx0221'}) { $encoding="x-euc-jp-jisx0221"; $add_decl=1;} elsif( $encoding eq 'shift-jis' && $xml_parser_encoding{'x-sjis-jisx0221'}) { $encoding="x-sjis-jisx0221"; $add_decl=1;} if( $add_decl) { $$xml=~ s{^(<\?xml.*?\?>)?}{<?xml version="1.0" encoding="$encoding"?>}s; #warn " added decl (encoding $encoding)\n"; } else { $$xml=~ s{^(<\?xml.*?\?>)?}{}s; #warn " converting to utf8 from $encoding\n"; $$xml= _to_utf8( $encoding, $$xml); } } else { $$xml=~ s{^(<\?xml.*?\?>)?}{}s; #warn " converting to utf8 from $encoding\n"; $$xml= _to_utf8( $encoding, $$xml); } } } } # some versions of HTML::TreeBuilder escape CDATA sections $$xml=~ s{(<!\[CDATA\[.*?\]\]>)}{_unescape_cdata( $1)}eg; } sub _xml_parser_encodings { my @encodings=( 'iso-8859-1'); # this one is included by default, there is no map for it in @INC foreach my $inc (@INC) { push @encodings, map { basename( $_, '.enc') } glob( File::Spec->catdir( $inc => XML => Parser => Encodings => '*.enc')); } return map { $_ => 1 } @encodings; } } sub _unescape_cdata { my( $cdata)= @_; $cdata=~s{<}{<}g; $cdata=~s{>}{>}g; $cdata=~s{&}{&}g; return $cdata; } sub _as_XML { # fork of HTML::Element::as_XML, which is a little too buggy and inconsistent between versions for my liking my ($elt) = @_; my $xml= ''; my $empty_element_map = $elt->_empty_element_map; my ( $tag, $node, $start ); # per-iteration scratch $elt->traverse( sub { ( $node, $start ) = @_; if ( ref $node ) { # it's an element $tag = $node->{'_tag'}; if ($start) { # on the way in foreach my $att ( grep { ! m{^(_|/$)} } keys %$node ) { # fix attribute names instead of dying my $new_att= $att; if( $att=~ m{^\d}) { $new_att= "a$att"; } $new_att=~ s{[^\w\d:_-]}{}g; $new_att ||= 'a'; if( $new_att ne $att) { $node->{$new_att}= delete $node->{$att}; } } if ( $empty_element_map->{$tag} && (!@{ $node->{'_content'} || []}) ) { $xml.= $node->starttag_XML( undef, 1 ); } else { $xml.= $node->starttag_XML(undef); } } else { # on the way out unless ( $empty_element_map->{$tag} and !@{ $node->{'_content'} || [] } ) { $xml.= $node->endtag_XML(); } # otherwise it will have been an <... /> tag. } } elsif( $node=~ /<!\[CDATA\[/) # the content includes CDATA { foreach my $chunk (split /(<!\[CDATA\[.*?\]\]>)/s, $node) # chunks are CDATA sections or normal text { $xml.= $chunk =~ m{<!\[CDATA\[} ? $chunk : _xml_escape( $chunk); } } else # it's just text { $xml .= _xml_escape($node); } 1; # keep traversing } ); return $xml; } sub _xml_escape { my( $html)= @_; $html =~ s{&(?! # An ampersand that isn't followed by... ( \#[0-9]+; | # A hash mark, digits and semicolon, or \#x[0-9a-fA-F]+; | # A hash mark, "x", hex digits and semicolon, or [\w]+; # A valid unicode entity name and semicolon ) ) } {&}gx if 0; # Needs to be escaped to amp $html=~ s{&}{&}g; # in old versions of HTML::TreeBuilder & can come out as &Amp; if( $HTML::TreeBuilder::VERSION && $HTML::TreeBuilder::VERSION <= 3.23) { $html=~ s{&Amp;}{&}g; } # simple character escapes $html =~ s/</</g; $html =~ s/>/>/g; $html =~ s/"/"/g; $html =~ s/'/'/g; return $html; } sub _check_xml { my( $xml)= @_; # $xml is a ref to the xml string my $ok= eval { XML::Parser->new->parse( $$xml); }; #if( $ok) { warn " parse OK\n"; } return $ok; } sub _encoding_from_meta { my( $tree)= @_; my $enc="iso-8859-1"; my @meta= $tree->find( 'meta'); foreach my $meta (@meta) { if( $meta->{'http-equiv'} && ($meta->{'http-equiv'} =~ m{^\s*content-type\s*}i) && $meta->{content} && ($meta->{content} =~ m{^\s*text/html\s*;\s*charset\s*=\s*(\S*)\s*}i) ) { $enc= lc $1; #warn " encoding from meta tag is '$enc'\n"; last; } } return $enc; } { sub _to_utf8 { my( $encoding, $string)= @_; local $SIG{__DIE__}; if( _use( 'Encode')) { Encode::from_to( $string, $encoding => 'utf8', 0x0400); } # 0x0400 is Encode::FB_XMLCREF elsif( _use( 'Text::Iconv')) { my $converter = eval { Text::Iconv->new( $encoding => "utf8") }; if( $converter) { $string= $converter->convert( $string); } } elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) { my $map= Unicode::Map8->new( $encoding); $string= $map->tou( $string)->utf8; } $string=~ s{[\x00-\x08\x0B\x0C\x0E-\x1F]}{}g; # get rid of control chars, portable in 5.6 return $string; } } sub _indent_xhtml { my( $xhtml)= @_; # $xhtml is a ref my %block_tag= map { $_ => 1 } qw( html head meta title link script base body h1 h2 h3 h4 h5 h6 p br address blockquote pre ol ul li dd dl dt table tr td th tbody tfoot thead col colgroup caption div frame frameset hr ); my $level=0; $$xhtml=~ s{( (?:<!(?:--.*?-->|[CDATA[.*?]]>)) # ignore comments and CDATA sections | <(\w+)((?:\s+\w+\s*=\s*(?:"[^"]*"|'[^']*'))*\s*/>) # empty tag | <(\w+) # start tag |</(\w+) # end tag ) } { if( $2 && $block_tag{$2}) { my $indent= " " x $level; "\n$indent<$2$3"; } elsif( $4 && $block_tag{$4}) { my $indent= " " x $level; $level++ unless( $4=~ m{/>}); my $nl= $4 eq 'html' ? '' : "\n"; "$nl$indent<$4"; } elsif( $5 && $block_tag{$5}) { $level--; "</$5"; } else { $1; } }xesg; } sub add_stylesheet { my( $t, $type, $href)= @_; my %text_type= map { $_ => 1 } qw( xsl css); my $ss= $t->{twig_elt_class}->new( $PI); if( $text_type{$type}) { $ss->_set_pi( 'xml-stylesheet', qq{type="text/$type" href="$href"}); } else { croak "unsupported style sheet type '$type'"; } $t->_add_cpi_outside_of_root( leading_cpi => $ss); return $t; } { my %used; # module => 1 if require ok, 0 otherwise my %disallowed; # for testing, refuses to _use modules in this hash sub _disallow_use ## no critic (Subroutines::ProhibitNestedSubs); { my( @modules)= @_; $disallowed{$_}= 1 foreach (@modules); } sub _allow_use ## no critic (Subroutines::ProhibitNestedSubs); { my( @modules)= @_; $disallowed{$_}= 0 foreach (@modules); } sub _use ## no critic (Subroutines::ProhibitNestedSubs); { my( $module, $version)= @_; $version ||= 0; if( $disallowed{$module}) { return 0; } if( $used{$module}) { return 1; } if( eval "require $module") { import $module; $used{$module}= 1; # no critic ProhibitStringyEval if( $version) { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; if( ${"${module}::VERSION"} >= $version ) { return 1; } else { return 0; } } else { return 1; } } else { $used{$module}= 0; return 0; } } } # used to solve the [n] predicates while avoiding getting the entire list # needs a prototype to accept passing bare blocks sub _first_n(&$@) ## no critic (Subroutines::ProhibitSubroutinePrototypes); { my $coderef= shift; my $n= shift; my $i=0; if( $n > 0) { foreach (@_) { if( &$coderef) { $i++; return $_ if( $i == $n); } } } elsif( $n < 0) { foreach (reverse @_) { if( &$coderef) { $i--; return $_ if( $i == $n); } } } else { croak "illegal position number 0"; } return undef; } sub _slurp_uri { my( $uri, $base)= @_; if( $uri=~ m{^\w+://}) { _use( 'LWP::Simple'); return LWP::Simple::get( $uri); } else { return _slurp( _based_filename( $uri, $base)); } } sub _based_filename { my( $filename, $base)= @_; # cf. XML/Parser.pm's file_ext_ent_handler if (defined($base) and not ($filename =~ m{^(?:[\\/]|\w+:)})) { my $newpath = $base; $newpath =~ s{[^\\/:]*$}{$filename}; $filename = $newpath; } return $filename; } sub _slurp { my( $filename)= @_; my $to_slurp; open( $to_slurp, "<$filename") or croak "cannot open '$filename': $!"; local $/= undef; my $content= <$to_slurp>; close $to_slurp; return $content; } sub _slurp_fh { my( $fh)= @_; local $/= undef; my $content= <$fh>; return $content; } # I should really add extra options to allow better configuration of the # LWP::UserAgent object # this method forks (except on VMS!) # - the child gets the data and copies it to the pipe, # - the parent reads the stream and sends it to XML::Parser # the data is cut it chunks the size of the XML::Parser::Expat buffer # the method returns the twig and the status sub _parseurl { my( $t, $safe, $url, $agent)= @_; _use( 'LWP') || croak "LWP not available, needed to use parseurl methods"; if( $^O ne 'VMS') { pipe( README, WRITEME) or croak "cannot create connected pipes: $!"; if( my $pid= fork) { # parent code: parse the incoming file close WRITEME; # no need to write my $result= $safe ? $t->safe_parse( \*README) : $t->parse( \*README); close README; return $@ ? 0 : $t; } else { # child close README; # no need to read local $|=1; $agent ||= LWP::UserAgent->new; my $request = HTTP::Request->new( GET => $url); # _pass_url_content is called with chunks of data the same size as # the XML::Parser buffer my $response = $agent->request( $request, sub { _pass_url_content( \*WRITEME, @_); }, $BUFSIZE); $response->is_success or croak "$url ", $response->message; close WRITEME; CORE::exit(); # CORE is there for mod_perl (which redefines exit) } } else { # VMS branch (hard to test!) local $|=1; $agent ||= LWP::UserAgent->new; my $request = HTTP::Request->new( GET => $url); my $response = $agent->request( $request); $response->is_success or croak "$url ", $response->message; my $result= $safe ? $t->safe_parse($response->content) : $t->parse($response->content); return $@ ? 0 : $t; } } # get the (hopefully!) XML data from the URL and sub _pass_url_content { my( $fh, $data, $response, $protocol)= @_; print {$fh} $data; } sub add_options { my %args= map { $_, 1 } @_; %args= _normalize_args( %args); foreach (keys %args) { $valid_option{$_}++; } } sub _pretty_print_styles { return XML::Twig::Elt::_pretty_print_styles(); } sub _twig_store_internal_dtd { # warn " in _twig_store_internal_dtd...\n"; # DEBUG handler my( $p, $string)= @_; my $t= $p->{twig}; if( $t->{twig_keep_encoding}) { $string= $p->original_string(); } $t->{twig_doctype}->{internal} .= $string; return; } sub _twig_stop_storing_internal_dtd { # warn " in _twig_stop_storing_internal_dtd...\n"; # DEBUG handler my $p= shift; if( @saved_default_handler && defined $saved_default_handler[1]) { $p->setHandlers( @saved_default_handler); } else { $p->setHandlers( Default => undef); } $p->{twig}->{twig_doctype}->{internal}=~ s{^\s*\[}{}; $p->{twig}->{twig_doctype}->{internal}=~ s{\]\s*$}{}; return; } sub _twig_doctype_fin_print { # warn " in _twig_doctype_fin_print...\n"; # DEBUG handler my( $p)= shift; if( $p->{twig}->{twig_doctype}->{has_internal} && !$expat_1_95_2) { print ' ]>'; } return; } sub _normalize_args { my %normalized_args; while( my $key= shift ) { $key= join '', map { ucfirst } split /_/, $key; #$key= "Twig".$key unless( substr( $key, 0, 4) eq 'Twig'); $normalized_args{$key}= shift ; } return %normalized_args; } sub _is_fh { return unless $_[0]; return $_[0] if( isa( $_[0], 'GLOB') || isa( $_[0], 'IO::Scalar')); } sub _set_handler { my( $handlers, $whole_path, $handler)= @_; my $H_SPECIAL = qr{($ALL|$DEFAULT|$COMMENT|$TEXT)}; my $H_PI = qr{(\?|$PI)\s*(([^\s]*)\s*)}; my $H_LEVEL = qr{level \s* \( \s* ([0-9]+) \s* \)}x; my $H_REGEXP = qr{\(\?([\^xism]*)(-[\^xism]*)?:(.*)\)}x; my $H_XPATH = qr{(/?/?$REG_TAG_PART? \s* ($REG_PREDICATE\s*)?)+}x; my $prev_handler; my $cpath= $whole_path; #warn "\$cpath: '$cpath\n"; while( $cpath && $cpath=~ s{^\s*($H_SPECIAL|$H_PI|$H_LEVEL|$H_REGEXP|$H_XPATH)\s*($|\|)}{}) { my $path= $1; #warn "\$cpath: '$cpath' - $path: '$path'\n"; $prev_handler ||= $handlers->{handlers}->{string}->{$path} || undef; # $prev_handler gets the first found handler _set_special_handler ( $handlers, $path, $handler, $prev_handler) || _set_pi_handler ( $handlers, $path, $handler, $prev_handler) || _set_level_handler ( $handlers, $path, $handler, $prev_handler) || _set_regexp_handler ( $handlers, $path, $handler, $prev_handler) || _set_xpath_handler ( $handlers, $path, $handler, $prev_handler) || croak "unrecognized expression in handler: '$whole_path'"; # this both takes care of the simple (gi) handlers and store # the handler code reference for other handlers $handlers->{handlers}->{string}->{$path}= $handler; } if( $cpath) { croak "unrecognized expression in handler: '$whole_path'"; } return $prev_handler; } sub _set_special_handler { my( $handlers, $path, $handler, $prev_handler)= @_; if( $path =~ m{^\s*($ALL|$DEFAULT|$COMMENT|$TEXT)\s*$}io ) { $handlers->{handlers}->{$1}= $handler; return 1; } else { return 0; } } sub _set_xpath_handler { my( $handlers, $path, $handler, $prev_handler)= @_; if( my $handler_data= _parse_xpath_handler( $path, $handler)) { _add_handler( $handlers, $handler_data, $path, $prev_handler); return 1; } else { return 0; } } sub _add_handler { my( $handlers, $handler_data, $path, $prev_handler)= @_; my $tag= $handler_data->{tag}; my @handlers= $handlers->{xpath_handler}->{$tag} ? @{$handlers->{xpath_handler}->{$tag}} : (); if( $prev_handler) { @handlers= grep { $_->{path} ne $path } @handlers; } push @handlers, $handler_data if( $handler_data->{handler}); if( @handlers > 1) { @handlers= sort { (($b->{score}->{type} || 0) <=> ($a->{score}->{type} || 0)) || (($b->{score}->{anchored} || 0) <=> ($a->{score}->{anchored} || 0)) || (($b->{score}->{steps} || 0) <=> ($a->{score}->{steps} || 0)) || (($b->{score}->{predicates} || 0) <=> ($a->{score}->{predicates} || 0)) || (($b->{score}->{tests} || 0) <=> ($a->{score}->{tests} || 0)) || ($a->{path} cmp $b->{path}) } @handlers; } $handlers->{xpath_handler}->{$tag}= \@handlers; } sub _set_pi_handler { my( $handlers, $path, $handler, $prev_handler)= @_; # PI conditions ( '?target' => \&handler or '?' => \&handler # or '#PItarget' => \&handler or '#PI' => \&handler) if( $path=~ /^\s*(?:\?|$PI)\s*(?:([^\s]*)\s*)$/) { my $target= $1 || ''; # update the path_handlers count, knowing that # either the previous or the new handler can be undef $handlers->{pi_handlers}->{$1}= $handler; return 1; } else { return 0; } } sub _set_level_handler { my( $handlers, $path, $handler, $prev_handler)= @_; if( $path =~ m{^ \s* level \s* \( \s* ([0-9]+) \s* \) \s* $}ox ) { my $level= $1; my $sub= sub { my( $stack)= @_; return( ($stack->[-1]->{$ST_TAG} !~ m{^#}) && (scalar @$stack == $level + 1) ) }; my $handler_data= { tag=> '*', score => { type => $LEVEL_TRIGGER}, trigger => $sub, path => $path, handler => $handler, test_on_text => 0 }; _add_handler( $handlers, $handler_data, $path, $prev_handler); return 1; } else { return 0; } } sub _set_regexp_handler { my( $handlers, $path, $handler, $prev_handler)= @_; # if the expression was a regexp it is now a string (it was stringified when it became a hash key) if( $path=~ m{^\(\?([\^xism]*)(?:-[\^xism]*)?:(.*)\)$}) { my $regexp= qr/(?$1:$2)/; # convert it back into a regexp my $sub= sub { my( $stack)= @_; return( $stack->[-1]->{$ST_TAG} =~ $regexp ) }; my $handler_data= { tag=> '*', score => { type => $REGEXP_TRIGGER} , trigger => $sub, path => $path, handler => $handler, test_on_text => 0 }; _add_handler( $handlers, $handler_data, $path, $prev_handler); return 1; } else { return 0; } } my $DEBUG_HANDLER= 0; # 0 or 1 (output the handler checking code) or 2 (super verbose) my $handler_string; # store the handler itself sub _set_debug_handler { $DEBUG_HANDLER= shift; } sub _warn_debug_handler { if( $DEBUG_HANDLER < 3) { warn @_; } else { $handler_string .= join( '', @_); } } sub _return_debug_handler { my $string= $handler_string; $handler_string=''; return $string; } sub _parse_xpath_handler { my( $xpath, $handler)= @_; my $xpath_original= $xpath; if( $DEBUG_HANDLER >=1) { _warn_debug_handler( "\n\nparsing path '$xpath'\n"); } my $path_to_check= $xpath; $path_to_check=~ s{/?/?$REG_TAG_PART?\s*(?:$REG_PREDICATE\s*)?}{}g; if( $DEBUG_HANDLER && $path_to_check=~ /\S/) { _warn_debug_handler( "left: $path_to_check\n"); } return if( $path_to_check=~ /\S/); (my $xpath_to_display= $xpath)=~ s{(["{}'\[\]\@\$])}{\\$1}g; my @xpath_steps; my $last_token_is_sep; while( $xpath=~ s{^\s* ( (//?) # separator | (?:$REG_TAG_PART\s*(?:$REG_PREDICATE\s*)?) # tag name and optional predicate | (?:$REG_PREDICATE) # just a predicate ) } {}x ) { # check that we have alternating separators and steps if( $2) # found a separator { if( $last_token_is_sep) { return 0; } # 2 separators in a row $last_token_is_sep= 1; } else { if( defined( $last_token_is_sep) && !$last_token_is_sep) { return 0; } # 2 steps in a row $last_token_is_sep= 0; } push @xpath_steps, $1; } if( $last_token_is_sep) { return 0; } # expression cannot end with a separator my $i=-1; my $perlfunc= _join_n( $NO_WARNINGS . ';', q|my( $stack)= @_; |, q|my @current_elts= (scalar @$stack); |, q|my @new_current_elts; |, q|my $elt; |, ($DEBUG_HANDLER >= 1) && (qq#warn q{checking path '$xpath_to_display'\n};#), ); my $last_tag=''; my $anchored= $xpath_original=~ m{^\s*/(?!/)} ? 1 : 0; my $score={ type => $XPATH_TRIGGER, anchored => $anchored }; my $flag= { test_on_text => 0 }; my $sep='/'; # '/' or '//' while( my $xpath_step= pop @xpath_steps) { my( $tag, $predicate)= $xpath_step =~ m{^($REG_TAG_PART)?(?:\[(.*)\])?\s*$}; $score->{steps}++; $tag||='*'; my $warn_empty_stack= $DEBUG_HANDLER >= 2 ? qq{warn "return with empty stack\\n";} : ''; if( $predicate) { if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate is: '$predicate'\n"); } # changes $predicate (from an XPath expression to a Perl one) if( $predicate=~ m{^\s*$REG_NUMBER\s*$}) { croak "position selector [$predicate] not supported on twig_handlers"; } _parse_predicate_in_handler( $predicate, $flag, $score); if( $DEBUG_HANDLER >= 2) { _warn_debug_handler( "predicate becomes: '$predicate'\n"); } } my $tag_cond= _tag_cond( $tag); my $cond= join( " && ", grep { $_ } $tag_cond, $predicate) || 1; if( $css_sel && $tag=~ m{\.}) { $tag=~s{\.[^.]*$}{}; $tag ||='*'; } $tag=~ s{(.)#.+$}{$1}; $last_tag ||= $tag; if( $sep eq '/') { $perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #, q# { next if( !$current_elt); #, q# $current_elt--; #, q# $elt= $stack->[$current_elt]; #, q# if( %s) { push @new_current_elts, $current_elt;} #, q# } #, ), $cond ); } elsif( $sep eq '//') { $perlfunc .= sprintf( _join_n( q#foreach my $current_elt (@current_elts) #, q# { next if( !$current_elt); #, q# $current_elt--; #, q# my $candidate= $current_elt; #, q# while( $candidate >=0) #, q# { $elt= $stack->[$candidate]; #, q# if( %s) { push @new_current_elts, $candidate;} #, q# $candidate--; #, q# } #, q# } #, ), $cond ); } my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq%fail at cond '$cond'%;#) : ''; $perlfunc .= sprintf( _join_n( q#unless( @new_current_elts) { %s return 0; } #, q#@current_elts= @new_current_elts; #, q#@new_current_elts=(); #, ), $warn ); $sep= pop @xpath_steps; } if( $anchored) # there should be a better way, but this works { my $warn= $DEBUG_HANDLER >= 2 ? _join_n( qq#warn qq{fail, stack not empty};#) : ''; $perlfunc .= sprintf( _join_n( q#if( ! grep { $_ == 0 } @current_elts) { %s return 0;}#), $warn); } $perlfunc.= qq{warn "handler for '$xpath_to_display' triggered\\n";\n} if( $DEBUG_HANDLER >=2); $perlfunc.= qq{return q{$xpath_original};\n}; _warn_debug_handler( "\nperlfunc:\n$perlfunc\n") if( $DEBUG_HANDLER>=1); my $s= eval "sub { $perlfunc }"; if( $@) { croak "wrong handler condition '$xpath' ($@);" } _warn_debug_handler( "last tag: '$last_tag', test_on_text: '$flag->{test_on_text}'\n") if( $DEBUG_HANDLER >=1); _warn_debug_handler( "score: ", join( ' ', map { "$_: $score->{$_}" } sort keys %$score), "\n") if( $DEBUG_HANDLER >=1); return { tag=> $last_tag, score => $score, trigger => $s, path => $xpath_original, handler => $handler, test_on_text => $flag->{test_on_text} }; } sub _join_n { return join( "\n", @_, ''); } # the "tag" part can be <tag>, <tag>.<class> or <tag>#<id> (where tag can be *, or start with # for hidden tags) sub _tag_cond { my( $full_tag)= @_; my( $tag, $class, $id); if( $full_tag=~ m{^(.+)#(.+)$}) { ($tag, $id)= ($1, $2); } # <tag>#<id> else { ( $tag, $class)= $css_sel ? $full_tag=~ m{^(.*?)(?:\.([^.]*))?$} : ($full_tag, undef); } my $tag_cond = $tag && $tag ne '*' ? qq#(\$elt->{'$ST_TAG'} eq "$tag")# : ''; my $id_cond = defined $id ? qq#(\$elt->{id} eq "$id")# : ''; my $class_cond = defined $class ? qq#(\$elt->{class}=~ m{(^| )$class( |\$)})# : ''; my $full_cond= join( ' && ', grep { $_ } ( $tag_cond, $class_cond, $id_cond)); return $full_cond; } # input: the predicate ($_[0]) which will be changed in place # flags, a hashref with various flags (like test_on_text) # the score sub _parse_predicate_in_handler { my( $flag, $score)= @_[1..2]; $_[0]=~ s{( ($REG_STRING) # strings |\@($REG_TAG_NAME)(\s* $REG_MATCH \s* $REG_REGEXP) # @att and regexp |\@($REG_TAG_NAME)(?=\s*(?:[><=!])) # @att followed by a comparison operator |\@($REG_TAG_NAME) # @att (not followed by a comparison operator) |=~|!~ # matching operators |([><]=?|=|!=)(?=\s*[\d+-]) # test before a number |([><]=?|=|!=) # test, other cases |($REG_FUNCTION) # no arg functions # this bit is a mess, but it is the only solution with this half-baked parser |(string\(\s*$REG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child)=~ /regexp/ |(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_STRING) # string( child) = "value" (or other test) |(string\(\s*$REG_NAME\s*\)\s*$REG_COMP\s*$REG_NUMBER) # string( child) = nb (or other test) |(and|or) # |($REG_NAME(?=\s*(and|or|$))) # nested tag name (needs to be after all other unquoted strings) |($REG_TAG_IN_PREDICATE) # nested tag name (needs to be after all other unquoted strings) )} { my( $token, $str, $att_re_name, $att_re_regexp, $att, $bare_att, $num_test, $alpha_test, $func, $str_regexp, $str_test_alpha, $str_test_num, $and_or, $tag) = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14); $score->{predicates}++; # store tests on text (they are not always allowed) if( $func || $str_regexp || $str_test_num || $str_test_alpha ) { $flag->{test_on_text}= 1; } if( defined $str) { $token } elsif( $tag) { qq{(\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->has_child( '$tag'))} } elsif( $att) { $att=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att'})} : qq{\$elt->{'$att'}} } elsif( $att_re_name) { $att_re_name=~ m{^#} ? qq{ (\$elt->{'$ST_ELT'} && \$elt->{'$ST_ELT'}->{att}->{'$att_re_name'}$att_re_regexp)} : qq{\$elt->{'$att_re_name'}$att_re_regexp} } # for some reason Devel::Cover flags the following lines as not tested. They are though. elsif( $bare_att) { $bare_att=~ m{^#} ? qq{(\$elt->{'$ST_ELT'} && defined(\$elt->{'$ST_ELT'}->{att}->{'$bare_att'}))} : qq{defined( \$elt->{'$bare_att'})} } elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} } elsif( $func && $func=~ m{^string}) { "\$elt->{'$ST_ELT'}->text"; } elsif( $str_regexp && $str_regexp =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)}) { "defined( _first_n { \$_->text $2 $3 } 1, \$elt->{'$ST_ELT'}->_children( '$1'))"; } elsif( $str_test_alpha && $str_test_alpha =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_STRING)}) { my( $tag, $op, $str)= ($1, $2, $3); $str=~ s{(?<=.)'(?=.)}{\\'}g; # escape a quote within the string $str=~ s{^"}{'}; $str=~ s{"$}{'}; "defined( _first_n { \$_->text $PERL_ALPHA_TEST{$op} $str } 1, \$elt->{'$ST_ELT'}->children( '$tag'))"; } elsif( $str_test_num && $str_test_num =~ m{string\(\s*($REG_TAG_NAME)\s*\)\s*($REG_COMP)\s*($REG_NUMBER)}) { my $test= ($2 eq '=') ? '==' : $2; "defined( _first_n { \$_->text $test $3 } 1, \$elt->{'$ST_ELT'}->children( '$1'))"; } elsif( $and_or) { $score->{tests}++; $and_or eq 'and' ? '&&' : '||' ; } else { $token; } }gexs; } sub setCharHandler { my( $t, $handler)= @_; $t->{twig_char_handler}= $handler; } sub _reset_handlers { my $handlers= shift; delete $handlers->{handlers}; delete $handlers->{path_handlers}; delete $handlers->{subpath_handlers}; $handlers->{attcond_handlers_exp}=[] if( $handlers->{attcond_handlers}); delete $handlers->{attcond_handlers}; } sub _set_handlers { my $handlers= shift || return; my $set_handlers= {}; foreach my $path (keys %{$handlers}) { _set_handler( $set_handlers, $path, $handlers->{$path}); } return $set_handlers; } sub setTwigHandler { my( $t, $path, $handler)= @_; $t->{twig_handlers} ||={}; return _set_handler( $t->{twig_handlers}, $path, $handler); } sub setTwigHandlers { my( $t, $handlers)= @_; my $previous_handlers= $t->{twig_handlers} || undef; _reset_handlers( $t->{twig_handlers}); $t->{twig_handlers}= _set_handlers( $handlers); return $previous_handlers; } sub setStartTagHandler { my( $t, $path, $handler)= @_; $t->{twig_starttag_handlers}||={}; return _set_handler( $t->{twig_starttag_handlers}, $path, $handler); } sub setStartTagHandlers { my( $t, $handlers)= @_; my $previous_handlers= $t->{twig_starttag_handlers} || undef; _reset_handlers( $t->{twig_starttag_handlers}); $t->{twig_starttag_handlers}= _set_handlers( $handlers); return $previous_handlers; } sub setIgnoreEltsHandler { my( $t, $path, $action)= @_; $t->{twig_ignore_elts_handlers}||={}; return _set_handler( $t->{twig_ignore_elts_handlers}, $path, $action ); } sub setIgnoreEltsHandlers { my( $t, $handlers)= @_; my $previous_handlers= $t->{twig_ignore_elts_handlers}; _reset_handlers( $t->{twig_ignore_elts_handlers}); $t->{twig_ignore_elts_handlers}= _set_handlers( $handlers); return $previous_handlers; } sub setEndTagHandler { my( $t, $path, $handler)= @_; $t->{twig_endtag_handlers}||={}; return _set_handler( $t->{twig_endtag_handlers}, $path,$handler); } sub setEndTagHandlers { my( $t, $handlers)= @_; my $previous_handlers= $t->{twig_endtag_handlers}; _reset_handlers( $t->{twig_endtag_handlers}); $t->{twig_endtag_handlers}= _set_handlers( $handlers); return $previous_handlers; } # a little more complex: set the twig_handlers only if a code ref is given sub setTwigRoots { my( $t, $handlers)= @_; my $previous_roots= $t->{twig_roots}; _reset_handlers($t->{twig_roots}); $t->{twig_roots}= _set_handlers( $handlers); _check_illegal_twig_roots_handlers( $t->{twig_roots}); foreach my $path (keys %{$handlers}) { $t->{twig_handlers}||= {}; _set_handler( $t->{twig_handlers}, $path, $handlers->{$path}) if( ref($handlers->{$path}) && isa( $handlers->{$path}, 'CODE')); } return $previous_roots; } sub _check_illegal_twig_roots_handlers { my( $handlers)= @_; foreach my $tag_handlers (values %{$handlers->{xpath_handler}}) { foreach my $handler_data (@$tag_handlers) { if( my $type= $handler_data->{test_on_text}) { croak "string() condition not supported on twig_roots option"; } } } return; } # just store the reference to the expat object in the twig sub _twig_init { # warn " in _twig_init...\n"; # DEBUG handler my $p= shift; my $t=$p->{twig}; if( $t->{twig_parsing} ) { croak "cannot reuse a twig that is already parsing"; } $t->{twig_parsing}=1; $t->{twig_parser}= $p; if( $weakrefs) { weaken( $t->{twig_parser}); } # in case they had been created by a previous parse delete $t->{twig_dtd}; delete $t->{twig_doctype}; delete $t->{twig_xmldecl}; delete $t->{twig_root}; # if needed set the output filehandle $t->_set_fh_to_twig_output_fh(); return; } # uses eval to catch the parser's death sub safe_parse { my $t= shift; eval { $t->parse( @_); } ; return $@ ? $t->_reset_twig_after_error : $t; } sub safe_parsefile { my $t= shift; eval { $t->parsefile( @_); } ; return $@ ? $t->_reset_twig_after_error : $t; } # restore a twig in a proper state so it can be reused for a new parse sub _reset_twig { my $t= shift; $t->{twig_parsing}= 0; delete $t->{twig_current}; delete $t->{extra_data}; delete $t->{twig_dtd}; delete $t->{twig_in_pcdata}; delete $t->{twig_in_cdata}; delete $t->{twig_stored_space}; delete $t->{twig_entity_list}; $t->root->delete if( $t->root); delete $t->{twig_root}; return $t; } sub _reset_twig_after_error { my $t= shift; $t->_reset_twig; return undef; } sub _add_or_discard_stored_spaces { my $t= shift; $t->{twig_right_after_root}=0; #XX my $current= $t->{twig_current} or return; # ugly hack, with ignore on, twig_current can disappear return unless length $t->{twig_stored_spaces}; my $current_gi= $current->gi; if( ! $t->{twig_discard_all_spaces}) { if( ! defined( $t->{twig_space_policy}->{$current_gi})) { $t->{twig_space_policy}->{$current_gi}= _space_policy( $t, $current_gi); } if( $t->{twig_space_policy}->{$current_gi} || ($t->{twig_stored_spaces}!~ m{\n}) || $t->{twig_preserve_space}) { _insert_pcdata( $t, $t->{twig_stored_spaces} ); } } $t->{twig_stored_spaces}=''; return; } # the default twig handlers, which build the tree sub _twig_start { # warn " in _twig_start...\n"; # DEBUG handler #foreach my $s (@_) { next if ref $s; warn "$s: ", is_utf8( $s) ? "has flag" : "FLAG NOT SET"; } # YYY my ($p, $gi, @att)= @_; my $t=$p->{twig}; # empty the stored pcdata (space stored in case they are really part of # a pcdata element) or stored it if the space policy dictates so # create a pcdata element with the spaces if need be _add_or_discard_stored_spaces( $t); my $parent= $t->{twig_current}; # if we were parsing PCDATA then we exit the pcdata if( $t->{twig_in_pcdata}) { $t->{twig_in_pcdata}= 0; $parent->del_twig_current; $parent= $parent->_parent; } # if we choose to keep the encoding then we need to parse the tag if( my $func = $t->{parse_start_tag}) { ($gi, @att)= &$func($p->original_string); } elsif( $t->{twig_entities_in_attribute}) { ($gi,@att)= _parse_start_tag( $p->recognized_string); $t->{twig_entities_in_attribute}=0; } # if we are using an external DTD, we need to fill the default attributes if( $t->{twig_read_external_dtd}) { _fill_default_atts( $t, $gi, \@att); } # filter the input data if need be if( my $filter= $t->{twig_input_filter}) { $gi= $filter->( $gi); foreach my $att (@att) { $att= $filter->($att); } } my $ns_decl; if( $t->{twig_map_xmlns}) { $ns_decl= _replace_ns( $t, \$gi, \@att); } my $elt= $t->{twig_elt_class}->new( $gi); $elt->set_atts( @att); # now we can store the tag and atts my $context= { $ST_TAG => $gi, $ST_ELT => $elt, @att}; $context->{$ST_NS}= $ns_decl if $ns_decl; if( $weakrefs) { weaken( $context->{$ST_ELT}); } push @{$t->{_twig_context_stack}}, $context; $parent->del_twig_current if( $parent); $t->{twig_current}= $elt; $elt->set_twig_current; if( $parent) { my $prev_sibling= $parent->_last_child; if( $prev_sibling) { $prev_sibling->set_next_sibling( $elt); $elt->set_prev_sibling( $prev_sibling); } $elt->set_parent( $parent); unless( $parent->_first_child) { $parent->set_first_child( $elt); } $parent->set_last_child( $elt); } else { # processing root $t->set_root( $elt); # call dtd handler if need be $t->{twig_dtd_handler}->($t, $t->{twig_dtd}) if( defined $t->{twig_dtd_handler}); # set this so we can catch external entities # (the handler was modified during DTD processing) if( $t->{twig_default_print}) { $p->setHandlers( Default => \&_twig_print); } elsif( $t->{twig_roots}) { $p->setHandlers( Default => sub { return }); } else { $p->setHandlers( Default => \&_twig_default); } } $elt->set_empty( $p->recognized_string=~ m{/\s*>$}s ? 1 : 0); $elt->{extra_data}= $t->{extra_data} if( $t->{extra_data}); $t->{extra_data}=''; # if the element is ID-ed then store that info my $id= $elt->id; if( defined $id) { $t->{twig_id_list}->{$id}= $elt; if( $weakrefs) { weaken( $t->{twig_id_list}->{$id}); } } # call user handler if need be if( $t->{twig_starttag_handlers}) { # call all appropriate handlers my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi); local $_= $elt; foreach my $handler ( @handlers) { $handler->($t, $elt) || last; } # call _all_ handler if needed if( my $all= $t->{twig_starttag_handlers}->{handlers}->{$ALL}) { $all->($t, $elt); } } # check if the tag is in the list of tags to be ignored if( $t->{twig_ignore_elts_handlers}) { my @handlers= _handler( $t, $t->{twig_ignore_elts_handlers}, $gi); # only the first handler counts, it contains the action (discard/print/string) if( @handlers) { my $action= shift @handlers; $t->ignore( $elt, $action); } } if( $elt->att( 'xml:space') && ( $elt->att( 'xml:space') eq 'preserve')) { $t->{twig_preserve_space}++; } return; } sub _replace_ns { my( $t, $gi, $atts)= @_; my $decls; foreach my $new_prefix ( $t->parser->new_ns_prefixes) { my $uri= $t->parser->expand_ns_prefix( $new_prefix); # replace the prefix if it is mapped $decls->{$new_prefix}= $uri; if( !$t->{twig_keep_original_prefix} && (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri})) { $new_prefix= $mapped_prefix; } # now put the namespace declaration back in the element if( $new_prefix eq '#default') { push @$atts, "xmlns" => $uri; } else { push @$atts, "xmlns:$new_prefix" => $uri; } } if( $t->{twig_keep_original_prefix}) { # things become more complex: we need to find the original prefix # and store both prefixes my $ns_info= $t->_ns_info( $$gi); my $map_att; if( $ns_info->{mapped_prefix}) { $$gi= "$ns_info->{mapped_prefix}:$$gi"; $map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix}; } my $att_name=1; foreach( @$atts) { if( $att_name) { my $ns_info= $t->_ns_info( $_); if( $ns_info->{mapped_prefix}) { $_= "$ns_info->{mapped_prefix}:$_"; $map_att->{$ns_info->{mapped_prefix}}= $ns_info->{prefix}; } $att_name=0; } else { $att_name=1; } } push @$atts, '#original_gi', $map_att if( $map_att); } else { $$gi= $t->_replace_prefix( $$gi); my $att_name=1; foreach( @$atts) { if( $att_name) { $_= $t->_replace_prefix( $_); $att_name=0; } else { $att_name=1; } } } return $decls; } # extract prefix, local_name, uri, mapped_prefix from a name # will only work if called from a start or end tag handler sub _ns_info { my( $t, $name)= @_; my $ns_info={}; my $p= $t->parser; $ns_info->{uri}= $p->namespace( $name); return $ns_info unless( $ns_info->{uri}); $ns_info->{prefix}= _a_proper_ns_prefix( $p, $ns_info->{uri}); $ns_info->{mapped_prefix}= $t->{twig_map_xmlns}->{$ns_info->{uri}} || $ns_info->{prefix}; return $ns_info; } sub _a_proper_ns_prefix { my( $p, $uri)= @_; foreach my $prefix ($p->current_ns_prefixes) { if( $p->expand_ns_prefix( $prefix) eq $uri) { return $prefix; } } return; } # returns the uri bound to a prefix in the original document # only works in a handler # can be used to deal with xsi:type attributes sub original_uri { my( $t, $prefix)= @_; my $ST_NS = '##ns' ; foreach my $ns (map { $_->{$ST_NS} if $_->{$ST_NS} } reverse @{$t->{_twig_context_stack}}) { return $ns->{$prefix} || next; } return; } sub _fill_default_atts { my( $t, $gi, $atts)= @_; my $dtd= $t->{twig_dtd}; my $attlist= $dtd->{att}->{$gi}; my %value= @$atts; foreach my $att (keys %$attlist) { if( !exists( $value{$att}) && exists( $attlist->{$att}->{default}) && ( $attlist->{$att}->{default} ne '#IMPLIED') ) { # the quotes are included in the default, so we need to remove them my $default_value= substr( $attlist->{$att}->{default}, 1, -1); push @$atts, $att, $default_value; } } return; } # the default function to parse a start tag (in keep_encoding mode) # can be overridden with the parse_start_tag method # only works for 1-byte character sets sub _parse_start_tag { my $string= shift; my( $gi, @atts); # get the gi (between < and the first space, / or > character) #if( $string=~ s{^<\s*([^\s>/]*)[\s>/]*}{}s) if( $string=~ s{^<\s*($REG_TAG_NAME)\s*[\s>/]}{}s) { $gi= $1; } else { croak "error parsing tag '$string'"; } while( $string=~ s{^([^\s=]*)\s*=\s*(["'])(.*?)\2\s*}{}s) { push @atts, $1, $3; } return $gi, @atts; } sub set_root { my( $t, $elt)= @_; $t->{twig_root}= $elt; if( $elt) { $elt->{twig}= $t; if( $weakrefs) { weaken( $elt->{twig}); } } return $t; } sub _twig_end { # warn " in _twig_end...\n"; # DEBUG handler my ($p, $gi) = @_; my $t=$p->{twig}; if( $t->{twig_in_pcdata} && (my $text_handler= $t->{TwigHandlers}->{$TEXT}) ) { local $_= $t->{twig_current}; $text_handler->( $t, $_) if $_; } if( $t->{twig_map_xmlns}) { $gi= $t->_replace_prefix( $gi); } _add_or_discard_stored_spaces( $t); # the new twig_current is the parent my $elt= $t->{twig_current}; $elt->del_twig_current; # if we were parsing PCDATA then we exit the pcdata too if( $t->{twig_in_pcdata}) { $t->{twig_in_pcdata}= 0; $elt= $elt->_parent if($elt->_parent); $elt->del_twig_current; } # parent is the new current element my $parent= $elt->_parent; $t->{twig_current}= $parent; if( $parent) { $parent->set_twig_current; # twig_to_be_normalized if( $parent->{twig_to_be_normalized}) { $parent->normalize; $parent->{twig_to_be_normalized}=0; } } if( $t->{extra_data}) { $elt->_set_extra_data_before_end_tag( $t->{extra_data}); $t->{extra_data}=''; } if( $t->{twig_handlers}) { # look for handlers my @handlers= _handler( $t, $t->{twig_handlers}, $gi); if( $t->{twig_tdh}) { if( @handlers) { push @{$t->{twig_handlers_to_trigger}}, [ $elt, \@handlers ]; } if( my $all= $t->{twig_handlers}->{handlers}->{$ALL}) { push @{$t->{twig_handlers_to_trigger}}, [ $elt, [$all] ]; } } else { local $_= $elt; # so we can use $_ in the handlers foreach my $handler ( @handlers) { $handler->($t, $elt) || last; } # call _all_ handler if needed my $all= $t->{twig_handlers}->{handlers}->{$ALL}; if( $all) { $all->($t, $elt); } if( @handlers || $all) { $t->{twig_right_after_root}=0; } } } # if twig_roots is set for the element then set appropriate handler if( $t->{twig_root_depth} and ($p->depth == $t->{twig_root_depth}) ) { if( $t->{twig_default_print}) { # select the proper fh (and store the currently selected one) $t->_set_fh_to_twig_output_fh(); if( !$p->depth==1) { $t->{twig_right_after_root}=1; } #XX if( $t->{twig_keep_encoding}) { $p->setHandlers( %twig_handlers_roots_print_original); } else { $p->setHandlers( %twig_handlers_roots_print); } } else { $p->setHandlers( %twig_handlers_roots); } } if( $elt->att( 'xml:space') && ( $elt->att( 'xml:space') eq 'preserve')) { $t->{twig_preserve_space}--; } pop @{$t->{_twig_context_stack}}; return; } sub _trigger_tdh { my( $t)= @_; if( @{$t->{twig_handlers_to_trigger}}) { my @handlers_to_trigger_now= sort { $a->[0]->cmp( $b->[0]) } @{$t->{twig_handlers_to_trigger}}; foreach my $elt_handlers (@handlers_to_trigger_now) { my( $handled_elt, $handlers_to_trigger)= @$elt_handlers; foreach my $handler ( @$handlers_to_trigger) { local $_= $handled_elt; $handler->($t, $handled_elt) || last; } } } return; } # return the list of handler that can be activated for an element # (either of CODE ref's or 1's for twig_roots) sub _handler { my( $t, $handlers, $gi)= @_; my @found_handlers=(); my $found_handler; foreach my $handler ( map { @$_ } grep { $_ } $handlers->{xpath_handler}->{$gi}, $handlers->{xpath_handler}->{'*'}) { my $trigger= $handler->{trigger}; if( my $found_path= $trigger->( $t->{_twig_context_stack})) { my $found_handler= $handler->{handler}; push @found_handlers, $found_handler; } } # if no handler found call default handler if defined if( !@found_handlers && defined $handlers->{handlers}->{$DEFAULT}) { push @found_handlers, $handlers->{handlers}->{$DEFAULT}; } if( @found_handlers and $t->{twig_do_not_chain_handlers}) { @found_handlers= ($found_handlers[0]); } return @found_handlers; # empty if no handler found } sub _replace_prefix { my( $t, $name)= @_; my $p= $t->parser; my $uri= $p->namespace( $name); # try to get the namespace from default if none is found (for attributes) # this should probably be an option if( !$uri and( $name!~/^xml/)) { $uri= $p->expand_ns_prefix( '#default'); } if( $uri) { if (my $mapped_prefix= $t->{twig_map_xmlns}->{$uri} || $DEFAULT_URI2NS{$uri}) { return "$mapped_prefix:$name"; } else { my $prefix= _a_proper_ns_prefix( $p, $uri); if( $prefix eq '#default') { $prefix=''; } return $prefix ? "$prefix:$name" : $name; } } else { return $name; } } sub _twig_char { # warn " in _twig_char...\n"; # DEBUG handler my ($p, $string)= @_; my $t=$p->{twig}; if( $t->{twig_keep_encoding}) { if( !$t->{twig_in_cdata}) { $string= $p->original_string(); } else { use bytes; # > perl 5.5 if( length( $string) < 1024) { $string= $p->original_string(); } else { #warn "dodgy case"; # TODO original_string does not hold the entire string, but $string is wrong # I believe due to a bug in XML::Parser # for now, we use the original string, even if it means that it's been converted to utf8 } } } if( $t->{twig_input_filter}) { $string= $t->{twig_input_filter}->( $string); } if( $t->{twig_char_handler}) { $string= $t->{twig_char_handler}->( $string); } my $elt= $t->{twig_current}; if( $t->{twig_in_cdata}) { # text is the continuation of a previously created cdata $elt->append_cdata( $t->{twig_stored_spaces} . $string); } elsif( $t->{twig_in_pcdata}) { # text is the continuation of a previously created pcdata if( $t->{extra_data}) { $elt->_push_extra_data_in_pcdata( $t->{extra_data}, length( $elt->{pcdata})); $t->{extra_data}=''; } $elt->append_pcdata( $string); } else { # text is just space, which might be discarded later if( $string=~/\A\s*\Z/s) { if( $t->{extra_data}) { # we got extra data (comment, pi), lets add the spaces to it $t->{extra_data} .= $string; } else { # no extra data, just store the spaces $t->{twig_stored_spaces}.= $string; } } else { my $new_elt= _insert_pcdata( $t, $t->{twig_stored_spaces}.$string); $elt->del_twig_current; $new_elt->set_twig_current; $t->{twig_current}= $new_elt; $t->{twig_in_pcdata}=1; if( $t->{extra_data}) { $new_elt->_push_extra_data_in_pcdata( $t->{extra_data}, 0); $t->{extra_data}=''; } } } return; } sub _twig_cdatastart { # warn " in _twig_cdatastart...\n"; # DEBUG handler my $p= shift; my $t=$p->{twig}; $t->{twig_in_cdata}=1; my $cdata= $t->{twig_elt_class}->new( $CDATA); my $twig_current= $t->{twig_current}; if( $t->{twig_in_pcdata}) { # create the node as a sibling of the PCDATA $cdata->set_prev_sibling( $twig_current); $twig_current->set_next_sibling( $cdata); my $parent= $twig_current->_parent; $cdata->set_parent( $parent); $parent->set_last_child( $cdata); $t->{twig_in_pcdata}=0; } else { # we have to create a PCDATA element if we need to store spaces if( $t->_space_policy($twig_current->gi) && $t->{twig_stored_spaces}) { _insert_pcdata( $t, $t->{twig_stored_spaces}); } $t->{twig_stored_spaces}=''; # create the node as a child of the current element $cdata->set_parent( $twig_current); if( my $prev_sibling= $twig_current->_last_child) { $cdata->set_prev_sibling( $prev_sibling); $prev_sibling->set_next_sibling( $cdata); } else { $twig_current->set_first_child( $cdata); } $twig_current->set_last_child( $cdata); } $twig_current->del_twig_current; $t->{twig_current}= $cdata; $cdata->set_twig_current; if( $t->{extra_data}) { $cdata->set_extra_data( $t->{extra_data}); $t->{extra_data}='' }; return; } sub _twig_cdataend { # warn " in _twig_cdataend...\n"; # DEBUG handler my $p= shift; my $t=$p->{twig}; $t->{twig_in_cdata}=0; my $elt= $t->{twig_current}; $elt->del_twig_current; my $cdata= $elt->cdata; $elt->_set_cdata( $cdata); push @{$t->{_twig_context_stack}}, { $ST_TAG => $CDATA }; if( $t->{twig_handlers}) { # look for handlers my @handlers= _handler( $t, $t->{twig_handlers}, $CDATA); local $_= $elt; # so we can use $_ in the handlers foreach my $handler ( @handlers) { $handler->($t, $elt) || last; } } pop @{$t->{_twig_context_stack}}; $elt= $elt->_parent; $t->{twig_current}= $elt; $elt->set_twig_current; $t->{twig_long_cdata}=0; return; } sub _pi_elt_handlers { my( $t, $pi)= @_; my $pi_handlers= $t->{twig_handlers}->{pi_handlers} || return; foreach my $handler ( $pi_handlers->{$pi->target}, $pi_handlers->{''}) { if( $handler) { local $_= $pi; $handler->( $t, $pi) || last; } } } sub _pi_text_handler { my( $t, $target, $data)= @_; if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target}) { return $handler->( $t, $target, $data); } if( my $handler= $t->{twig_handlers}->{pi_handlers}->{''}) { return $handler->( $t, $target, $data); } return defined( $data) && $data ne '' ? "<?$target $data?>" : "<?$target?>" ; } sub _comment_elt_handler { my( $t, $comment)= @_; if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT}) { local $_= $comment; $handler->($t, $comment); } } sub _comment_text_handler { my( $t, $comment)= @_; if( my $handler= $t->{twig_handlers}->{handlers}->{$COMMENT}) { $comment= $handler->($t, $comment); if( !defined $comment || $comment eq '') { return ''; } } return "<!--$comment-->"; } sub _twig_comment { # warn " in _twig_comment...\n"; # DEBUG handler my( $p, $comment_text)= @_; my $t=$p->{twig}; if( $t->{twig_keep_encoding}) { $comment_text= substr( $p->original_string(), 4, -3); } $t->_twig_pi_comment( $p, $COMMENT, $t->{twig_keep_comments}, $t->{twig_process_comments}, '_set_comment', '_comment_elt_handler', '_comment_text_handler', $comment_text ); return; } sub _twig_pi { # warn " in _twig_pi...\n"; # DEBUG handler my( $p, $target, $data)= @_; my $t=$p->{twig}; if( $t->{twig_keep_encoding}) { my $pi_text= substr( $p->original_string(), 2, -2); ($target, $data)= split( /\s+/, $pi_text, 2); } $t->_twig_pi_comment( $p, $PI, $t->{twig_keep_pi}, $t->{twig_process_pi}, '_set_pi', '_pi_elt_handlers', '_pi_text_handler', $target, $data ); return; } sub _twig_pi_comment { my( $t, $p, $type, $keep, $process, $set, $elt_handler, $text_handler, @parser_args)= @_; if( $t->{twig_input_filter}) { foreach my $arg (@parser_args) { $arg= $t->{twig_input_filter}->( $arg); } } # if pi/comments are to be kept then we piggyback them to the current element if( $keep) { # first add spaces if( $t->{twig_stored_spaces}) { $t->{extra_data}.= $t->{twig_stored_spaces}; $t->{twig_stored_spaces}= ''; } my $extra_data= $t->$text_handler( @parser_args); $t->{extra_data}.= $extra_data; } elsif( $process) { my $twig_current= $t->{twig_current}; # defined unless we are outside of the root my $elt= $t->{twig_elt_class}->new( $type); $elt->$set( @parser_args); if( $t->{extra_data}) { $elt->set_extra_data( $t->{extra_data}); $t->{extra_data}=''; } unless( $t->root) { $t->_add_cpi_outside_of_root( leading_cpi => $elt); } elsif( $t->{twig_in_pcdata}) { # create the node as a sibling of the PCDATA $elt->paste_after( $twig_current); $t->{twig_in_pcdata}=0; } elsif( $twig_current) { # we have to create a PCDATA element if we need to store spaces if( $t->_space_policy($twig_current->gi) && $t->{twig_stored_spaces}) { _insert_pcdata( $t, $t->{twig_stored_spaces}); } $t->{twig_stored_spaces}=''; # create the node as a child of the current element $elt->paste_last_child( $twig_current); } else { $t->_add_cpi_outside_of_root( trailing_cpi => $elt); } if( $twig_current) { $twig_current->del_twig_current; my $parent= $elt->_parent; $t->{twig_current}= $parent; $parent->set_twig_current; } $t->$elt_handler( $elt); } } # add a comment or pi before the first element sub _add_cpi_outside_of_root { my($t, $type, $elt)= @_; # $type is 'leading_cpi' or 'trailing_cpi' $t->{$type} ||= $t->{twig_elt_class}->new( '#CPI'); # create the node as a child of the current element $elt->paste_last_child( $t->{$type}); return $t; } sub _twig_final { # warn " in _twig_final...\n"; # DEBUG handler my $p= shift; my $t= $p->isa( 'XML::Twig') ? $p : $p->{twig}; # store trailing data if( $t->{extra_data}) { $t->{trailing_cpi_text} = $t->{extra_data}; $t->{extra_data}=''; } $t->{trailing_spaces}= $t->{twig_stored_spaces} || ''; my $s= $t->{twig_stored_spaces}; $s=~s{\n}{\\n}g; if( $t->{twig_stored_spaces}) { my $s= $t->{twig_stored_spaces}; } # restore the selected filehandle if needed $t->_set_fh_to_selected_fh(); $t->_trigger_tdh if( $t->{twig_tdh}); select $t->{twig_original_selected_fh} if($t->{twig_original_selected_fh}); # probably dodgy if( exists $t->{twig_autoflush_data}) { my @args; push @args, $t->{twig_autoflush_data}->{fh} if( $t->{twig_autoflush_data}->{fh}); push @args, @{$t->{twig_autoflush_data}->{args}} if( $t->{twig_autoflush_data}->{args}); $t->flush( @args); delete $t->{twig_autoflush_data}; $t->root->delete if $t->root; } # tries to clean-up (probably not very well at the moment) #undef $p->{twig}; undef $t->{twig_parser}; delete $t->{twig_parsing}; @{$t}{ qw( twig_parser twig_parsing _twig_context_stack twig_current) }=(); return $t; } sub _insert_pcdata { my( $t, $string)= @_; # create a new PCDATA element my $parent= $t->{twig_current}; # always defined my $elt; if( exists $t->{twig_alt_elt_class}) { $elt= $t->{twig_elt_class}->new( $PCDATA); $elt->_set_pcdata( $string); } else { $elt= bless( { gi => $XML::Twig::gi2index{$PCDATA}, pcdata => $string }, 'XML::Twig::Elt'); } my $prev_sibling= $parent->_last_child; if( $prev_sibling) { $prev_sibling->set_next_sibling( $elt); $elt->set_prev_sibling( $prev_sibling); } else { $parent->set_first_child( $elt); } $elt->set_parent( $parent); $parent->set_last_child( $elt); $t->{twig_stored_spaces}=''; return $elt; } sub _space_policy { my( $t, $gi)= @_; my $policy; $policy=0 if( $t->{twig_discard_spaces}); $policy=1 if( $t->{twig_keep_spaces}); $policy=1 if( $t->{twig_keep_spaces_in} && $t->{twig_keep_spaces_in}->{$gi}); $policy=0 if( $t->{twig_discard_spaces_in} && $t->{twig_discard_spaces_in}->{$gi}); return $policy; } sub _twig_entity { # warn " in _twig_entity...\n"; # DEBUG handler my( $p, $name, $val, $sysid, $pubid, $ndata, $param)= @_; my $t=$p->{twig}; #{ no warnings; my $base= $p->base; warn "_twig_entity called: expand: '$t->{twig_expand_external_ents}', base: '$base', name: '$name', val: '$val', sysid: '$sysid', pubid: '$pubid', ndata: '$ndata', param: '$param'\n";} my $missing_entity=0; if( $sysid) { if($ndata) { if( ! -f _based_filename( $sysid, $p->base)) { $missing_entity= 1; } } else { if( $t->{twig_expand_external_ents}) { $val= eval { _slurp_uri( $sysid, $p->base) }; if( ! defined $val) { if( $t->{twig_extern_ent_nofail}) { $missing_entity= 1; } else { _croak( "cannot load SYSTEM entity '$name' from '$sysid': $@", 3); } } } } } my $ent=XML::Twig::Entity->new( $name, $val, $sysid, $pubid, $ndata, $param); if( $missing_entity) { $t->{twig_missing_system_entities}->{$name}= $ent; } my $entity_list= $t->entity_list; if( $entity_list) { $entity_list->add( $ent); } if( $parser_version > 2.27) { # this is really ugly, but with some versions of XML::Parser the value # of the entity is not properly returned by the default handler my $ent_decl= $ent->text; if( $t->{twig_keep_encoding}) { if( defined $ent->{val} && ($ent_decl !~ /["']/)) { my $val= $ent->{val}; $ent_decl .= $val =~ /"/ ? qq{'$val' } : qq{"$val" }; } # for my solaris box (perl 5.6.1, XML::Parser 2.31, expat?) $t->{twig_doctype}->{internal}=~ s{<!ENTITY\s+$name\s+$}{substr( $ent_decl, 0, -1)}e; } $t->{twig_doctype}->{internal} .= $ent_decl unless( $t->{twig_doctype}->{internal}=~ m{<!ENTITY\s+$name\s+}); } return; } sub _twig_notation { my( $p, $name, $base, $sysid, $pubid ) = @_; my $t = $p->{twig}; my $notation = XML::Twig::Notation->new( $name, $base, $sysid, $pubid ); my $notation_list = $t->notation_list(); if( $notation_list ) { $notation_list->add( $notation ); } # internal should get the recognized_string, but XML::Parser does not provide it # so we need to re-create it ( $notation->text) and stick it there. $t->{twig_doctype}->{internal} .= $notation->text; return; } sub _twig_extern_ent { # warn " in _twig_extern_ent...I (", $_[0]->original_string, ")\n"; # DEBUG handler my( $p, $base, $sysid, $pubid)= @_; my $t= $p->{twig}; if( $t->{twig_no_expand}) { my $ent_name= $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string; _twig_insert_ent( $t, $ent_name); return ''; } my $ent_content= eval { $t->{twig_ext_ent_handler}->( $p, $base, $sysid) }; if( ! defined $ent_content) { my $ent_name = $p->recognized_string; my $file = _based_filename( $sysid, $base); my $error_message= "cannot expand $ent_name - cannot load '$file'"; if( $t->{twig_extern_ent_nofail}) { return "<!-- $error_message -->"; } else { _croak( $error_message); } } return $ent_content; } # I use this so I can change the $Carp::CarpLevel (which determines how many call frames to skip when reporting an error) sub _croak { my( $message, $level)= @_; $Carp::CarpLevel= $level || 0; croak $message; } sub _twig_xmldecl { # warn " in _twig_xmldecl...\n"; # DEBUG handler my $p= shift; my $t=$p->{twig}; $t->{twig_xmldecl}||={}; # could have been set by set_output_encoding $t->{twig_xmldecl}->{version}= shift; $t->{twig_xmldecl}->{encoding}= shift; $t->{twig_xmldecl}->{standalone}= shift; return; } sub _twig_doctype { # warn " in _twig_doctype...\n"; # DEBUG handler my( $p, $name, $sysid, $pub, $internal)= @_; my $t=$p->{twig}; $t->{twig_doctype}||= {}; # create $t->{twig_doctype}->{name}= $name; # always there $t->{twig_doctype}->{sysid}= $sysid; # $t->{twig_doctype}->{pub}= $pub; # # now let's try to cope with XML::Parser 2.28 and above if( $parser_version > 2.27) { @saved_default_handler= $p->setHandlers( Default => \&_twig_store_internal_dtd, Entity => \&_twig_entity, ); $p->setHandlers( DoctypeFin => \&_twig_stop_storing_internal_dtd); $t->{twig_doctype}->{internal}=''; } else # for XML::Parser before 2.28 { $internal||=''; $internal=~ s{^\s*\[}{}; $internal=~ s{]\s*$}{}; $t->{twig_doctype}->{internal}=$internal; } # now check if we want to get the DTD info if( $t->{twig_read_external_dtd} && $sysid) { # let's build a fake document with an internal DTD if( $t->{DTDBase}) { _use( 'File::Spec'); $sysid=File::Spec->catfile($t->{DTDBase}, $sysid); } my $dtd= _slurp_uri( $sysid); # if the DTD includes an XML declaration, it needs to be moved before the DOCTYPE bit if( $dtd=~ s{^(\s*<\?xml(\s+\w+\s*=\s*("[^"]*"|'[^']*'))*\s*\?>)}{}) { $dtd= "$1<!DOCTYPE $name [$dtd]><$name/>"; } else { $dtd= "<!DOCTYPE $name [$dtd]><$name/>"; } $t->save_global_state(); # save the globals (they will be reset by the following new) my $t_dtd= XML::Twig->new( load_DTD => 1, ParseParamEnt => 1, error_context => $t->{ErrorContext} || 0); # create a temp twig $t_dtd->parse( $dtd); # parse it $t->{twig_dtd}= $t_dtd->{twig_dtd}; # grab the dtd info #$t->{twig_dtd_is_external}=1; $t->entity_list->_add_list( $t_dtd->entity_list) if( $t_dtd->entity_list); # grab the entity info $t->notation_list->_add_list( $t_dtd->notation_list) if( $t_dtd->notation_list); # grab the notation info $t->restore_global_state(); } return; } sub _twig_element { # warn " in _twig_element...\n"; # DEBUG handler my( $p, $name, $model)= @_; my $t=$p->{twig}; $t->{twig_dtd}||= {}; # may create the dtd $t->{twig_dtd}->{model}||= {}; # may create the model hash $t->{twig_dtd}->{elt_list}||= []; # ordered list of elements push @{$t->{twig_dtd}->{elt_list}}, $name; # store the elt $t->{twig_dtd}->{model}->{$name}= $model; # store the model if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) ) { my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string; unless( $text) { # this version of XML::Parser does not return the text in the *_string method # we need to rebuild it $text= "<!ELEMENT $name $model>"; } $t->{twig_doctype}->{internal} .= $text; } return; } sub _twig_attlist { # warn " in _twig_attlist...\n"; # DEBUG handler my( $p, $gi, $att, $type, $default, $fixed)= @_; #warn "in attlist: gi: '$gi', att: '$att', type: '$type', default: '$default', fixed: '$fixed'\n"; my $t=$p->{twig}; $t->{twig_dtd}||= {}; # create dtd if need be $t->{twig_dtd}->{$gi}||= {}; # create elt if need be #$t->{twig_dtd}->{$gi}->{att}||= {}; # create att if need be if( ($parser_version > 2.27) && ($t->{twig_doctype}->{internal}=~ m{(^|>)\s*$}) ) { my $text= $XML::Twig::Elt::keep_encoding ? $p->original_string : $p->recognized_string; unless( $text) { # this version of XML::Parser does not return the text in the *_string method # we need to rebuild it my $att_decl="$att $type"; $att_decl .= " #FIXED" if( $fixed); $att_decl .= " $default" if( defined $default); # 2 cases: there is already an attlist on that element or not if( $t->{twig_dtd}->{att}->{$gi}) { # there is already an attlist, add to it $t->{twig_doctype}->{internal}=~ s{(<!ATTLIST\s*$gi )(.*?)\n?>} { "$1$2\n" . ' ' x length( $1) . "$att_decl\n>"}es; } else { # create the attlist $t->{twig_doctype}->{internal}.= "<!ATTLIST $gi $att_decl>" } } } $t->{twig_dtd}->{att}->{$gi}->{$att}= {} ; $t->{twig_dtd}->{att}->{$gi}->{$att}->{type}= $type; $t->{twig_dtd}->{att}->{$gi}->{$att}->{default}= $default if( defined $default); $t->{twig_dtd}->{att}->{$gi}->{$att}->{fixed}= $fixed; return; } sub _twig_default { # warn " in _twig_default...\n"; # DEBUG handler my( $p, $string)= @_; my $t= $p->{twig}; # we need to process the data in 2 cases: entity, or spaces after the closing tag # after the closing tag (no twig_current and root has been created) if( ! $t->{twig_current} && $t->{twig_root} && $string=~ m{^\s+$}m) { $t->{twig_stored_spaces} .= $string; } # process only if we have an entity if( $string=~ m{^&([^;]*);$}) { # the entity has to be pure pcdata, or we have a problem if( ($p->original_string=~ m{^<}) && ($p->original_string=~ m{>$}) ) { # string is a tag, entity is in an attribute $t->{twig_entities_in_attribute}=1 if( $t->{twig_do_not_escape_amp_in_atts}); } else { my $ent; if( $t->{twig_keep_encoding}) { _twig_char( $p, $string); $ent= substr( $string, 1, -1); } else { $ent= _twig_insert_ent( $t, $string); } return $ent; } } } sub _twig_insert_ent { my( $t, $string)=@_; my $twig_current= $t->{twig_current}; my $ent= $t->{twig_elt_class}->new( $ENT); $ent->set_ent( $string); _add_or_discard_stored_spaces( $t); if( $t->{twig_in_pcdata}) { # create the node as a sibling of the #PCDATA $ent->set_prev_sibling( $twig_current); $twig_current->set_next_sibling( $ent); my $parent= $twig_current->_parent; $ent->set_parent( $parent); $parent->set_last_child( $ent); # the twig_current is now the parent $twig_current->del_twig_current; $t->{twig_current}= $parent; # we left pcdata $t->{twig_in_pcdata}=0; } else { # create the node as a child of the current element $ent->set_parent( $twig_current); if( my $prev_sibling= $twig_current->_last_child) { $ent->set_prev_sibling( $prev_sibling); $prev_sibling->set_next_sibling( $ent); } else { if( $twig_current) { $twig_current->set_first_child( $ent); } } if( $twig_current) { $twig_current->set_last_child( $ent); } } # meant to trigger entity handler, does not seem to be activated at this time #if( my $handler= $t->{twig_handlers}->{gi}->{$ENT}) # { local $_= $ent; $handler->( $t, $ent); } return $ent; } sub parser { return $_[0]->{twig_parser}; } # returns the declaration text (or a default one) sub xmldecl { my $t= shift; return '' unless( $t->{twig_xmldecl} || $t->{output_encoding}); my $decl_string; my $decl= $t->{twig_xmldecl}; if( $decl) { my $version= $decl->{version}; $decl_string= q{<?xml}; $decl_string .= qq{ version="$version"}; # encoding can either have been set (in $decl->{output_encoding}) # or come from the document (in $decl->{encoding}) if( $t->{output_encoding}) { my $encoding= $t->{output_encoding}; $decl_string .= qq{ encoding="$encoding"}; } elsif( $decl->{encoding}) { my $encoding= $decl->{encoding}; $decl_string .= qq{ encoding="$encoding"}; } if( defined( $decl->{standalone})) { $decl_string .= q{ standalone="}; $decl_string .= $decl->{standalone} ? "yes" : "no"; $decl_string .= q{"}; } $decl_string .= "?>\n"; } else { my $encoding= $t->{output_encoding}; $decl_string= qq{<?xml version="1.0" encoding="$encoding"?>}; } my $output_filter= XML::Twig::Elt::output_filter(); return $output_filter ? $output_filter->( $decl_string) : $decl_string; } sub set_doctype { my( $t, $name, $system, $public, $internal)= @_; $t->{twig_doctype}= {} unless defined $t->{twig_doctype}; my $doctype= $t->{twig_doctype}; $doctype->{name} = $name if( defined $name); $doctype->{sysid} = $system if( defined $system); $doctype->{pub} = $public if( defined $public); $doctype->{internal} = $internal if( defined $internal); } sub doctype_name { my $t= shift; my $doctype= $t->{twig_doctype} or return ''; return $doctype->{name} || ''; } sub system_id { my $t= shift; my $doctype= $t->{twig_doctype} or return ''; return $doctype->{sysid} || ''; } sub public_id { my $t= shift; my $doctype= $t->{twig_doctype} or return ''; return $doctype->{pub} || ''; } sub internal_subset { my $t= shift; my $doctype= $t->{twig_doctype} or return ''; return $doctype->{internal} || ''; } # return the dtd object sub dtd { my $t= shift; return $t->{twig_dtd}; } # return an element model, or the list of element models sub model { my $t= shift; my $elt= shift; return $t->dtd->{model}->{$elt} if( $elt); return (sort keys %{$t->dtd->{model}}); } # return the entity_list object sub entity_list { my $t= shift; return $t->{twig_entity_list}; } # return the list of entity names sub entity_names { my $t= shift; return $t->entity_list->entity_names; } # return the entity object sub entity { my $t= shift; my $entity_name= shift; return $t->entity_list->ent( $entity_name); } # return the notation_list object sub notation_list { my $t= shift; return $t->{twig_notation_list}; } # return the list of notation names sub notation_names { my $t= shift; return $t->notation_list->notation_names; } # return the notation object sub notation { my $t= shift; my $notation_name= shift; return $t->notation_list->notation( $notation_name); } sub print_prolog { my $t= shift; my $fh= _is_fh($_[0]) ? shift : $t->{twig_output_fh} || select() || \*STDOUT; ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $t->prolog( @_); } sub prolog { my $t= shift; if( $t->{no_prolog}){ return ''; } return $t->{no_prolog} ? '' : defined $t->{no_dtd_output} ? $t->xmldecl : $t->xmldecl . $t->doctype( @_); } sub doctype { my $t= shift; my %args= _normalize_args( @_); my $update_dtd = $args{UpdateDTD} || ''; my $doctype_text=''; my $doctype= $t->{twig_doctype}; if( $doctype) { $doctype_text .= qq{<!DOCTYPE $doctype->{name}} if( $doctype->{name}); $doctype_text .= qq{ PUBLIC "$doctype->{pub}"} if( $doctype->{pub}); $doctype_text .= qq{ SYSTEM} if( $doctype->{sysid} && !$doctype->{pub}); $doctype_text .= qq{ "$doctype->{sysid}"} if( $doctype->{sysid}); } if( $update_dtd) { if( $doctype) { my $internal=$doctype->{internal}; # awful hack, but at least it works a little better that what was there before if( $internal) { # remove entity and notation declarations (they will be re-generated from the updated entity list) $internal=~ s{<! \s* ENTITY \s+ $REG_TAG_NAME \s+ ( ("[^"]*"|'[^']*') \s* | SYSTEM [^>]*) >\s*}{}xg; $internal=~ s{<! \s* NOTATION .*? >\s*}{}sxg; $internal=~ s{^\n}{}; } $internal .= $t->entity_list->text ||'' if( $t->entity_list); $internal .= $t->notation_list->text ||'' if( $t->notation_list); if( $internal) { $doctype_text .= "[\n$internal]>\n"; } } elsif( !$t->{'twig_dtd'} && ( keys %{$t->entity_list} || keys %{$t->notation_list} ) ) { $doctype_text .= "<!DOCTYPE " . $t->root->gi . " [\n" . $t->entity_list->text . $t->notation_list->text . "\n]>";} else { $doctype_text= $t->{twig_dtd}; $doctype_text .= $t->dtd_text; } } elsif( $doctype) { if( my $internal= $doctype->{internal}) { # add opening and closing brackets if not already there # plus some spaces and newlines for a nice formating # I test it here because I can't remember which version of # XML::Parser need it or not, nor guess which one will in the # future, so this about the best I can do $internal=~ s{^\s*(\[\s*)?}{ [\n}; $internal=~ s{\s*(\]\s*(>\s*)?)?\s*$}{\n]>\n}; # XML::Parser does not include the NOTATION declarations in the DTD # at least in the current version. So put them back #if( $t->notation_list && $internal !~ m{<!\s*NOTATION}) # { $internal=~ s{(\n]>\n)$}{ "\n" . $t->notation_list->text . $1}es; } $doctype_text .= $internal; } } if( $doctype_text) { # terrible hack, as I can't figure out in which case the darn prolog # should get an extra > (depends on XML::Parser and expat versions) $doctype_text=~ s/(>\s*)*$/>\n/; # if($doctype_text); my $output_filter= XML::Twig::Elt::output_filter(); return $output_filter ? $output_filter->( $doctype_text) : $doctype_text; } else { return $doctype_text; } } sub _leading_cpi { my $t= shift; my $leading_cpi= $t->{leading_cpi} || return ''; return $leading_cpi->xml_string; } sub _trailing_cpi { my $t= shift; my $trailing_cpi= $t->{trailing_cpi} || return ''; return $trailing_cpi->xml_string; } sub _trailing_cpi_text { my $t= shift; return $t->{trailing_cpi_text} || ''; } sub print_to_file { my( $t, $filename)= (shift, shift); my $out_fh; open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8 my $mode= $t->{twig_keep_encoding} && ! _use_perlio() ? '>' : '>:utf8'; # >= perl 5.8 open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8 $t->print( $out_fh, @_); close $out_fh; return $t; } # probably only works on *nix (at least the chmod bit) # first print to a temporary file, then rename that file to the desired file name, then change permissions # to the original file permissions (or to the current umask) sub safe_print_to_file { my( $t, $filename)= (shift, shift); my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ; XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n"; my $tmpdir= dirname( $filename); my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir); $t->print_to_file( $tmpfilename, @_); rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!"); chmod $perm, $filename; return $t; } sub print { my $t= shift; my $fh= _is_fh( $_[0]) ? shift : undef; my %args= _normalize_args( @_); my $old_select = defined $fh ? select $fh : undef; my $old_pretty = defined ($args{PrettyPrint}) ? $t->set_pretty_print( $args{PrettyPrint}) : undef; my $old_empty_tag = defined ($args{EmptyTags}) ? $t->set_empty_tag_style( $args{EmptyTags}) : undef; #if( !$t->{encoding} || lc( $t->{encoding}) eq 'utf-8') { my $out= $fh || \*STDOUT; binmode $out, ':utf8'; } if( $perl_version > 5.006 && ! $t->{twig_keep_encoding} && _use_perlio() ) { binmode( $fh || \*STDOUT, ":utf8" ); } print $t->prolog( %args) . $t->_leading_cpi( %args); $t->{twig_root}->print; print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) . $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) . ( ($t->{twig_keep_spaces}||'') && ($t->{trailing_spaces} || '')) ; $t->set_pretty_print( $old_pretty) if( defined $old_pretty); $t->set_empty_tag_style( $old_empty_tag) if( defined $old_empty_tag); if( $fh) { select $old_select; } return $t; } sub flush { my $t= shift; $t->_trigger_tdh if $t->{twig_tdh}; return if( $t->{twig_completely_flushed}); my $fh= _is_fh( $_[0]) ? shift : undef; my $old_select= defined $fh ? select $fh : undef; my $up_to= ref $_[0] ? shift : undef; my %args= _normalize_args( @_); my $old_pretty; if( defined $args{PrettyPrint}) { $old_pretty= $t->set_pretty_print( $args{PrettyPrint}); delete $args{PrettyPrint}; } my $old_empty_tag_style; if( $args{EmptyTags}) { $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags}); delete $args{EmptyTags}; } # the "real" last element processed, as _twig_end has closed it my $last_elt; my $flush_trailing_data=0; if( $up_to) { $last_elt= $up_to; } elsif( $t->{twig_current}) { $last_elt= $t->{twig_current}->_last_child; } else { $last_elt= $t->{twig_root}; $flush_trailing_data=1; $t->{twig_completely_flushed}=1; } # flush the DTD unless it has ready flushed (ie root has been flushed) my $elt= $t->{twig_root}; unless( $elt->_flushed) { # store flush info so we can auto-flush later if( $t->{twig_autoflush}) { $t->{twig_autoflush_data}={}; $t->{twig_autoflush_data}->{fh} = $fh if( $fh); $t->{twig_autoflush_data}->{args} = \@_ if( @_); } $t->print_prolog( %args); print $t->_leading_cpi; } while( $elt) { my $next_elt; if( $last_elt && $last_elt->in( $elt)) { unless( $elt->_flushed) { # just output the front tag print $elt->start_tag(); $elt->_set_flushed; } $next_elt= $elt->_first_child; } else { # an element before the last one or the last one, $next_elt= $elt->_next_sibling; $elt->_flush(); $elt->delete; last if( $last_elt && ($elt == $last_elt)); } $elt= $next_elt; } if( $flush_trailing_data) { print $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) , $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) } select $old_select if( defined $old_select); $t->set_pretty_print( $old_pretty) if( defined $old_pretty); $t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style); if( my $ids= $t->{twig_id_list}) { while( my ($id, $elt)= each %$ids) { if( ! defined $elt) { delete $t->{twig_id_list}->{$id} } } } return $t; } # flushes up to an element # this method just reorders the arguments and calls flush sub flush_up_to { my $t= shift; my $up_to= shift; if( _is_fh( $_[0])) { my $fh= shift; $t->flush( $fh, $up_to, @_); } else { $t->flush( $up_to, @_); } return $t; } # same as print except the entire document text is returned as a string sub sprint { my $t= shift; my %args= _normalize_args( @_); my $old_pretty; if( defined $args{PrettyPrint}) { $old_pretty= $t->set_pretty_print( $args{PrettyPrint}); delete $args{PrettyPrint}; } my $old_empty_tag_style; if( defined $args{EmptyTags}) { $old_empty_tag_style= $t->set_empty_tag_style( $args{EmptyTags}); delete $args{EmptyTags}; } my $string= $t->prolog( %args) # xml declaration and doctype . $t->_leading_cpi( %args) # leading comments and pi's in 'process' mode . ( ($t->{twig_root} && $t->{twig_root}->sprint) || '') . $t->_trailing_cpi # trailing comments and pi's (elements, in 'process' mode) . $t->_trailing_cpi_text # trailing comments and pi's (in 'keep' mode) ; if( $t->{twig_keep_spaces} && $t->{trailing_spaces}) { $string .= $t->{trailing_spaces}; } $t->set_pretty_print( $old_pretty) if( defined $old_pretty); $t->set_empty_tag_style( $old_empty_tag_style) if( defined $old_empty_tag_style); return $string; } # this method discards useless elements in a tree # it does the same thing as a flush except it does not print it # the second argument is an element, the last purged element # (this argument is usually set through the purge_up_to method) sub purge { my $t= shift; my $up_to= shift; $t->_trigger_tdh if $t->{twig_tdh}; # the "real" last element processed, as _twig_end has closed it my $last_elt; if( $up_to) { $last_elt= $up_to; } elsif( $t->{twig_current}) { $last_elt= $t->{twig_current}->_last_child; } else { $last_elt= $t->{twig_root}; } my $elt= $t->{twig_root}; while( $elt) { my $next_elt; if( $last_elt && $last_elt->in( $elt)) { $elt->_set_flushed; $next_elt= $elt->_first_child; } else { # an element before the last one or the last one, $next_elt= $elt->_next_sibling; $elt->delete; last if( $last_elt && ($elt == $last_elt) ); } $elt= $next_elt; } if( my $ids= $t->{twig_id_list}) { while( my ($id, $elt)= each %$ids) { if( ! defined $elt) { delete $t->{twig_id_list}->{$id} } } } return $t; } # flushes up to an element. This method just calls purge sub purge_up_to { my $t= shift; return $t->purge( @_); } sub root { return $_[0]->{twig_root}; } sub normalize { return $_[0]->root->normalize; } # create accessor methods on attribute names { my %accessor; # memorize accessor names so re-creating them won't trigger an error sub att_accessors { my $twig_or_class= shift; my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} : 'XML::Twig::Elt' ; ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; foreach my $att (@_) { _croak( "attempt to redefine existing method $att using att_accessors") if( $elt_class->can( $att) && !$accessor{$att}); if( !$accessor{$att}) { *{"$elt_class\::$att"}= sub :lvalue # > perl 5.5 { my $elt= shift; if( @_) { $elt->{att}->{$att}= $_[0]; } $elt->{att}->{$att}; }; $accessor{$att}=1; } } return $twig_or_class; } } { my %accessor; # memorize accessor names so re-creating them won't trigger an error sub elt_accessors { my $twig_or_class= shift; my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} : 'XML::Twig::Elt' ; # if arg is a hash ref, it's exp => name, otherwise it's a list of tags my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]} : map { $_ => $_ } @_; ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; while( my( $alias, $exp)= each %exp_to_alias ) { if( $elt_class->can( $alias) && !$accessor{$alias}) { _croak( "attempt to redefine existing method $alias using elt_accessors"); } if( !$accessor{$alias}) { *{"$elt_class\::$alias"}= sub { my $elt= shift; return wantarray ? $elt->children( $exp) : $elt->first_child( $exp); }; $accessor{$alias}=1; } } return $twig_or_class; } } { my %accessor; # memorize accessor names so re-creating them won't trigger an error sub field_accessors { my $twig_or_class= shift; my $elt_class= ref $twig_or_class ? $twig_or_class->{twig_elt_class} : 'XML::Twig::Elt' ; my %exp_to_alias= ref( $_[0]) && isa( $_[0], 'HASH') ? %{$_[0]} : map { $_ => $_ } @_; ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; while( my( $alias, $exp)= each %exp_to_alias ) { if( $elt_class->can( $alias) && !$accessor{$alias}) { _croak( "attempt to redefine existing method $exp using field_accessors"); } if( !$accessor{$alias}) { *{"$elt_class\::$alias"}= sub { my $elt= shift; $elt->field( $exp) }; $accessor{$alias}=1; } } return $twig_or_class; } } sub first_elt { my( $t, $cond)= @_; my $root= $t->root || return undef; return $root if( $root->passes( $cond)); return $root->next_elt( $cond); } sub last_elt { my( $t, $cond)= @_; my $root= $t->root || return undef; return $root->last_descendant( $cond); } sub next_n_elt { my( $t, $offset, $cond)= @_; $offset -- if( $t->root->matches( $cond) ); return $t->root->next_n_elt( $offset, $cond); } sub get_xpath { my $twig= shift; if( isa( $_[0], 'ARRAY')) { my $elt_array= shift; return _unique_elts( map { $_->get_xpath( @_) } @$elt_array); } else { return $twig->root->get_xpath( @_); } } # get a list of elts and return a sorted list of unique elts sub _unique_elts { my @sorted= sort { $a ->cmp( $b) } @_; my @unique; while( my $current= shift @sorted) { push @unique, $current unless( @unique && ($unique[-1] == $current)); } return @unique; } sub findvalue { my $twig= shift; if( isa( $_[0], 'ARRAY')) { my $elt_array= shift; return join( '', map { $_->findvalue( @_) } @$elt_array); } else { return $twig->root->findvalue( @_); } } sub findvalues { my $twig= shift; if( isa( $_[0], 'ARRAY')) { my $elt_array= shift; return map { $_->findvalues( @_) } @$elt_array; } else { return $twig->root->findvalues( @_); } } sub set_id_seed { my $t= shift; XML::Twig::Elt->set_id_seed( @_); return $t; } # return an array ref to an index, or undef sub index { my( $twig, $name, $index)= @_; return defined( $index) ? $twig->{_twig_index}->{$name}->[$index] : $twig->{_twig_index}->{$name}; } # return a list with just the root # if a condition is given then return an empty list unless the root matches sub children { my( $t, $cond)= @_; my $root= $t->root; unless( $cond && !($root->passes( $cond)) ) { return ($root); } else { return (); } } sub _children { return ($_[0]->root); } # weird, but here for completude # used to solve (non-sensical) /doc[1] XPath queries sub child { my $t= shift; my $nb= shift; return ($t->children( @_))[$nb]; } sub descendants { my( $t, $cond)= @_; my $root= $t->root; if( $root->passes( $cond) ) { return ($root, $root->descendants( $cond)); } else { return ( $root->descendants( $cond)); } } sub simplify { my $t= shift; $t->root->simplify( @_); } sub subs_text { my $t= shift; $t->root->subs_text( @_); } sub trim { my $t= shift; $t->root->trim( @_); } sub set_keep_encoding { my( $t, $keep)= @_; $t->{twig_keep_encoding}= $keep; $t->{NoExpand}= $keep; return XML::Twig::Elt::set_keep_encoding( $keep); } sub set_expand_external_entities { return XML::Twig::Elt::set_expand_external_entities( @_); } sub escape_gt { my $t= shift; $t->{twig_escape_gt}= 1; return XML::Twig::Elt::escape_gt( @_); } sub do_not_escape_gt { my $t= shift; $t->{twig_escape_gt}= 0; return XML::Twig::Elt::do_not_escape_gt( @_); } sub elt_id { return $_[0]->{twig_id_list}->{$_[1]}; } # change it in ALL twigs at the moment sub change_gi { my( $twig, $old_gi, $new_gi)= @_; my $index; return unless($index= $XML::Twig::gi2index{$old_gi}); $XML::Twig::index2gi[$index]= $new_gi; delete $XML::Twig::gi2index{$old_gi}; $XML::Twig::gi2index{$new_gi}= $index; return $twig; } # builds the DTD from the stored (possibly updated) data sub dtd_text { my $t= shift; my $dtd= $t->{twig_dtd}; my $doctype= $t->{twig_doctype} or return ''; my $string= "<!DOCTYPE ".$doctype->{name}; $string .= " [\n"; foreach my $gi (@{$dtd->{elt_list}}) { $string.= "<!ELEMENT $gi ".$dtd->{model}->{$gi}.">\n" ; if( $dtd->{att}->{$gi}) { my $attlist= $dtd->{att}->{$gi}; $string.= "<!ATTLIST $gi\n"; foreach my $att ( sort keys %{$attlist}) { if( $attlist->{$att}->{fixed}) { $string.= " $att $attlist->{$att}->{type} #FIXED $attlist->{$att}->{default}"; } else { $string.= " $att $attlist->{$att}->{type} $attlist->{$att}->{default}"; } $string.= "\n"; } $string.= ">\n"; } } $string.= $t->entity_list->text if( $t->entity_list); $string.= "\n]>\n"; return $string; } # prints the DTD from the stored (possibly updated) data sub dtd_print { my $t= shift; my $fh= _is_fh( $_[0]) ? shift : undef; if( $fh) { print $fh $t->dtd_text; } else { print $t->dtd_text; } return $t; } # build the subs that call directly expat BEGIN { my @expat_methods= qw( depth in_element within_element context current_line current_column current_byte recognized_string original_string xpcroak xpcarp base current_element element_index xml_escape position_in_context); foreach my $method (@expat_methods) { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; *{$method}= sub { my $t= shift; _croak( "calling $method after parsing is finished") unless( $t->{twig_parsing}); return $t->{twig_parser}->$method(@_); }; } } sub path { my( $t, $gi)= @_; if( $t->{twig_map_xmlns}) { return "/" . join( "/", map { $t->_replace_prefix( $_)} ($t->{twig_parser}->context, $gi)); } else { return "/" . join( "/", ($t->{twig_parser}->context, $gi)); } } sub finish { my $t= shift; return $t->{twig_parser}->finish; } # just finish the parse by printing the rest of the document sub finish_print { my( $t, $fh)= @_; my $old_fh; unless( defined $fh) { $t->_set_fh_to_twig_output_fh(); } elsif( defined $fh) { $old_fh= select $fh; $t->{twig_original_selected_fh}= $old_fh if( $old_fh); } my $p=$t->{twig_parser}; if( $t->{twig_keep_encoding}) { $p->setHandlers( %twig_handlers_finish_print); } else { $p->setHandlers( %twig_handlers_finish_print_original); } return $t; } sub set_remove_cdata { return XML::Twig::Elt::set_remove_cdata( @_); } sub output_filter { return XML::Twig::Elt::output_filter( @_); } sub set_output_filter { return XML::Twig::Elt::set_output_filter( @_); } sub output_text_filter { return XML::Twig::Elt::output_text_filter( @_); } sub set_output_text_filter { return XML::Twig::Elt::set_output_text_filter( @_); } sub set_input_filter { my( $t, $input_filter)= @_; my $old_filter= $t->{twig_input_filter}; if( !$input_filter || isa( $input_filter, 'CODE') ) { $t->{twig_input_filter}= $input_filter; } elsif( $input_filter eq 'latin1') { $t->{twig_input_filter}= latin1(); } elsif( $filter{$input_filter}) { $t->{twig_input_filter}= $filter{$input_filter}; } else { _croak( "invalid input filter: $input_filter"); } return $old_filter; } sub set_empty_tag_style { return XML::Twig::Elt::set_empty_tag_style( @_); } sub set_pretty_print { return XML::Twig::Elt::set_pretty_print( @_); } sub set_quote { return XML::Twig::Elt::set_quote( @_); } sub set_indent { return XML::Twig::Elt::set_indent( @_); } sub set_keep_atts_order { shift; return XML::Twig::Elt::set_keep_atts_order( @_); } sub keep_atts_order { return XML::Twig::Elt::keep_atts_order( @_); } sub set_do_not_escape_amp_in_atts { return XML::Twig::Elt::set_do_not_escape_amp_in_atts( @_); } # save and restore package globals (the ones in XML::Twig::Elt) # should probably return the XML::Twig object itself, but instead # returns the state (as a hashref) for backward compatibility sub save_global_state { my $t= shift; return $t->{twig_saved_state}= XML::Twig::Elt::global_state(); } sub restore_global_state { my $t= shift; XML::Twig::Elt::set_global_state( $t->{twig_saved_state}); } sub global_state { return XML::Twig::Elt::global_state(); } sub set_global_state { return XML::Twig::Elt::set_global_state( $_[1]); } sub dispose { my $t= shift; $t->DESTROY; return; } sub DESTROY { my $t= shift; if( $t->{twig_root} && isa( $t->{twig_root}, 'XML::Twig::Elt')) { $t->{twig_root}->delete } # added to break circular references undef $t->{twig}; undef $t->{twig_root}->{twig} if( $t->{twig_root}); undef $t->{twig_parser}; undef %$t;# prevents memory leaks (especially when using mod_perl) undef $t; } # return true if perl was compiled using perlio # if perl is not available return true, these days perlio should be used sub _use_perlio { my $perl= _this_perl(); return $perl ? grep /useperlio=define/, `$perl -V` : 1; } # returns the parth to the perl executable (if available) sub _this_perl { # straight from perlvar my $secure_perl_path= $Config{perlpath}; if ($^O ne 'VMS') { $secure_perl_path .= $Config{_exe} unless $secure_perl_path =~ m/$Config{_exe}$/i; } if( ! -f $secure_perl_path) { $secure_perl_path= ''; } # when perl is not available (PDK) return $secure_perl_path; } # # non standard handlers # # kludge: expat 1.95.2 calls both Default AND Doctype handlers # so if the default handler finds '<!DOCTYPE' then it must # unset itself (_twig_print_doctype will reset it) sub _twig_print_check_doctype { # warn " in _twig_print_check_doctype...\n"; # DEBUG handler my $p= shift; my $string= $p->recognized_string(); if( $string eq '<!DOCTYPE') { $p->setHandlers( Default => undef); $p->setHandlers( Entity => undef); $expat_1_95_2=1; } else { print $string; } return; } sub _twig_print { # warn " in _twig_print...\n"; # DEBUG handler my $p= shift; if( $expat_1_95_2 && ($p->recognized_string eq '[') && !$p->{twig}->{expat_1_95_2_seen_bracket}) { # otherwise the opening square bracket of the doctype gets printed twice $p->{twig}->{expat_1_95_2_seen_bracket}=1; } else { if( $p->{twig}->{twig_right_after_root}) { my $s= $p->recognized_string(); print $s if $s=~ m{\S}; } else { print $p->recognized_string(); } } return; } # recognized_string does not seem to work for entities, go figure! # so this handler is used to print them anyway sub _twig_print_entity { # warn " in _twig_print_entity...\n"; # DEBUG handler my $p= shift; XML::Twig::Entity->new( @_)->print; } # kludge: expat 1.95.2 calls both Default AND Doctype handlers # so if the default handler finds '<!DOCTYPE' then it must # unset itself (_twig_print_doctype will reset it) sub _twig_print_original_check_doctype { # warn " in _twig_print_original_check_doctype...\n"; # DEBUG handler my $p= shift; my $string= $p->original_string(); if( $string eq '<!DOCTYPE') { $p->setHandlers( Default => undef); $p->setHandlers( Entity => undef); $expat_1_95_2=1; } else { print $string; } return; } sub _twig_print_original { # warn " in _twig_print_original...\n"; # DEBUG handler my $p= shift; print $p->original_string(); return; } sub _twig_print_original_doctype { # warn " in _twig_print_original_doctype...\n"; # DEBUG handler my( $p, $name, $sysid, $pubid, $internal)= @_; if( $name) { # with recent versions of XML::Parser original_string does not work, # hence we need to rebuild the doctype declaration my $doctype=''; $doctype .= qq{<!DOCTYPE $name} if( $name); $doctype .= qq{ PUBLIC "$pubid"} if( $pubid); $doctype .= qq{ SYSTEM} if( $sysid && !$pubid); $doctype .= qq{ "$sysid"} if( $sysid); $doctype .= ' [' if( $internal && !$expat_1_95_2) ; $doctype .= qq{>} unless( $internal || $expat_1_95_2); $p->{twig}->{twig_doctype}->{has_internal}=$internal; print $doctype; } $p->setHandlers( Default => \&_twig_print_original); return; } sub _twig_print_doctype { # warn " in _twig_print_doctype...\n"; # DEBUG handler my( $p, $name, $sysid, $pubid, $internal)= @_; if( $name) { # with recent versions of XML::Parser original_string does not work, # hence we need to rebuild the doctype declaration my $doctype=''; $doctype .= qq{<!DOCTYPE $name} if( $name); $doctype .= qq{ PUBLIC "$pubid"} if( $pubid); $doctype .= qq{ SYSTEM} if( $sysid && !$pubid); $doctype .= qq{ "$sysid"} if( $sysid); $doctype .= ' [' if( $internal) ; $doctype .= qq{>} unless( $internal || $expat_1_95_2); $p->{twig}->{twig_doctype}->{has_internal}=$internal; print $doctype; } $p->setHandlers( Default => \&_twig_print); return; } sub _twig_print_original_default { # warn " in _twig_print_original_default...\n"; # DEBUG handler my $p= shift; print $p->original_string(); return; } # account for the case where the element is empty sub _twig_print_end_original { # warn " in _twig_print_end_original...\n"; # DEBUG handler my $p= shift; print $p->original_string(); return; } sub _twig_start_check_roots { # warn " in _twig_start_check_roots...\n"; # DEBUG handler my $p= shift; my $gi= shift; my $t= $p->{twig}; my $fh= $t->{twig_output_fh} || select() || \*STDOUT; my $ns_decl; unless( $p->depth == 0) { if( $t->{twig_map_xmlns}) { $ns_decl= _replace_ns( $t, \$gi, \@_); } } my $context= { $ST_TAG => $gi, @_}; $context->{$ST_NS}= $ns_decl if $ns_decl; push @{$t->{_twig_context_stack}}, $context; my %att= @_; if( _handler( $t, $t->{twig_roots}, $gi)) { $p->setHandlers( %twig_handlers); # restore regular handlers $t->{twig_root_depth}= $p->depth; pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start _twig_start( $p, $gi, @_); return; } # $tag will always be true if it needs to be printed (the tag string is never empty) my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string : ''; if( $p->depth == 0) { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $tag if( $tag); pop @{$t->{_twig_context_stack}}; # will be pushed back in _twig_start _twig_start( $p, $gi, @_); $t->root->_set_flushed; # or the root start tag gets output the first time we flush } elsif( $t->{twig_starttag_handlers}) { # look for start tag handlers my @handlers= _handler( $t, $t->{twig_starttag_handlers}, $gi); my $last_handler_res; foreach my $handler ( @handlers) { $last_handler_res= $handler->($t, $gi, %att); last unless $last_handler_res; } ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $tag if( $tag && (!@handlers || $last_handler_res)); } else { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $tag if( $tag); } return; } sub _twig_end_check_roots { # warn " in _twig_end_check_roots...\n"; # DEBUG handler my( $p, $gi, %att)= @_; my $t= $p->{twig}; # $tag can be empty (<elt/>), hence the undef and the tests for defined my $tag= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string : undef; my $fh= $t->{twig_output_fh} || select() || \*STDOUT; if( $t->{twig_endtag_handlers}) { # look for end tag handlers my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi); my $last_handler_res=1; foreach my $handler ( @handlers) { $last_handler_res= $handler->($t, $gi) || last; } #if( ! $last_handler_res) # { pop @{$t->{_twig_context_stack}}; warn "tested"; # return; # } } { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $tag if( defined $tag); } if( $p->depth == 0) { _twig_end( $p, $gi); $t->root->{end_tag_flushed}=1; } pop @{$t->{_twig_context_stack}}; return; } sub _twig_pi_check_roots { # warn " in _twig_pi_check_roots...\n"; # DEBUG handler my( $p, $target, $data)= @_; my $t= $p->{twig}; my $pi= $t->{twig_default_print} ? $t->{twig_keep_encoding} ? $p->original_string : $p->recognized_string : undef; my $fh= $t->{twig_output_fh} || select() || \*STDOUT; if( my $handler= $t->{twig_handlers}->{pi_handlers}->{$target} || $t->{twig_handlers}->{pi_handlers}->{''} ) { # if handler is called on pi, then it needs to be processed as a regular node my @flags= qw( twig_process_pi twig_keep_pi); my @save= @{$t}{@flags}; # save pi related flags @{$t}{@flags}= (1, 0); # override them, pi needs to be processed _twig_pi( @_); # call handler on the pi @{$t}{@flags}= @save;; # restore flag } else { ## no critic (TestingAndDebugging::ProhibitNoStrict); no strict 'refs'; print {$fh} $pi if( defined( $pi)); } return; } sub _output_ignored { my( $t, $p)= @_; my $action= $t->{twig_ignore_action}; my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string'; if( $action eq 'print' ) { print $p->$get_string; } else { my $string_ref; if( $action eq 'string') { $string_ref= \$t->{twig_buffered_string}; } elsif( ref( $action) && ref( $action) eq 'SCALAR') { $string_ref= $action; } else { _croak( "wrong ignore action: $action"); } $$string_ref .= $p->$get_string; } } sub _twig_ignore_start { # warn " in _twig_ignore_start...\n"; # DEBUG handler my( $p, $gi)= @_; my $t= $p->{twig}; $t->{twig_ignore_level}++; my $action= $t->{twig_ignore_action}; $t->_output_ignored( $p) unless $action eq 'discard'; return; } sub _twig_ignore_end { # warn " in _twig_ignore_end...\n"; # DEBUG handler my( $p, $gi)= @_; my $t= $p->{twig}; my $action= $t->{twig_ignore_action}; $t->_output_ignored( $p) unless $action eq 'discard'; $t->{twig_ignore_level}--; if( ! $t->{twig_ignore_level}) { $t->{twig_current} = $t->{twig_ignore_elt}; $t->{twig_current}->set_twig_current; $t->{twig_ignore_elt}->cut; # there could possibly be a memory leak here (delete would avoid it, # but could also delete elements that should not be deleted) # restore the saved stack to the current level splice( @{$t->{_twig_context_stack}}, $p->depth+ 1 ); #warn "stack: ", _dump_stack( $t->{_twig_context_stack}), "\n"; $p->setHandlers( @{$t->{twig_saved_handlers}}); # test for handlers if( $t->{twig_endtag_handlers}) { # look for end tag handlers my @handlers= _handler( $t, $t->{twig_endtag_handlers}, $gi); my $last_handler_res=1; foreach my $handler ( @handlers) { $last_handler_res= $handler->($t, $gi) || last; } } pop @{$t->{_twig_context_stack}}; }; return; } #sub _dump_stack { my( $stack)= @_; return join( ":", map { $_->{$ST_TAG} } @$stack); } sub ignore { my( $t, $elt, $action)= @_; my $current= $t->{twig_current}; if( ! ($elt && ref( $elt) && isa( $elt, 'XML::Twig::Elt'))) { $elt= $current; } #warn "ignore: current = ", $current->tag, ", elt = ", $elt->tag, ")\n"; # we need the ($elt == $current->_last_child) test because the current element is set to the # parent _before_ handlers are called (and I can't figure out how to fix this) unless( ($elt == $current) || ($current->_last_child && ($elt == $current->_last_child)) || $current->in( $elt)) { _croak( "element to be ignored must be ancestor of current element"); } $t->{twig_ignore_level}= $current == $elt ? 1 : $t->_level_in_stack( $current) - $t->_level_in_stack($elt) + 1; #warn "twig_ignore_level: $t->{twig_ignore_level} (current: ", $current->tag, ", elt: ", $elt->tag, ")\n"; $t->{twig_ignore_elt} = $elt; # save it, so we can delete it later $action ||= 'discard'; if( !($action eq 'print' || $action eq 'string' || ( ref( $action) && ref( $action) eq 'SCALAR'))) { $action= 'discard'; } $t->{twig_ignore_action}= $action; my $p= $t->{twig_parser}; my @saved_handlers= $p->setHandlers( %twig_handlers_ignore); # set handlers my $get_string= $t->{twig_keep_encoding} ? 'original_string' : 'recognized_string'; my $default_handler; if( $action ne 'discard') { if( $action eq 'print') { $p->setHandlers( Default => sub { print $_[0]->$get_string; }); } else { my $string_ref; if( $action eq 'string') { if( ! exists $t->{twig_buffered_string}) { $t->{twig_buffered_string}=''; } $string_ref= \$t->{twig_buffered_string}; } elsif( ref( $action) && ref( $action) eq 'SCALAR') { $string_ref= $action; } $p->setHandlers( Default => sub { $$string_ref .= $_[0]->$get_string; }); } $t->_output_ignored( $p, $action); } $t->{twig_saved_handlers}= \@saved_handlers; # save current handlers } sub _level_in_stack { my( $t, $elt)= @_; my $level=1; foreach my $elt_in_stack ( @{$t->{_twig_context_stack}} ) { if( $elt_in_stack->{$ST_ELT} && ($elt == $elt_in_stack->{$ST_ELT})) { return $level } $level++; } } # select $t->{twig_output_fh} and store the current selected fh sub _set_fh_to_twig_output_fh { my $t= shift; my $output_fh= $t->{twig_output_fh}; if( $output_fh && !$t->{twig_output_fh_selected}) { # there is an output fh $t->{twig_selected_fh}= select(); # store the currently selected fh $t->{twig_output_fh_selected}=1; select $output_fh; # select the output fh for the twig } } # select the fh that was stored in $t->{twig_selected_fh} # (before $t->{twig_output_fh} was selected) sub _set_fh_to_selected_fh { my $t= shift; return unless( $t->{twig_output_fh}); my $selected_fh= $t->{twig_selected_fh}; $t->{twig_output_fh_selected}=0; select $selected_fh; return; } sub encoding { return $_[0]->{twig_xmldecl}->{encoding} if( $_[0]->{twig_xmldecl}); } sub set_encoding { my( $t, $encoding)= @_; $t->{twig_xmldecl} ||={}; $t->set_xml_version( "1.0") unless( $t->xml_version); $t->{twig_xmldecl}->{encoding}= $encoding; return $t; } sub output_encoding { return $_[0]->{output_encoding}; } sub set_output_encoding { my( $t, $encoding)= @_; my $output_filter= $t->output_filter || ''; if( ($encoding && $encoding !~ m{^utf-?8$}i) || $t->{twig_keep_encoding} || $output_filter) { $t->set_output_filter( _encoding_filter( $encoding || '')); } $t->{output_encoding}= $encoding; return $t; } sub xml_version { return $_[0]->{twig_xmldecl}->{version} if( $_[0]->{twig_xmldecl}); } sub set_xml_version { my( $t, $version)= @_; $t->{twig_xmldecl} ||={}; $t->{twig_xmldecl}->{version}= $version; return $t; } sub standalone { return $_[0]->{twig_xmldecl}->{standalone} if( $_[0]->{twig_xmldecl}); } sub set_standalone { my( $t, $standalone)= @_; $t->{twig_xmldecl} ||={}; $t->set_xml_version( "1.0") unless( $t->xml_version); $t->{twig_xmldecl}->{standalone}= $standalone; return $t; } # SAX methods sub toSAX1 { _croak( "cannot use toSAX1 while parsing (use flush_toSAX1)") if (defined $_[0]->{twig_parser}); shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1, \&XML::Twig::Elt::_end_tag_data_SAX1 ); } sub toSAX2 { _croak( "cannot use toSAX2 while parsing (use flush_toSAX2)") if (defined $_[0]->{twig_parser}); shift(@_)->_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2, \&XML::Twig::Elt::_end_tag_data_SAX2 ); } sub _toSAX { my( $t, $handler, $start_tag_data, $end_tag_data) = @_; if( my $start_document = $handler->can( 'start_document')) { $start_document->( $handler); } $t->_prolog_toSAX( $handler); if( $t->root) { $t->root->_toSAX( $handler, $start_tag_data, $end_tag_data) ; } if( my $end_document = $handler->can( 'end_document')) { $end_document->( $handler); } } sub flush_toSAX1 { shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX1, \&XML::Twig::Elt::_end_tag_data_SAX1 ); } sub flush_toSAX2 { shift(@_)->_flush_toSAX(@_, \&XML::Twig::Elt::_start_tag_data_SAX2, \&XML::Twig::Elt::_end_tag_data_SAX2 ); } sub _flush_toSAX { my( $t, $handler, $start_tag_data, $end_tag_data)= @_; # the "real" last element processed, as _twig_end has closed it my $last_elt; if( $t->{twig_current}) { $last_elt= $t->{twig_current}->_last_child; } else { $last_elt= $t->{twig_root}; } my $elt= $t->{twig_root}; unless( $elt->_flushed) { # init unless already done (ie root has been flushed) if( my $start_document = $handler->can( 'start_document')) { $start_document->( $handler); } # flush the DTD $t->_prolog_toSAX( $handler) } while( $elt) { my $next_elt; if( $last_elt && $last_elt->in( $elt)) { unless( $elt->_flushed) { # just output the front tag if( my $start_element = $handler->can( 'start_element')) { if( my $tag_data= $start_tag_data->( $elt)) { $start_element->( $handler, $tag_data); } } $elt->_set_flushed; } $next_elt= $elt->_first_child; } else { # an element before the last one or the last one, $next_elt= $elt->_next_sibling; $elt->_toSAX( $handler, $start_tag_data, $end_tag_data); $elt->delete; last if( $last_elt && ($elt == $last_elt)); } $elt= $next_elt; } if( !$t->{twig_parsing}) { if( my $end_document = $handler->can( 'end_document')) { $end_document->( $handler); } } } sub _prolog_toSAX { my( $t, $handler)= @_; $t->_xmldecl_toSAX( $handler); $t->_DTD_toSAX( $handler); } sub _xmldecl_toSAX { my( $t, $handler)= @_; my $decl= $t->{twig_xmldecl}; my $data= { Version => $decl->{version}, Encoding => $decl->{encoding}, Standalone => $decl->{standalone}, }; if( my $xml_decl= $handler->can( 'xml_decl')) { $xml_decl->( $handler, $data); } } sub _DTD_toSAX { my( $t, $handler)= @_; my $doctype= $t->{twig_doctype}; return unless( $doctype); my $data= { Name => $doctype->{name}, PublicId => $doctype->{pub}, SystemId => $doctype->{sysid}, }; if( my $start_dtd= $handler->can( 'start_dtd')) { $start_dtd->( $handler, $data); } # I should call code to export the internal subset here if( my $end_dtd= $handler->can( 'end_dtd')) { $end_dtd->( $handler); } } # input/output filters sub latin1 { local $SIG{__DIE__}; if( _use( 'Encode')) { return encode_convert( 'ISO-8859-15'); } elsif( _use( 'Text::Iconv')) { return iconv_convert( 'ISO-8859-15'); } elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) { return unicode_convert( 'ISO-8859-15'); } else { return \®exp2latin1; } } sub _encoding_filter { { local $SIG{__DIE__}; my $encoding= $_[1] || $_[0]; if( _use( 'Encode')) { my $sub= encode_convert( $encoding); return $sub; } elsif( _use( 'Text::Iconv')) { return iconv_convert( $encoding); } elsif( _use( 'Unicode::Map8') && _use( 'Unicode::String')) { return unicode_convert( $encoding); } } _croak( "Encode, Text::Iconv or Unicode::Map8 and Unicode::String need to be installed in order to use encoding options"); } # shamelessly lifted from XML::TyePYX (works only with XML::Parse 2.27) sub regexp2latin1 { my $text=shift; $text=~s{([\xc0-\xc3])(.)}{ my $hi = ord($1); my $lo = ord($2); chr((($hi & 0x03) <<6) | ($lo & 0x3F)) }ge; return $text; } sub html_encode { _use( 'HTML::Entities') or croak "cannot use html_encode: missing HTML::Entities"; return HTML::Entities::encode_entities($_[0] ); } sub safe_encode { my $str= shift; if( $perl_version < 5.008) { # the no utf8 makes the regexp work in 5.6 no utf8; # = perl 5.6 $str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} {_XmlUtf8Decode($1)}egs; } else { $str= encode( ascii => $str, $FB_HTMLCREF); } return $str; } sub safe_encode_hex { my $str= shift; if( $perl_version < 5.008) { # the no utf8 makes the regexp work in 5.6 no utf8; # = perl 5.6 $str =~ s{([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)} {_XmlUtf8Decode($1, 1)}egs; } else { $str= encode( ascii => $str, $FB_XMLCREF); } return $str; } # this one shamelessly lifted from XML::DOM # does NOT work on 5.8.0 sub _XmlUtf8Decode { my ($str, $hex) = @_; my $len = length ($str); my $n; if ($len == 2) { my @n = unpack "C2", $str; $n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f); } elsif ($len == 3) { my @n = unpack "C3", $str; $n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + ($n[2] & 0x3f); } elsif ($len == 4) { my @n = unpack "C4", $str; $n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) + (($n[2] & 0x3f) << 6) + ($n[3] & 0x3f); } elsif ($len == 1) # just to be complete... { $n = ord ($str); } else { croak "bad value [$str] for _XmlUtf8Decode"; } my $char= $hex ? sprintf ("&#x%x;", $n) : "&#$n;"; return $char; } sub unicode_convert { my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly _use( 'Unicode::Map8') or croak "Unicode::Map8 not available, needed for encoding filter: $!"; _use( 'Unicode::String') or croak "Unicode::String not available, needed for encoding filter: $!"; import Unicode::String qw(utf8); my $sub= eval qq{ { $NO_WARNINGS; my \$cnv; BEGIN { \$cnv= Unicode::Map8->new(\$enc) or croak "Can't create converter to \$enc"; } sub { return \$cnv->to8 (utf8(\$_[0])->ucs2); } } }; unless( $sub) { croak $@; } return $sub; } sub iconv_convert { my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly _use( 'Text::Iconv') or croak "Text::Iconv not available, needed for encoding filter: $!"; my $sub= eval qq{ { $NO_WARNINGS; my \$cnv; BEGIN { \$cnv = Text::Iconv->new( 'utf8', \$enc) or croak "Can't create iconv converter to \$enc"; } sub { return \$cnv->convert( \$_[0]); } } }; unless( $sub) { if( $@=~ m{^Unsupported conversion: Invalid argument}) { croak "Unsupported encoding: $enc"; } else { croak $@; } } return $sub; } sub encode_convert { my $enc= $_[1] ? $_[1] : $_[0]; # so the method can be called on the twig or directly my $sub= eval qq{sub { $NO_WARNINGS; return encode( "$enc", \$_[0]); } }; croak "can't create Encode-based filter: $@" unless( $sub); return $sub; } # XML::XPath compatibility sub getRootNode { return $_[0]; } sub getParentNode { return undef; } sub getChildNodes { my @children= ($_[0]->root); return wantarray ? @children : \@children; } sub _weakrefs { return $weakrefs; } sub _set_weakrefs { $weakrefs=shift() || 0; XML::Twig::Elt::set_destroy()if ! $weakrefs; } # for testing purposes sub _dump { my $t= shift; my $dump=''; $dump="document\n"; # should dump twig level data here if( $t->root) { $dump .= $t->root->_dump( @_); } return $dump; } 1; ###################################################################### package XML::Twig::Entity_list; ###################################################################### *isa= *UNIVERSAL::isa; sub new { my $class = shift; my $self={ entities => {}, updated => 0}; bless $self, $class; return $self; } sub add_new_ent { my $ent_list= shift; my $ent= XML::Twig::Entity->new( @_); $ent_list->add( $ent); return $ent_list; } sub _add_list { my( $ent_list, $to_add)= @_; my $ents_to_add= $to_add->{entities}; return $ent_list unless( $ents_to_add && %$ents_to_add); @{$ent_list->{entities}}{keys %$ents_to_add}= values %$ents_to_add; $ent_list->{updated}=1; return $ent_list; } sub add { my( $ent_list, $ent)= @_; $ent_list->{entities}->{$ent->{name}}= $ent; $ent_list->{updated}=1; return $ent_list; } sub ent { my( $ent_list, $ent_name)= @_; return $ent_list->{entities}->{$ent_name}; } # can be called with an entity or with an entity name sub delete { my $ent_list= shift; if( isa( ref $_[0], 'XML::Twig::Entity')) { # the second arg is an entity my $ent= shift; delete $ent_list->{entities}->{$ent->{name}}; } else { # the second arg was not entity, must be a string then my $name= shift; delete $ent_list->{entities}->{$name}; } $ent_list->{updated}=1; return $ent_list; } sub print { my ($ent_list, $fh)= @_; my $old_select= defined $fh ? select $fh : undef; foreach my $ent_name ( sort keys %{$ent_list->{entities}}) { my $ent= $ent_list->{entities}->{$ent_name}; # we have to test what the entity is or un-defined entities can creep in if( isa( $ent, 'XML::Twig::Entity')) { $ent->print(); } } select $old_select if( defined $old_select); return $ent_list; } sub text { my ($ent_list)= @_; return join "\n", map { $ent_list->{entities}->{$_}->text} sort keys %{$ent_list->{entities}}; } # return the list of entity names sub entity_names { my $ent_list= shift; return (sort keys %{$ent_list->{entities}}) ; } sub list { my ($ent_list)= @_; return map { $ent_list->{entities}->{$_} } sort keys %{$ent_list->{entities}}; } 1; ###################################################################### package XML::Twig::Entity; ###################################################################### #*isa= *UNIVERSAL::isa; sub new { my( $class, $name, $val, $sysid, $pubid, $ndata, $param)= @_; $class= ref( $class) || $class; my $self={}; $self->{name} = $name; $self->{val} = $val if( defined $val ); $self->{sysid} = $sysid if( defined $sysid); $self->{pubid} = $pubid if( defined $pubid); $self->{ndata} = $ndata if( defined $ndata); $self->{param} = $param if( defined $param); bless $self, $class; return $self; } sub name { return $_[0]->{name}; } sub val { return $_[0]->{val}; } sub sysid { return defined( $_[0]->{sysid}) ? $_[0]->{sysid} : ''; } sub pubid { return defined( $_[0]->{pubid}) ? $_[0]->{pubid} : ''; } sub ndata { return defined( $_[0]->{ndata}) ? $_[0]->{ndata} : ''; } sub param { return defined( $_[0]->{param}) ? $_[0]->{param} : ''; } sub print { my ($ent, $fh)= @_; my $text= $ent->text; if( $fh) { print $fh $text . "\n"; } else { print $text . "\n"; } } sub sprint { my ($ent)= @_; return $ent->text; } sub text { my ($ent)= @_; #warn "text called: '", $ent->_dump, "'\n"; return '' if( !$ent->{name}); my @tokens; push @tokens, '<!ENTITY'; push @tokens, '%' if( $ent->{param}); push @tokens, $ent->{name}; if( defined $ent->{val} && !defined( $ent->{sysid}) && !defined($ent->{pubid}) ) { push @tokens, _quoted_val( $ent->{val}); } elsif( defined $ent->{sysid}) { push @tokens, 'PUBLIC', _quoted_val( $ent->{pubid}) if( $ent->{pubid}); push @tokens, 'SYSTEM' unless( $ent->{pubid}); push @tokens, _quoted_val( $ent->{sysid}); push @tokens, 'NDATA', $ent->{ndata} if( $ent->{ndata}); } return join( ' ', @tokens) . '>'; } sub _quoted_val { my $q= $_[0]=~ m{"} ? q{'} : q{"}; return qq{$q$_[0]$q}; } sub _dump { my( $ent)= @_; return join( " - ", map { "$_ => '$ent->{$_}'" } grep { defined $ent->{$_} } sort keys %$ent); } 1; ###################################################################### package XML::Twig::Notation_list; ###################################################################### *isa= *UNIVERSAL::isa; sub new { my $class = shift; my $self={ notations => {}, updated => 0}; bless $self, $class; return $self; } sub add_new_notation { my $notation_list= shift; my $notation= XML::Twig::Notation->new( @_); $notation_list->add( $notation); return $notation_list; } sub _add_list { my( $notation_list, $to_add)= @_; my $notations_to_add= $to_add->{notations}; return $notation_list unless( $notations_to_add && %$notations_to_add); @{$notation_list->{notations}}{keys %$notations_to_add}= values %$notations_to_add; $notation_list->{updated}=1; return $notation_list; } sub add { my( $notation_list, $notation)= @_; $notation_list->{notations}->{$notation->{name}}= $notation; $notation_list->{updated}=1; return $notation_list; } sub notation { my( $notation_list, $notation_name)= @_; return $notation_list->{notations}->{$notation_name}; } # can be called with an notation or with an notation name sub delete { my $notation_list= shift; if( isa( ref $_[0], 'XML::Twig::Notation')) { # the second arg is an notation my $notation= shift; delete $notation_list->{notations}->{$notation->{name}}; } else { # the second arg was not notation, must be a string then my $name= shift; delete $notation_list->{notations}->{$name}; } $notation_list->{updated}=1; return $notation_list; } sub print { my ($notation_list, $fh)= @_; my $old_select= defined $fh ? select $fh : undef; foreach my $notation_name ( sort keys %{$notation_list->{notations}}) { my $notation= $notation_list->{notations}->{$notation_name}; # we have to test what the notation is or un-defined notations can creep in if( isa( $notation, 'XML::Twig::Notation')) { $notation->print(); } } select $old_select if( defined $old_select); return $notation_list; } sub text { my ($notation_list)= @_; return join "\n", map { $notation_list->{notations}->{$_}->text} sort keys %{$notation_list->{notations}}; } # return the list of notation names sub notation_names { my $notation_list= shift; return (sort keys %{$notation_list->{notations}}) ; } sub list { my ($notation_list)= @_; return map { $notation_list->{notations}->{$_} } sort keys %{$notation_list->{notations}}; } 1; ###################################################################### package XML::Twig::Notation; ###################################################################### #*isa= *UNIVERSAL::isa; BEGIN { *sprint= *text; } sub new { my( $class, $name, $base, $sysid, $pubid)= @_; $class= ref( $class) || $class; my $self={}; $self->{name} = $name; $self->{base} = $base if( defined $base ); $self->{sysid} = $sysid if( defined $sysid); $self->{pubid} = $pubid if( defined $pubid); bless $self, $class; return $self; } sub name { return $_[0]->{name}; } sub base { return $_[0]->{base}; } sub sysid { return $_[0]->{sysid}; } sub pubid { return $_[0]->{pubid}; } sub print { my ($notation, $fh)= @_; my $text= $notation->text; if( $fh) { print $fh $text . "\n"; } else { print $text . "\n"; } } sub text { my ($notation)= @_; return '' if( !$notation->{name}); my @tokens; push @tokens, '<!NOTATION'; push @tokens, $notation->{name}; push @tokens, ( 'PUBLIC', _quoted_val( $notation->{pubid} ) ) if $notation->{pubid}; push @tokens, ( 'SYSTEM') if ! $notation->{pubid} && $notation->{sysid}; push @tokens, (_quoted_val( $notation->{sysid}) ) if $notation->{sysid}; return join( ' ', @tokens) . '>'; } sub _quoted_val { my $q= $_[0]=~ m{"} ? q{'} : q{"}; return qq{$q$_[0]$q}; } sub _dump { my( $notation)= @_; return join( " - ", map { "$_ => '$notation->{$_}'" } grep { defined $notation->{$_} } sort keys %$notation); } 1; ###################################################################### package XML::Twig::Elt; ###################################################################### use Carp; *isa= *UNIVERSAL::isa; my $CDATA_START = "<![CDATA["; my $CDATA_END = "]]>"; my $PI_START = "<?"; my $PI_END = "?>"; my $COMMENT_START = "<!--"; my $COMMENT_END = "-->"; my $XMLNS_URI = 'http://www.w3.org/2000/xmlns/'; BEGIN { # set some aliases for methods *tag = *gi; *name = *gi; *set_tag = *set_gi; *set_name = *set_gi; *find_nodes = *get_xpath; # as in XML::DOM *findnodes = *get_xpath; # as in XML::LibXML *field = *first_child_text; *trimmed_field = *first_child_trimmed_text; *is_field = *contains_only_text; *is = *passes; *matches = *passes; *has_child = *first_child; *has_children = *first_child; *all_children_pass = *all_children_are; *all_children_match= *all_children_are; *getElementsByTagName= *descendants; *find_by_tag_name= *descendants_or_self; *unwrap = *erase; *inner_xml = *xml_string; *outer_xml = *sprint; *add_class = *add_to_class; *first_child_is = *first_child_matches; *last_child_is = *last_child_matches; *next_sibling_is = *next_sibling_matches; *prev_sibling_is = *prev_sibling_matches; *next_elt_is = *next_elt_matches; *prev_elt_is = *prev_elt_matches; *parent_is = *parent_matches; *child_is = *child_matches; *inherited_att = *inherit_att; *sort_children_by_value= *sort_children_on_value; *has_atts= *att_nb; # imports from XML::Twig *_is_fh= *XML::Twig::_is_fh; # XML::XPath compatibility *string_value = *text; *toString = *sprint; *getName = *gi; *getRootNode = *twig; *getNextSibling = *_next_sibling; *getPreviousSibling = *_prev_sibling; *isElementNode = *is_elt; *isTextNode = *is_text; *isPI = *is_pi; *isPINode = *is_pi; *isProcessingInstructionNode= *is_pi; *isComment = *is_comment; *isCommentNode = *is_comment; *getTarget = *target; *getFirstChild = *_first_child; *getLastChild = *_last_child; # try using weak references # test whether we can use weak references { local $SIG{__DIE__}; if( eval 'require Scalar::Util' && defined( &Scalar::Util::weaken) ) { import Scalar::Util qw(weaken); } elsif( eval 'require WeakRef') { import WeakRef; } } } # can be called as XML::Twig::Elt->new( [[$gi, $atts, [@content]]) # - gi is an optional gi given to the element # - $atts is a hashref to attributes for the element # - @content is an optional list of text and elements that will # be inserted under the element sub new { my $class= shift; $class= ref $class || $class; my $elt = {}; bless ($elt, $class); return $elt unless @_; if( @_ == 1 && $_[0]=~ m{^\s*<}) { return $class->parse( @_); } # if a gi is passed then use it my $gi= shift; $elt->set_gi( $gi); my $atts= ref $_[0] eq 'HASH' ? shift : undef; if( $atts && defined $atts->{$CDATA}) { delete $atts->{$CDATA}; my $cdata= $class->new( $CDATA => @_); return $class->new( $gi, $atts, $cdata); } if( $gi eq $PCDATA) { if( grep { ref $_ } @_) { croak "element $PCDATA can only be created from text"; } $elt->_set_pcdata( join '', @_); } elsif( $gi eq $ENT) { $elt->set_ent( shift); } elsif( $gi eq $CDATA) { if( grep { ref $_ } @_) { croak "element $CDATA can only be created from text"; } $elt->_set_cdata( join '', @_); } elsif( $gi eq $COMMENT) { if( grep { ref $_ } @_) { croak "element $COMMENT can only be created from text"; } $elt->_set_comment( join '', @_); } elsif( $gi eq $PI) { if( grep { ref $_ } @_) { croak "element $PI can only be created from text"; } $elt->_set_pi( shift, join '', @_); } else { # the rest of the arguments are the content of the element if( @_) { $elt->set_content( @_); } else { $elt->set_empty( 1); } } if( $atts) { # the attribute hash can be used to pass the asis status if( defined $atts->{$ASIS}) { $elt->set_asis( $atts->{$ASIS} ); delete $atts->{$ASIS}; } if( defined $atts->{$EMPTY}) { $elt->set_empty( $atts->{$EMPTY}); delete $atts->{$EMPTY}; } if( keys %$atts) { $elt->set_atts( $atts); } $elt->_set_id( $atts->{$ID}) if( $atts->{$ID}); } return $elt; } # optimized version of $elt->new( PCDATA, $text); sub _new_pcdata { my $class= $_[0]; $class= ref $class || $class; my $elt = {}; bless $elt, $class; $elt->set_gi( $PCDATA); $elt->_set_pcdata( $_[1]); return $elt; } # this function creates an XM:::Twig::Elt from a string # it is quite clumsy at the moment, as it just creates a # new twig then returns its root # there might also be memory leaks there # additional arguments are passed to new XML::Twig sub parse { my $class= shift; if( ref( $class)) { $class= ref( $class); } my $string= shift; my %args= @_; my $t= XML::Twig->new(%args); $t->parse( $string); my $elt= $t->root; # clean-up the node delete $elt->{twig}; # get rid of the twig data delete $elt->{twig_current}; # better get rid of this too if( $t->{twig_id_list}) { $elt->{twig_id_list}= $t->{twig_id_list}; } $elt->cut; undef $t->{twig_root}; return $elt; } sub set_inner_xml { my( $elt, $xml, @args)= @_; my $new_elt= $elt->parse( "<dummy>$xml</dummy>", @args); $elt->cut_children; $new_elt->paste_first_child( $elt); $new_elt->erase; return $elt; } sub set_outer_xml { my( $elt, $xml, @args)= @_; my $new_elt= $elt->parse( "<dummy>$xml</dummy>", @args); $elt->cut_children; $new_elt->replace( $elt); $new_elt->erase; return $new_elt; } sub set_inner_html { my( $elt, $html)= @_; my $t= XML::Twig->new->parse_html( "<html>$html</html>"); my $new_elt= $t->root; if( $elt->tag eq 'head') { $new_elt->first_child( 'head')->unwrap; $new_elt->first_child( 'body')->cut; } elsif( $elt->tag ne 'html') { $new_elt->first_child( 'head')->cut; $new_elt->first_child( 'body')->unwrap; } $new_elt->cut; $elt->cut_children; $new_elt->paste_first_child( $elt); $new_elt->erase; return $elt; } sub set_gi { my ($elt, $gi)= @_; unless( defined $XML::Twig::gi2index{$gi}) { # new gi, create entries in %gi2index and @index2gi push @XML::Twig::index2gi, $gi; $XML::Twig::gi2index{$gi}= $#XML::Twig::index2gi; } $elt->{gi}= $XML::Twig::gi2index{$gi}; return $elt; } sub gi { return $XML::Twig::index2gi[$_[0]->{gi}]; } sub local_name { my $elt= shift; return _local_name( $elt->gi); } sub ns_prefix { my $elt= shift; return _ns_prefix( $elt->gi); } # namespace prefix for any qname (can be used for elements or attributes) sub _ns_prefix { my $qname= shift; if( $qname=~ m{^([^:]*):}) { return $1; } else { return( ''); } # should it be '' ? } # local name for any qname (can be used for elements or attributes) sub _local_name { my $qname= shift; (my $local= $qname)=~ s{^[^:]*:}{}; return $local; } #sub get_namespace sub namespace ## no critic (Subroutines::ProhibitNestedSubs); { my $elt= shift; my $prefix= defined $_[0] ? shift() : $elt->ns_prefix; my $ns_att= $prefix ? "xmlns:$prefix" : "xmlns"; my $expanded= $DEFAULT_NS{$prefix} || $elt->_inherit_att_through_cut( $ns_att) || ''; return $expanded; } sub declare_missing_ns ## no critic (Subroutines::ProhibitNestedSubs); { my $root= shift; my %missing_prefix; my $map= $root->_current_ns_prefix_map; foreach my $prefix (keys %$map) { my $prefix_att= $prefix eq '#default' ? 'xmlns' : "xmlns:$prefix"; if( ! $root->att( $prefix_att)) { $root->set_att( $prefix_att => $map->{$prefix}); } } return $root; } sub _current_ns_prefix_map { my( $elt)= shift; my $map; while( $elt) { foreach my $att ($elt->att_names) { my $prefix= $att eq 'xmlns' ? '#default' : $att=~ m{^xmlns:(.*)$} ? $1 : next ; if( ! exists $map->{$prefix}) { $map->{$prefix}= $elt->att( $att); } } $elt= $elt->_parent || $elt->former_parent; } return $map; } sub set_ns_decl { my( $elt, $uri, $prefix)= @_; my $ns_att= $prefix ? "xmlns:$prefix" : 'xmlns'; $elt->set_att( $ns_att => $uri); return $elt; } sub set_ns_as_default { my( $root, $uri)= @_; my @ns_decl_to_remove; foreach my $elt ($root->descendants_or_self) { if( $elt->_ns_prefix && $elt->namespace eq $uri) { $elt->set_tag( $elt->local_name); } # store any namespace declaration for that uri foreach my $ns_decl (grep { $_=~ m{xmlns(:|$)} && $elt->att( $_) eq $uri } $elt->att_names) { push @ns_decl_to_remove, [$elt, $ns_decl]; } } $root->set_ns_decl( $uri); # now remove the ns declarations (if done earlier then descendants of an element with the ns declaration # are not considered being in the namespace foreach my $ns_decl_to_remove ( @ns_decl_to_remove) { my( $elt, $ns_decl)= @$ns_decl_to_remove; $elt->del_att( $ns_decl); } return $root; } # return #ELT for an element and #PCDATA... for others sub get_type { my $gi_nb= $_[0]->{gi}; # the number, not the string return $ELT if( $gi_nb >= $XML::Twig::SPECIAL_GI); return $_[0]->gi; } # return the gi if it's a "real" element, 0 otherwise sub is_elt { if( $_[0]->{gi} >= $XML::Twig::SPECIAL_GI) { return $_[0]->gi; } else { return 0; } } sub is_pcdata { my $elt= shift; return (exists $elt->{'pcdata'}); } sub is_cdata { my $elt= shift; return (exists $elt->{'cdata'}); } sub is_pi { my $elt= shift; return (exists $elt->{'target'}); } sub is_comment { my $elt= shift; return (exists $elt->{'comment'}); } sub is_ent { my $elt= shift; return (exists $elt->{ent} || $elt->{ent_name}); } sub is_text { my $elt= shift; return (exists( $elt->{'pcdata'}) || (exists $elt->{'cdata'})); } sub is_empty { return $_[0]->{empty} || 0; } sub set_empty { $_[0]->{empty}= defined( $_[1]) ? $_[1] : 1; return $_[0]; } sub set_not_empty { delete $_[0]->{empty} if( $_[0]->is_empty); return $_[0]; } sub set_asis { my $elt=shift; foreach my $descendant ($elt, $elt->_descendants ) { $descendant->{asis}= 1; if( $descendant->is_cdata) { $descendant->set_gi( $PCDATA); $descendant->_set_pcdata( $descendant->cdata); } } return $elt; } sub set_not_asis { my $elt=shift; foreach my $descendant ($elt, $elt->descendants) { delete $descendant->{asis} if $descendant->{asis};} return $elt; } sub is_asis { return $_[0]->{asis}; } sub closed { my $elt= shift; my $t= $elt->twig || return; my $curr_elt= $t->{twig_current}; return 1 unless( $curr_elt); return $curr_elt->in( $elt); } sub set_pcdata { my( $elt, $pcdata)= @_; if( $elt->_extra_data_in_pcdata) { _try_moving_extra_data( $elt, $pcdata); } $elt->{pcdata}= $pcdata; return $elt; } sub _extra_data_in_pcdata { return $_[0]->{extra_data_in_pcdata}; } sub _set_extra_data_in_pcdata { $_[0]->{extra_data_in_pcdata}= $_[1]; return $_[0]; } sub _del_extra_data_in_pcdata { delete $_[0]->{extra_data_in_pcdata}; return $_[0]; } sub _unshift_extra_data_in_pcdata { my $e= shift; $e->{extra_data_in_pcdata}||=[]; unshift @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() }; } sub _push_extra_data_in_pcdata { my $e= shift; $e->{extra_data_in_pcdata}||=[]; push @{$e->{extra_data_in_pcdata}}, { text => shift(), offset => shift() }; } sub _extra_data_before_end_tag { return $_[0]->{extra_data_before_end_tag} || ''; } sub _set_extra_data_before_end_tag { $_[0]->{extra_data_before_end_tag}= $_[1]; return $_[0]} sub _del_extra_data_before_end_tag { delete $_[0]->{extra_data_before_end_tag}; return $_[0]} sub _prefix_extra_data_before_end_tag { my( $elt, $data)= @_; if($elt->{extra_data_before_end_tag}) { $elt->{extra_data_before_end_tag}= $data . $elt->{extra_data_before_end_tag}; } else { $elt->{extra_data_before_end_tag}= $data; } return $elt; } # internal, in cases where we know there is no extra_data (inlined anyway!) sub _set_pcdata { $_[0]->{pcdata}= $_[1]; } # try to figure out if we can keep the extra_data around sub _try_moving_extra_data { my( $elt, $modified)=@_; my $initial= $elt->{pcdata}; my $cpis= $elt->_extra_data_in_pcdata; if( (my $offset= index( $modified, $initial)) != -1) { # text has been added foreach (@$cpis) { $_->{offset}+= $offset; } } elsif( ($offset= index( $initial, $modified)) != -1) { # text has been cut my $len= length( $modified); foreach my $cpi (@$cpis) { $cpi->{offset} -= $offset; } $elt->_set_extra_data_in_pcdata( [ grep { $_->{offset} >= 0 && $_->{offset} < $len } @$cpis ]); } else { _match_extra_data_words( $elt, $initial, $modified) || _match_extra_data_chars( $elt, $initial, $modified) || $elt->_del_extra_data_in_pcdata; } } sub _match_extra_data_words { my( $elt, $initial, $modified)= @_; my @initial= split /\b/, $initial; my @modified= split /\b/, $modified; return _match_extra_data( $elt, length( $initial), \@initial, \@modified); } sub _match_extra_data_chars { my( $elt, $initial, $modified)= @_; my @initial= split //, $initial; my @modified= split //, $modified; return _match_extra_data( $elt, length( $initial), \@initial, \@modified); } sub _match_extra_data { my( $elt, $length, $initial, $modified)= @_; my $cpis= $elt->_extra_data_in_pcdata; if( @$initial <= @$modified) { my( $ok, $positions, $offsets)= _pos_offset( $initial, $modified); if( $ok) { my $offset=0; my $pos= shift @$positions; foreach my $cpi (@$cpis) { while( $cpi->{offset} >= $pos) { $offset= shift @$offsets; $pos= shift @$positions || $length +1; } $cpi->{offset} += $offset; } return 1; } } else { my( $ok, $positions, $offsets)= _pos_offset( $modified, $initial); if( $ok) { #print STDERR "pos: ", join( ':', @$positions), "\n", # "offset: ", join( ':', @$offsets), "\n"; my $offset=0; my $pos= shift @$positions; my $prev_pos= 0; foreach my $cpi (@$cpis) { while( $cpi->{offset} >= $pos) { $offset= shift @$offsets; $prev_pos= $pos; $pos= shift @$positions || $length +1; } $cpi->{offset} -= $offset; if( $cpi->{offset} < $prev_pos) { delete $cpi->{text}; } } $elt->_set_extra_data_in_pcdata( [ grep { exists $_->{text} } @$cpis ]); return 1; } } return 0; } sub _pos_offset { my( $short, $long)= @_; my( @pos, @offset); my( $s_length, $l_length)=(0,0); while (@$short) { my $s_word= shift @$short; my $l_word= shift @$long; if( $s_word ne $l_word) { while( @$long && $s_word ne $l_word) { $l_length += length( $l_word); $l_word= shift @$long; } if( !@$long && $s_word ne $l_word) { return 0; } push @pos, $s_length; push @offset, $l_length - $s_length; } my $length= length( $s_word); $s_length += $length; $l_length += $length; } return( 1, \@pos, \@offset); } sub append_pcdata { $_[0]->{'pcdata'}.= $_[1]; return $_[0]; } sub pcdata { return $_[0]->{pcdata}; } sub append_extra_data { $_[0]->{extra_data}.= $_[1]; return $_[0]; } sub set_extra_data { $_[0]->{extra_data}= $_[1]; return $_[0]; } sub extra_data { return $_[0]->{extra_data} || ''; } sub set_target { my( $elt, $target)= @_; $elt->{target}= $target; return $elt; } sub target { return $_[0]->{target}; } sub set_data { $_[0]->{'data'}= $_[1]; return $_[0]; } sub data { return $_[0]->{data}; } sub set_pi { my $elt= shift; unless( $elt->{gi} == $XML::Twig::gi2index{$PI}) { $elt->cut_children; $elt->set_gi( $PI); } return $elt->_set_pi( @_); } sub _set_pi { $_[0]->set_target( $_[1]); $_[0]->set_data( $_[2]); return $_[0]; } sub pi_string { my $string= $PI_START . $_[0]->target; my $data= $_[0]->data; if( defined( $data) && $data ne '') { $string .= " $data"; } $string .= $PI_END ; return $string; } sub set_comment { my $elt= shift; unless( $elt->{gi} == $XML::Twig::gi2index{$COMMENT}) { $elt->cut_children; $elt->set_gi( $COMMENT); } $elt->_set_comment( $_[0]); return $elt; } sub _set_comment { $_[0]->{comment}= $_[1]; return $_[0]; } sub comment { return $_[0]->{comment}; } sub comment_string { return $COMMENT_START . _comment_escaped_string( $_[0]->comment) . $COMMENT_END; } # comments cannot start or end with sub _comment_escaped_string { my( $c)= @_; $c=~ s{^-}{ -}; $c=~ s{-$}{- }; $c=~ s{--}{- -}g; return $c; } sub set_ent { $_[0]->{ent}= $_[1]; return $_[0]; } sub ent { return $_[0]->{ent}; } sub ent_name { return substr( $_[0]->ent, 1, -1);} sub set_cdata { my $elt= shift; unless( $elt->{gi} == $XML::Twig::gi2index{$CDATA}) { $elt->cut_children; $elt->insert_new_elt( first_child => $CDATA, @_); return $elt; } $elt->_set_cdata( $_[0]); return $_[0]; } sub _set_cdata { $_[0]->{cdata}= $_[1]; return $_[0]; } sub append_cdata { $_[0]->{cdata}.= $_[1]; return $_[0]; } sub cdata { return $_[0]->{cdata}; } sub contains_only_text { my $elt= shift; return 0 unless $elt->is_elt; foreach my $child ($elt->_children) { return 0 if $child->is_elt; } return $elt; } sub contains_only { my( $elt, $exp)= @_; my @children= $elt->_children; foreach my $child (@children) { return 0 unless $child->is( $exp); } return @children || 1; } sub contains_a_single { my( $elt, $exp)= @_; my $child= $elt->_first_child or return 0; return 0 unless $child->passes( $exp); return 0 if( $child->_next_sibling); return $child; } sub root { my $elt= shift; while( $elt->_parent) { $elt= $elt->_parent; } return $elt; } sub _root_through_cut { my $elt= shift; while( $elt->_parent || $elt->former_parent) { $elt= $elt->_parent || $elt->former_parent; } return $elt; } sub twig { my $elt= shift; my $root= $elt->root; return $root->{twig}; } sub _twig_through_cut { my $elt= shift; my $root= $elt->_root_through_cut; return $root->{twig}; } # used for navigation # returns undef or the element, depending on whether $elt passes $cond # $cond can be # - empty: the element passes the condition # - ELT ('#ELT'): the element passes the condition if it is a "real" element # - TEXT ('#TEXT'): the element passes if it is a CDATA or PCDATA element # - a string with an XPath condition (only a subset of XPath is actually # supported). # - a regexp: the element passes if its gi matches the regexp # - a code ref: the element passes if the code, applied on the element, # returns true my %cond_cache; # expression => coderef sub reset_cond_cache { %cond_cache=(); } { sub _install_cond { my $cond= shift; my $test; my $init=''; my $original_cond= $cond; my $not= ($cond=~ s{^\s*!}{}) ? '!' : ''; if( ref $cond eq 'CODE') { return $cond; } if( ref $cond eq 'Regexp') { $test = qq{(\$_[0]->gi=~ /$cond/)}; } else { my @tests; while( $cond) { # the condition is a string if( $cond=~ s{$ELT$SEP}{}) { push @tests, qq{\$_[0]->is_elt}; } elsif( $cond=~ s{$TEXT$SEP}{}) { push @tests, qq{\$_[0]->is_text}; } elsif( $cond=~ s{^\s*($REG_TAG_PART)$SEP}{}) { push @tests, _gi_test( $1); } elsif( $cond=~ s{^\s*($REG_REGEXP)$SEP}{}) { # /regexp/ push @tests, qq{ \$_[0]->gi=~ $1 }; } elsif( $cond=~ s{^\s*($REG_TAG_PART)?\s* # $1 \[\s*(-?)\s*(\d+)\s*\] # [$2] $SEP}{}xo ) { my( $gi, $neg, $index)= ($1, $2, $3); my $siblings= $neg ? q{$_[0]->_next_siblings} : q{$_[0]->_prev_siblings}; if( $gi && ($gi ne '*')) #{ $test= qq{((\$_[0]->gi eq "$gi") && (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index))}; } { push @tests, _and( _gi_test( $gi), qq{ (scalar( grep { \$_->gi eq "$gi" } $siblings) + 1 == $index)}); } else { push @tests, qq{(scalar( $siblings) + 1 == $index)}; } } elsif( $cond=~ s{^\s*($REG_TAG_PART?)\s*($REG_PREDICATE)$SEP}{}) { my( $gi, $predicate)= ( $1, $2); push @tests, _and( _gi_test( $gi), _parse_predicate_in_step( $predicate)); } elsif( $cond=~ s{^\s*($REG_NAKED_PREDICATE)$SEP}{}) { push @tests, _parse_predicate_in_step( $1); } else { croak "wrong navigation condition '$original_cond' ($@)"; } } $test= @tests > 1 ? '(' . join( '||', map { "($_)" } @tests) . ')' : $tests[0]; } #warn "init: '$init' - test: '$test'\n"; my $sub= qq{sub { $NO_WARNINGS; $init; return $not($test) ? \$_[0] : undef; } }; my $s= eval $sub; #warn "cond: $cond\n$sub\n"; if( $@) { croak "wrong navigation condition '$original_cond' ($@);" } return $s; } sub _gi_test { my( $full_gi)= @_; # optimize if the gi exists, including the case where the gi includes a dot my $index= $XML::Twig::gi2index{$full_gi}; if( $index) { return qq{\$_[0]->{gi} == $index}; } my( $gi, $class, $id)= $full_gi=~ m{^(.*?)(?:[.]([^.]*)|[#](.*))?$}; my $gi_test=''; if( $gi && $gi ne '*' ) { # 2 options, depending on whether the gi exists in gi2index # start optimization my $index= $XML::Twig::gi2index{$gi}; if( $index) { # the gi exists, use its index as a faster shortcut $gi_test = qq{\$_[0]->{gi} == $index}; } else # end optimization { # it does not exist (but might be created later), compare the strings $gi_test = qq{ \$_[0]->gi eq "$gi"}; } } else { $gi_test= 1; } my $class_test=''; #warn "class: '$class'"; if( $class) { $class_test = qq{ defined( \$_[0]->{att}->{class}) && \$_[0]->{att}->{class}=~ m{\\b$class\\b} }; } my $id_test=''; #warn "id: '$id'"; if( $id) { $id_test = qq{ defined( \$_[0]->{att}->{$ID}) && \$_[0]->{att}->{$ID} eq '$id' }; } #warn "gi_test: '$gi_test' - class_test: '$class_test' returning ", _and( $gi_test, $class_test); return _and( $gi_test, $class_test, $id_test); } # input: the original predicate sub _parse_predicate_in_step { my $cond= shift; my %PERL_ALPHA_TEST= ( '=' => ' eq ', '!=' => ' ne ', '>' => ' gt ', '>=' => ' ge ', '<' => ' lt ', '<=' => ' le '); $cond=~ s{^\s*\[\s*}{}; $cond=~ s{\s*\]\s*$}{}; $cond=~ s{( ($REG_STRING|$REG_REGEXP) # strings or regexps |\@($REG_TAG_NAME)(?=\s*(?:[><=!]|!~|=~)) # @att (followed by a comparison operator) |\@($REG_TAG_NAME) # @att (not followed by a comparison operator) |=~|!~ # matching operators |([><]=?|=|!=)(?=\s*[\d+-]) # test before a number |([><]=?|=|!=) # test, other cases |($REG_FUNCTION) # no arg functions # this bit is a mess, but it is the only solution with this half-baked parser |((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*$REG_MATCH\s*$REG_REGEXP) # string( child) =~ /regexp/ |((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*!?=\s*$REG_VALUE) # string( child) = "value" (or !=) |((?:string|text)\(\s*$REG_TAG_NAME\s*\)\s*[<>]=?\s*$REG_VALUE) # string( child) > "value" |(and|or) )} { my( $token, $string, $att, $bare_att, $num_test, $alpha_test, $func, $string_regexp, $string_eq, $string_test, $and_or) = ( $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11); if( defined $string) { $token } elsif( $att) { "( \$_[0]->{att} && exists( \$_[0]->{att}->{'$att'}) && \$_[0]->{att}->{'$att'})"; } elsif( $bare_att) { "(\$_[0]->{att} && defined( \$_[0]->{att}->{'$bare_att'}))"; } elsif( $num_test && ($num_test eq '=') ) { "==" } # others tests are unchanged elsif( $alpha_test) { $PERL_ALPHA_TEST{$alpha_test} } elsif( $func && $func=~ m{^(?:string|text)}) { "\$_[0]->text"; } elsif( $string_regexp && $string_regexp =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)}) { "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; } elsif( $string_eq && $string_eq =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*(!?=)\s*($REG_VALUE)}) {"(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $PERL_ALPHA_TEST{$2} $3) } 1, \$_[0]->_children)"; } elsif( $string_test && $string_test =~ m{(?:string|text)\(\s*($REG_TAG_NAME)\s*\)\s*([<>]=?)\s*($REG_VALUE)}) { "(XML::Twig::_first_n { (\$_->gi eq '$1') && (\$_->text $2 $3) } 1, \$_[0]->_children)"; } elsif( $and_or) { $and_or eq 'and' ? '&&' : '||' ; } else { $token; } }gexs; return "($cond)"; } sub _op { my $op= shift; if( $op eq '=') { $op= 'eq'; } elsif( $op eq '!=') { $op= 'ne'; } return $op; } sub passes { my( $elt, $cond)= @_; return $elt unless $cond; my $sub= ($cond_cache{$cond} ||= _install_cond( $cond)); return $sub->( $elt); } } sub set_parent { $_[0]->{parent}= $_[1]; if( $XML::Twig::weakrefs) { weaken( $_[0]->{parent}); } } sub parent { my $elt= shift; my $cond= shift || return $elt->_parent; do { $elt= $elt->_parent || return; } until ( $elt->passes( $cond)); return $elt; } sub set_first_child { $_[0]->{'first_child'}= $_[1]; } sub first_child { my $elt= shift; my $cond= shift || return $elt->_first_child; my $child= $elt->_first_child; my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); while( $child && !$test_cond->( $child)) { $child= $child->_next_sibling; } return $child; } sub _first_child { return $_[0]->{first_child}; } sub _last_child { return $_[0]->{last_child}; } sub _next_sibling { return $_[0]->{next_sibling}; } sub _prev_sibling { return $_[0]->{prev_sibling}; } sub _parent { return $_[0]->{parent}; } sub _next_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{next_sibling}) { push @siblings, $elt; } return @siblings; } sub _prev_siblings { my $elt= shift; my @siblings; while( $elt= $elt->{prev_sibling}) { push @siblings, $elt; } return @siblings; } # sets a field # arguments $record, $cond, @content sub set_field { my $record = shift; my $cond = shift; my $child= $record->first_child( $cond); if( $child) { $child->set_content( @_); } else { if( $cond=~ m{^\s*($REG_TAG_NAME)}) { my $gi= $1; $child= $record->insert_new_elt( last_child => $gi, @_); } else { croak "can't create a field name from $cond"; } } return $child; } sub set_last_child { $_[0]->{'last_child'}= $_[1]; delete $_->[0]->{empty}; if( $XML::Twig::weakrefs) { weaken( $_[0]->{'last_child'}); } } sub last_child { my $elt= shift; my $cond= shift || return $elt->_last_child; my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); my $child= $elt->_last_child; while( $child && !$test_cond->( $child) ) { $child= $child->_prev_sibling; } return $child } sub set_prev_sibling { $_[0]->{'prev_sibling'}= $_[1]; if( $XML::Twig::weakrefs) { weaken( $_[0]->{'prev_sibling'}); } } sub prev_sibling { my $elt= shift; my $cond= shift || return $elt->_prev_sibling; my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); my $sibling= $elt->_prev_sibling; while( $sibling && !$test_cond->( $sibling) ) { $sibling= $sibling->_prev_sibling; } return $sibling; } sub set_next_sibling { $_[0]->{'next_sibling'}= $_[1]; } sub next_sibling { my $elt= shift; my $cond= shift || return $elt->_next_sibling; my $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); my $sibling= $elt->_next_sibling; while( $sibling && !$test_cond->( $sibling) ) { $sibling= $sibling->_next_sibling; } return $sibling; } # methods dealing with the class attribute, convenient if you work with xhtml sub class { $_[0]->{att}->{class}; } # lvalue version of class. separate from class to avoid problem like RT# sub lclass :lvalue # > perl 5.5 { $_[0]->{att}->{class}; } sub set_class { my( $elt, $class)= @_; $elt->set_att( class => $class); } # adds a class to an element sub add_to_class { my( $elt, $new_class)= @_; return $elt unless $new_class; my $class= $elt->class; my %class= $class ? map { $_ => 1 } split /\s+/, $class : (); $class{$new_class}= 1; $elt->set_class( join( ' ', sort keys %class)); } sub remove_class { my( $elt, $class_to_remove)= @_; return $elt unless $class_to_remove; my $class= $elt->class; my %class= $class ? map { $_ => 1 } split /\s+/, $class : (); delete $class{$class_to_remove}; $elt->set_class( join( ' ', sort keys %class)); } sub att_to_class { my( $elt, $att)= @_; $elt->set_class( $elt->att( $att)); } sub add_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->att( $att)); } sub move_att_to_class { my( $elt, $att)= @_; $elt->add_to_class( $elt->att( $att)); $elt->del_att( $att); } sub tag_to_class { my( $elt)= @_; $elt->set_class( $elt->tag); } sub add_tag_to_class { my( $elt)= @_; $elt->add_to_class( $elt->tag); } sub set_tag_class { my( $elt, $new_tag)= @_; $elt->add_tag_to_class; $elt->set_tag( $new_tag); } sub tag_to_span { my( $elt)= @_; $elt->set_class( $elt->tag) unless( $elt->tag eq 'span' && $elt->class); # set class to span unless it would mean replacing it with span $elt->set_tag( 'span'); } sub tag_to_div { my( $elt)= @_; $elt->set_class( $elt->tag) unless( $elt->tag eq 'div' && $elt->class); # set class to div unless it would mean replacing it with div $elt->set_tag( 'div'); } sub in_class { my( $elt, $class)= @_; my $elt_class= $elt->class; return unless( defined $elt_class); return $elt->class=~ m{(?:^|\s)\Q$class\E(?:\s|$)} ? $elt : 0; } # get or set all attributes # argument can be a hash or a hashref sub set_atts { my $elt= shift; my %atts; tie %atts, 'Tie::IxHash' if( keep_atts_order()); %atts= @_ == 1 ? %{$_[0]} : @_; $elt->{att}= \%atts; if( exists $atts{$ID}) { $elt->_set_id( $atts{$ID}); } return $elt; } sub atts { return $_[0]->{att}; } sub att_names { return (sort keys %{$_[0]->{att}}); } sub del_atts { $_[0]->{att}={}; return $_[0]; } # get or set a single attribute (set works for several atts) sub set_att { my $elt= shift; if( $_[0] && ref( $_[0]) && !$_[1]) { croak "improper call to set_att, usage is \$elt->set_att( att1 => 'val1', att2 => 'val2',...)"; } unless( $elt->{att}) { $elt->{att}={}; tie %{$elt->{att}}, 'Tie::IxHash' if( keep_atts_order()); } while(@_) { my( $att, $val)= (shift, shift); $elt->{att}->{$att}= $val; if( $att eq $ID) { $elt->_set_id( $val); } } return $elt; } sub att { $_[0]->{att}->{$_[1]}; } # lvalue version of att. separate from class to avoid problem like RT# sub latt :lvalue # > perl 5.5 { $_[0]->{att}->{$_[1]}; } sub del_att { my $elt= shift; while( @_) { delete $elt->{'att'}->{shift()}; } return $elt; } sub att_exists { return exists $_[0]->{att}->{$_[1]}; } # delete an attribute from all descendants of an element sub strip_att { my( $elt, $att)= @_; $_->del_att( $att) foreach ($elt->descendants_or_self( qq{*[\@$att]})); return $elt; } sub change_att_name { my( $elt, $old_name, $new_name)= @_; my $value= $elt->att( $old_name); return $elt unless( defined $value); $elt->del_att( $old_name) ->set_att( $new_name => $value); return $elt; } sub lc_attnames { my $elt= shift; foreach my $att ($elt->att_names) { if( $att ne lc $att) { $elt->change_att_name( $att, lc $att); } } return $elt; } sub set_twig_current { $_[0]->{twig_current}=1; } sub del_twig_current { delete $_[0]->{twig_current}; } # get or set the id attribute sub set_id { my( $elt, $id)= @_; $elt->del_id() if( exists $elt->{att}->{$ID}); $elt->set_att($ID, $id); $elt->_set_id( $id); return $elt; } # only set id, does not update the attribute value sub _set_id { my( $elt, $id)= @_; my $t= $elt->twig || $elt; $t->{twig_id_list}->{$id}= $elt; if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); } return $elt; } sub id { return $_[0]->{att}->{$ID}; } # methods used to add ids to elements that don't have one BEGIN { my $id_nb = "0001"; my $id_seed = "twig_id_"; sub set_id_seed ## no critic (Subroutines::ProhibitNestedSubs); { $id_seed= $_[1]; $id_nb=1; } sub add_id ## no critic (Subroutines::ProhibitNestedSubs); { my $elt= shift; if( defined $elt->id) { return $elt->id; } else { my $id= $_[0] && ref( $_[0]) && isa( $_[0], 'CODE') ? $_[0]->( $elt) : $id_seed . $id_nb++; $elt->set_id( $id); return $id; } } } # delete the id attribute and remove the element from the id list sub del_id { my $elt= shift; if( ! exists $elt->{att}->{$ID}) { return $elt }; my $id= $elt->{att}->{$ID}; delete $elt->{att}->{$ID}; my $t= shift || $elt->twig; unless( $t) { return $elt; } if( exists $t->{twig_id_list}->{$id}) { delete $t->{twig_id_list}->{$id}; } return $elt; } # return the list of children sub children { my $elt= shift; my @children; my $child= $elt->first_child( @_); while( $child) { push @children, $child; $child= $child->next_sibling( @_); } return @children; } sub _children { my $elt= shift; my @children=(); my $child= $elt->_first_child(); while( $child) { push @children, $child; $child= $child->_next_sibling; } return @children; } sub children_copy { my $elt= shift; my @children; my $child= $elt->first_child( @_); while( $child) { push @children, $child->copy; $child= $child->next_sibling( @_); } return @children; } sub children_count { my $elt= shift; my $cond= shift; my $count=0; my $child= $elt->_first_child; while( $child) { $count++ if( $child->passes( $cond)); $child= $child->_next_sibling; } return $count; } sub children_text { my $elt= shift; return wantarray() ? map { $_->text} $elt->children( @_) : join( '', map { $_->text} $elt->children( @_) ) ; } sub children_trimmed_text { my $elt= shift; return wantarray() ? map { $_->trimmed_text} $elt->children( @_) : join( '', map { $_->trimmed_text} $elt->children( @_) ) ; } sub all_children_are { my( $parent, $cond)= @_; foreach my $child ($parent->_children) { return 0 unless( $child->passes( $cond)); } return $parent; } sub ancestors { my( $elt, $cond)= @_; my @ancestors; while( $elt->_parent) { $elt= $elt->_parent; push @ancestors, $elt if( $elt->passes( $cond)); } return @ancestors; } sub ancestors_or_self { my( $elt, $cond)= @_; my @ancestors; while( $elt) { push @ancestors, $elt if( $elt->passes( $cond)); $elt= $elt->_parent; } return @ancestors; } sub _ancestors { my( $elt, $include_self)= @_; my @ancestors= $include_self ? ($elt) : (); while( $elt= $elt->_parent) { push @ancestors, $elt; } return @ancestors; } sub inherit_att { my $elt= shift; my $att= shift; my %tags= map { ($_, 1) } @_; do { if( (defined $elt->att( $att)) && ( !%tags || $tags{$elt->gi}) ) { return $elt->att( $att); } } while( $elt= $elt->_parent); return undef; } sub _inherit_att_through_cut { my $elt= shift; my $att= shift; my %tags= map { ($_, 1) } @_; do { if( (defined $elt->att( $att)) && ( !%tags || $tags{$elt->gi}) ) { return $elt->att( $att); } } while( $elt= $elt->_parent || $elt->former_parent); return undef; } sub current_ns_prefixes { my $elt= shift; my %prefix; $prefix{''}=1 if( $elt->namespace( '')); while( $elt) { my @ns= grep { !m{^xml} } map { m{^([^:]+):} } ($elt->gi, $elt->att_names); $prefix{$_}=1 foreach (@ns); $elt= $elt->_parent; } return (sort keys %prefix); } # kinda counter-intuitive actually: # the next element is found by looking for the next open tag after from the # current one, which is the first child, if it exists, or the next sibling # or the first next sibling of an ancestor # optional arguments are: # - $subtree_root: a reference to an element, when the next element is not # within $subtree_root anymore then next_elt returns undef # - $cond: a condition, next_elt returns the next element matching the condition sub next_elt { my $elt= shift; my $subtree_root= 0; $subtree_root= shift if( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt')); my $cond= shift; my $next_elt; my $ind; # optimization my $test_cond; if( $cond) # optimization { unless( defined( $ind= $XML::Twig::gi2index{$cond}) ) # optimization { $test_cond= ($cond_cache{$cond} ||= _install_cond( $cond)); } # optimization } # optimization do { if( $next_elt= $elt->_first_child) { # simplest case: the elt has a child } elsif( $next_elt= $elt->_next_sibling) { # no child but a next sibling (just check we stay within the subtree) # case where elt is subtree_root, is empty and has a sibling return undef if( $subtree_root && ($elt == $subtree_root)); } else { # case where the element has no child and no next sibling: # get the first next sibling of an ancestor, checking subtree_root # case where elt is subtree_root, is empty and has no sibling return undef if( $subtree_root && ($elt == $subtree_root)); $next_elt= $elt->_parent || return undef; until( $next_elt->_next_sibling) { return undef if( $subtree_root && ($subtree_root == $next_elt)); $next_elt= $next_elt->_parent || return undef; } return undef if( $subtree_root && ($subtree_root == $next_elt)); $next_elt= $next_elt->_next_sibling; } $elt= $next_elt; # just in case we need to loop } until( ! defined $elt || ! defined $cond || (defined $ind && ($elt->{gi} eq $ind)) # optimization || (defined $test_cond && ($test_cond->( $elt))) ); return $elt; } # return the next_elt within the element # just call next_elt with the element as first and second argument sub first_descendant { return $_[0]->next_elt( @_); } # get the last descendant, # then return the element found or call prev_elt with the condition sub last_descendant { my( $elt, $cond)= @_; my $last_descendant= $elt->_last_descendant; if( !$cond || $last_descendant->matches( $cond)) { return $last_descendant; } else { return $last_descendant->prev_elt( $elt, $cond); } } # no argument allowed here, just go down the last_child recursively sub _last_descendant { my $elt= shift; while( my $child= $elt->_last_child) { $elt= $child; } return $elt; } # counter-intuitive too: # the previous element is found by looking # for the first open tag backwards from the current one # it's the last descendant of the previous sibling # if it exists, otherwise it's simply the parent sub prev_elt { my $elt= shift; my $subtree_root= 0; if( defined $_[0] and (ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt'))) { $subtree_root= shift ; return undef if( $elt == $subtree_root); } my $cond= shift; # get prev elt my $prev_elt; do { return undef if( $elt == $subtree_root); if( $prev_elt= $elt->_prev_sibling) { while( $prev_elt->_last_child) { $prev_elt= $prev_elt->_last_child; } } else { $prev_elt= $elt->_parent || return undef; } $elt= $prev_elt; # in case we need to loop } until( $elt->passes( $cond)); return $elt; } sub _following_elt { my( $elt)= @_; while( $elt && !$elt->next_sibling) { $elt= $elt->parent; } return $elt ? $elt->next_sibling : undef; } sub following_elt { my( $elt, $cond)= @_; $elt= $elt->_following_elt || return undef; return $elt if( !$cond || $elt->matches( $cond)); return $elt->next_elt( $cond); } sub following_elts { my( $elt, $cond)= @_; if( !$cond) { undef $cond; } my $following= $elt->following_elt( $cond); if( $following) { my @followings= $following; while( $following= $following->next_elt( $cond)) { push @followings, $following; } return( @followings); } else { return (); } } sub _preceding_elt { my( $elt)= @_; while( $elt && !$elt->_prev_sibling) { $elt= $elt->parent; } return $elt ? $elt->_prev_sibling->_last_descendant : undef; } sub preceding_elt { my( $elt, $cond)= @_; $elt= $elt->_preceding_elt || return undef; return $elt if( !$cond || $elt->matches( $cond)); return $elt->prev_elt( $cond); } sub preceding_elts { my( $elt, $cond)= @_; if( !$cond) { undef $cond; } my $preceding= $elt->preceding_elt( $cond); if( $preceding) { my @precedings= $preceding; while( $preceding= $preceding->prev_elt( $cond)) { push @precedings, $preceding; } return( @precedings); } else { return (); } } # used in get_xpath sub _self { my( $elt, $cond)= @_; return $cond ? $elt->matches( $cond) : $elt; } sub next_n_elt { my $elt= shift; my $offset= shift || return undef; foreach (1..$offset) { $elt= $elt->next_elt( @_) || return undef; } return $elt; } # checks whether $elt is included in $ancestor, returns 1 in that case sub in { my ($elt, $ancestor)= @_; if( ref( $ancestor) && isa( $ancestor, 'XML::Twig::Elt')) { # element while( $elt= $elt->_parent) { return $elt if( $elt == $ancestor); } } else { # condition while( $elt= $elt->_parent) { return $elt if( $elt->matches( $ancestor)); } } return 0; } sub first_child_text { my $elt= shift; my $dest=$elt->first_child(@_) or return ''; return $dest->text; } sub fields { my $elt= shift; return map { $elt->field( $_) } @_; } sub first_child_trimmed_text { my $elt= shift; my $dest=$elt->first_child(@_) or return ''; return $dest->trimmed_text; } sub first_child_matches { my $elt= shift; my $dest= $elt->_first_child or return undef; return $dest->passes( @_); } sub last_child_text { my $elt= shift; my $dest=$elt->last_child(@_) or return ''; return $dest->text; } sub last_child_trimmed_text { my $elt= shift; my $dest=$elt->last_child(@_) or return ''; return $dest->trimmed_text; } sub last_child_matches { my $elt= shift; my $dest= $elt->_last_child or return undef; return $dest->passes( @_); } sub child_text { my $elt= shift; my $dest=$elt->child(@_) or return ''; return $dest->text; } sub child_trimmed_text { my $elt= shift; my $dest=$elt->child(@_) or return ''; return $dest->trimmed_text; } sub child_matches { my $elt= shift; my $nb= shift; my $dest= $elt->child( $nb) or return undef; return $dest->passes( @_); } sub prev_sibling_text { my $elt= shift; my $dest= $elt->_prev_sibling(@_) or return ''; return $dest->text; } sub prev_sibling_trimmed_text { my $elt= shift; my $dest= $elt->_prev_sibling(@_) or return ''; return $dest->trimmed_text; } sub prev_sibling_matches { my $elt= shift; my $dest= $elt->_prev_sibling or return undef; return $dest->passes( @_); } sub next_sibling_text { my $elt= shift; my $dest= $elt->next_sibling(@_) or return ''; return $dest->text; } sub next_sibling_trimmed_text { my $elt= shift; my $dest= $elt->next_sibling(@_) or return ''; return $dest->trimmed_text; } sub next_sibling_matches { my $elt= shift; my $dest= $elt->_next_sibling or return undef; return $dest->passes( @_); } sub prev_elt_text { my $elt= shift; my $dest= $elt->prev_elt(@_) or return ''; return $dest->text; } sub prev_elt_trimmed_text { my $elt= shift; my $dest= $elt->prev_elt(@_) or return ''; return $dest->trimmed_text; } sub prev_elt_matches { my $elt= shift; my $dest= $elt->prev_elt or return undef; return $dest->passes( @_); } sub next_elt_text { my $elt= shift; my $dest= $elt->next_elt(@_) or return ''; return $dest->text; } sub next_elt_trimmed_text { my $elt= shift; my $dest= $elt->next_elt(@_) or return ''; return $dest->trimmed_text; } sub next_elt_matches { my $elt= shift; my $dest= $elt->next_elt or return undef; return $dest->passes( @_); } sub parent_text { my $elt= shift; my $dest= $elt->parent(@_) or return ''; return $dest->text; } sub parent_trimmed_text { my $elt= shift; my $dest= $elt->parent(@_) or return ''; return $dest->trimmed_text; } sub parent_matches { my $elt= shift; my $dest= $elt->_parent or return undef; return $dest->passes( @_); } sub is_first_child { my $elt= shift; my $parent= $elt->parent or return 0; my $first_child= $parent->first_child( @_) or return 0; return ($first_child == $elt) ? $elt : 0; } sub is_last_child { my $elt= shift; my $parent= $elt->parent or return 0; my $last_child= $parent->last_child( @_) or return 0; return ($last_child == $elt) ? $elt : 0; } # returns the depth level of the element # if 2 parameter are used then counts the 2cd element name in the # ancestors list sub level { my( $elt, $cond)= @_; my $level=0; my $name=shift || ''; while( $elt= $elt->_parent) { $level++ if( !$cond || $elt->matches( $cond)); } return $level; } # checks whether $elt has an ancestor that satisfies $cond, returns the ancestor sub in_context { my ($elt, $cond, $level)= @_; $level= -1 unless( $level) ; # $level-- will never hit 0 while( $level) { $elt= $elt->_parent or return 0; if( $elt->matches( $cond)) { return $elt; } $level--; } return 0; } sub _descendants { my( $subtree_root, $include_self)= @_; my @descendants= $include_self ? ($subtree_root) : (); my $elt= $subtree_root; my $next_elt; MAIN: while( 1) { if( $next_elt= $elt->_first_child) { # simplest case: the elt has a child } elsif( $next_elt= $elt->_next_sibling) { # no child but a next sibling (just check we stay within the subtree) # case where elt is subtree_root, is empty and has a sibling last MAIN if( $elt == $subtree_root); } else { # case where the element has no child and no next sibling: # get the first next sibling of an ancestor, checking subtree_root # case where elt is subtree_root, is empty and has no sibling last MAIN if( $elt == $subtree_root); # backtrack until we find a parent with a next sibling $next_elt= $elt->_parent || last; until( $next_elt->_next_sibling) { last MAIN if( $subtree_root == $next_elt); $next_elt= $next_elt->_parent || last MAIN; } last MAIN if( $subtree_root == $next_elt); $next_elt= $next_elt->_next_sibling; } $elt= $next_elt || last MAIN; push @descendants, $elt; } return @descendants; } sub descendants { my( $subtree_root, $cond)= @_; my @descendants=(); my $elt= $subtree_root; # this branch is pure optimization for speed: if $cond is a gi replace it # by the index of the gi and loop here # start optimization my $ind; if( !$cond || ( defined ( $ind= $XML::Twig::gi2index{$cond})) ) { my $next_elt; while( 1) { if( $next_elt= $elt->_first_child) { # simplest case: the elt has a child } elsif( $next_elt= $elt->_next_sibling) { # no child but a next sibling (just check we stay within the subtree) # case where elt is subtree_root, is empty and has a sibling last if( $subtree_root && ($elt == $subtree_root)); } else { # case where the element has no child and no next sibling: # get the first next sibling of an ancestor, checking subtree_root # case where elt is subtree_root, is empty and has no sibling last if( $subtree_root && ($elt == $subtree_root)); # backtrack until we find a parent with a next sibling $next_elt= $elt->_parent || last undef; until( $next_elt->_next_sibling) { last if( $subtree_root && ($subtree_root == $next_elt)); $next_elt= $next_elt->_parent || last; } last if( $subtree_root && ($subtree_root == $next_elt)); $next_elt= $next_elt->_next_sibling; } $elt= $next_elt || last; push @descendants, $elt if( !$cond || ($elt->{gi} eq $ind)); } } else # end optimization { # branch for a complex condition: use the regular (slow but simple) way while( $elt= $elt->next_elt( $subtree_root, $cond)) { push @descendants, $elt; } } return @descendants; } sub descendants_or_self { my( $elt, $cond)= @_; my @descendants= $elt->passes( $cond) ? ($elt) : (); push @descendants, $elt->descendants( $cond); return @descendants; } sub sibling { my $elt= shift; my $nb= shift; if( $nb > 0) { foreach( 1..$nb) { $elt= $elt->next_sibling( @_) or return undef; } } elsif( $nb < 0) { foreach( 1..(-$nb)) { $elt= $elt->prev_sibling( @_) or return undef; } } else # $nb == 0 { return $elt->passes( $_[0]); } return $elt; } sub sibling_text { my $elt= sibling( @_); return $elt ? $elt->text : undef; } sub child { my $elt= shift; my $nb= shift; if( $nb >= 0) { $elt= $elt->first_child( @_) or return undef; foreach( 1..$nb) { $elt= $elt->next_sibling( @_) or return undef; } } else { $elt= $elt->last_child( @_) or return undef; foreach( 2..(-$nb)) { $elt= $elt->prev_sibling( @_) or return undef; } } return $elt; } sub prev_siblings { my $elt= shift; my @siblings=(); while( $elt= $elt->prev_sibling( @_)) { unshift @siblings, $elt; } return @siblings; } sub siblings { my $elt= shift; return grep { $_ ne $elt } $elt->parent->children( @_); } sub pos { my $elt= shift; return 0 if ($_[0] && !$elt->matches( @_)); my $pos=1; $pos++ while( $elt= $elt->prev_sibling( @_)); return $pos; } sub next_siblings { my $elt= shift; my @siblings=(); while( $elt= $elt->next_sibling( @_)) { push @siblings, $elt; } return @siblings; } # used by get_xpath: parses the xpath expression and generates a sub that performs the # search { my %axis2method; BEGIN { %axis2method= ( child => 'children', descendant => 'descendants', 'descendant-or-self' => 'descendants_or_self', parent => 'parent_is', ancestor => 'ancestors', 'ancestor-or-self' => 'ancestors_or_self', 'following-sibling' => 'next_siblings', 'preceding-sibling' => 'prev_siblings', following => 'following_elts', preceding => 'preceding_elts', self => '_self', ); } sub _install_xpath { my( $xpath_exp, $type)= @_; my $original_exp= $xpath_exp; my $sub= 'my $elt= shift; my @results;'; # grab the root if expression starts with a / if( $xpath_exp=~ s{^/}{}) { $sub .= '@results= ($elt->twig) || croak "cannot use an XPath query starting with a / on a node not attached to a whole twig";'; } elsif( $xpath_exp=~ s{^\./}{}) { $sub .= '@results= ($elt);'; } else { $sub .= '@results= ($elt);'; } #warn "xpath_exp= '$xpath_exp'\n"; while( $xpath_exp && $xpath_exp=~s{^\s*(/?) # the xxx=~/regexp/ is a pain as it includes / (\s*(?:(?:($REG_AXIS)::)?(\*|$REG_TAG_PART|\.\.|\.)\s*)?($REG_PREDICATE_ALT*) ) (/|$)}{}xo) { my( $wildcard, $sub_exp, $axis, $gi, $predicates)= ($1, $2, $3, $4, $5); if( $axis && ! $gi) { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp"); } # grab a parent if( $sub_exp eq '..') { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp") if( $wildcard); $sub .= '@results= map { $_->parent} @results;'; } # test the element itself elsif( $sub_exp=~ m{^\.(.*)$}s) { $sub .= "\@results= grep { \$_->matches( q{$1}) } \@results;" } # grab children else { if( !$axis) { $axis= $wildcard ? 'descendant' : 'child'; } if( !$gi or $gi eq '*') { $gi=''; } my $function; # "special" predicates, that return just one element if( $predicates && ($predicates =~ m{^\s*\[\s*((-\s*)?\d+)\s*\]\s*$})) { # [<nb>] my $offset= $1; $offset-- if( $offset > 0); $function= $axis eq 'descendant' ? "next_n_elt( $offset, '$gi')" : $axis eq 'child' ? "child( $offset, '$gi')" : _croak_and_doublecheck_xpath( $original_exp, "error [$1] not supported along axis '$axis'") ; $sub .= "\@results= grep { \$_ } map { \$_->$function } \@results;" } elsif( $predicates && ($predicates =~ m{^\s*\[\s*last\s*\(\s*\)\s*\]\s*$}) ) { # last() _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp, usage of // and last() not supported") if( $wildcard); $sub .= "\@results= map { \$_->last_child( '$gi') } \@results;"; } else { # follow the axis #warn "axis: '$axis' - method: '$axis2method{$axis}' - gi: '$gi'\n"; my $follow_axis= " \$_->$axis2method{$axis}( '$gi')"; my $step= $follow_axis; # now filter using the predicate while( $predicates=~ s{^\s*($REG_PREDICATE_ALT)\s*}{}o) { my $pred= $1; $pred=~ s{^\s*\[\s*}{}; $pred=~ s{\s*\]\s*$}{}; my $test=""; my $pos; if( $pred=~ m{^(-?\s*\d+)$}) { my $pos= $1; if( $step=~ m{^\s*grep(.*) (\$_->\w+\(\s*'[^']*'\s*\))}) { $step= "XML::Twig::_first_n $1 $pos, $2"; } else { if( $pos > 0) { $pos--; } $step= "($step)[$pos]"; } #warn "number predicate '$pos' - generated step '$step'\n"; } else { my $syntax_error=0; do { if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_STRING)\s*}{}o) # string()="string" pred { $test .= "\$_->text eq $1"; } elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_STRING)\s*}{}o) # string()!="string" pred { $test .= "\$_->text ne $1"; } if( $pred =~ s{^string\(\s*\)\s*=\s*($REG_NUMBER)\s*}{}o) # string()=<number> pred { $test .= "\$_->text eq $1"; } elsif( $pred =~ s{^string\(\s*\)\s*!=\s*($REG_NUMBER)\s*}{}o) # string()!=<number> pred { $test .= "\$_->text ne $1"; } elsif( $pred =~ s{^string\(\s*\)\s*(>|<|>=|<=)\s*($REG_NUMBER)\s*}{}o) # string()!=<number> pred { $test .= "\$_->text $1 $2"; } elsif( $pred =~ s{^string\(\s*\)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # string()=~/regex/ pred { my( $match, $regexp)= ($1, $2); $test .= "\$_->text $match $regexp"; } elsif( $pred =~ s{^string\(\s*\)\s*}{}o) # string() pred { $test .= "\$_->text"; } elsif( $pred=~ s{^@($REG_TAG_NAME)\s*($REG_OP)\s*($REG_STRING|$REG_NUMBER)}{}o) # @att="val" pred { my( $att, $oper, $val)= ($1, _op( $2), $3); $test .= qq{((defined \$_->att("$att")) && (\$_->att( "$att") $oper $val))}; } elsif( $pred =~ s{^@($REG_TAG_NAME)\s*($REG_MATCH)\s*($REG_REGEXP)\s*}{}o) # @att=~/regex/ pred XXX { my( $att, $match, $regexp)= ($1, $2, $3); $test .= qq{((defined \$_->att("$att")) && (\$_->att("$att") $match $regexp))};; } elsif( $pred=~ s{^@($REG_TAG_NAME)\s*}{}o) # @att pred { $test .= qq{(defined \$_->att("$1"))}; } elsif( $pred=~ s{^\s*(?:not|!)\s*@($REG_TAG_NAME)\s*}{}o) # not @att pred { $test .= qq{((\$_->is_elt) && (not defined \$_->att("$1")))}; } elsif( $pred=~ s{^\s*([()])}{}) # ( or ) (just add to the test) { $test .= qq{$1}; } elsif( $pred=~ s{^\s*(and|or)\s*}{}) { $test .= lc " $1 "; } else { $syntax_error=1; } } while( !$syntax_error && $pred); _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp at $pred") if( $pred); $step= " grep { $test } $step "; } } #warn "step: '$step'"; $sub .= "\@results= grep defined, map { $step } \@results;"; } } } if( $xpath_exp) { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp around $xpath_exp"); } $sub .= q{return XML::Twig::_unique_elts( @results); }; #warn "generated: '$sub'\n"; my $s= eval "sub { $NO_WARNINGS; $sub }"; if( $@) { _croak_and_doublecheck_xpath( $original_exp, "error in xpath expression $original_exp ($@);") } return( $s); } } sub _croak_and_doublecheck_xpath { my $xpath_expression= shift; my $mess= join( "\n", @_); if( $XML::Twig::XPath::VERSION || 0) { my $check_twig= XML::Twig::XPath->new; if( eval { $check_twig->{twig_xp}->_parse( $xpath_expression) }) { $mess .= "\nthe expression is a valid XPath statement, and you are using XML::Twig::XPath, but" . "\nyou are using either 'find_nodes' or 'get_xpath' where the method you likely wanted" . "\nto use is 'findnodes', which is the only one that uses the full XPath engine\n"; } } croak $mess; } { # extremely elaborate caching mechanism my %xpath; # xpath_expression => subroutine_code; sub get_xpath { my( $elt, $xpath_exp, $offset)= @_; my $sub= ($xpath{$xpath_exp} ||= _install_xpath( $xpath_exp)); return $sub->( $elt) unless( defined $offset); my @res= $sub->( $elt); return $res[$offset]; } } sub findvalues { my $elt= shift; return map { $_->text } $elt->get_xpath( @_); } sub findvalue { my $elt= shift; return join '', map { $_->text } $elt->get_xpath( @_); } # XML::XPath compatibility sub getElementById { return $_[0]->twig->elt_id( $_[1]); } sub getChildNodes { my @children= $_[0]->_children; return wantarray ? @children : \@children; } sub _flushed { return $_[0]->{flushed}; } sub _set_flushed { $_[0]->{flushed}=1; } sub _del_flushed { delete $_[0]->{flushed}; } sub cut { my $elt= shift; my( $parent, $prev_sibling, $next_sibling); $parent= $elt->_parent; if( ! $parent && $elt->is_elt) { # are we cutting the root? my $t= $elt->{twig}; if( $t && ! $t->{twig_parsing}) { delete $t->{twig_root}; delete $elt->{twig}; return $elt; } # cutt`ing the root else { return; } # cutting an orphan, returning $elt would break backward compatibility } # save the old links, that'll make it easier for some loops foreach my $link ( qw(parent prev_sibling next_sibling) ) { $elt->{former}->{$link}= $elt->{$link}; if( $XML::Twig::weakrefs) { weaken( $elt->{former}->{$link}); } } # if we cut the current element then its parent becomes the current elt if( $elt->{twig_current}) { my $twig_current= $elt->_parent; $elt->twig->{twig_current}= $twig_current; $twig_current->set_twig_current; $elt->del_twig_current; } if( $parent->_first_child && $parent->_first_child == $elt) { $parent->set_first_child( $elt->_next_sibling); # cutting can make the parent empty if( ! $parent->_first_child) { $parent->set_empty( 1); } } if( $parent->_last_child && $parent->_last_child == $elt) { $parent->set_last_child( $elt->_prev_sibling); } if( $prev_sibling= $elt->_prev_sibling) { $prev_sibling->set_next_sibling( $elt->_next_sibling); } if( $next_sibling= $elt->_next_sibling) { $next_sibling->set_prev_sibling( $elt->_prev_sibling); } $elt->set_parent( undef); $elt->set_prev_sibling( undef); $elt->set_next_sibling( undef); # merge 2 (now) consecutive text nodes if they are of the same type # (type can be PCDATA or CDATA) if( $prev_sibling && $next_sibling && $prev_sibling->is_text && ( $prev_sibling->gi eq $next_sibling->gi)) { $prev_sibling->merge_text( $next_sibling); } return $elt; } sub former_next_sibling { return $_[0]->{former}->{next_sibling}; } sub former_prev_sibling { return $_[0]->{former}->{prev_sibling}; } sub former_parent { return $_[0]->{former}->{parent}; } sub cut_children { my( $elt, $exp)= @_; my @children= $elt->children( $exp); foreach (@children) { $_->cut; } if( ! $elt->has_children) { $elt->set_empty( 1); } return @children; } sub cut_descendants { my( $elt, $exp)= @_; my @descendants= $elt->descendants( $exp); foreach ($elt->descendants( $exp)) { $_->cut; } if( ! $elt->has_children) { $elt->set_empty( 1); } return @descendants; } sub erase { my $elt= shift; #you cannot erase the current element if( $elt->{twig_current}) { croak "trying to erase an element before it has been completely parsed"; } if( my $parent= $elt->_parent) { # normal case $elt->_move_extra_data_after_erase; my @children= $elt->_children; if( @children) { # elt has children, move them up # the first child may need to be merged with a previous text my $first_child= shift @children; $first_child->move( before => $elt); my $prev= $first_child->prev_sibling; if( $prev && $prev->is_text && ($first_child->gi eq $prev->gi) ) { $prev->merge_text( $first_child); } # move the rest of the children foreach my $child (@children) { $child->move( before => $elt); } # now the elt had no child, delete it $elt->delete; # now see if we need to merge the last child with the next element my $last_child= $children[-1] || $first_child; # if no last child, then it's also the first child my $next= $last_child->next_sibling; if( $next && $next->is_text && ($last_child->gi eq $next->gi) ) { $last_child->merge_text( $next); } # if parsing and have now a PCDATA text, mark so we can normalize later on if need be if( $parent->{twig_current} && $last_child->is_text) { $parent->{twig_to_be_normalized}=1; } } else { # no children, just cut the elt $elt->delete; } } else { # trying to erase the root (of a twig or of a cut/new element) my @children= $elt->_children; unless( @children == 1) { croak "can only erase an element with no parent if it has a single child"; } $elt->_move_extra_data_after_erase; my $child= shift @children; $child->set_parent( undef); my $twig= $elt->twig; $twig->set_root( $child); } return $elt; } sub _move_extra_data_after_erase { my( $elt)= @_; # extra_data if( my $extra_data= $elt->{extra_data}) { my $target= $elt->_first_child || $elt->_next_sibling; if( $target) { if( $target->is( $ELT)) { $target->set_extra_data( $extra_data . ($target->extra_data || '')); } elsif( $target->is( $TEXT)) { $target->_unshift_extra_data_in_pcdata( $extra_data, 0); } # TO CHECK } else { my $parent= $elt->parent; # always exists or the erase cannot be performed $parent->_prefix_extra_data_before_end_tag( $extra_data); } } # extra_data_before_end_tag if( my $extra_data= $elt->_extra_data_before_end_tag) { if( my $target= $elt->_next_sibling) { if( $target->is( $ELT)) { $target->set_extra_data( $extra_data . ($target->extra_data || '')); } elsif( $target->is( $TEXT)) { $target->_unshift_extra_data_in_pcdata( $extra_data, 0); } } elsif( my $parent= $elt->parent) { $parent->_prefix_extra_data_before_end_tag( $extra_data); } } return $elt; } BEGIN { my %method= ( before => \&paste_before, after => \&paste_after, first_child => \&paste_first_child, last_child => \&paste_last_child, within => \&paste_within, ); # paste elt somewhere around ref # pos can be first_child (default), last_child, before, after or within sub paste ## no critic (Subroutines::ProhibitNestedSubs); { my $elt= shift; if( $elt->_parent) { croak "cannot paste an element that belongs to a tree"; } my $pos; my $ref; if( ref $_[0]) { $pos= 'first_child'; croak "wrong argument order in paste, should be $_[1] first" if($_[1]); } else { $pos= shift; } if( my $method= $method{$pos}) { unless( ref( $_[0]) && isa( $_[0], 'XML::Twig::Elt')) { if( ! defined( $_[0])) { croak "missing target in paste"; } elsif( ! ref( $_[0])) { croak "wrong target type in paste (not a reference), should be XML::Twig::Elt or a subclass"; } else { my $ref= ref $_[0]; croak "wrong target type in paste: '$ref', should be XML::Twig::Elt or a subclass"; } } $ref= $_[0]; # check here so error message lists the caller file/line if( !$ref->_parent && ($pos=~ m{^(before|after)$}) && !$elt->is_pi && !$elt->is_comment) { croak "cannot paste $1 root"; } $elt->$method( @_); } else { croak "tried to paste in wrong position '$pos', allowed positions " . " are 'first_child', 'last_child', 'before', 'after' and " . "'within'"; } if( (my $ids= $elt->{twig_id_list}) && (my $t= $ref->twig) ) { $t->{twig_id_list}||={}; foreach my $id (keys %$ids) { $t->{twig_id_list}->{$id}= $ids->{$id}; if( $XML::Twig::weakrefs) { weaken( $t->{twig_id_list}->{$id}); } } } return $elt; } sub paste_before { my( $elt, $ref)= @_; my( $parent, $prev_sibling, $next_sibling ); # trying to paste before an orphan (root or detached wlt) unless( $ref->_parent) { if( my $t= $ref->twig) { if( $elt->is_comment || $elt->is_pi) # we can still do this { $t->_add_cpi_outside_of_root( leading_cpi => $elt); return; } else { croak "cannot paste before root"; } } else { croak "cannot paste before an orphan element"; } } $parent= $ref->_parent; $prev_sibling= $ref->_prev_sibling; $next_sibling= $ref; $elt->set_parent( $parent); if( $parent->_first_child == $ref) { $parent->set_first_child( $elt); } if( $prev_sibling) { $prev_sibling->set_next_sibling( $elt); } $elt->set_prev_sibling( $prev_sibling); $next_sibling->set_prev_sibling( $elt); $elt->set_next_sibling( $ref); return $elt; } sub paste_after { my( $elt, $ref)= @_; my( $parent, $prev_sibling, $next_sibling ); # trying to paste after an orphan (root or detached wlt) unless( $ref->_parent) { if( my $t= $ref->twig) { if( $elt->is_comment || $elt->is_pi) # we can still do this { $t->_add_cpi_outside_of_root( trailing_cpi => $elt); return; } else { croak "cannot paste after root"; } } else { croak "cannot paste after an orphan element"; } } $parent= $ref->_parent; $prev_sibling= $ref; $next_sibling= $ref->_next_sibling; $elt->set_parent( $parent); if( $parent->_last_child== $ref) { $parent->set_last_child( $elt); } $prev_sibling->set_next_sibling( $elt); $elt->set_prev_sibling( $prev_sibling); if( $next_sibling) { $next_sibling->set_prev_sibling( $elt); } $elt->set_next_sibling( $next_sibling); return $elt; } sub paste_first_child { my( $elt, $ref)= @_; my( $parent, $prev_sibling, $next_sibling ); $parent= $ref; $next_sibling= $ref->_first_child; $elt->set_parent( $parent); $parent->set_first_child( $elt); unless( $parent->_last_child) { $parent->set_last_child( $elt); } $elt->set_prev_sibling( undef); if( $next_sibling) { $next_sibling->set_prev_sibling( $elt); } $elt->set_next_sibling( $next_sibling); return $elt; } sub paste_last_child { my( $elt, $ref)= @_; my( $parent, $prev_sibling, $next_sibling ); $parent= $ref; $prev_sibling= $ref->_last_child; $elt->set_parent( $parent); $parent->set_last_child( $elt); unless( $parent->_first_child) { $parent->set_first_child( $elt); } $elt->set_prev_sibling( $prev_sibling); if( $prev_sibling) { $prev_sibling->set_next_sibling( $elt); } $elt->set_next_sibling( undef); return $elt; } sub paste_within { my( $elt, $ref, $offset)= @_; my $text= $ref->is_text ? $ref : $ref->next_elt( $TEXT, $ref); my $new= $text->split_at( $offset); $elt->paste_before( $new); return $elt; } } # load an element into a structure similar to XML::Simple's sub simplify { my $elt= shift; # normalize option names my %options= @_; %options= map { my ($key, $val)= ($_, $options{$_}); $key=~ s{(\w)([A-Z])}{$1_\L$2}g; $key => $val } keys %options; # check options my @allowed_options= qw( keyattr forcearray noattr content_key var var_regexp variables var_attr group_tags forcecontent normalise_space normalize_space ); my %allowed_options= map { $_ => 1 } @allowed_options; foreach my $option (keys %options) { carp "invalid option $option\n" unless( $allowed_options{$option}); } $options{normalise_space} ||= $options{normalize_space} || 0; $options{content_key} ||= 'content'; if( $options{content_key}=~ m{^-}) { # need to remove the - and to activate extra folding $options{content_key}=~ s{^-}{}; $options{extra_folding}= 1; } else { $options{extra_folding}= 0; } $options{forcearray} ||=0; if( isa( $options{forcearray}, 'ARRAY')) { my %forcearray_tags= map { $_ => 1 } @{$options{forcearray}}; $options{forcearray_tags}= \%forcearray_tags; $options{forcearray}= 0; } $options{keyattr} ||= ['name', 'key', 'id']; if( ref $options{keyattr} eq 'ARRAY') { foreach my $keyattr (@{$options{keyattr}}) { my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)}); $prefix ||= ''; $options{key_for_all}->{$att}= 1; $options{remove_key_for_all}->{$att}=1 unless( $prefix eq '+'); $options{prefix_key_for_all}->{$att}=1 if( $prefix eq '-'); } } elsif( ref $options{keyattr} eq 'HASH') { while( my( $elt, $keyattr)= each %{$options{keyattr}}) { my( $prefix, $att)= ($keyattr=~ m{^([+-])?(.*)}); $prefix ||=''; $options{key_for_elt}->{$elt}= $att; $options{remove_key_for_elt}->{"$elt#$att"}=1 unless( $prefix); $options{prefix_key_for_elt}->{"$elt#$att"}=1 if( $prefix eq '-'); } } $options{var}||= $options{var_attr}; # for compat with XML::Simple if( $options{var}) { $options{var_values}= {}; } else { $options{var}=''; } if( $options{variables}) { $options{var}||= 1; $options{var_values}= $options{variables}; } if( $options{var_regexp} and !$options{var}) { warn "var option not used, var_regexp option ignored\n"; } $options{var_regexp} ||= '\$\{?(\w+)\}?'; $elt->_simplify( \%options); } sub _simplify { my( $elt, $options)= @_; my $data; my $gi= $elt->gi; my @children= $elt->_children; my %atts= $options->{noattr} || !$elt->atts ? () : %{$elt->atts}; my $nb_atts= keys %atts; my $nb_children= $elt->children_count + $nb_atts; my %nb_children; foreach (@children) { $nb_children{$_->tag}++; } foreach (keys %atts) { $nb_children{$_}++; } my $arrays; # tag => array where elements are stored # store children foreach my $child (@children) { if( $child->is_text) { # generate with a content key my $text= $elt->_text_with_vars( $options); if( $options->{normalise_space} >= 2) { $text= _normalize_space( $text); } if( $options->{force_content} || $nb_atts || (scalar @children > 1) ) { $data->{$options->{content_key}}= $text; } else { $data= $text; } } else { # element with sub-elements my $child_gi= $child->gi; my $child_data= $child->_simplify( $options); # first see if we need to simplify further the child data # simplify because of grouped tags if( my $grouped_tag= $options->{group_tags}->{$child_gi}) { # check that the child data is a hash with a single field unless( (ref( $child_data) eq 'HASH') && (keys %$child_data == 1) && defined ( my $grouped_child_data= $child_data->{$grouped_tag}) ) { croak "error in grouped tag $child_gi"; } else { $child_data= $grouped_child_data; } } # simplify because of extra folding if( $options->{extra_folding}) { if( (ref( $child_data) eq 'HASH') && (keys %$child_data == 1) && defined( my $content= $child_data->{$options->{content_key}}) ) { $child_data= $content; } } if( my $keyatt= $child->_key_attr( $options)) { # simplify element with key my $key= $child->att( $keyatt); if( $options->{normalise_space} >= 1) { $key= _normalize_space( $key); } $data->{$child_gi}->{$key}= $child_data; } elsif( $options->{forcearray} || $options->{forcearray_tags}->{$child_gi} || ( $nb_children{$child_gi} > 1) ) { # simplify element to store in an array if( defined $child_data && $child_data ne "" ) { $data->{$child_gi} ||= []; push @{$data->{$child_gi}}, $child_data; } else { $data->{$child_gi}= [{}]; } } else { # simplify element to store as a hash field $data->{$child_gi}=$child_data; $data->{$child_gi}= defined $child_data && $child_data ne "" ? $child_data : {}; } } } # store atts # TODO: deal with att that already have an element by that name foreach my $att (keys %atts) { # do not store if the att is a key that needs to be removed if( $options->{remove_key_for_all}->{$att} || $options->{remove_key_for_elt}->{"$gi#$att"} ) { next; } my $att_text= $options->{var} ? _replace_vars_in_text( $atts{$att}, $options) : $atts{$att} ; if( $options->{normalise_space} >= 2) { $att_text= _normalize_space( $att_text); } if( $options->{prefix_key_for_all}->{$att} || $options->{prefix_key_for_elt}->{"$gi#$att"} ) { # prefix the att $data->{"-$att"}= $att_text; } else { # normal case $data->{$att}= $att_text; } } return $data; } sub _key_attr { my( $elt, $options)=@_; return if( $options->{noattr}); if( $options->{key_for_all}) { foreach my $att ($elt->att_names) { if( $options->{key_for_all}->{$att}) { return $att; } } } elsif( $options->{key_for_elt}) { if( my $key_for_elt= $options->{key_for_elt}->{$elt->gi} ) { return $key_for_elt if( defined( $elt->att( $key_for_elt))); } } return; } sub _text_with_vars { my( $elt, $options)= @_; my $text; if( $options->{var}) { $text= _replace_vars_in_text( $elt->text, $options); $elt->_store_var( $options); } else { $text= $elt->text; } return $text; } sub _normalize_space { my $text= shift; $text=~ s{\s+}{ }sg; $text=~ s{^\s}{}; $text=~ s{\s$}{}; return $text; } sub att_nb { return 0 unless( my $atts= $_[0]->atts); return scalar keys %$atts; } sub has_no_atts { return 1 unless( my $atts= $_[0]->atts); return scalar keys %$atts ? 0 : 1; } sub _replace_vars_in_text { my( $text, $options)= @_; $text=~ s{($options->{var_regexp})} { if( defined( my $value= $options->{var_values}->{$2})) { $value } else { warn "unknown variable $2\n"; $1 } }gex; return $text; } sub _store_var { my( $elt, $options)= @_; if( defined (my $var_name= $elt->att( $options->{var}))) { $options->{var_values}->{$var_name}= $elt->text; } } # split a text element at a given offset sub split_at { my( $elt, $offset)= @_; my $text_elt= $elt->is_text ? $elt : $elt->first_child( $TEXT) || return ''; my $string= $text_elt->text; my $left_string= substr( $string, 0, $offset); my $right_string= substr( $string, $offset); $text_elt->set_pcdata( $left_string); my $new_elt= $elt->new( $elt->gi, $right_string); $new_elt->paste( after => $elt); return $new_elt; } # split an element or its text descendants into several, in place # all elements (new and untouched) are returned sub split { my $elt= shift; my @text_chunks; my @result; if( $elt->is_text) { @text_chunks= ($elt); } else { @text_chunks= $elt->descendants( $TEXT); } foreach my $text_chunk (@text_chunks) { push @result, $text_chunk->_split( 1, @_); } return @result; } # split an element or its text descendants into several, in place # created elements (those which match the regexp) are returned sub mark { my $elt= shift; my @text_chunks; my @result; if( $elt->is_text) { @text_chunks= ($elt); } else { @text_chunks= $elt->descendants( $TEXT); } foreach my $text_chunk (@text_chunks) { push @result, $text_chunk->_split( 0, @_); } return @result; } # split a single text element # return_all defines what is returned: if it is true # only returns the elements created by matches in the split regexp # otherwise all elements (new and untouched) are returned { sub _split { my $elt= shift; my $return_all= shift; my $regexp= shift; my @tags; while( @_) { my $tag= shift(); if( ref $_[0]) { push @tags, { tag => $tag, atts => shift }; } else { push @tags, { tag => $tag }; } } unless( @tags) { @tags= { tag => $elt->_parent->gi }; } my @result; # the returned list of elements my $text= $elt->text; my $gi= $elt->gi; # 2 uses: if split matches then the first substring reuses $elt # once a split has occurred then the last match needs to be put in # a new element my $previous_match= 0; while( my( $pre_match, @matches)= $text=~ /^(.*?)$regexp(.*)$/gcs) { $text= pop @matches; if( $previous_match) { # match, not the first one, create a new text ($gi) element _utf8_ify( $pre_match) if( $] < 5.010); $elt= $elt->insert_new_elt( after => $gi, $pre_match); push @result, $elt if( $return_all); } else { # first match in $elt, re-use $elt for the first sub-string _utf8_ify( $pre_match) if( $] < 5.010); $elt->set_text( $pre_match); $previous_match++; # store the fact that there was a match push @result, $elt if( $return_all); } # now deal with matches captured in the regexp if( @matches) { # match, with capture my $i=0; foreach my $match (@matches) { # create new element, text is the match _utf8_ify( $match) if( $] < 5.010); my $tag = _repl_match( $tags[$i]->{tag}, @matches) || '#PCDATA'; my $atts = \%{$tags[$i]->{atts}} || {}; my %atts= map { _repl_match( $_, @matches) => _repl_match( $atts->{$_}, @matches) } keys %$atts; $elt= $elt->insert_new_elt( after => $tag, \%atts, $match); push @result, $elt; $i= ($i + 1) % @tags; } } else { # match, no captures my $tag = $tags[0]->{tag}; my $atts = \%{$tags[0]->{atts}} || {}; $elt= $elt->insert_new_elt( after => $tag, $atts); push @result, $elt; } } if( $previous_match && $text) { # there was at least 1 match, and there is text left after the match $elt= $elt->insert_new_elt( after => $gi, $text); } push @result, $elt if( $return_all); return @result; # return all elements } sub _repl_match { my( $val, @matches)= @_; $val=~ s{\$(\d+)}{$matches[$1-1]}g; return $val; } # evil hack needed as sometimes my $encode_is_loaded=0; # so we only load Encode once sub _utf8_ify { if( $perl_version >= 5.008 and $perl_version < 5.010 and !_keep_encoding()) { unless( $encode_is_loaded) { require Encode; import Encode; $encode_is_loaded++; } Encode::_utf8_on( $_[0]); # the flag should be set but is not } } } { my %replace_sub; # cache for complex expressions (expression => sub) sub subs_text { my( $elt, $regexp, $replace)= @_; my $replacement_string; my $is_string= _is_string( $replace); my @parents; foreach my $text_elt ($elt->descendants_or_self( $TEXT)) { if( $is_string) { my $text= $text_elt->text; $text=~ s{$regexp}{ _replace_var( $replace, $1, $2, $3, $4, $5, $6, $7, $8, $9)}egx; $text_elt->set_text( $text); } else { no utf8; # = perl 5.6 my $replace_sub= ( $replace_sub{$replace} ||= _install_replace_sub( $replace)); my $text= $text_elt->text; my $pos=0; # used to skip text that was previously matched my $found_hit; while( my( $pre_match_string, $match_string, @var)= ($text=~ m{(.*?)($regexp)}sg)) { $found_hit=1; my $match_start = length( $pre_match_string); my $match = $match_start ? $text_elt->split_at( $match_start + $pos) : $text_elt; my $match_length = length( $match_string); my $post_match = $match->split_at( $match_length); $replace_sub->( $match, @var); # go to next $text_elt= $post_match; $text= $post_match->text; if( $found_hit) { push @parents, $text_elt->parent unless $parents[-1] && $parents[-1]== $text_elt->parent; } } } } foreach my $parent (@parents) { $parent->normalize; } return $elt; } sub _is_string { return ($_[0]=~ m{&e[ln]t}) ? 0: 1 } sub _replace_var { my( $string, @var)= @_; unshift @var, undef; $string=~ s{\$(\d)}{$var[$1]}g; return $string; } sub _install_replace_sub { my $replace_exp= shift; my @item= split m{(&e[ln]t\s*\([^)]*\))}, $replace_exp; my $sub= q{ my( $match, @var)= @_; my $new; my $last_inserted=$match;}; my( $gi, $exp); foreach my $item (@item) { next if ! length $item; if( $item=~ m{^&elt\s*\(([^)]*)\)}) { $exp= $1; } elsif( $item=~ m{^&ent\s*\(\s*([^\s)]*)\s*\)}) { $exp= " '#ENT' => $1"; } else { $exp= qq{ '#PCDATA' => "$item"}; } $exp=~ s{\$(\d)}{my $i= $1-1; "\$var[$i]"}eg; # replace references to matches $sub.= qq{ \$new= \$match->new( $exp); }; $sub .= q{ $new->paste( after => $last_inserted); $last_inserted=$new;}; } $sub .= q{ $match->delete; }; #$sub=~ s/;/;\n/g; warn "subs: $sub"; my $coderef= eval "sub { $NO_WARNINGS; $sub }"; if( $@) { croak( "invalid replacement expression $replace_exp: ",$@); } return $coderef; } } sub merge_text { my( $e1, $e2)= @_; croak "invalid merge: can only merge 2 elements" unless( isa( $e2, 'XML::Twig::Elt')); croak "invalid merge: can only merge 2 text elements" unless( $e1->is_text && $e2->is_text && ($e1->gi eq $e2->gi)); my $t1_length= length( $e1->text); $e1->set_text( $e1->text . $e2->text); if( my $extra_data_in_pcdata= $e2->_extra_data_in_pcdata) { foreach my $data (@$extra_data_in_pcdata) { $e1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } } $e2->delete; return $e1; } sub merge { my( $e1, $e2)= @_; my @e2_children= $e2->_children; if( $e1->_last_child && $e1->_last_child->is_pcdata && @e2_children && $e2_children[0]->is_pcdata ) { my $t1_length= length( $e1->_last_child->{pcdata}); my $child1= $e1->_last_child; my $child2= shift @e2_children; $child1->{pcdata} .= $child2->{pcdata}; my $extra_data= $e1->_extra_data_before_end_tag . $e2->extra_data; if( $extra_data) { $e1->_del_extra_data_before_end_tag; $child1->_push_extra_data_in_pcdata( $extra_data, $t1_length); } if( my $extra_data_in_pcdata= $child2->_extra_data_in_pcdata) { foreach my $data (@$extra_data_in_pcdata) { $child1->_push_extra_data_in_pcdata( $data->{text}, $data->{offset} + $t1_length); } } if( my $extra_data_before_end_tag= $e2->_extra_data_before_end_tag) { $e1->_set_extra_data_before_end_tag( $extra_data_before_end_tag); } } foreach my $e (@e2_children) { $e->move( last_child => $e1); } $e2->delete; return $e1; } # recursively copy an element and returns the copy (can be huge and long) sub copy { my $elt= shift; my $copy= $elt->new( $elt->gi); if( $elt->extra_data) { $copy->set_extra_data( $elt->extra_data); } if( $elt->_extra_data_before_end_tag) { $copy->_set_extra_data_before_end_tag( $elt->_extra_data_before_end_tag); } if( $elt->is_asis) { $copy->set_asis; } if( $elt->is_pcdata) { $copy->set_pcdata( $elt->pcdata); if( $elt->_extra_data_in_pcdata) { $copy->_set_extra_data_in_pcdata( $elt->_extra_data_in_pcdata); } } elsif( $elt->is_cdata) { $copy->_set_cdata( $elt->cdata); if( $elt->_extra_data_in_pcdata) { $copy->_set_extra_data_in_pcdata( $elt->_extra_data_in_pcdata); } } elsif( $elt->is_pi) { $copy->_set_pi( $elt->target, $elt->data); } elsif( $elt->is_comment) { $copy->_set_comment( $elt->comment); } elsif( $elt->is_ent) { $copy->set_ent( $elt->ent); } else { my @children= $elt->_children; if( my $atts= $elt->atts) { my %atts; tie %atts, 'Tie::IxHash' if (keep_atts_order()); %atts= %{$atts}; # we want to do a real copy of the attributes $copy->set_atts( \%atts); } foreach my $child (@children) { my $child_copy= $child->copy; $child_copy->paste( 'last_child', $copy); } } # save links to the original location, which can be convenient and is used for namespace resolution foreach my $link ( qw(parent prev_sibling next_sibling) ) { $copy->{former}->{$link}= $elt->{$link}; if( $XML::Twig::weakrefs) { weaken( $copy->{former}->{$link}); } } $copy->set_empty( $elt->is_empty); return $copy; } sub delete { my $elt= shift; $elt->cut; $elt->DESTROY unless $XML::Twig::weakrefs; return undef; } sub __destroy { my $elt= shift; return if( $XML::Twig::weakrefs); my $t= shift || $elt->twig; # optional argument, passed in recursive calls foreach( @{[$elt->_children]}) { $_->DESTROY( $t); } # the id reference needs to be destroyed # lots of tests to avoid warnings during the cleanup phase $elt->del_id( $t) if( $ID && $t && defined( $elt->{att}) && exists( $elt->{att}->{$ID})); if( $elt->{former}) { foreach (keys %{$elt->{former}}) { delete $elt->{former}->{$_}; } delete $elt->{former}; } foreach (qw( keys %$elt)) { delete $elt->{$_}; } undef $elt; } BEGIN { sub set_destroy { if( $XML::Twig::weakrefs) { undef *DESTROY } else { *DESTROY= *__destroy; } } set_destroy(); } # ignores the element sub ignore { my $elt= shift; my $t= $elt->twig; $t->ignore( $elt, @_); } BEGIN { my $pretty = 0; my $quote = '"'; my $INDENT = ' '; my $empty_tag_style = 0; my $remove_cdata = 0; my $keep_encoding = 0; my $expand_external_entities = 0; my $keep_atts_order = 0; my $do_not_escape_amp_in_atts = 0; my $WRAP = '80'; my $REPLACED_ENTS = qq{&<}; my ($NSGMLS, $NICE, $INDENTED, $INDENTEDCT, $INDENTEDC, $WRAPPED, $RECORD1, $RECORD2, $INDENTEDA)= (1..9); my %KEEP_TEXT_TAG_ON_ONE_LINE= map { $_ => 1 } ( $INDENTED, $INDENTEDCT, $INDENTEDC, $INDENTEDA, $WRAPPED); my %WRAPPED = map { $_ => 1 } ( $WRAPPED, $INDENTEDA, $INDENTEDC); my %pretty_print_style= ( none => 0, # no added \n nsgmls => $NSGMLS, # nsgmls-style, \n in tags # below this line styles are UNSAFE (the generated XML can be well-formed but invalid) nice => $NICE, # \n after open/close tags except when the # element starts with text indented => $INDENTED, # nice plus idented indented_close_tag => $INDENTEDCT, # nice plus idented indented_c => $INDENTEDC, # slightly more compact than indented (closing # tags are on the same line) wrapped => $WRAPPED, # text is wrapped at column record_c => $RECORD1, # for record-like data (compact) record => $RECORD2, # for record-like data (not so compact) indented_a => $INDENTEDA, # nice, indented, and with attributes on separate # lines as the nsgmls style, as well as wrapped # lines - to make the xml friendly to line-oriented tools cvs => $INDENTEDA, # alias for indented_a ); my ($HTML, $EXPAND)= (1..2); my %empty_tag_style= ( normal => 0, # <tag/> html => $HTML, # <tag /> xhtml => $HTML, # <tag /> expand => $EXPAND, # <tag></tag> ); my %quote_style= ( double => '"', single => "'", # smart => "smart", ); my $xml_space_preserve; # set when an element includes xml:space="preserve" my $output_filter; # filters the entire output (including < and >) my $output_text_filter; # filters only the text part (tag names, attributes, pcdata) my $replaced_ents= $REPLACED_ENTS; # returns those pesky "global" variables so you can switch between twigs sub global_state ## no critic (Subroutines::ProhibitNestedSubs); { return { pretty => $pretty, quote => $quote, indent => $INDENT, empty_tag_style => $empty_tag_style, remove_cdata => $remove_cdata, keep_encoding => $keep_encoding, expand_external_entities => $expand_external_entities, output_filter => $output_filter, output_text_filter => $output_text_filter, keep_atts_order => $keep_atts_order, do_not_escape_amp_in_atts => $do_not_escape_amp_in_atts, wrap => $WRAP, replaced_ents => $replaced_ents, }; } # restores the global variables sub set_global_state { my $state= shift; $pretty = $state->{pretty}; $quote = $state->{quote}; $INDENT = $state->{indent}; $empty_tag_style = $state->{empty_tag_style}; $remove_cdata = $state->{remove_cdata}; $keep_encoding = $state->{keep_encoding}; $expand_external_entities = $state->{expand_external_entities}; $output_filter = $state->{output_filter}; $output_text_filter = $state->{output_text_filter}; $keep_atts_order = $state->{keep_atts_order}; $do_not_escape_amp_in_atts = $state->{do_not_escape_amp_in_atts}; $WRAP = $state->{wrap}; $replaced_ents = $state->{replaced_ents}, } # sets global state to defaults sub init_global_state { set_global_state( { pretty => 0, quote => '"', indent => $INDENT, empty_tag_style => 0, remove_cdata => 0, keep_encoding => 0, expand_external_entities => 0, output_filter => undef, output_text_filter => undef, keep_atts_order => undef, do_not_escape_amp_in_atts => 0, wrap => $WRAP, replaced_ents => $REPLACED_ENTS, }); } # set the pretty_print style (in $pretty) and returns the old one # can be called from outside the package with 2 arguments (elt, style) # or from inside with only one argument (style) # the style can be either a string (one of the keys of %pretty_print_style # or a number (presumably an old value saved) sub set_pretty_print { my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases my $old_pretty= $pretty; if( $style=~ /^\d+$/) { croak "invalid pretty print style $style" unless( $style < keys %pretty_print_style); $pretty= $style; } else { croak "invalid pretty print style '$style'" unless( exists $pretty_print_style{$style}); $pretty= $pretty_print_style{$style}; } if( $WRAPPED{$pretty} ) { XML::Twig::_use( 'Text::Wrap') or croak( "Text::Wrap not available, cannot use style $style"); } return $old_pretty; } sub _pretty_print { return $pretty; } # set the empty tag style (in $empty_tag_style) and returns the old one # can be called from outside the package with 2 arguments (elt, style) # or from inside with only one argument (style) # the style can be either a string (one of the keys of %empty_tag_style # or a number (presumably an old value saved) sub set_empty_tag_style { my $style= lc( defined $_[1] ? $_[1] : $_[0]); # so we cover both cases my $old_style= $empty_tag_style; if( $style=~ /^\d+$/) { croak "invalid empty tag style $style" unless( $style < keys %empty_tag_style); $empty_tag_style= $style; } else { croak "invalid empty tag style '$style'" unless( exists $empty_tag_style{$style}); $empty_tag_style= $empty_tag_style{$style}; } return $old_style; } sub _pretty_print_styles { return (sort { $pretty_print_style{$a} <=> $pretty_print_style{$b} || $a cmp $b } keys %pretty_print_style); } sub set_quote { my $style= $_[1] || $_[0]; my $old_quote= $quote; croak "invalid quote '$style'" unless( exists $quote_style{$style}); $quote= $quote_style{$style}; return $old_quote; } sub set_remove_cdata { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $remove_cdata; $remove_cdata= $new_value; return $old_value; } sub set_indent { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $INDENT; $INDENT= $new_value; return $old_value; } sub set_wrap { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $WRAP; $WRAP= $new_value; return $old_value; } sub set_keep_encoding { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $keep_encoding; $keep_encoding= $new_value; return $old_value; } sub set_replaced_ents { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $replaced_ents; $replaced_ents= $new_value; return $old_value; } sub do_not_escape_gt { my $old_value= $replaced_ents; $replaced_ents= q{&<}; # & needs to be first return $old_value; } sub escape_gt { my $old_value= $replaced_ents; $replaced_ents= qq{&<>}; # & needs to be first return $old_value; } sub _keep_encoding { return $keep_encoding; } # so I can use elsewhere in the module sub set_do_not_escape_amp_in_atts { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $do_not_escape_amp_in_atts; $do_not_escape_amp_in_atts= $new_value; return $old_value; } sub output_filter { return $output_filter; } sub output_text_filter { return $output_text_filter; } sub set_output_filter { my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode # if called in object mode with no argument, the filter is undefined if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; } my $old_value= $output_filter; if( !$new_value || isa( $new_value, 'CODE') ) { $output_filter= $new_value; } elsif( $new_value eq 'latin1') { $output_filter= XML::Twig::latin1(); } elsif( $XML::Twig::filter{$new_value}) { $output_filter= $XML::Twig::filter{$new_value}; } else { croak "invalid output filter '$new_value'"; } return $old_value; } sub set_output_text_filter { my $new_value= defined $_[1] ? $_[1] : $_[0]; # can be called in object/non-object mode # if called in object mode with no argument, the filter is undefined if( isa( $new_value, 'XML::Twig::Elt') || isa( $new_value, 'XML::Twig')) { undef $new_value; } my $old_value= $output_text_filter; if( !$new_value || isa( $new_value, 'CODE') ) { $output_text_filter= $new_value; } elsif( $new_value eq 'latin1') { $output_text_filter= XML::Twig::latin1(); } elsif( $XML::Twig::filter{$new_value}) { $output_text_filter= $XML::Twig::filter{$new_value}; } else { croak "invalid output text filter '$new_value'"; } return $old_value; } sub set_expand_external_entities { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $expand_external_entities; $expand_external_entities= $new_value; return $old_value; } sub set_keep_atts_order { my $new_value= defined $_[1] ? $_[1] : $_[0]; my $old_value= $keep_atts_order; $keep_atts_order= $new_value; return $old_value; } sub keep_atts_order { return $keep_atts_order; } # so I can use elsewhere in the module my %html_empty_elt; BEGIN { %html_empty_elt= map { $_ => 1} qw( base meta link hr br param img area input col); } sub start_tag { my( $elt, $option)= @_; return if( $elt->{gi} < $XML::Twig::SPECIAL_GI); my $extra_data= $elt->{extra_data} || ''; my $gi= $elt->gi; my $att= $elt->{att}; # should be $elt->atts, optimized into a pure hash look-up my $ns_map= $att ? $att->{'#original_gi'} : ''; if( $ns_map) { $gi= _restore_original_prefix( $ns_map, $gi); } $gi=~ s{^#default:}{}; # remove default prefix if( $output_text_filter) { $gi= $output_text_filter->( $gi); } # get the attribute and their values my $att_sep = $pretty==$NSGMLS ? "\n" : $pretty==$INDENTEDA ? "\n" . $INDENT x ($elt->level+1) . ' ' : ' ' ; my $replace_in_att_value= $replaced_ents . "$quote\t\r\n"; if( $option->{escape_gt} && $replaced_ents !~ m{>}) { $replace_in_att_value.= '>'; } my $tag; my @att_names= grep { !_is_private_name( $_) } $keep_atts_order ? keys %{$att} : sort keys %{$att}; if( @att_names) { my $atts= join $att_sep, map { my $output_att_name= $ns_map ? _restore_original_prefix( $ns_map, $_) : $_; if( $output_text_filter) { $output_att_name= $output_text_filter->( $output_att_name); } $output_att_name . '=' . $quote . _att_xml_string( $att->{$_}, $replace_in_att_value) . $quote } @att_names ; if( $pretty==$INDENTEDA && @att_names == 1) { $att_sep= ' '; } $tag= "<$gi$att_sep$atts"; } else { $tag= "<$gi"; } $tag .= "\n" if($pretty==$NSGMLS); # force empty if suitable HTML tag, otherwise use the value from the input tree if( ($empty_tag_style eq $HTML) && !$elt->_first_child && !$elt->_extra_data_before_end_tag && $html_empty_elt{$gi}) { $elt->{empty}= 1; } my $empty= defined $elt->{empty} ? $elt->{empty} : $elt->_first_child ? 0 : 1; $tag .= (!$elt->{empty} || $elt->_extra_data_before_end_tag) ? '>' # element has content : (($empty_tag_style eq $HTML) && $html_empty_elt{$gi}) ? ' />' # html empty element # cvs-friendly format : ( $pretty == $INDENTEDA && @att_names > 1) ? "\n" . $INDENT x $elt->level . "/>" : ( $pretty == $INDENTEDA && @att_names == 1) ? " />" : $empty_tag_style ? "></" . $elt->gi . ">" # $empty_tag_style is $HTML or $EXPAND : '/>' ; if( $elt->_is_private) { $tag= ''; } #warn "TRACE: ", $tag,": ", Encode::is_utf8( $tag) ? "has flag" : "FLAG NOT SET"; unless( $pretty) { return defined( $extra_data) ? $extra_data . $tag : $tag; } my $prefix=''; my $return=''; # '' or \n is to be printed before the tag my $indent=0; # number of indents before the tag if( $pretty==$RECORD1) { my $level= $elt->level; $return= "\n" if( $level < 2); $indent= 1 if( $level == 1); } elsif( $pretty==$RECORD2) { $return= "\n"; $indent= $elt->level; } elsif( $pretty==$NICE) { my $parent= $elt->_parent; unless( !$parent || $parent->{contains_text}) { $return= "\n"; } $elt->{contains_text}= 1 if( ($parent && $parent->{contains_text}) || $elt->contains_text); } elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty}) { my $parent= $elt->_parent; unless( !$parent || $parent->{contains_text}) { $return= "\n"; $indent= $elt->level; } $elt->{contains_text}= 1 if( ($parent && $parent->{contains_text}) || $elt->contains_text); } if( $return || $indent) { # check for elements in which spaces should be kept my $t= $elt->twig; return $extra_data . $tag if( $xml_space_preserve); if( $t && $t->{twig_keep_spaces_in}) { foreach my $ancestor ($elt->ancestors) { return $extra_data . $tag if( $t->{twig_keep_spaces_in}->{$ancestor->gi}) } } $prefix= $INDENT x $indent; if( $extra_data) { $extra_data=~ s{\s+$}{}; $extra_data=~ s{^\s+}{}; $extra_data= $prefix . $extra_data . $return; } } return $return . $extra_data . $prefix . $tag; } sub end_tag { my $elt= shift; return '' if( ($elt->{gi}<$XML::Twig::SPECIAL_GI) || ($elt->is_empty && !$elt->_extra_data_before_end_tag) ); my $tag= "<"; my $gi= $elt->gi; if( my $map= $elt->att( '#original_gi')) { $gi= _restore_original_prefix( $map, $gi); } $gi=~ s{^#default:}{}; # remove default prefix if( $output_text_filter) { $gi= $output_text_filter->( $elt->gi); } $tag .= "/$gi>"; $tag = ($elt->_extra_data_before_end_tag || '') . $tag; if( $elt->_is_private) { $tag= ''; } return $tag unless $pretty; my $prefix=''; my $return=0; # 1 if a \n is to be printed before the tag my $indent=0; # number of indents before the tag if( $pretty==$RECORD1) { $return= 1 if( $elt->level == 0); } elsif( $pretty==$RECORD2) { unless( $elt->contains_text) { $return= 1 ; $indent= $elt->level; } } elsif( $pretty==$NICE) { my $parent= $elt->_parent; if( ( ($parent && !$parent->{contains_text}) || !$parent ) && ( !$elt->{contains_text} && ($elt->{has_flushed_child} || $elt->_first_child()) ) ) { $return= 1; } } elsif( $KEEP_TEXT_TAG_ON_ONE_LINE{$pretty}) { my $parent= $elt->_parent; if( ( ($parent && !$parent->{contains_text}) || !$parent ) && ( !$elt->{contains_text} && ($elt->{has_flushed_child} || $elt->_first_child()) ) ) { $return= 1; $indent= $elt->level; } } if( $return || $indent) { # check for elements in which spaces should be kept my $t= $elt->twig; return $tag if( $xml_space_preserve); if( $t && $t->{twig_keep_spaces_in}) { foreach my $ancestor ($elt, $elt->ancestors) { return $tag if( $t->{twig_keep_spaces_in}->{$ancestor->gi}) } } if( $return) { $prefix= ($pretty== $INDENTEDCT) ? "\n$INDENT" : "\n"; } $prefix.= $INDENT x $indent; } # add a \n at the end of the document (after the root element) $tag .= "\n" unless( $elt->parent); return $prefix . $tag; } sub _restore_original_prefix { my( $map, $name)= @_; my $prefix= _ns_prefix( $name); if( my $original_prefix= $map->{$prefix}) { if( $original_prefix eq '#default') { $name=~ s{^$prefix:}{}; } else { $name=~ s{^$prefix(?=:)}{$original_prefix}; } } return $name; } # buffer used to hold the text to print/sprint, to avoid passing it back and forth between methods my @sprint; # $elt is an element to print # $fh is an optional filehandle to print to # $pretty is an optional value, if true a \n is printed after the < of the # opening tag sub print { my $elt= shift; my $fh= _is_fh( $_[0]) ? shift : undef; my $old_select= defined $fh ? select $fh : undef; print $elt->sprint( @_); select $old_select if( defined $old_select); } # those next 2 methods need to be refactored, they are copies of the same methods in XML::Twig sub print_to_file { my( $elt, $filename)= (shift, shift); my $out_fh; open( $out_fh, ">$filename") or _croak( "cannot create file $filename: $!"); # < perl 5.8 my $mode= $keep_encoding ? '>' : '>:utf8'; # >= perl 5.8 open( $out_fh, $mode, $filename) or _croak( "cannot create file $filename: $!"); # >= perl 5.8 $elt->print( $out_fh, @_); close $out_fh; return $elt; } # probably only works on *nix (at least the chmod bit) # first print to a temporary file, then rename that file to the desired file name, then change permissions # to the original file permissions (or to the current umask) sub safe_print_to_file { my( $elt, $filename)= (shift, shift); my $perm= -f $filename ? (stat $filename)[2] & 07777 : ~umask() ; XML::Twig::_use( 'File::Temp') || croak "need File::Temp to use safe_print_to_file\n"; XML::Twig::_use( 'File::Basename') || croak "need File::Basename to use safe_print_to_file\n"; my $tmpdir= File::Basename::dirname( $filename); my( $fh, $tmpfilename) = File::Temp::tempfile( DIR => $tmpdir); $elt->print_to_file( $tmpfilename, @_); rename( $tmpfilename, $filename) or unlink $tmpfilename && _croak( "cannot move temporary file to $filename: $!"); chmod $perm, $filename; return $elt; } # same as print but does not output the start tag if the element # is marked as flushed sub flush { my $elt= shift; my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt; $elt->twig->flush_up_to( $up_to, @_); } sub purge { my $elt= shift; my $up_to= $_[0] && isa( $_[0], 'XML::Twig::Elt') ? shift : $elt; $elt->twig->purge_up_to( $up_to, @_); } sub _flush { my $elt= shift; my $pretty; my $fh= _is_fh( $_[0]) ? shift : undef; my $old_select= defined $fh ? select $fh : undef; my $old_pretty= defined ($pretty= shift) ? set_pretty_print( $pretty) : undef; $xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve'); $elt->__flush(); $xml_space_preserve= 0; select $old_select if( defined $old_select); set_pretty_print( $old_pretty) if( defined $old_pretty); } sub __flush { my $elt= shift; if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) { my $preserve= ($elt->att( 'xml:space') || '') eq 'preserve'; $xml_space_preserve++ if $preserve; unless( $elt->_flushed) { print $elt->start_tag(); } # flush the children my @children= $elt->_children; foreach my $child (@children) { $child->_flush( $pretty); $child->_set_flushed; } if( ! $elt->{end_tag_flushed}) { print $elt->end_tag; $elt->{end_tag_flushed}=1; $elt->_set_flushed; } $xml_space_preserve-- if $preserve; # used for pretty printing if( my $parent= $elt->parent) { $parent->{has_flushed_child}= 1; } } else # text or special element { my $text; if( $elt->is_pcdata) { $text= $elt->pcdata_xml_string; if( my $parent= $elt->parent) { $parent->{contains_text}= 1; } } elsif( $elt->is_cdata) { $text= $elt->cdata_string; if( my $parent= $elt->parent) { $parent->{contains_text}= 1; } } elsif( $elt->is_pi) { $text= $elt->pi_string; } elsif( $elt->is_comment) { $text= $elt->comment_string; } elsif( $elt->is_ent) { $text= $elt->ent_string; } print $output_filter ? $output_filter->( $text) : $text; } } sub xml_text { my( $elt, @options)= @_; if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->xml_text_only; } my $string=''; if( ($elt->{gi} >= $XML::Twig::SPECIAL_GI) ) { # sprint the children my $child= $elt->_first_child || ''; while( $child) { $string.= $child->xml_text; } continue { $child= $child->_next_sibling; } } elsif( $elt->is_pcdata) { $string .= $output_filter ? $output_filter->($elt->pcdata_xml_string) : $elt->pcdata_xml_string; } elsif( $elt->is_cdata) { $string .= $output_filter ? $output_filter->($elt->cdata_string) : $elt->cdata_string; } elsif( $elt->is_ent) { $string .= $elt->ent_string; } return $string; } sub xml_text_only { return join '', map { $_->xml_text if( $_->is_text || $_->is_ent) } $_[0]->_children; } # same as print but except... it does not print but rather returns the string # if the second parameter is set then only the content is returned, not the # start and end tags of the element (but the tags of the included elements are # returned) sub sprint { my $elt= shift; my( $old_pretty, $old_empty_tag_style); if( $_[0]) { if( isa( $_[0], 'HASH')) { # "proper way, using a hashref for options my %args= XML::Twig::_normalize_args( %{shift()}); if( defined $args{PrettyPrint}) { $old_pretty = set_pretty_print( $args{PrettyPrint}); } if( defined $args{EmptyTags}) { $old_empty_tag_style = set_empty_tag_style( $args{EmptyTags}); } } else { # "old" way, just using the option name my @other_opt; foreach my $opt (@_) { if( exists $pretty_print_style{$opt}) { $old_pretty = set_pretty_print( $opt); } elsif( exists $empty_tag_style{$opt}) { $old_empty_tag_style = set_empty_tag_style( $opt); } else { push @other_opt, $opt; } } @_= @other_opt; } } $xml_space_preserve= 1 if( ($elt->inherit_att( 'xml:space') || '') eq 'preserve'); @sprint=(); $elt->_sprint( @_); my $sprint= join( '', @sprint); if( $output_filter) { $sprint= $output_filter->( $sprint); } if( ( ($pretty== $WRAPPED) || ($pretty==$INDENTEDC)) && !$xml_space_preserve) { $sprint= _wrap_text( $sprint); } $xml_space_preserve= 0; if( defined $old_pretty) { set_pretty_print( $old_pretty); } if( defined $old_empty_tag_style) { set_empty_tag_style( $old_empty_tag_style); } return $sprint; } sub _wrap_text { my( $string)= @_; my $wrapped; foreach my $line (split /\n/, $string) { my( $initial_indent)= $line=~ m{^(\s*)}; my $wrapped_line= Text::Wrap::wrap( '', $initial_indent . $INDENT, $line) . "\n"; # fix glitch with Text::wrap when the first line is long and does not include spaces # the first line ends up being too short by 2 chars, but we'll have to live with it! $wrapped_line=~ s{^ +\n }{}s; # this prefix needs to be removed $wrapped .= $wrapped_line; } return $wrapped; } sub _sprint { my $elt= shift; my $no_tag= shift || 0; # in case there's some comments or PI's piggybacking if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) { my $preserve= ($elt->att( 'xml:space') || '') eq 'preserve'; $xml_space_preserve++ if $preserve; push @sprint, $elt->start_tag unless( $no_tag); # sprint the children my $child= $elt->_first_child; while( $child) { $child->_sprint; $child= $child->_next_sibling; } push @sprint, $elt->end_tag unless( $no_tag); $xml_space_preserve-- if $preserve; } else { push @sprint, $elt->{extra_data} if( $elt->{extra_data}) ; if( $elt->is_pcdata) { push @sprint, $elt->pcdata_xml_string; } elsif( $elt->is_cdata) { push @sprint, $elt->cdata_string; } elsif( $elt->is_pi) { if( ($pretty >= $INDENTED) && !$elt->parent->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; } push @sprint, $elt->pi_string; } elsif( $elt->is_comment) { if( ($pretty >= $INDENTED) && !$elt->parent->{contains_text}) { push @sprint, "\n" . $INDENT x $elt->level; } push @sprint, $elt->comment_string; } elsif( $elt->is_ent) { push @sprint, $elt->ent_string; } } return; } # just a shortcut to $elt->sprint( 1) sub xml_string { my $elt= shift; isa( $_[0], 'HASH') ? $elt->sprint( shift(), 1) : $elt->sprint( 1); } sub pcdata_xml_string { my $elt= shift; if( defined( my $string= $elt->{pcdata}) ) { if( ! $elt->_extra_data_in_pcdata) { $string=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g unless( !$replaced_ents || $keep_encoding || $elt->{asis}); $string=~ s{\Q]]>}{]]>}g; } else { _gen_mark( $string); # used by _(un)?protect_extra_data foreach my $data (reverse @{$elt->_extra_data_in_pcdata}) { my $substr= substr( $string, $data->{offset}); if( $keep_encoding || $elt->{asis}) { substr( $string, $data->{offset}, 0, $data->{text}); } else { substr( $string, $data->{offset}, 0, _protect_extra_data( $data->{text})); } } unless( $keep_encoding || $elt->{asis}) { $string=~ s{([$replaced_ents])}{$XML::Twig::base_ent{$1}}g ; $string=~ s{\Q]]>}{]]>}g; _unprotect_extra_data( $string); } } return $output_text_filter ? $output_text_filter->( $string) : $string; } else { return ''; } } { my $mark; my( %char2ent, %ent2char); BEGIN { %char2ent= ( '<' => 'lt', '&' => 'amp', '>' => 'gt'); %ent2char= map { $char2ent{$_} => $_ } keys %char2ent; } # generate a unique mark (a string) not found in the string, # used to mark < and & in the extra data sub _gen_mark { $mark="AAAA"; $mark++ while( index( $_[0], $mark) > -1); return $mark; } sub _protect_extra_data { my( $extra_data)= @_; $extra_data=~ s{([<&>])}{:$mark:$char2ent{$1}:}g; return $extra_data; } sub _unprotect_extra_data { $_[0]=~ s{:$mark:(\w+):}{$ent2char{$1}}g; } } sub cdata_string { my $cdata= $_[0]->cdata; unless( defined $cdata) { return ''; } if( $remove_cdata) { $cdata=~ s/([$replaced_ents])/$XML::Twig::base_ent{$1}/g; } else { $cdata= $CDATA_START . $cdata . $CDATA_END; } return $cdata; } sub att_xml_string { my $elt= shift; my $att= shift; my $replace= $replaced_ents . "$quote\n\r\t"; if($_[0] && $_[0]->{escape_gt} && ($replace!~ m{>}) ) { $replace .='>'; } if( defined (my $string= $elt->{att}->{$att})) { return _att_xml_string( $string, $replace); } else { return ''; } } # escaped xml string for an attribute value sub _att_xml_string { my( $string, $escape)= @_; if( !defined( $string)) { return ''; } if( $keep_encoding) { $string=~ s{$quote}{$XML::Twig::base_ent{$quote}}g; } else { if( $do_not_escape_amp_in_atts) { $escape=~ s{^.}{}; # seems like the most backward compatible way to remove & from the list $string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g; $string=~ s{&(?!(\w+|#\d+|[xX][0-9a-fA-F]+);)}{&}g; # dodgy: escape & that do not start an entity } else { $string=~ s{([$escape])}{$XML::Twig::base_ent{$1}}g; $string=~ s{\Q]]>}{]]>}g; } } return $output_text_filter ? $output_text_filter->( $string) : $string; } sub ent_string { my $ent= shift; my $ent_text= $ent->{ent}; my( $t, $el, $ent_string); if( $expand_external_entities && ($t= $ent->twig) && ($el= $t->entity_list) && ($ent_string= $el->{entities}->{$ent->ent_name}->{val}) ) { return $ent_string; } else { return $ent_text; } } # returns just the text, no tags, for an element sub text { my( $elt, @options)= @_; if( @options && grep { lc( $_) eq 'no_recurse' } @options) { return $elt->text_only; } my $sep = (@options && grep { lc( $_) eq 'sep' } @options) ? ' ' : ''; my $string; if( $elt->is_pcdata) { return $elt->pcdata . $sep; } elsif( $elt->is_cdata) { return $elt->cdata . $sep; } elsif( $elt->is_pi) { return $elt->pi_string . $sep; } elsif( $elt->is_comment) { return $elt->comment . $sep; } elsif( $elt->is_ent) { return $elt->ent . $sep ; } my $child= $elt->_first_child ||''; while( $child) { my $child_text= $child->text( @options); $string.= defined( $child_text) ? $sep . $child_text : ''; } continue { $child= $child->_next_sibling; } unless( defined $string) { $string=''; } return $output_text_filter ? $output_text_filter->( $string) : $string; } sub text_only { return join '', map { $_->text if( $_->is_text || $_->is_ent) } $_[0]->_children; } sub trimmed_text { my $elt= shift; my $text= $elt->text( @_); $text=~ s{\s+}{ }sg; $text=~ s{^\s*}{}; $text=~ s{\s*$}{}; return $text; } sub trim { my( $elt)= @_; my $pcdata= $elt->first_descendant( $TEXT); (my $pcdata_text= $pcdata->text)=~ s{^\s+}{}s; $pcdata->set_text( $pcdata_text); $pcdata= $elt->last_descendant( $TEXT); ($pcdata_text= $pcdata->text)=~ s{\s+$}{}; $pcdata->set_text( $pcdata_text); foreach my $pcdata ($elt->descendants( $TEXT)) { ($pcdata_text= $pcdata->text)=~ s{\s+}{ }g; $pcdata->set_text( $pcdata_text); } return $elt; } # remove cdata sections (turns them into regular pcdata) in an element sub remove_cdata { my $elt= shift; foreach my $cdata ($elt->descendants_or_self( $CDATA)) { if( $keep_encoding) { my $data= $cdata->cdata; $data=~ s{([&<"'])}{$XML::Twig::base_ent{$1}}g; $cdata->set_pcdata( $data); } else { $cdata->set_pcdata( $cdata->cdata); } $cdata->set_gi( $PCDATA); undef $cdata->{cdata}; } } sub _is_private { return _is_private_name( $_[0]->gi); } sub _is_private_name { return $_[0]=~ m{^#(?!default:)}; } } # end of block containing package globals ($pretty_print, $quotes, keep_encoding...) # merges consecutive #PCDATAs in am element sub normalize { my( $elt)= @_; my @descendants= $elt->descendants( $PCDATA); while( my $desc= shift @descendants) { if( ! length $desc->{pcdata}) { $desc->delete; next; } while( @descendants && $desc->_next_sibling && $desc->_next_sibling== $descendants[0]) { my $to_merge= shift @descendants; $desc->merge_text( $to_merge); } } return $elt; } # SAX export methods sub toSAX1 { _toSAX(@_, \&_start_tag_data_SAX1, \&_end_tag_data_SAX1); } sub toSAX2 { _toSAX(@_, \&_start_tag_data_SAX2, \&_end_tag_data_SAX2); } sub _toSAX { my( $elt, $handler, $start_tag_data, $end_tag_data)= @_; if( $elt->{gi} >= $XML::Twig::SPECIAL_GI) { my $data= $start_tag_data->( $elt); _start_prefix_mapping( $elt, $handler, $data); if( $data && (my $start_element = $handler->can( 'start_element'))) { unless( $elt->_flushed) { $start_element->( $handler, $data); } } foreach my $child ($elt->_children) { $child->_toSAX( $handler, $start_tag_data, $end_tag_data); } if( (my $data= $end_tag_data->( $elt)) && (my $end_element = $handler->can( 'end_element')) ) { $end_element->( $handler, $data); } _end_prefix_mapping( $elt, $handler); } else # text or special element { if( $elt->is_pcdata && (my $characters= $handler->can( 'characters'))) { $characters->( $handler, { Data => $elt->pcdata }); } elsif( $elt->is_cdata) { if( my $start_cdata= $handler->can( 'start_cdata')) { $start_cdata->( $handler); } if( my $characters= $handler->can( 'characters')) { $characters->( $handler, {Data => $elt->cdata }); } if( my $end_cdata= $handler->can( 'end_cdata')) { $end_cdata->( $handler); } } elsif( ($elt->is_pi) && (my $pi= $handler->can( 'processing_instruction'))) { $pi->( $handler, { Target =>$elt->target, Data => $elt->data }); } elsif( ($elt->is_comment) && (my $comment= $handler->can( 'comment'))) { $comment->( $handler, { Data => $elt->comment }); } elsif( ($elt->is_ent)) { if( my $se= $handler->can( 'skipped_entity')) { $se->( $handler, { Name => $elt->ent_name }); } elsif( my $characters= $handler->can( 'characters')) { if( defined $elt->ent_string) { $characters->( $handler, {Data => $elt->ent_string}); } else { $characters->( $handler, {Data => $elt->ent_name}); } } } } } sub _start_tag_data_SAX1 { my( $elt)= @_; my $name= $elt->gi; return if( $elt->_is_private); my $attributes={}; my $atts= $elt->atts; while( my( $att, $value)= each %$atts) { $attributes->{$att}= $value unless( _is_private_name( $att)); } my $data= { Name => $name, Attributes => $attributes}; return $data; } sub _end_tag_data_SAX1 { my( $elt)= @_; return if( $elt->_is_private); return { Name => $elt->gi }; } sub _start_tag_data_SAX2 { my( $elt)= @_; my $data={}; my $name= $elt->gi; return if( $elt->_is_private); $data->{Name} = $name; $data->{Prefix} = $elt->ns_prefix; $data->{LocalName} = $elt->local_name; $data->{NamespaceURI} = $elt->namespace; # save a copy of the data so we can re-use it for the end tag my %sax2_data= %$data; $elt->{twig_elt_SAX2_data}= \%sax2_data; # add the attributes $data->{Attributes}= $elt->_atts_to_SAX2; return $data; } sub _atts_to_SAX2 { my $elt= shift; my $SAX2_atts= {}; foreach my $att (keys %{$elt->atts}) { next if( _is_private_name( $att)); my $SAX2_att={}; $SAX2_att->{Name} = $att; $SAX2_att->{Prefix} = _ns_prefix( $att); $SAX2_att->{LocalName} = _local_name( $att); $SAX2_att->{NamespaceURI} = $elt->namespace( $SAX2_att->{Prefix}); $SAX2_att->{Value} = $elt->att( $att); my $SAX2_att_name= "{$SAX2_att->{NamespaceURI}}$SAX2_att->{LocalName}"; $SAX2_atts->{$SAX2_att_name}= $SAX2_att; } return $SAX2_atts; } sub _start_prefix_mapping { my( $elt, $handler, $data)= @_; if( my $start_prefix_mapping= $handler->can( 'start_prefix_mapping') and my @new_prefix_mappings= grep { /^\{[^}]*\}xmlns/ || /^\{$XMLNS_URI\}/ } keys %{$data->{Attributes}} ) { foreach my $prefix (@new_prefix_mappings) { my $prefix_string= $data->{Attributes}->{$prefix}->{LocalName}; if( $prefix_string eq 'xmlns') { $prefix_string=''; } my $prefix_data= { Prefix => $prefix_string, NamespaceURI => $data->{Attributes}->{$prefix}->{Value} }; $start_prefix_mapping->( $handler, $prefix_data); $elt->{twig_end_prefix_mapping} ||= []; push @{$elt->{twig_end_prefix_mapping}}, $prefix_string; } } } sub _end_prefix_mapping { my( $elt, $handler)= @_; if( my $end_prefix_mapping= $handler->can( 'end_prefix_mapping')) { foreach my $prefix (@{$elt->{twig_end_prefix_mapping}}) { $end_prefix_mapping->( $handler, { Prefix => $prefix} ); } } } sub _end_tag_data_SAX2 { my( $elt)= @_; return if( $elt->_is_private); return $elt->{twig_elt_SAX2_data}; } sub contains_text { my $elt= shift; my $child= $elt->_first_child; while ($child) { return 1 if( $child->is_text || $child->is_ent); $child= $child->_next_sibling; } return 0; } # creates a single pcdata element containing the text as child of the element # options: # - force_pcdata: when set to a true value forces the text to be in a #PCDATA # even if the original element was a #CDATA sub set_text { my( $elt, $string, %option)= @_; if( $elt->gi eq $PCDATA) { return $elt->set_pcdata( $string); } elsif( $elt->gi eq $CDATA) { if( $option{force_pcdata}) { $elt->set_gi( $PCDATA); $elt->_set_cdata(''); return $elt->set_pcdata( $string); } else { $elt->_set_cdata( $string); return $string; } } elsif( $elt->contains_a_single( $PCDATA) ) { # optimized so we have a slight chance of not losing embedded comments and pi's $elt->_first_child->set_pcdata( $string); return $elt; } foreach my $child (@{[$elt->_children]}) { $child->delete; } my $pcdata= $elt->_new_pcdata( $string); $pcdata->paste( $elt); $elt->set_not_empty; return $elt; } # set the content of an element from a list of strings and elements sub set_content { my $elt= shift; return $elt unless defined $_[0]; # attributes can be given as a hash (passed by ref) if( ref $_[0] eq 'HASH') { my $atts= shift; $elt->del_atts; # usually useless but better safe than sorry $elt->set_atts( $atts); return $elt unless defined $_[0]; } # check next argument for #EMPTY if( !(ref $_[0]) && ($_[0] eq $EMPTY) ) { $elt->set_empty; return $elt; } # case where we really want to do a set_text, the element is '#PCDATA' # or contains a single PCDATA and we only want to add text in it if( ($elt->gi eq $PCDATA || $elt->contains_a_single( $PCDATA)) && (@_ == 1) && !( ref $_[0])) { $elt->set_text( $_[0]); return $elt; } elsif( ($elt->gi eq $CDATA) && (@_ == 1) && !( ref $_[0])) { $elt->_set_cdata( $_[0]); return $elt; } # delete the children foreach my $child (@{[$elt->_children]}) { $child->delete; } if( @_) { $elt->set_not_empty; } foreach my $child (@_) { if( ref( $child) && isa( $child, 'XML::Twig::Elt')) { # argument is an element $child->paste( 'last_child', $elt); } else { # argument is a string if( (my $pcdata= $elt->_last_child) && $elt->_last_child->is_pcdata) { # previous child is also pcdata: just concatenate $pcdata->set_pcdata( $pcdata->pcdata . $child) } else { # previous child is not a string: create a new pcdata element $pcdata= $elt->_new_pcdata( $child); $pcdata->paste( 'last_child', $elt); } } } return $elt; } # inserts an element (whose gi is given) as child of the element # all children of the element are now children of the new element # returns the new element sub insert { my ($elt, @args)= @_; # first cut the children my @children= $elt->_children; foreach my $child (@children) { $child->cut; } # insert elements while( my $gi= shift @args) { my $new_elt= $elt->new( $gi); # add attributes if needed if( defined( $args[0]) && ( isa( $args[0], 'HASH')) ) { $new_elt->set_atts( shift @args); } # paste the element $new_elt->paste( $elt); $elt->set_not_empty; $elt= $new_elt; } # paste back the children foreach my $child (@children) { $child->paste( 'last_child', $elt); } return $elt; } # insert a new element # $elt->insert_new_element( $opt_position, $gi, $opt_atts_hash, @opt_content); # the element is created with the same syntax as new # position is the same as in paste, first_child by default sub insert_new_elt { my $elt= shift; my $position= $_[0]; if( ($position eq 'before') || ($position eq 'after') || ($position eq 'first_child') || ($position eq 'last_child')) { shift; } else { $position= 'first_child'; } my $new_elt= $elt->new( @_); $new_elt->paste( $position, $elt); #if( defined $new_elt->id) { $new_elt->set_id( $new_elt->id); } return $new_elt; } # wraps an element in elements which gi's are given as arguments # $elt->wrap_in( 'td', 'tr', 'table') wraps the element as a single # cell in a table for example # returns the new element sub wrap_in { my $elt= shift; while( my $gi = shift @_) { my $new_elt = $elt->new( $gi); if( $elt->{twig_current}) { my $t= $elt->twig; $t->{twig_current}= $new_elt; $elt->del_twig_current; $new_elt->set_twig_current; } if( my $parent= $elt->_parent) { $new_elt->set_parent( $parent); if( $parent->_first_child == $elt) { $parent->set_first_child( $new_elt); } if( $parent->_last_child == $elt) { $parent->set_last_child( $new_elt); } } else { # wrapping the root my $twig= $elt->twig; if( $twig && $twig->root && ($twig->root eq $elt) ) { $twig->set_root( $new_elt); } } if( my $prev_sibling= $elt->_prev_sibling) { $new_elt->set_prev_sibling( $prev_sibling); $prev_sibling->set_next_sibling( $new_elt); } if( my $next_sibling= $elt->_next_sibling) { $new_elt->set_next_sibling( $next_sibling); $next_sibling->set_prev_sibling( $new_elt); } $new_elt->set_first_child( $elt); $new_elt->set_last_child( $elt); $elt->set_parent( $new_elt); $elt->set_prev_sibling( undef); $elt->set_next_sibling( undef); # add the attributes if the next argument is a hash ref if( defined( $_[0]) && (isa( $_[0], 'HASH')) ) { $new_elt->set_atts( shift @_); } $elt= $new_elt; } return $elt; } sub replace { my( $elt, $ref)= @_; if( $elt->_parent) { $elt->cut; } if( my $parent= $ref->_parent) { $elt->set_parent( $parent); if( $parent->_first_child == $ref) { $parent->set_first_child( $elt); } if( $parent->_last_child == $ref) { $parent->set_last_child( $elt) ; } } elsif( $ref->twig && $ref == $ref->twig->root) { $ref->twig->set_root( $elt); } if( my $prev_sibling= $ref->_prev_sibling) { $elt->set_prev_sibling( $prev_sibling); $prev_sibling->set_next_sibling( $elt); } if( my $next_sibling= $ref->_next_sibling) { $elt->set_next_sibling( $next_sibling); $next_sibling->set_prev_sibling( $elt); } $ref->set_parent( undef); $ref->set_prev_sibling( undef); $ref->set_next_sibling( undef); return $ref; } sub replace_with { my $ref= shift; my $elt= shift; $elt->replace( $ref); foreach my $new_elt (reverse @_) { $new_elt->paste( after => $elt); } return $elt; } # move an element, same syntax as paste, except the element is first cut sub move { my $elt= shift; $elt->cut; $elt->paste( @_); return $elt; } # adds a prefix to an element, creating a pcdata child if needed sub prefix { my ($elt, $prefix, $option)= @_; my $asis= ($option && ($option eq 'asis')) ? 1 : 0; if( $elt->is_pcdata && (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis})) ) { $elt->set_pcdata( $prefix . $elt->pcdata); } elsif( $elt->_first_child && $elt->_first_child->is_pcdata && ( ($asis && $elt->_first_child->{asis}) || (!$asis && ! $elt->_first_child->{asis})) ) { $elt->_first_child->set_pcdata( $prefix . $elt->_first_child->pcdata); } else { my $new_elt= $elt->_new_pcdata( $prefix); my $pos= $elt->is_pcdata ? 'before' : 'first_child'; $new_elt->paste( $pos => $elt); if( $asis) { $new_elt->set_asis; } } return $elt; } # adds a suffix to an element, creating a pcdata child if needed sub suffix { my ($elt, $suffix, $option)= @_; my $asis= ($option && ($option eq 'asis')) ? 1 : 0; if( $elt->is_pcdata && (($asis && $elt->{asis}) || (!$asis && ! $elt->{asis})) ) { $elt->set_pcdata( $elt->pcdata . $suffix); } elsif( $elt->_last_child && $elt->_last_child->is_pcdata && ( ($asis && $elt->_last_child->{asis}) || (!$asis && ! $elt->_last_child->{asis})) ) { $elt->_last_child->set_pcdata( $elt->_last_child->pcdata . $suffix); } else { my $new_elt= $elt->_new_pcdata( $suffix); my $pos= $elt->is_pcdata ? 'after' : 'last_child'; $new_elt->paste( $pos => $elt); if( $asis) { $new_elt->set_asis; } } return $elt; } # create a path to an element ('/root/.../gi) sub path { my $elt= shift; my @context= ( $elt, $elt->ancestors); return "/" . join( "/", reverse map {$_->gi} @context); } sub xpath { my $elt= shift; my $xpath; foreach my $ancestor (reverse $elt->ancestors_or_self) { my $gi= $ancestor->gi; $xpath.= "/$gi"; my $index= $ancestor->prev_siblings( $gi) + 1; unless( ($index == 1) && !$ancestor->next_sibling( $gi)) { $xpath.= "[$index]"; } } return $xpath; } # methods used mainly by wrap_children # return a string with the # for an element <foo><elt att="val">...</elt><elt2/><elt>...</elt></foo> # returns '<elt att="val"><elt2><elt>' sub _stringify_struct { my( $elt, %opt)= @_; my $string=''; my $pretty_print= set_pretty_print( 'none'); foreach my $child ($elt->_children) { $child->add_id; $string .= $child->start_tag( { escape_gt => 1 }) ||''; } set_pretty_print( $pretty_print); return $string; } # wrap a series of elements in a new one sub _wrap_range { my $elt= shift; my $gi= shift; my $atts= isa( $_[0], 'HASH') ? shift : undef; my $range= shift; # the string with the tags to wrap my $t= $elt->twig; # get the tags to wrap my @to_wrap; while( $range=~ m{<\w+\s+[^>]*id=("[^"]*"|'[^']*')[^>]*>}g) { push @to_wrap, $t->elt_id( substr( $1, 1, -1)); } return '' unless @to_wrap; my $to_wrap= shift @to_wrap; my %atts= %$atts; my $new_elt= $to_wrap->wrap_in( $gi, \%atts); $_->move( last_child => $new_elt) foreach (@to_wrap); return ''; } # wrap children matching a regexp in a new element sub wrap_children { my( $elt, $regexp, $gi, $atts)= @_; $atts ||={}; my $elt_as_string= $elt->_stringify_struct; # stringify the elt structure $regexp=~ s{(<[^>]*>)}{_match_expr( $1)}eg; # in the regexp, replace gi's by the proper regexp $elt_as_string=~ s{($regexp)}{$elt->_wrap_range( $gi, $atts, $1)}eg; # then do the actual replace return $elt; } sub _match_expr { my $tag= shift; my( $gi, %atts)= XML::Twig::_parse_start_tag( $tag); return _match_tag( $gi, %atts); } sub _match_tag { my( $elt, %atts)= @_; my $string= "<$elt\\b"; foreach my $key (sort keys %atts) { my $val= qq{\Q$atts{$key}\E}; $string.= qq{[^>]*$key=(?:"$val"|'$val')}; } $string.= qq{[^>]*>}; return "(?:$string)"; } sub field_to_att { my( $elt, $cond, $att)= @_; $att ||= $cond; my $child= $elt->first_child( $cond) or return undef; $elt->set_att( $att => $child->text); $child->cut; return $elt; } sub att_to_field { my( $elt, $att, $tag)= @_; $tag ||= $att; my $child= $elt->insert_new_elt( first_child => $tag, $elt->att( $att)); $elt->del_att( $att); return $elt; } # sort children methods sub sort_children_on_field { my $elt = shift; my $field = shift; my $get_key= sub { return $_[0]->field( $field) }; return $elt->sort_children( $get_key, @_); } sub sort_children_on_att { my $elt = shift; my $att = shift; my $get_key= sub { return $_[0]->att( $att) }; return $elt->sort_children( $get_key, @_); } sub sort_children_on_value { my $elt = shift; #my $get_key= eval qq{ sub { $NO_WARNINGS; return \$_[0]->text } }; my $get_key= \&text; return $elt->sort_children( $get_key, @_); } sub sort_children { my( $elt, $get_key, %opt)=@_; $opt{order} ||= 'normal'; $opt{type} ||= 'alpha'; my( $par_a, $par_b)= ($opt{order} eq 'reverse') ? qw( b a) : qw ( a b) ; my $op= ($opt{type} eq 'numeric') ? '<=>' : 'cmp' ; my @children= $elt->cut_children; if( $opt{type} eq 'numeric') { @children= map { $_->[1] } sort { $a->[0] <=> $b->[0] } map { [ $get_key->( $_), $_] } @children; } elsif( $opt{type} eq 'alpha') { @children= map { $_->[1] } sort { $a->[0] cmp $b->[0] } map { [ $get_key->( $_), $_] } @children; } else { croak "wrong sort type '$opt{type}', should be either 'alpha' or 'numeric'"; } @children= reverse @children if( $opt{order} eq 'reverse'); $elt->set_content( @children); } # comparison methods sub before { my( $a, $b)=@_; if( $a->cmp( $b) == -1) { return 1; } else { return 0; } } sub after { my( $a, $b)=@_; if( $a->cmp( $b) == 1) { return 1; } else { return 0; } } sub lt { my( $a, $b)=@_; return 1 if( $a->cmp( $b) == -1); return 0; } sub le { my( $a, $b)=@_; return 1 unless( $a->cmp( $b) == 1); return 0; } sub gt { my( $a, $b)=@_; return 1 if( $a->cmp( $b) == 1); return 0; } sub ge { my( $a, $b)=@_; return 1 unless( $a->cmp( $b) == -1); return 0; } sub cmp { my( $a, $b)=@_; # easy cases return 0 if( $a == $b); return 1 if( $a->in($b)); # a in b => a starts after b return -1 if( $b->in($a)); # b in a => a starts before b # ancestors does not include the element itself my @a_pile= ($a, $a->ancestors); my @b_pile= ($b, $b->ancestors); # the 2 elements are not in the same twig return undef unless( $a_pile[-1] == $b_pile[-1]); # find the first non common ancestors (they are siblings) my $a_anc= pop @a_pile; my $b_anc= pop @b_pile; while( $a_anc == $b_anc) { $a_anc= pop @a_pile; $b_anc= pop @b_pile; } # from there move left and right and figure out the order my( $a_prev, $a_next, $b_prev, $b_next)= ($a_anc, $a_anc, $b_anc, $b_anc); while() { $a_prev= $a_prev->_prev_sibling || return( -1); return 1 if( $a_prev == $b_next); $a_next= $a_next->_next_sibling || return( 1); return -1 if( $a_next == $b_prev); $b_prev= $b_prev->_prev_sibling || return( 1); return -1 if( $b_prev == $a_next); $b_next= $b_next->_next_sibling || return( -1); return 1 if( $b_next == $a_prev); } } sub _dump { my( $elt, $option)= @_; my $atts = defined $option->{atts} ? $option->{atts} : 1; my $extra = defined $option->{extra} ? $option->{extra} : 0; my $short_text = defined $option->{short_text} ? $option->{short_text} : 40; my $sp= '| '; my $indent= $sp x $elt->level; my $indent_sp= ' ' x $elt->level; my $dump=''; if( $elt->is_elt) { $dump .= $indent . '|-' . $elt->gi; if( $atts && (my @atts= $elt->att_names) ) { $dump .= ' ' . join( ' ', map { qq{$_="} . $elt->att( $_) . qq{"} } @atts); } $dump .= "\n"; if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); } $dump .= join( "", map { $_->_dump( $option) } $elt->_children); } else { if( $elt->is_pcdata) { $dump .= "$indent|-PCDATA: '" . _short_text( $elt->pcdata, $short_text) . "'\n" } elsif( $elt->is_ent) { $dump .= "$indent|-ENTITY: '" . _short_text( $elt->ent, $short_text) . "'\n" } elsif( $elt->is_cdata) { $dump .= "$indent|-CDATA: '" . _short_text( $elt->cdata, $short_text) . "'\n" } elsif( $elt->is_comment) { $dump .= "$indent|-COMMENT: '" . _short_text( $elt->comment_string, $short_text) . "'\n" } elsif( $elt->is_pi) { $dump .= "$indent|-PI: '" . $elt->target . "' - '" . _short_text( $elt->data, $short_text) . "'\n" } if( $extra) { $dump .= $elt->_dump_extra_data( $indent, $indent_sp, $short_text); } } return $dump; } sub _dump_extra_data { my( $elt, $indent, $indent_sp, $short_text)= @_; my $dump=''; if( $elt->extra_data) { my $extra_data = $indent . "|-- (cpi before) '" . _short_text( $elt->extra_data, $short_text) . "'"; $extra_data=~ s{\n}{$indent_sp}g; $dump .= $extra_data . "\n"; } if( $elt->_extra_data_in_pcdata) { foreach my $data ( @{$elt->_extra_data_in_pcdata}) { my $extra_data = $indent . "|-- (cpi offset $data->{offset}) '" . _short_text( $data->{text}, $short_text) . "'"; $extra_data=~ s{\n}{$indent_sp}g; $dump .= $extra_data . "\n"; } } if( $elt->_extra_data_before_end_tag) { my $extra_data = $indent . "|-- (cpi end) '" . _short_text( $elt->_extra_data_before_end_tag, $short_text) . "'"; $extra_data=~ s{\n}{$indent_sp}g; $dump .= $extra_data . "\n"; } return $dump; } sub _short_text { my( $string, $length)= @_; if( !$length || (length( $string) < $length) ) { return $string; } my $l1= (length( $string) -5) /2; my $l2= length( $string) - ($l1 + 5); return substr( $string, 0, $l1) . ' ... ' . substr( $string, -$l2); } sub _and { return _join_defined( ' && ', @_); } sub _join_defined { return join( shift(), grep { $_ } @_); } 1; __END__ =head1 NAME XML::Twig - A perl module for processing huge XML documents in tree mode. =head1 SYNOPSIS Note that this documentation is intended as a reference to the module. Complete docs, including a tutorial, examples, an easier to use HTML version, a quick reference card and a FAQ are available at L<http://www.xmltwig.org/xmltwig> Small documents (loaded in memory as a tree): my $twig=XML::Twig->new(); # create the twig $twig->parsefile( 'doc.xml'); # build it my_process( $twig); # use twig methods to process it $twig->print; # output the twig Huge documents (processed in combined stream/tree mode): # at most one div will be loaded in memory my $twig=XML::Twig->new( twig_handlers => { title => sub { $_->set_tag( 'h2') }, # change title tags to h2 # $_ is the current element para => sub { $_->set_tag( 'p') }, # change para to p hidden => sub { $_->delete; }, # remove hidden elements list => \&my_list_process, # process list elements div => sub { $_[0]->flush; }, # output and free memory }, pretty_print => 'indented', # output will be nicely formatted empty_tags => 'html', # outputs <empty_tag /> ); $twig->parsefile( 'my_big.xml'); sub my_list_process { my( $twig, $list)= @_; # ... } See L<XML::Twig 101|/XML::Twig 101> for other ways to use the module, as a filter for example. =encoding utf8 # > perl 5.10.0 =head1 DESCRIPTION This module provides a way to process XML documents. It is build on top of C<XML::Parser>. The module offers a tree interface to the document, while allowing you to output the parts of it that have been completely processed. It allows minimal resource (CPU and memory) usage by building the tree only for the parts of the documents that need actual processing, through the use of the C<L<twig_roots> > and C<L<twig_print_outside_roots> > options. The C<L<finish> > and C<L<finish_print> > methods also help to increase performances. XML::Twig tries to make simple things easy so it tries its best to takes care of a lot of the (usually) annoying (but sometimes necessary) features that come with XML and XML::Parser. =head1 TOOLS XML::Twig comes with a few command-line utilities: =head2 xml_pp - xml pretty-printer XML pretty printer using XML::Twig =head2 xml_grep - grep XML files looking for specific elements C<xml_grep> does a grep on XML files. Instead of using regular expressions it uses XPath expressions (in fact the subset of XPath supported by XML::Twig). =head2 xml_split - cut a big XML file into smaller chunks C<xml_split> takes a (presumably big) XML file and split it in several smaller files, based on various criteria (level in the tree, size or an XPath expression) =head2 xml_merge - merge back XML files split with xml_split C<xml_merge> takes several xml files that have been split using C<xml_split> and recreates a single file. =head2 xml_spellcheck - spellcheck XML files C<xml_spellcheck> lets you spell check the content of an XML file. It extracts the text (the content of elements and optionally of attributes), call a spell checker on it and then recreates the XML document. =head1 XML::Twig 101 XML::Twig can be used either on "small" XML documents (that fit in memory) or on huge ones, by processing parts of the document and outputting or discarding them once they are processed. =head2 Loading an XML document and processing it my $t= XML::Twig->new(); $t->parse( '<d><title>titlep 1p 2'); my $root= $t->root; $root->set_tag( 'html'); # change doc to html $title= $root->first_child( 'title'); # get the title $title->set_tag( 'h1'); # turn it into h1 my @para= $root->children( 'para'); # get the para children foreach my $para (@para) { $para->set_tag( 'p'); } # turn them into p $t->print; # output the document Other useful methods include: L: C<< $elt->att( 'foo') >> return the C attribute for an element, L : C<< $elt->set_att( foo => "bar") >> sets the C attribute to the C value, L: C<< $elt->next_sibling >> return the next sibling in the document (in the example C<< $title->next_sibling >> is the first C, you can also (and actually should) use C<< $elt->next_sibling( 'para') >> to get it The document can also be transformed through the use of the L, L, L and L methods: C<< $title->cut; $title->paste( after => $p); >> for example And much, much more, see L. =head2 Processing an XML document chunk by chunk One of the strengths of XML::Twig is that it let you work with files that do not fit in memory (BTW storing an XML document in memory as a tree is quite memory-expensive, the expansion factor being often around 10). To do this you can define handlers, that will be called once a specific element has been completely parsed. In these handlers you can access the element and process it as you see fit, using the navigation and the cut-n-paste methods, plus lots of convenient ones like C >. Once the element is completely processed you can then C > it, which will output it and free the memory. You can also C > it if you don't need to output it (if you are just extracting some data from the document for example). The handler will be called again once the next relevant element has been parsed. my $t= XML::Twig->new( twig_handlers => { section => \§ion, para => sub { $_->set_tag( 'p'); } }, ); $t->parsefile( 'doc.xml'); # the handler is called once a section is completely parsed, ie when # the end tag for section is found, it receives the twig itself and # the element (including all its sub-elements) as arguments sub section { my( $t, $section)= @_; # arguments for all twig_handlers $section->set_tag( 'div'); # change the tag name # let's use the attribute nb as a prefix to the title my $title= $section->first_child( 'title'); # find the title my $nb= $title->att( 'nb'); # get the attribute $title->prefix( "$nb - "); # easy isn't it? $section->flush; # outputs the section and frees memory } There is of course more to it: you can trigger handlers on more elaborate conditions than just the name of the element, C
for example. my $t= XML::Twig->new( twig_handlers => { 'section/title' => sub { $_->print } } ) ->parsefile( 'doc.xml'); Here C<< sub { $_->print } >> simply prints the current element (C<$_> is aliased to the element in the handler). You can also trigger a handler on a test on an attribute: my $t= XML::Twig->new( twig_handlers => { 'section[@level="1"]' => sub { $_->print } } ); ->parsefile( 'doc.xml'); You can also use C > to process an element as soon as the start tag is found. Besides C > you can also use C >, =head2 Processing just parts of an XML document The twig_roots mode builds only the required sub-trees from the document Anything outside of the twig roots will just be ignored: my $t= XML::Twig->new( # the twig will include just the root and selected titles twig_roots => { 'section/title' => \&print_n_purge, 'annex/title' => \&print_n_purge } ); $t->parsefile( 'doc.xml'); sub print_n_purge { my( $t, $elt)= @_; print $elt->text; # print the text (including sub-element texts) $t->purge; # frees the memory } You can use that mode when you want to process parts of a documents but are not interested in the rest and you don't want to pay the price, either in time or memory, to build the tree for the it. =head2 Building an XML filter You can combine the C and the C options to build filters, which let you modify selected elements and will output the rest of the document as is. This would convert prices in $ to prices in Euro in a document: my $t= XML::Twig->new( twig_roots => { 'price' => \&convert, }, # process prices twig_print_outside_roots => 1, # print the rest ); $t->parsefile( 'doc.xml'); sub convert { my( $t, $price)= @_; my $currency= $price->att( 'currency'); # get the currency if( $currency eq 'USD') { $usd_price= $price->text; # get the price # %rate is just a conversion table my $euro_price= $usd_price * $rate{usd2euro}; $price->set_text( $euro_price); # set the new price $price->set_att( currency => 'EUR'); # don't forget this! } $price->print; # output the price } =head2 XML::Twig and various versions of Perl, XML::Parser and expat: XML::Twig is a lot more sensitive to variations in versions of perl, XML::Parser and expat than to the OS, so this should cover some reasonable configurations. The "recommended configuration" is perl 5.8.3+ (for good Unicode support), XML::Parser 2.31+ and expat 1.95.5+ See L for the CPAN testers reports on XML::Twig, which list all tested configurations. An Atom feed of the CPAN Testers results is available at L Finally: =over 4 =item XML::Twig does B work with expat 1.95.4 =item XML::Twig only works with XML::Parser 2.27 in perl 5.6.* Note that I can't compile XML::Parser 2.27 anymore, so I can't guarantee that it still works =item XML::Parser 2.28 does not really work =back When in doubt, upgrade expat, XML::Parser and Scalar::Util Finally, for some optional features, XML::Twig depends on some additional modules. The complete list, which depends somewhat on the version of Perl that you are running, is given by running C =head1 Simplifying XML processing =over 4 =item Whitespaces Whitespaces that look non-significant are discarded, this behaviour can be controlled using the C >, C > and C > options. =item Encoding You can specify that you want the output in the same encoding as the input (provided you have valid XML, which means you have to specify the encoding either in the document or when you create the Twig object) using the C > option You can also use C> to convert the internal UTF-8 format to the required encoding. =item Comments and Processing Instructions (PI) Comments and PI's can be hidden from the processing, but still appear in the output (they are carried by the "real" element closer to them) =item Pretty Printing XML::Twig can output the document pretty printed so it is easier to read for us humans. =item Surviving an untimely death XML parsers are supposed to react violently when fed improper XML. XML::Parser just dies. XML::Twig provides the C > and the C > methods which wrap the parse in an eval and return either the parsed twig or 0 in case of failure. =item Private attributes Attributes with a name starting with # (illegal in XML) will not be output, so you can safely use them to store temporary values during processing. Note that you can store anything in a private attribute, not just text, it's just a regular Perl variable, so a reference to an object or a huge data structure is perfectly fine. =back =head1 CLASSES XML::Twig uses a very limited number of classes. The ones you are most likely to use are C> of course, which represents a complete XML document, including the document itself (the root of the document itself is C>), its handlers, its input or output filters... The other main class is C>, which models an XML element. Element here has a very wide definition: it can be a regular element, or but also text, with an element C> of C<#PCDATA> (or C<#CDATA>), an entity (tag is C<#ENT>), a Processing Instruction (C<#PI>), a comment (C<#COMMENT>). Those are the 2 commonly used classes. You might want to look the C> option if you want to subclass C. Attributes are just attached to their parent element, they are not objects per se. (Please use the provided methods C> and C> to access them, if you access them as a hash, then your code becomes implementation dependent and might break in the future). Other classes that are seldom used are C> and C>. If you use C> instead of C, elements are then created as C> =head1 METHODS =head2 XML::Twig A twig is a subclass of XML::Parser, so all XML::Parser methods can be called on a twig object, including parse and parsefile. C on the other hand cannot be used, see C > =over 4 =item new This is a class method, the constructor for XML::Twig. Options are passed as keyword value pairs. Recognized options are the same as XML::Parser, plus some (in fact a lot!) XML::Twig specifics. New Options: =over 4 =item twig_handlers This argument consists of a hash C<{ expression => \&handler}> where expression is a an I (+ some others). XPath expressions are limited to using the child and descendant axis (indeed you can't specify an axis), and predicates cannot be nested. You can use the C, or C<< string() >> function (except in C triggers). Additionally you can use regexps (/ delimited) to match attribute and string values. Examples: foo foo/bar foo//bar /foo/bar /foo//bar /foo/bar[@att1 = "val1" and @att2 = "val2"]/baz[@a >= 1] foo[string()=~ /^duh!+/] /foo[string(bar)=~ /\d+/]/baz[@att != 3] #CDATA can be used to call a handler for a CDATA section. #COMMENT can be used to call a handler for comments Some additional (non-XPath) expressions are also provided for convenience: =over 4 =item processing instructions C<'?'> or C<'#PI'> triggers the handler for any processing instruction, and C<< '?' >> or C<< '#PI ' >> triggers a handler for processing instruction with the given target( ex: C<'#PI xml-stylesheet'>). =item level() Triggers the handler on any element at that level in the tree (root is level 1) =item _all_ Triggers the handler for B elements in the tree =item _default_ Triggers the handler for each element that does NOT have any other handler. =back Expressions are evaluated against the input document. Which means that even if you have changed the tag of an element (changing the tag of a parent element from a handler for example) the change will not impact the expression evaluation. There is an exception to this: "private" attributes (which name start with a '#', and can only be created during the parsing, as they are not valid XML) are checked against the current twig. Handlers are triggered in fixed order, sorted by their type (xpath expressions first, then regexps, then level), then by whether they specify a full path (starting at the root element) or not, then by number of steps in the expression, then number of predicates, then number of tests in predicates. Handlers where the last step does not specify a step (C) are triggered after other XPath handlers. Finally C<_all_> handlers are triggered last. B: once a handler has been triggered if it returns 0 then no other handler is called, except a C<_all_> handler which will be called anyway. If a handler returns a true value and other handlers apply, then the next applicable handler will be called. Repeat, rinse, lather..; The exception to that rule is when the C> option is set, in which case only the first handler will be called. Note that it might be a good idea to explicitly return a short true value (like 1) from handlers: this ensures that other applicable handlers are called even if the last statement for the handler happens to evaluate to false. This might also speedup the code by avoiding the result of the last statement of the code to be copied and passed to the code managing handlers. It can really pay to have 1 instead of a long string returned. When the closing tag for an element is parsed the corresponding handler is called, with 2 arguments: the twig and the C >. The twig includes the document tree that has been built so far, the element is the complete sub-tree for the element. B. C<$_> is also set to the element, so it is easy to write inline handlers like para => sub { $_->set_tag( 'p'); } Text is stored in elements whose tag name is #PCDATA (due to mixed content, text and sub-element in an element there is no way to store the text as just an attribute of the enclosing element, this is similar to the DOM model). B: if you have used purge or flush on the twig the element might not be complete, some of its children might have been entirely flushed or purged, and the start tag might even have been printed (by C) already, so changing its tag might not give the expected result. =item twig_roots This argument let's you build the tree only for those elements you are interested in. Example: my $t= XML::Twig->new( twig_roots => { title => 1, subtitle => 1}); $t->parsefile( file); my $t= XML::Twig->new( twig_roots => { 'section/title' => 1}); $t->parsefile( file); return a twig containing a document including only C and C<subtitle> elements, as children of the root element. You can use I<generic_attribute_condition>, I<attribute_condition>, I<full_path>, I<partial_path>, I<tag>, I<tag_regexp>, I<_default_> and I<_all_> to trigger the building of the twig. I<string_condition> and I<regexp_condition> cannot be used as the content of the element, and the string, have not yet been parsed when the condition is checked. B<WARNING>: path are checked for the document. Even if the C<twig_roots> option is used they will be checked against the full document tree, not the virtual tree created by XML::Twig B<WARNING>: twig_roots elements should NOT be nested, that would hopelessly confuse XML::Twig ;--( Note: you can set handlers (twig_handlers) using twig_roots Example: my $t= XML::Twig->new( twig_roots => { title => sub { $_[1]->print;}, subtitle => \&process_subtitle } ); $t->parsefile( file); =item twig_print_outside_roots To be used in conjunction with the C<twig_roots> argument. When set to a true value this will print the document outside of the C<twig_roots> elements. Example: my $t= XML::Twig->new( twig_roots => { title => \&number_title }, twig_print_outside_roots => 1, ); $t->parsefile( file); { my $nb; sub number_title { my( $twig, $title); $nb++; $title->prefix( "$nb "); $title->print; } } This example prints the document outside of the title element, calls C<number_title> for each C<title> element, prints it, and then resumes printing the document. The twig is built only for the C<title> elements. If the value is a reference to a file handle then the document outside the C<twig_roots> elements will be output to this file handle: open( my $out, '>', 'out_file.xml') or die "cannot open out file.xml out_file:$!"; my $t= XML::Twig->new( twig_roots => { title => \&number_title }, # default output to $out twig_print_outside_roots => $out, ); { my $nb; sub number_title { my( $twig, $title); $nb++; $title->prefix( "$nb "); $title->print( $out); # you have to print to \*OUT here } } =item start_tag_handlers A hash C<{ expression => \&handler}>. Sets element handlers that are called when the element is open (at the end of the XML::Parser C<Start> handler). The handlers are called with 2 params: the twig and the element. The element is empty at that point, its attributes are created though. You can use I<generic_attribute_condition>, I<attribute_condition>, I<full_path>, I<partial_path>, I<tag>, I<tag_regexp>, I<_default_> and I<_all_> to trigger the handler. I<string_condition> and I<regexp_condition> cannot be used as the content of the element, and the string, have not yet been parsed when the condition is checked. The main uses for those handlers are to change the tag name (you might have to do it as soon as you find the open tag if you plan to C<flush> the twig at some point in the element, and to create temporary attributes that will be used when processing sub-element with C<twig_hanlders>. B<Note>: C<start_tag> handlers can be called outside of C<twig_roots> if this argument is used. Since the element object is not built, in this case handlers are called with the following arguments: C<$t> (the twig), C<$tag> (the tag of the element) and C<%att> (a hash of the attributes of the element). If the C<twig_print_outside_roots> argument is also used, if the last handler called returns a C<true> value, then the start tag will be output as it appeared in the original document, if the handler returns a C<false> value then the start tag will B<not> be printed (so you can print a modified string yourself for example). Note that you can use the L<ignore> method in C<start_tag_handlers> (and only there). =item end_tag_handlers A hash C<{ expression => \&handler}>. Sets element handlers that are called when the element is closed (at the end of the XML::Parser C<End> handler). The handlers are called with 2 params: the twig and the tag of the element. I<twig_handlers> are called when an element is completely parsed, so why have this redundant option? There is only one use for C<end_tag_handlers>: when using the C<twig_roots> option, to trigger a handler for an element B<outside> the roots. It is for example very useful to number titles in a document using nested sections: my @no= (0); my $no; my $t= XML::Twig->new( start_tag_handlers => { section => sub { $no[$#no]++; $no= join '.', @no; push @no, 0; } }, twig_roots => { title => sub { $_->prefix( $no); $_->print; } }, end_tag_handlers => { section => sub { pop @no; } }, twig_print_outside_roots => 1 ); $t->parsefile( $file); Using the C<end_tag_handlers> argument without C<twig_roots> will result in an error. =item do_not_chain_handlers If this option is set to a true value, then only one handler will be called for each element, even if several satisfy the condition Note that the C<_all_> handler will still be called regardless =item ignore_elts This option lets you ignore elements when building the twig. This is useful in cases where you cannot use C<twig_roots> to ignore elements, for example if the element to ignore is a sibling of elements you are interested in. Example: my $twig= XML::Twig->new( ignore_elts => { elt => 'discard' }); $twig->parsefile( 'doc.xml'); This will build the complete twig for the document, except that all C<elt> elements (and their children) will be left out. The keys in the hash are triggers, limited to the same subset as C<L<start_tag_handlers>>. The values can be C<discard>, to discard the element, C<print>, to output the element as-is, C<string> to store the text of the ignored element(s), including markup, in a field of the twig: C<< $t->{twig_buffered_string} >> or a reference to a scalar, in which case the text of the ignored element(s), including markup, will be stored in the scalar. Any other value will be treated as C<discard>. =item char_handler A reference to a subroutine that will be called every time C<PCDATA> is found. The subroutine receives the string as argument, and returns the modified string: # WE WANT ALL STRINGS IN UPPER CASE sub my_char_handler { my( $text)= @_; $text= uc( $text); return $text; } =item elt_class The name of a class used to store elements. this class should inherit from C<XML::Twig::Elt> (and by default it is C<XML::Twig::Elt>). This option is used to subclass the element class and extend it with new methods. This option is needed because during the parsing of the XML, elements are created by C<XML::Twig>, without any control from the user code. =item keep_atts_order Setting this option to a true value causes the attribute hash to be tied to a C<Tie::IxHash> object. This means that C<Tie::IxHash> needs to be installed for this option to be available. It also means that the hash keeps its order, so you will get the attributes in order. This allows outputting the attributes in the same order as they were in the original document. =item keep_encoding This is a (slightly?) evil option: if the XML document is not UTF-8 encoded and you want to keep it that way, then setting keep_encoding will use theC<Expat> original_string method for character, thus keeping the original encoding, as well as the original entities in the strings. See the C<t/test6.t> test file to see what results you can expect from the various encoding options. B<WARNING>: if the original encoding is multi-byte then attribute parsing will be EXTREMELY unsafe under any Perl before 5.6, as it uses regular expressions which do not deal properly with multi-byte characters. You can specify an alternate function to parse the start tags with the C<parse_start_tag> option (see below) B<WARNING>: this option is NOT used when parsing with XML::Parser non-blocking parser (C<parse_start>, C<parse_more>, C<parse_done> methods) which you probably should not use with XML::Twig anyway as they are totally untested! =item output_encoding This option generates an output_filter using C<Encode>, C<Text::Iconv> or C<Unicode::Map8> and C<Unicode::Strings>, and sets the encoding in the XML declaration. This is the easiest way to deal with encodings, if you need more sophisticated features, look at C<output_filter> below =item output_filter This option is used to convert the character encoding of the output document. It is passed either a string corresponding to a predefined filter or a subroutine reference. The filter will be called every time a document or element is processed by the "print" functions (C<print>, C<sprint>, C<flush>). Pre-defined filters: =over 4 =item latin1 uses either C<Encode>, C<Text::Iconv> or C<Unicode::Map8> and C<Unicode::String> or a regexp (which works only with XML::Parser 2.27), in this order, to convert all characters to ISO-8859-15 (usually latin1 is synonym to ISO-8859-1, but in practice it seems that ISO-8859-15, which includes the euro sign, is more useful and probably what most people want). =item html does the same conversion as C<latin1>, plus encodes entities using C<HTML::Entities> (oddly enough you will need to have HTML::Entities installed for it to be available). This should only be used if the tags and attribute names themselves are in US-ASCII, or they will be converted and the output will not be valid XML any more =item safe converts the output to ASCII (US) only plus I<character entities> (C<&#nnn;>) this should be used only if the tags and attribute names themselves are in US-ASCII, or they will be converted and the output will not be valid XML any more =item safe_hex same as C<safe> except that the character entities are in hex (C<&#xnnn;>) =item encode_convert ($encoding) Return a subref that can be used to convert utf8 strings to C<$encoding>). Uses C<Encode>. my $conv = XML::Twig::encode_convert( 'latin1'); my $t = XML::Twig->new(output_filter => $conv); =item iconv_convert ($encoding) this function is used to create a filter subroutine that will be used to convert the characters to the target encoding using C<Text::Iconv> (which needs to be installed, look at the documentation for the module and for the C<iconv> library to find out which encodings are available on your system, C<iconv -l> should give you a list of available encodings) my $conv = XML::Twig::iconv_convert( 'latin1'); my $t = XML::Twig->new(output_filter => $conv); =item unicode_convert ($encoding) this function is used to create a filter subroutine that will be used to convert the characters to the target encoding using C<Unicode::Strings> and C<Unicode::Map8> (which need to be installed, look at the documentation for the modules to find out which encodings are available on your system) my $conv = XML::Twig::unicode_convert( 'latin1'); my $t = XML::Twig->new(output_filter => $conv); =back The C<text> and C<att> methods do not use the filter, so their result are always in unicode. Those predeclared filters are based on subroutines that can be used by themselves (as C<XML::Twig::foo>). =over 4 =item html_encode ($string) Use C<HTML::Entities> to encode a utf8 string =item safe_encode ($string) Use either a regexp (perl < 5.8) or C<Encode> to encode non-ascii characters in the string in C<< &#<nnnn>; >> format =item safe_encode_hex ($string) Use either a regexp (perl < 5.8) or C<Encode> to encode non-ascii characters in the string in C<< &#x<nnnn>; >> format =item regexp2latin1 ($string) Use a regexp to encode a utf8 string into latin 1 (ISO-8859-1). Does not work with Perl 5.8.0! =back =item output_text_filter same as output_filter, except it doesn't apply to the brackets and quotes around attribute values. This is useful for all filters that could change the tagging, basically anything that does not just change the encoding of the output. C<html>, C<safe> and C<safe_hex> are better used with this option. =item input_filter This option is similar to C<output_filter> except the filter is applied to the characters before they are stored in the twig, at parsing time. =item remove_cdata Setting this option to a true value will force the twig to output CDATA sections as regular (escaped) PCDATA =item parse_start_tag If you use the C<keep_encoding> option then this option can be used to replace the default parsing function. You should provide a coderef (a reference to a subroutine) as the argument, this subroutine takes the original tag (given by XML::Parser::Expat C<original_string()> method) and returns a tag and the attributes in a hash (or in a list attribute_name/attribute value). =item no_xxe prevents external entities to be parsed. This is a security feature, in case the input XML cannot be trusted. With this option set to a true value defining external entities in the document will cause the parse to fail. This prevents an entity like C<< <!ENTITY xxe PUBLIC "bar" "/etc/passwd"> >> to make the password fiel available in the document. =item expand_external_ents When this option is used external entities (that are defined) are expanded when the document is output using "print" functions such as C<L<print> >, C<L<sprint> >, C<L<flush> > and C<L<xml_string> >. Note that in the twig the entity will be stored as an element with a tag 'C<#ENT>', the entity will not be expanded there, so you might want to process the entities before outputting it. If an external entity is not available, then the parse will fail. A special case is when the value of this option is -1. In that case a missing entity will not cause the parser to die, but its C<name>, C<sysid> and C<pubid> will be stored in the twig as C<< $twig->{twig_missing_system_entities} >> (a reference to an array of hashes { name => <name>, sysid => <sysid>, pubid => <pubid> }). Yes, this is a bit of a hack, but it's useful in some cases. =item load_DTD If this argument is set to a true value, C<parse> or C<parsefile> on the twig will load the DTD information. This information can then be accessed through the twig, in a C<DTD_handler> for example. This will load even an external DTD. Default and fixed values for attributes will also be filled, based on the DTD. Note that to do this the module will generate a temporary file in the current directory. If this is a problem let me know and I will add an option to specify an alternate directory. See L<DTD Handling> for more information =item DTD_base <path_to_DTD_directory> If the DTD is in a different directory, looks for it there, useful to make up somewhat for the lack of catalog suport in C<expat>. You still need a SYSTEM declaration =item DTD_handler Set a handler that will be called once the doctype (and the DTD) have been loaded, with 2 arguments, the twig and the DTD. =item no_prolog Does not output a prolog (XML declaration and DTD) =item id This optional argument gives the name of an attribute that can be used as an ID in the document. Elements whose ID is known can be accessed through the elt_id method. id defaults to 'id'. See C<L<BUGS> > =item discard_spaces If this optional argument is set to a true value then spaces are discarded when they look non-significant: strings containing only spaces and at least one line feed are discarded. This argument is set to true by default. The exact algorithm to drop spaces is: strings including only spaces (perl \s) and at least one \n right before an open or close tag are dropped. =item discard_all_spaces If this argument is set to a true value, spaces are discarded more aggressively than with C<discard_spaces>: strings not including a \n are also dropped. This option is appropriate for data-oriented XML. =item keep_spaces If this optional argument is set to a true value then all spaces in the document are kept, and stored as C<PCDATA>. B<Warning>: adding this option can result in changes in the twig generated: space that was previously discarded might end up in a new text element. see the difference by calling the following code with 0 and 1 as arguments: perl -MXML::Twig -e'print XML::Twig->new( keep_spaces => shift)->parse( "<d> \n<e/></d>")->_dump' C<keep_spaces> and C<discard_spaces> cannot be both set. =item discard_spaces_in This argument sets C<keep_spaces> to true but will cause the twig builder to discard spaces in the elements listed. The syntax for using this argument is: XML::Twig->new( discard_spaces_in => [ 'elt1', 'elt2']); =item keep_spaces_in This argument sets C<discard_spaces> to true but will cause the twig builder to keep spaces in the elements listed. The syntax for using this argument is: XML::Twig->new( keep_spaces_in => [ 'elt1', 'elt2']); B<Warning>: adding this option can result in changes in the twig generated: space that was previously discarded might end up in a new text element. =item pretty_print Set the pretty print method, amongst 'C<none>' (default), 'C<nsgmls>', 'C<nice>', 'C<indented>', 'C<indented_c>', 'C<indented_a>', 'C<indented_close_tag>', 'C<cvs>', 'C<wrapped>', 'C<record>' and 'C<record_c>' pretty_print formats: =over 4 =item none The document is output as one ling string, with no line breaks except those found within text elements =item nsgmls Line breaks are inserted in safe places: that is within tags, between a tag and an attribute, between attributes and before the > at the end of a tag. This is quite ugly but better than C<none>, and it is very safe, the document will still be valid (conforming to its DTD). This is how the SGML parser C<sgmls> splits documents, hence the name. =item nice This option inserts line breaks before any tag that does not contain text (so element with textual content are not broken as the \n is the significant). B<WARNING>: this option leaves the document well-formed but might make it invalid (not conformant to its DTD). If you have elements declared as <!ELEMENT foo (#PCDATA|bar)> then a C<foo> element including a C<bar> one will be printed as <foo> <bar>bar is just pcdata</bar> </foo> This is invalid, as the parser will take the line break after the C<foo> tag as a sign that the element contains PCDATA, it will then die when it finds the C<bar> tag. This may or may not be important for you, but be aware of it! =item indented Same as C<nice> (and with the same warning) but indents elements according to their level =item indented_c Same as C<indented> but a little more compact: the closing tags are on the same line as the preceding text =item indented_close_tag Same as C<indented> except that the closing tag is also indented, to line up with the tags within the element =item idented_a This formats XML files in a line-oriented version control friendly way. The format is described in L<http://tinyurl.com/2kwscq> (that's an Oracle document with an insanely long URL). Note that to be totaly conformant to the "spec", the order of attributes should not be changed, so if they are not already in alphabetical order you will need to use the C<L<keep_atts_order>> option. =item cvs Same as C<L<idented_a>>. =item wrapped Same as C<indented_c> but lines are wrapped using L<Text::Wrap::wrap>. The default length for lines is the default for C<$Text::Wrap::columns>, and can be changed by changing that variable. =item record This is a record-oriented pretty print, that display data in records, one field per line (which looks a LOT like C<indented>) =item record_c Stands for record compact, one record per line =back =item empty_tags Set the empty tag display style ('C<normal>', 'C<html>' or 'C<expand>'). C<normal> outputs an empty tag 'C<< <tag/> >>', C<html> adds a space 'C<< <tag /> >>' for elements that can be empty in XHTML and C<expand> outputs 'C<< <tag></tag> >>' =item quote Set the quote character for attributes ('C<single>' or 'C<double>'). =item escape_gt By default XML::Twig does not escape the character > in its output, as it is not mandated by the XML spec. With this option on, > will be replaced by C<>> =item comments Set the way comments are processed: 'C<drop>' (default), 'C<keep>' or 'C<process>' Comments processing options: =over 4 =item drop drops the comments, they are not read, nor printed to the output =item keep comments are loaded and will appear on the output, they are not accessible within the twig and will not interfere with processing though B<Note>: comments in the middle of a text element such as <p>text <!-- comment --> more text --></p> are kept at their original position in the text. Using Ë"print" methods like C<print> or C<sprint> will return the comments in the text. Using C<text> or C<field> on the other hand will not. Any use of C<set_pcdata> on the C<#PCDATA> element (directly or through other methods like C<set_content>) will delete the comment(s). =item process comments are loaded in the twig and will be treated as regular elements (their C<tag> is C<#COMMENT>) this can interfere with processing if you expect C<< $elt->first_child >> to be an element but find a comment there. Validation will not protect you from this as comments can happen anywhere. You can use C<< $elt->first_child( 'tag') >> (which is a good habit anyway) to get where you want. Consider using C<process> if you are outputting SAX events from XML::Twig. =back =item pi Set the way processing instructions are processed: 'C<drop>', 'C<keep>' (default) or 'C<process>' Note that you can also set PI handlers in the C<twig_handlers> option: '?' => \&handler '?target' => \&handler 2 The handlers will be called with 2 parameters, the twig and the PI element if C<pi> is set to C<process>, and with 3, the twig, the target and the data if C<pi> is set to C<keep>. Of course they will not be called if C<pi> is set to C<drop>. If C<pi> is set to C<keep> the handler should return a string that will be used as-is as the PI text (it should look like "C< <?target data?> >" or '' if you want to remove the PI), Only one handler will be called, C<?target> or C<?> if no specific handler for that target is available. =item map_xmlns This option is passed a hashref that maps uri's to prefixes. The prefixes in the document will be replaced by the ones in the map. The mapped prefixes can (actually have to) be used to trigger handlers, navigate or query the document. Here is an example: my $t= XML::Twig->new( map_xmlns => {'http://www.w3.org/2000/svg' => "svg"}, twig_handlers => { 'svg:circle' => sub { $_->set_att( r => 20) } }, pretty_print => 'indented', ) ->parse( '<doc xmlns:gr="http://www.w3.org/2000/svg"> <gr:circle cx="10" cy="90" r="10"/> </doc>' ) ->print; This will output: <doc xmlns:svg="http://www.w3.org/2000/svg"> <svg:circle cx="10" cy="90" r="20"/> </doc> =item keep_original_prefix When used with C<L<map_xmlns>> this option will make C<XML::Twig> use the original namespace prefixes when outputting a document. The mapped prefix will still be used for triggering handlers and in navigation and query methods. my $t= XML::Twig->new( map_xmlns => {'http://www.w3.org/2000/svg' => "svg"}, twig_handlers => { 'svg:circle' => sub { $_->set_att( r => 20) } }, keep_original_prefix => 1, pretty_print => 'indented', ) ->parse( '<doc xmlns:gr="http://www.w3.org/2000/svg"> <gr:circle cx="10" cy="90" r="10"/> </doc>' ) ->print; This will output: <doc xmlns:gr="http://www.w3.org/2000/svg"> <gr:circle cx="10" cy="90" r="20"/> </doc> =item original_uri ($prefix) called within a handler, this will return the uri bound to the namespace prefix in the original document. =item index ($arrayref or $hashref) This option creates lists of specific elements during the parsing of the XML. It takes a reference to either a list of triggering expressions or to a hash name => expression, and for each one generates the list of elements that match the expression. The list can be accessed through the C<L<index>> method. example: # using an array ref my $t= XML::Twig->new( index => [ 'div', 'table' ]) ->parsefile( "foo.xml"); my $divs= $t->index( 'div'); my $first_div= $divs->[0]; my $last_table= $t->index( table => -1); # using a hashref to name the indexes my $t= XML::Twig->new( index => { email => 'a[@href=~/^ \s*mailto:/]'}) ->parsefile( "foo.xml"); my $last_emails= $t->index( email => -1); Note that the index is not maintained after the parsing. If elements are deleted, renamed or otherwise hurt during processing, the index is NOT updated. (changing the id element OTOH will update the index) =item att_accessors <list of attribute names> creates methods that give direct access to attribute: my $t= XML::Twig->new( att_accessors => [ 'href', 'src']) ->parsefile( $file); my $first_href= $t->first_elt( 'img')->src; # same as ->att( 'src') $t->first_elt( 'img')->src( 'new_logo.png') # changes the attribute value =item elt_accessors creates methods that give direct access to the first child element (in scalar context) or the list of elements (in list context): the list of accessors to create can be given 1 2 different ways: in an array, or in a hash alias => expression my $t= XML::Twig->new( elt_accessors => [ 'head']) ->parsefile( $file); my $title_text= $t->root->head->field( 'title'); # same as $title_text= $t->root->first_child( 'head')->field( 'title'); my $t= XML::Twig->new( elt_accessors => { warnings => 'p[@class="warning"]', d2 => 'div[2]'}, ) ->parsefile( $file); my $body= $t->first_elt( 'body'); my @warnings= $body->warnings; # same as $body->children( 'p[@class="warning"]'); my $s2= $body->d2; # same as $body->first_child( 'div[2]') =item field_accessors creates methods that give direct access to the first child element text: my $t= XML::Twig->new( field_accessors => [ 'h1']) ->parsefile( $file); my $div_title_text= $t->first_elt( 'div')->title; # same as $title_text= $t->first_elt( 'div')->field( 'title'); =item use_tidy set this option to use HTML::Tidy instead of HTML::TreeBuilder to convert HTML to XML. HTML, especially real (real "crap") HTML found in the wild, so depending on the data, one module or the other does a better job at the conversion. Also, HTML::Tidy can be a bit difficult to install, so XML::Twig offers both option. TIMTOWTDI =item output_html_doctype when using HTML::TreeBuilder to convert HTML, this option causes the DOCTYPE declaration to be output, which may be important for some legacy browsers. Without that option the DOCTYPE definition is NOT output. Also if the definition is completely wrong (ie not easily parsable), it is not output either. =back B<Note>: I _HATE_ the Java-like name of arguments used by most XML modules. So in pure TIMTOWTDI fashion all arguments can be written either as C<UglyJavaLikeName> or as C<readable_perl_name>: C<twig_print_outside_roots> or C<TwigPrintOutsideRoots> (or even C<twigPrintOutsideRoots> {shudder}). XML::Twig normalizes them before processing them. =item parse ( $source) The C<$source> parameter should either be a string containing the whole XML document, or it should be an open C<IO::Handle> (aka a filehandle). A die call is thrown if a parse error occurs. Otherwise it will return the twig built by the parse. Use C<safe_parse> if you want the parsing to return even when an error occurs. If this method is called as a class method (C<< XML::Twig->parse( $some_xml_or_html) >>) then an XML::Twig object is created, using the parameters except the last one (eg C<< XML::Twig->parse( pretty_print => 'indented', $some_xml_or_html) >>) and C<L<xparse>> is called on it. Note that when parsing a filehandle, the handle should NOT be open with an encoding (ie open with C<open( my $in, '<', $filename)>. The file will be parsed by C<expat>, so specifying the encoding actually causes problems for the parser (as in: it can crash it, see https://rt.cpan.org/Ticket/Display.html?id=78877). For parsing a file it is actually recommended to use C<parsefile> on the file name, instead of <parse> on the open file. =item parsestring This is just an alias for C<parse> for backwards compatibility. =item parsefile (FILE [, OPT => OPT_VALUE [...]]) Open C<FILE> for reading, then call C<parse> with the open handle. The file is closed no matter how C<parse> returns. A C<die> call is thrown if a parse error occurs. Otherwise it will return the twig built by the parse. Use C<safe_parsefile> if you want the parsing to return even when an error occurs. =item parsefile_inplace ( $file, $optional_extension) Parse and update a file "in place". It does this by creating a temp file, selecting it as the default for print() statements (and methods), then parsing the input file. If the parsing is successful, then the temp file is moved to replace the input file. If an extension is given then the original file is backed-up (the rules for the extension are the same as the rule for the -i option in perl). =item parsefile_html_inplace ( $file, $optional_extension) Same as parsefile_inplace, except that it parses HTML instead of XML =item parseurl ($url $optional_user_agent) Gets the data from C<$url> and parse it. The data is piped to the parser in chunks the size of the XML::Parser::Expat buffer, so memory consumption and hopefully speed are optimal. For most (read "small") XML it is probably as efficient (and easier to debug) to just C<get> the XML file and then parse it as a string. use XML::Twig; use LWP::Simple; my $twig= XML::Twig->new(); $twig->parse( LWP::Simple::get( $URL )); or use XML::Twig; my $twig= XML::Twig->nparse( $URL); If the C<$optional_user_agent> argument is used then it is used, otherwise a new one is created. =item safe_parse ( SOURCE [, OPT => OPT_VALUE [...]]) This method is similar to C<parse> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig). C<$@> contains the error message on failure. Note that the parsing still stops as soon as an error is detected, there is no way to keep going after an error. =item safe_parsefile (FILE [, OPT => OPT_VALUE [...]]) This method is similar to C<parsefile> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig) . C<$@> contains the error message on failure Note that the parsing still stops as soon as an error is detected, there is no way to keep going after an error. =item safe_parseurl ($url $optional_user_agent) Same as C<parseurl> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig) . C<$@> contains the error message on failure =item parse_html ($string_or_fh) parse an HTML string or file handle (by converting it to XML using HTML::TreeBuilder, which needs to be available). This works nicely, but some information gets lost in the process: newlines are removed, and (at least on the version I use), comments get an extra CDATA section inside ( <!-- foo --> becomes <!-- <![CDATA[ foo ]]> --> =item parsefile_html ($file) parse an HTML file (by converting it to XML using HTML::TreeBuilder, which needs to be available, or HTML::Tidy if the C<use_tidy> option was used). The file is loaded completely in memory and converted to XML before being parsed. this method is to be used with caution though, as it doesn't know about the file encoding, it is usually better to use C<L<parse_html>>, which gives you a chance to open the file with the proper encoding layer. =item parseurl_html ($url $optional_user_agent) parse an URL as html the same way C<L<parse_html>> does =item safe_parseurl_html ($url $optional_user_agent) Same as C<L<parseurl_html>>> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig) . C<$@> contains the error message on failure =item safe_parsefile_html ($file $optional_user_agent) Same as C<L<parsefile_html>>> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig) . C<$@> contains the error message on failure =item safe_parse_html ($string_or_fh) Same as C<L<parse_html>> except that it wraps the parsing in an C<eval> block. It returns the twig on success and 0 on failure (the twig object also contains the parsed twig) . C<$@> contains the error message on failure =item xparse ($thing_to_parse) parse the C<$thing_to_parse>, whether it is a filehandle, a string, an HTML file, an HTML URL, an URL or a file. Note that this is mostly a convenience method for one-off scripts. For example files that end in '.htm' or '.html' are parsed first as XML, and if this fails as HTML. This is certainly not the most efficient way to do this in general. =item nparse ($optional_twig_options, $thing_to_parse) create a twig with the C<$optional_options>, and parse the C<$thing_to_parse>, whether it is a filehandle, a string, an HTML file, an HTML URL, an URL or a file. Examples: XML::Twig->nparse( "file.xml"); XML::Twig->nparse( error_context => 1, "file://file.xml"); =item nparse_pp ($optional_twig_options, $thing_to_parse) same as C<L<nparse>> but also sets the C<pretty_print> option to C<indented>. =item nparse_e ($optional_twig_options, $thing_to_parse) same as C<L<nparse>> but also sets the C<error_context> option to 1. =item nparse_ppe ($optional_twig_options, $thing_to_parse) same as C<L<nparse>> but also sets the C<pretty_print> option to C<indented> and the C<error_context> option to 1. =item parser This method returns the C<expat> object (actually the XML::Parser::Expat object) used during parsing. It is useful for example to call XML::Parser::Expat methods on it. To get the line of a tag for example use C<< $t->parser->current_line >>. =item setTwigHandlers ($handlers) Set the twig_handlers. C<$handlers> is a reference to a hash similar to the one in the C<twig_handlers> option of new. All previous handlers are unset. The method returns the reference to the previous handlers. =item setTwigHandler ($exp $handler) Set a single twig_handler for elements matching C<$exp>. C<$handler> is a reference to a subroutine. If the handler was previously set then the reference to the previous handler is returned. =item setStartTagHandlers ($handlers) Set the start_tag handlers. C<$handlers> is a reference to a hash similar to the one in the C<start_tag_handlers> option of new. All previous handlers are unset. The method returns the reference to the previous handlers. =item setStartTagHandler ($exp $handler) Set a single start_tag handlers for elements matching C<$exp>. C<$handler> is a reference to a subroutine. If the handler was previously set then the reference to the previous handler is returned. =item setEndTagHandlers ($handlers) Set the end_tag handlers. C<$handlers> is a reference to a hash similar to the one in the C<end_tag_handlers> option of new. All previous handlers are unset. The method returns the reference to the previous handlers. =item setEndTagHandler ($exp $handler) Set a single end_tag handlers for elements matching C<$exp>. C<$handler> is a reference to a subroutine. If the handler was previously set then the reference to the previous handler is returned. =item setTwigRoots ($handlers) Same as using the C<L<twig_roots>> option when creating the twig =item setCharHandler ($exp $handler) Set a C<char_handler> =item setIgnoreEltsHandler ($exp) Set a C<ignore_elt> handler (elements that match C<$exp> will be ignored =item setIgnoreEltsHandlers ($exp) Set all C<ignore_elt> handlers (previous handlers are replaced) =item dtd Return the dtd (an L<XML::Twig::DTD> object) of a twig =item xmldecl Return the XML declaration for the document, or a default one if it doesn't have one =item doctype Return the doctype for the document =item doctype_name returns the doctype of the document from the doctype declaration =item system_id returns the system value of the DTD of the document from the doctype declaration =item public_id returns the public doctype of the document from the doctype declaration =item internal_subset returns the internal subset of the DTD =item dtd_text Return the DTD text =item dtd_print Print the DTD =item model ($tag) Return the model (in the DTD) for the element C<$tag> =item root Return the root element of a twig =item set_root ($elt) Set the root of a twig =item first_elt ($optional_condition) Return the first element matching C<$optional_condition> of a twig, if no condition is given then the root is returned =item last_elt ($optional_condition) Return the last element matching C<$optional_condition> of a twig, if no condition is given then the last element of the twig is returned =item elt_id ($id) Return the element whose C<id> attribute is $id =item getEltById Same as C<L<elt_id>> =item index ($index_name, $optional_index) If the C<$optional_index> argument is present, return the corresponding element in the index (created using the C<index> option for C<XML::Twig->new>) If the argument is not present, return an arrayref to the index =item normalize merge together all consecutive pcdata elements in the document (if for example you have turned some elements into pcdata using C<L<erase>>, this will give you a "clean" document in which there all text elements are as long as possible). =item encoding This method returns the encoding of the XML document, as defined by the C<encoding> attribute in the XML declaration (ie it is C<undef> if the attribute is not defined) =item set_encoding This method sets the value of the C<encoding> attribute in the XML declaration. Note that if the document did not have a declaration it is generated (with an XML version of 1.0) =item xml_version This method returns the XML version, as defined by the C<version> attribute in the XML declaration (ie it is C<undef> if the attribute is not defined) =item set_xml_version This method sets the value of the C<version> attribute in the XML declaration. If the declaration did not exist it is created. =item standalone This method returns the value of the C<standalone> declaration for the document =item set_standalone This method sets the value of the C<standalone> attribute in the XML declaration. Note that if the document did not have a declaration it is generated (with an XML version of 1.0) =item set_output_encoding Set the C<encoding> "attribute" in the XML declaration =item set_doctype ($name, $system, $public, $internal) Set the doctype of the element. If an argument is C<undef> (or not present) then its former value is retained, if a false ('' or 0) value is passed then the former value is deleted; =item entity_list Return the entity list of a twig =item entity_names Return the list of all defined entities =item entity ($entity_name) Return the entity =item notation_list Return the notation list of a twig =item notation_names Return the list of all defined notations =item notation ($notation_name) Return the notation =item change_gi ($old_gi, $new_gi) Performs a (very fast) global change. All elements C<$old_gi> are now C<$new_gi>. This is a bit dangerous though and should be avoided if < possible, as the new tag might be ignored in subsequent processing. See C<L<BUGS> > =item flush ($optional_filehandle, %options) Flushes a twig up to (and including) the current element, then deletes all unnecessary elements from the tree that's kept in memory. C<flush> keeps track of which elements need to be open/closed, so if you flush from handlers you don't have to worry about anything. Just keep flushing the twig every time you're done with a sub-tree and it will come out well-formed. After the whole parsing don't forget toC<flush> one more time to print the end of the document. The doctype and entity declarations are also printed. flush take an optional filehandle as an argument. If you use C<flush> at any point during parsing, the document will be flushed one last time at the end of the parsing, to the proper filehandle. options: use the C<update_DTD> option if you have updated the (internal) DTD and/or the entity list and you want the updated DTD to be output The C<pretty_print> option sets the pretty printing of the document. Example: $t->flush( Update_DTD => 1); $t->flush( $filehandle, pretty_print => 'indented'); $t->flush( \*FILE); =item flush_up_to ($elt, $optional_filehandle, %options) Flushes up to the C<$elt> element. This allows you to keep part of the tree in memory when you C<flush>. options: see flush. =item purge Does the same as a C<flush> except it does not print the twig. It just deletes all elements that have been completely parsed so far. =item purge_up_to ($elt) Purges up to the C<$elt> element. This allows you to keep part of the tree in memory when you C<purge>. =item print ($optional_filehandle, %options) Prints the whole document associated with the twig. To be used only AFTER the parse. options: see C<flush>. =item print_to_file ($filename, %options) Prints the whole document associated with the twig to file C<$filename>. To be used only AFTER the parse. options: see C<flush>. =item safe_print_to_file ($filename, %options) Prints the whole document associated with the twig to file C<$filename>. This variant, which probably only works on *nix prints to a temp file, then move the temp file to overwrite the original file. This is a bit safer when 2 processes an potentiallywrite the same file: only the last one will succeed, but the file won't be corruted. I often use this for cron jobs, so testing the code doesn't interfere with the cron job running at the same time. options: see C<flush>. =item sprint Return the text of the whole document associated with the twig. To be used only AFTER the parse. options: see C<flush>. =item trim Trim the document: gets rid of initial and trailing spaces, and replaces multiple spaces by a single one. =item toSAX1 ($handler) Send SAX events for the twig to the SAX1 handler C<$handler> =item toSAX2 ($handler) Send SAX events for the twig to the SAX2 handler C<$handler> =item flush_toSAX1 ($handler) Same as flush, except that SAX events are sent to the SAX1 handler C<$handler> instead of the twig being printed =item flush_toSAX2 ($handler) Same as flush, except that SAX events are sent to the SAX2 handler C<$handler> instead of the twig being printed =item ignore This method should be called during parsing, usually in C<start_tag_handlers>. It causes the element to be skipped during the parsing: the twig is not built for this element, it will not be accessible during parsing or after it. The element will not take up any memory and parsing will be faster. Note that this method can also be called on an element. If the element is a parent of the current element then this element will be ignored (the twig will not be built any more for it and what has already been built will be deleted). =item set_pretty_print ($style) Set the pretty print method, amongst 'C<none>' (default), 'C<nsgmls>', 'C<nice>', 'C<indented>', C<indented_c>, 'C<wrapped>', 'C<record>' and 'C<record_c>' B<WARNING:> the pretty print style is a B<GLOBAL> variable, so once set it's applied to B<ALL> C<print>'s (and C<sprint>'s). Same goes if you use XML::Twig with C<mod_perl> . This should not be a problem as the XML that's generated is valid anyway, and XML processors (as well as HTML processors, including browsers) should not care. Let me know if this is a big problem, but at the moment the performance/cleanliness trade-off clearly favors the global approach. =item set_empty_tag_style ($style) Set the empty tag display style ('C<normal>', 'C<html>' or 'C<expand>'). As with C<L<set_pretty_print>> this sets a global flag. C<normal> outputs an empty tag 'C<< <tag/> >>', C<html> adds a space 'C<< <tag /> >>' for elements that can be empty in XHTML and C<expand> outputs 'C<< <tag></tag> >>' =item set_remove_cdata ($flag) set (or unset) the flag that forces the twig to output CDATA sections as regular (escaped) PCDATA =item print_prolog ($optional_filehandle, %options) Prints the prolog (XML declaration + DTD + entity declarations) of a document. options: see C<L<flush>>. =item prolog ($optional_filehandle, %options) Return the prolog (XML declaration + DTD + entity declarations) of a document. options: see C<L<flush>>. =item finish Call Expat C<finish> method. Unsets all handlers (including internal ones that set context), but expat continues parsing to the end of the document or until it finds an error. It should finish up a lot faster than with the handlers set. =item finish_print Stops twig processing, flush the twig and proceed to finish printing the document as fast as possible. Use this method when modifying a document and the modification is done. =item finish_now Stops twig processing, does not finish parsing the document (which could actually be not well-formed after the point where C<finish_now> is called). Execution resumes after the C<Lparse>> or C<L<parsefile>> call. The content of the twig is what has been parsed so far (all open elements at the time C<finish_now> is called are considered closed). =item set_expand_external_entities Same as using the C<L<expand_external_ents>> option when creating the twig =item set_input_filter Same as using the C<L<input_filter>> option when creating the twig =item set_keep_atts_order Same as using the C<L<keep_atts_order>> option when creating the twig =item set_keep_encoding Same as using the C<L<keep_encoding>> option when creating the twig =item escape_gt usually XML::Twig does not escape > in its output. Using this option makes it replace > by > =item do_not_escape_gt reverts XML::Twig behavior to its default of not escaping > in its output. =item set_output_filter Same as using the C<L<output_filter>> option when creating the twig =item set_output_text_filter Same as using the C<L<output_text_filter>> option when creating the twig =item add_stylesheet ($type, @options) Adds an external stylesheet to an XML document. Supported types and options: =over 4 =item xsl option: the url of the stylesheet Example: $t->add_stylesheet( xsl => "xsl_style.xsl"); will generate the following PI at the beginning of the document: <?xml-stylesheet type="text/xsl" href="xsl_style.xsl"?> =item css option: the url of the stylesheet =item active_twig a class method that returns the last processed twig, so you don't necessarily need the object to call methods on it. =back =item Methods inherited from XML::Parser::Expat A twig inherits all the relevant methods from XML::Parser::Expat. These methods can only be used during the parsing phase (they will generate a fatal error otherwise). Inherited methods are: =over 4 =item depth Returns the size of the context list. =item in_element Returns true if NAME is equal to the name of the innermost cur†rently opened element. If namespace processing is being used and you want to check against a name that may be in a namespace, then use the generate_ns_name method to create the NAME argument. =item within_element Returns the number of times the given name appears in the context list. If namespace processing is being used and you want to check against a name that may be in a namespace, then use the gener†ate_ns_name method to create the NAME argument. =item context Returns a list of element names that represent open elements, with the last one being the innermost. Inside start and end tag han†dlers, this will be the tag of the parent element. =item current_line Returns the line number of the current position of the parse. =item current_column Returns the column number of the current position of the parse. =item current_byte Returns the current position of the parse. =item position_in_context Returns a string that shows the current parse position. LINES should be an integer >= 0 that represents the number of lines on either side of the current parse line to place into the returned string. =item base ([NEWBASE]) Returns the current value of the base for resolving relative URIs. If NEWBASE is supplied, changes the base to that value. =item current_element Returns the name of the innermost currently opened element. Inside start or end handlers, returns the parent of the element associated with those tags. =item element_index Returns an integer that is the depth-first visit order of the cur†rent element. This will be zero outside of the root element. For example, this will return 1 when called from the start handler for the root element start tag. =item recognized_string Returns the string from the document that was recognized in order to call the current handler. For instance, when called from a start handler, it will give us the start-tag string. The string is encoded in UTF-8. This method doesn't return a meaningful string inside declaration handlers. =item original_string Returns the verbatim string from the document that was recognized in order to call the current handler. The string is in the original document encoding. This method doesn't return a meaningful string inside declaration handlers. =item xpcroak Concatenate onto the given message the current line number within the XML document plus the message implied by ErrorContext. Then croak with the formed message. =item xpcarp Concatenate onto the given message the current line number within the XML document plus the message implied by ErrorContext. Then carp with the formed message. =item xml_escape(TEXT [, CHAR [, CHAR ...]]) Returns TEXT with markup characters turned into character entities. Any additional characters provided as arguments are also turned into character references where found in TEXT. (this method is broken on some versions of expat/XML::Parser) =back =item path ( $optional_tag) Return the element context in a form similar to XPath's short form: 'C</root/tag1/../tag>' =item get_xpath ( $optional_array_ref, $xpath, $optional_offset) Performs a C<get_xpath> on the document root (see <Elt|"Elt">) If the C<$optional_array_ref> argument is used the array must contain elements. The C<$xpath> expression is applied to each element in turn and the result is union of all results. This way a first query can be refined in further steps. =item find_nodes ( $optional_array_ref, $xpath, $optional_offset) same as C<get_xpath> =item findnodes ( $optional_array_ref, $xpath, $optional_offset) same as C<get_xpath> (similar to the XML::LibXML method) =item findvalue ( $optional_array_ref, $xpath, $optional_offset) Return the C<join> of all texts of the results of applying C<L<get_xpath>> to the node (similar to the XML::LibXML method) =item findvalues ( $optional_array_ref, $xpath, $optional_offset) Return an array of all texts of the results of applying C<L<get_xpath>> to the node =item subs_text ($regexp, $replace) subs_text does text substitution on the whole document, similar to perl's C< s///> operator. =item dispose Useful only if you don't have C<Scalar::Util> or C<WeakRef> installed. Reclaims properly the memory used by an XML::Twig object. As the object has circular references it never goes out of scope, so if you want to parse lots of XML documents then the memory leak becomes a problem. Use C<< $twig->dispose >> to clear this problem. =item att_accessors (list_of_attribute_names) A convenience method that creates l-valued accessors for attributes. So C<< $twig->create_accessors( 'foo') >> will create a C<foo> method that can be called on elements: $elt->foo; # equivalent to $elt->att( 'foo'); $elt->foo( 'bar'); # equivalent to $elt->set_att( foo => 'bar'); The methods are l-valued only under those perl's that support this feature (5.6 and above) =item create_accessors (list_of_attribute_names) Same as att_accessors =item elt_accessors (list_of_attribute_names) A convenience method that creates accessors for elements. So C<< $twig->create_accessors( 'foo') >> will create a C<foo> method that can be called on elements: $elt->foo; # equivalent to $elt->first_child( 'foo'); =item field_accessors (list_of_attribute_names) A convenience method that creates accessors for element values (C<field>). So C<< $twig->create_accessors( 'foo') >> will create a C<foo> method that can be called on elements: $elt->foo; # equivalent to $elt->field( 'foo'); =item set_do_not_escape_amp_in_atts An evil method, that I only document because Test::Pod::Coverage complaints otherwise, but really, you don't want to know about it. =back =head2 XML::Twig::Elt =over 4 =item new ($optional_tag, $optional_atts, @optional_content) The C<tag> is optional (but then you can't have a content ), the C<$optional_atts> argument is a reference to a hash of attributes, the content can be just a string or a list of strings and element. A content of 'C<#EMPTY>' creates an empty element; Examples: my $elt= XML::Twig::Elt->new(); my $elt= XML::Twig::Elt->new( para => { align => 'center' }); my $elt= XML::Twig::Elt->new( para => { align => 'center' }, 'foo'); my $elt= XML::Twig::Elt->new( br => '#EMPTY'); my $elt= XML::Twig::Elt->new( 'para'); my $elt= XML::Twig::Elt->new( para => 'this is a para'); my $elt= XML::Twig::Elt->new( para => $elt3, 'another para'); The strings are not parsed, the element is not attached to any twig. B<WARNING>: if you rely on ID's then you will have to set the id yourself. At this point the element does not belong to a twig yet, so the ID attribute is not known so it won't be stored in the ID list. Note that C<#COMMENT>, C<#PCDATA> or C<#CDATA> are valid tag names, that will create text elements. To create an element C<foo> containing a CDATA section: my $foo= XML::Twig::Elt->new( '#CDATA' => "content of the CDATA section") ->wrap_in( 'foo'); An attribute of '#CDATA', will create the content of the element as CDATA: my $elt= XML::Twig::Elt->new( 'p' => { '#CDATA' => 1}, 'foo < bar'); creates an element <p><![CDATA[foo < bar]]></> =item parse ($string, %args) Creates an element from an XML string. The string is actually parsed as a new twig, then the root of that twig is returned. The arguments in C<%args> are passed to the twig. As always if the parse fails the parser will die, so use an eval if you want to trap syntax errors. As obviously the element does not exist beforehand this method has to be called on the class: my $elt= parse XML::Twig::Elt( "<a> string to parse, with <sub/> <elements>, actually tons of </elements> h</a>"); =item set_inner_xml ($string) Sets the content of the element to be the tree created from the string =item set_inner_html ($string) Sets the content of the element, after parsing the string with an HTML parser (HTML::Parser) =item set_outer_xml ($string) Replaces the element with the tree created from the string =item print ($optional_filehandle, $optional_pretty_print_style) Prints an entire element, including the tags, optionally to a C<$optional_filehandle>, optionally with a C<$pretty_print_style>. The print outputs XML data so base entities are escaped. =item print_to_file ($filename, %options) Prints the element to file C<$filename>. options: see C<flush>. =item sprint ($elt, $optional_no_enclosing_tag) Return the xml string for an entire element, including the tags. If the optional second argument is true then only the string inside the element is returned (the start and end tag for $elt are not). The text is XML-escaped: base entities (& and < in text, & < and " in attribute values) are turned into entities. =item gi Return the gi of the element (the gi is the C<generic identifier> the tag name in SGML parlance). C<tag> and C<name> are synonyms of C<gi>. =item tag Same as C<L<gi>> =item name Same as C<L<tag>> =item set_gi ($tag) Set the gi (tag) of an element =item set_tag ($tag) Set the tag (=C<L<tag>>) of an element =item set_name ($name) Set the name (=C<L<tag>>) of an element =item root Return the root of the twig in which the element is contained. =item twig Return the twig containing the element. =item parent ($optional_condition) Return the parent of the element, or the first ancestor matching the C<$optional_condition> =item first_child ($optional_condition) Return the first child of the element, or the first child matching the C<$optional_condition> =item has_child ($optional_condition) Return the first child of the element, or the first child matching the C<$optional_condition> (same as L<first_child>) =item has_children ($optional_condition) Return the first child of the element, or the first child matching the C<$optional_condition> (same as L<first_child>) =item first_child_text ($optional_condition) Return the text of the first child of the element, or the first child matching the C<$optional_condition> If there is no first_child then returns ''. This avoids getting the child, checking for its existence then getting the text for trivial cases. Similar methods are available for the other navigation methods: =over 4 =item last_child_text =item prev_sibling_text =item next_sibling_text =item prev_elt_text =item next_elt_text =item child_text =item parent_text =back All this methods also exist in "trimmed" variant: =over 4 =item first_child_trimmed_text =item last_child_trimmed_text =item prev_sibling_trimmed_text =item next_sibling_trimmed_text =item prev_elt_trimmed_text =item next_elt_trimmed_text =item child_trimmed_text =item parent_trimmed_text =back =item field ($condition) Same method as C<first_child_text> with a different name =item fields ($condition_list) Return the list of field (text of first child matching the conditions), missing fields are returned as the empty string. Same method as C<first_child_text> with a different name =item trimmed_field ($optional_condition) Same method as C<first_child_trimmed_text> with a different name =item set_field ($condition, $optional_atts, @list_of_elt_and_strings) Set the content of the first child of the element that matches C<$condition>, the rest of the arguments is the same as for C<L<set_content>> If no child matches C<$condition> _and_ if C<$condition> is a valid XML element name, then a new element by that name is created and inserted as the last child. =item first_child_matches ($optional_condition) Return the element if the first child of the element (if it exists) passes the C<$optional_condition> C<undef> otherwise if( $elt->first_child_matches( 'title')) ... is equivalent to if( $elt->first_child && $elt->first_child->passes( 'title')) C<first_child_is> is an other name for this method Similar methods are available for the other navigation methods: =over 4 =item last_child_matches =item prev_sibling_matches =item next_sibling_matches =item prev_elt_matches =item next_elt_matches =item child_matches =item parent_matches =back =item is_first_child ($optional_condition) returns true (the element) if the element is the first child of its parent (optionally that satisfies the C<$optional_condition>) =item is_last_child ($optional_condition) returns true (the element) if the element is the last child of its parent (optionally that satisfies the C<$optional_condition>) =item prev_sibling ($optional_condition) Return the previous sibling of the element, or the previous sibling matching C<$optional_condition> =item next_sibling ($optional_condition) Return the next sibling of the element, or the first one matching C<$optional_condition>. =item next_elt ($optional_elt, $optional_condition) Return the next elt (optionally matching C<$optional_condition>) of the element. This is defined as the next element which opens after the current element opens. Which usually means the first child of the element. Counter-intuitive as it might look this allows you to loop through the whole document by starting from the root. The C<$optional_elt> is the root of a subtree. When the C<next_elt> is out of the subtree then the method returns undef. You can then walk a sub-tree with: my $elt= $subtree_root; while( $elt= $elt->next_elt( $subtree_root)) { # insert processing code here } =item prev_elt ($optional_condition) Return the previous elt (optionally matching C<$optional_condition>) of the element. This is the first element which opens before the current one. It is usually either the last descendant of the previous sibling or simply the parent =item next_n_elt ($offset, $optional_condition) Return the C<$offset>-th element that matches the C<$optional_condition> =item following_elt Return the following element (as per the XPath following axis) =item preceding_elt Return the preceding element (as per the XPath preceding axis) =item following_elts Return the list of following elements (as per the XPath following axis) =item preceding_elts Return the list of preceding elements (as per the XPath preceding axis) =item children ($optional_condition) Return the list of children (optionally which matches C<$optional_condition>) of the element. The list is in document order. =item children_count ($optional_condition) Return the number of children of the element (optionally which matches C<$optional_condition>) =item children_text ($optional_condition) In array context, returns an array containing the text of children of the element (optionally which matches C<$optional_condition>) In scalar context, returns the concatenation of the text of children of the element =item children_trimmed_text ($optional_condition) In array context, returns an array containing the trimmed text of children of the element (optionally which matches C<$optional_condition>) In scalar context, returns the concatenation of the trimmed text of children of the element =item children_copy ($optional_condition) Return a list of elements that are copies of the children of the element, optionally which matches C<$optional_condition> =item descendants ($optional_condition) Return the list of all descendants (optionally which matches C<$optional_condition>) of the element. This is the equivalent of the C<getElementsByTagName> of the DOM (by the way, if you are really a DOM addict, you can use C<getElementsByTagName> instead) =item getElementsByTagName ($optional_condition) Same as C<L<descendants>> =item find_by_tag_name ($optional_condition) Same as C<L<descendants>> =item descendants_or_self ($optional_condition) Same as C<L<descendants>> except that the element itself is included in the list if it matches the C<$optional_condition> =item first_descendant ($optional_condition) Return the first descendant of the element that matches the condition =item last_descendant ($optional_condition) Return the last descendant of the element that matches the condition =item ancestors ($optional_condition) Return the list of ancestors (optionally matching C<$optional_condition>) of the element. The list is ordered from the innermost ancestor to the outermost one NOTE: the element itself is not part of the list, in order to include it you will have to use ancestors_or_self =item ancestors_or_self ($optional_condition) Return the list of ancestors (optionally matching C<$optional_condition>) of the element, including the element (if it matches the condition>). The list is ordered from the innermost ancestor to the outermost one =item passes ($condition) Return the element if it passes the C<$condition> =item att ($att) Return the value of attribute C<$att> or C<undef> =item latt ($att) Return the value of attribute C<$att> or C<undef> this method is an lvalue, so you can do C<< $elt->latt( 'foo')= 'bar' >> or C<< $elt->latt( 'foo')++; >> =item set_att ($att, $att_value) Set the attribute of the element to the given value You can actually set several attributes this way: $elt->set_att( att1 => "val1", att2 => "val2"); =item del_att ($att) Delete the attribute for the element You can actually delete several attributes at once: $elt->del_att( 'att1', 'att2', 'att3'); =item att_exists ($att) Returns true if the attribute C<$att> exists for the element, false otherwise =item cut Cut the element from the tree. The element still exists, it can be copied or pasted somewhere else, it is just not attached to the tree anymore. Note that the "old" links to the parent, previous and next siblings can still be accessed using the former_* methods =item former_next_sibling Returns the former next sibling of a cut node (or undef if the node has not been cut) This makes it easier to write loops where you cut elements: my $child= $parent->first_child( 'achild'); while( $child->att( 'cut')) { $child->cut; $child= $child->former_next_sibling; } =item former_prev_sibling Returns the former previous sibling of a cut node (or undef if the node has not been cut) =item former_parent Returns the former parent of a cut node (or undef if the node has not been cut) =item cut_children ($optional_condition) Cut all the children of the element (or all of those which satisfy the C<$optional_condition>). Return the list of children =item cut_descendants ($optional_condition) Cut all the descendants of the element (or all of those which satisfy the C<$optional_condition>). Return the list of descendants =item copy ($elt) Return a copy of the element. The copy is a "deep" copy: all sub-elements of the element are duplicated. =item paste ($optional_position, $ref) Paste a (previously C<cut> or newly generated) element. Die if the element already belongs to a tree. Note that the calling element is pasted: $child->paste( first_child => $existing_parent); $new_sibling->paste( after => $this_sibling_is_already_in_the_tree); or my $new_elt= XML::Twig::Elt->new( tag => $content); $new_elt->paste( $position => $existing_elt); Example: my $t= XML::Twig->new->parse( 'doc.xml') my $toc= $t->root->new( 'toc'); $toc->paste( $t->root); # $toc is pasted as first child of the root foreach my $title ($t->findnodes( '/doc/section/title')) { my $title_toc= $title->copy; # paste $title_toc as the last child of toc $title_toc->paste( last_child => $toc) } Position options: =over 4 =item first_child (default) The element is pasted as the first child of C<$ref> =item last_child The element is pasted as the last child of C<$ref> =item before The element is pasted before C<$ref>, as its previous sibling. =item after The element is pasted after C<$ref>, as its next sibling. =item within In this case an extra argument, C<$offset>, should be supplied. The element will be pasted in the reference element (or in its first text child) at the given offset. To achieve this the reference element will be split at the offset. =back Note that you can call directly the underlying method: =over 4 =item paste_before =item paste_after =item paste_first_child =item paste_last_child =item paste_within =back =item move ($optional_position, $ref) Move an element in the tree. This is just a C<cut> then a C<paste>. The syntax is the same as C<paste>. =item replace ($ref) Replaces an element in the tree. Sometimes it is just not possible toC<cut> an element then C<paste> another in its place, so C<replace> comes in handy. The calling element replaces C<$ref>. =item replace_with (@elts) Replaces the calling element with one or more elements =item delete Cut the element and frees the memory. =item prefix ($text, $optional_option) Add a prefix to an element. If the element is a C<PCDATA> element the text is added to the pcdata, if the elements first child is a C<PCDATA> then the text is added to it's pcdata, otherwise a new C<PCDATA> element is created and pasted as the first child of the element. If the option is C<asis> then the prefix is added asis: it is created in a separate C<PCDATA> element with an C<asis> property. You can then write: $elt1->prefix( '<b>', 'asis'); to create a C<< <b> >> in the output of C<print>. =item suffix ($text, $optional_option) Add a suffix to an element. If the element is a C<PCDATA> element the text is added to the pcdata, if the elements last child is a C<PCDATA> then the text is added to it's pcdata, otherwise a new PCDATA element is created and pasted as the last child of the element. If the option is C<asis> then the suffix is added asis: it is created in a separate C<PCDATA> element with an C<asis> property. You can then write: $elt2->suffix( '</b>', 'asis'); =item trim Trim the element in-place: spaces at the beginning and at the end of the element are discarded and multiple spaces within the element (or its descendants) are replaced by a single space. Note that in some cases you can still end up with multiple spaces, if they are split between several elements: <doc> text <b> hah! </b> yep</doc> gets trimmed to <doc>text <b> hah! </b> yep</doc> This is somewhere in between a bug and a feature. =item normalize merge together all consecutive pcdata elements in the element (if for example you have turned some elements into pcdata using C<L<erase>>, this will give you a "clean" element in which there all text fragments are as long as possible). =item simplify (%options) Return a data structure suspiciously similar to XML::Simple's. Options are identical to XMLin options, see XML::Simple doc for more details (or use DATA::dumper or YAML to dump the data structure) B<Note>: there is no magic here, if you write C<< $twig->parsefile( $file )->simplify(); >> then it will load the entire document in memory. I am afraid you will have to put some work into it to get just the bits you want and discard the rest. Look at the synopsis or the XML::Twig 101 section at the top of the docs for more information. =over 4 =item content_key =item forcearray =item keyattr =item noattr =item normalize_space aka normalise_space =item variables (%var_hash) %var_hash is a hash { name => value } This option allows variables in the XML to be expanded when the file is read. (there is no facility for putting the variable names back if you regenerate XML using XMLout). A 'variable' is any text of the form ${name} (or $name) which occurs in an attribute value or in the text content of an element. If 'name' matches a key in the supplied hashref, ${name} will be replaced with the corresponding value from the hashref. If no matching key is found, the variable will not be replaced. =item var_att ($attribute_name) This option gives the name of an attribute that will be used to create variables in the XML: <dirs> <dir name="prefix">/usr/local</dir> <dir name="exec_prefix">$prefix/bin</dir> </dirs> use C<< var => 'name' >> to get $prefix replaced by /usr/local in the generated data structure By default variables are captured by the following regexp: /$(\w+)/ =item var_regexp (regexp) This option changes the regexp used to capture variables. The variable name should be in $1 =item group_tags { grouping tag => grouped tag, grouping tag 2 => grouped tag 2...} Option used to simplify the structure: elements listed will not be used. Their children will be, they will be considered children of the element parent. If the element is: <config host="laptop.xmltwig.org"> <server>localhost</server> <dirs> <dir name="base">/home/mrodrigu/standards</dir> <dir name="tools">$base/tools</dir> </dirs> <templates> <template name="std_def">std_def.templ</template> <template name="dummy">dummy</template> </templates> </config> Then calling simplify with C<< group_tags => { dirs => 'dir', templates => 'template'} >> makes the data structure be exactly as if the start and end tags for C<dirs> and C<templates> were not there. A YAML dump of the structure base: '/home/mrodrigu/standards' host: laptop.xmltwig.org server: localhost template: - std_def.templ - dummy.templ tools: '$base/tools' =back =item split_at ($offset) Split a text (C<PCDATA> or C<CDATA>) element in 2 at C<$offset>, the original element now holds the first part of the string and a new element holds the right part. The new element is returned If the element is not a text element then the first text child of the element is split =item split ( $optional_regexp, $tag1, $atts1, $tag2, $atts2...) Split the text descendants of an element in place, the text is split using the C<$regexp>, if the regexp includes () then the matched separators will be wrapped in elements. C<$1> is wrapped in $tag1, with attributes C<$atts1> if C<$atts1> is given (as a hashref), C<$2> is wrapped in $tag2... if $elt is C<< <p>tati tata <b>tutu tati titi</b> tata tati tata</p> >> $elt->split( qr/(ta)ti/, 'foo', {type => 'toto'} ) will change $elt to <p><foo type="toto">ta</foo> tata <b>tutu <foo type="toto">ta</foo> titi</b> tata <foo type="toto">ta</foo> tata</p> The regexp can be passed either as a string or as C<qr//> (perl 5.005 and later), it defaults to \s+ just as the C<split> built-in (but this would be quite a useless behaviour without the C<$optional_tag> parameter) C<$optional_tag> defaults to PCDATA or CDATA, depending on the initial element type The list of descendants is returned (including un-touched original elements and newly created ones) =item mark ( $regexp, $optional_tag, $optional_attribute_ref) This method behaves exactly as L<split>, except only the newly created elements are returned =item wrap_children ( $regexp_string, $tag, $optional_attribute_hashref) Wrap the children of the element that match the regexp in an element C<$tag>. If $optional_attribute_hashref is passed then the new element will have these attributes. The $regexp_string includes tags, within pointy brackets, as in C<< <title><para>+ >> and the usual Perl modifiers (+*?...). Tags can be further qualified with attributes: C<< <para type="warning" classif="cosmic_secret">+ >>. The values for attributes should be xml-escaped: C<< <candy type="M&Ms">* >> (C<E<lt>>, C<&> B<C<E<gt>>> and C<"> should be escaped). Note that elements might get extra C<id> attributes in the process. See L<add_id>. Use L<strip_att> to remove unwanted id's. Here is an example: If the element C<$elt> has the following content: <elt> <p>para 1</p> <l_l1_1>list 1 item 1 para 1</l_l1_1> <l_l1>list 1 item 1 para 2</l_l1> <l_l1_n>list 1 item 2 para 1 (only para)</l_l1_n> <l_l1_n>list 1 item 3 para 1</l_l1_n> <l_l1>list 1 item 3 para 2</l_l1> <l_l1>list 1 item 3 para 3</l_l1> <l_l1_1>list 2 item 1 para 1</l_l1_1> <l_l1>list 2 item 1 para 2</l_l1> <l_l1_n>list 2 item 2 para 1 (only para)</l_l1_n> <l_l1_n>list 2 item 3 para 1</l_l1_n> <l_l1>list 2 item 3 para 2</l_l1> <l_l1>list 2 item 3 para 3</l_l1> </elt> Then the code $elt->wrap_children( q{<l_l1_1><l_l1>*} , li => { type => "ul1" }); $elt->wrap_children( q{<l_l1_n><l_l1>*} , li => { type => "ul" }); $elt->wrap_children( q{<li type="ul1"><li type="ul">+}, "ul"); $elt->strip_att( 'id'); $elt->strip_att( 'type'); $elt->print; will output: <elt> <p>para 1</p> <ul> <li> <l_l1_1>list 1 item 1 para 1</l_l1_1> <l_l1>list 1 item 1 para 2</l_l1> </li> <li> <l_l1_n>list 1 item 2 para 1 (only para)</l_l1_n> </li> <li> <l_l1_n>list 1 item 3 para 1</l_l1_n> <l_l1>list 1 item 3 para 2</l_l1> <l_l1>list 1 item 3 para 3</l_l1> </li> </ul> <ul> <li> <l_l1_1>list 2 item 1 para 1</l_l1_1> <l_l1>list 2 item 1 para 2</l_l1> </li> <li> <l_l1_n>list 2 item 2 para 1 (only para)</l_l1_n> </li> <li> <l_l1_n>list 2 item 3 para 1</l_l1_n> <l_l1>list 2 item 3 para 2</l_l1> <l_l1>list 2 item 3 para 3</l_l1> </li> </ul> </elt> =item subs_text ($regexp, $replace) subs_text does text substitution, similar to perl's C< s///> operator. C<$regexp> must be a perl regexp, created with the C<qr> operator. C<$replace> can include C<$1, $2>... from the C<$regexp>. It can also be used to create element and entities, by using C<< &elt( tag => { att => val }, text) >> (similar syntax as C<L<new>>) and C<< &ent( name) >>. Here is a rather complex example: $elt->subs_text( qr{(?<!do not )link to (http://([^\s,]*))}, 'see &elt( a =>{ href => $1 }, $2)' ); This will replace text like I<link to http://www.xmltwig.org> by I<< see <a href="www.xmltwig.org">www.xmltwig.org</a> >>, but not I<do not link to...> Generating entities (here replacing spaces with  ): $elt->subs_text( qr{ }, '&ent( " ")'); or, using a variable: my $ent=" "; $elt->subs_text( qr{ }, "&ent( '$ent')"); Note that the substitution is always global, as in using the C<g> modifier in a perl substitution, and that it is performed on all text descendants of the element. B<Bug>: in the C<$regexp>, you can only use C<\1>, C<\2>... if the replacement expression does not include elements or attributes. eg $t->subs_text( qr/((t[aiou])\2)/, '$2'); # ok, replaces toto, tata, titi, tutu by to, ta, ti, tu $t->subs_text( qr/((t[aiou])\2)/, '&elt(p => $1)' ); # NOK, does not find toto... =item add_id ($optional_coderef) Add an id to the element. The id is an attribute, C<id> by default, see the C<id> option for XML::Twig C<new> to change it. Use an id starting with C<#> to get an id that's not output by L<print>, L<flush> or L<sprint>, yet that allows you to use the L<elt_id> method to get the element easily. If the element already has an id, no new id is generated. By default the method create an id of the form C<< twig_id_<nnnn> >>, where C<< <nnnn> >> is a number, incremented each time the method is called successfully. =item set_id_seed ($prefix) by default the id generated by C<L<add_id>> is C<< twig_id_<nnnn> >>, C<set_id_seed> changes the prefix to C<$prefix> and resets the number to 1 =item strip_att ($att) Remove the attribute C<$att> from all descendants of the element (including the element) Return the element =item change_att_name ($old_name, $new_name) Change the name of the attribute from C<$old_name> to C<$new_name>. If there is no attribute C<$old_name> nothing happens. =item lc_attnames Lower cases the name all the attributes of the element. =item sort_children_on_value( %options) Sort the children of the element in place according to their text. All children are sorted. Return the element, with its children sorted. C<%options> are type : numeric | alpha (default: alpha) order : normal | reverse (default: normal) Return the element, with its children sorted =item sort_children_on_att ($att, %options) Sort the children of the element in place according to attribute C<$att>. C<%options> are the same as for C<sort_children_on_value> Return the element. =item sort_children_on_field ($tag, %options) Sort the children of the element in place, according to the field C<$tag> (the text of the first child of the child with this tag). C<%options> are the same as for C<sort_children_on_value>. Return the element, with its children sorted =item sort_children( $get_key, %options) Sort the children of the element in place. The C<$get_key> argument is a reference to a function that returns the sort key when passed an element. For example: $elt->sort_children( sub { $_[0]->att( "nb") + $_[0]->text }, type => 'numeric', order => 'reverse' ); =item field_to_att ($cond, $att) Turn the text of the first sub-element matched by C<$cond> into the value of attribute C<$att> of the element. If C<$att> is omitted then C<$cond> is used as the name of the attribute, which makes sense only if C<$cond> is a valid element (and attribute) name. The sub-element is then cut. =item att_to_field ($att, $tag) Take the value of attribute C<$att> and create a sub-element C<$tag> as first child of the element. If C<$tag> is omitted then C<$att> is used as the name of the sub-element. =item get_xpath ($xpath, $optional_offset) Return a list of elements satisfying the C<$xpath>. C<$xpath> is an XPATH-like expression. A subset of the XPATH abbreviated syntax is covered: tag tag[1] (or any other positive number) tag[last()] tag[@att] (the attribute exists for the element) tag[@att="val"] tag[@att=~ /regexp/] tag[att1="val1" and att2="val2"] tag[att1="val1" or att2="val2"] tag[string()="toto"] (returns tag elements which text (as per the text method) is toto) tag[string()=~/regexp/] (returns tag elements which text (as per the text method) matches regexp) expressions can start with / (search starts at the document root) expressions can start with . (search starts at the current element) // can be used to get all descendants instead of just direct children * matches any tag So the following examples from the F<XPath recommendationL<http://www.w3.org/TR/xpath.html#path-abbrev>> work: para selects the para element children of the context node * selects all element children of the context node para[1] selects the first para child of the context node para[last()] selects the last para child of the context node */para selects all para grandchildren of the context node /doc/chapter[5]/section[2] selects the second section of the fifth chapter of the doc chapter//para selects the para element descendants of the chapter element children of the context node //para selects all the para descendants of the document root and thus selects all para elements in the same document as the context node //olist/item selects all the item elements in the same document as the context node that have an olist parent .//para selects the para element descendants of the context node .. selects the parent of the context node para[@type="warning"] selects all para children of the context node that have a type attribute with value warning employee[@secretary and @assistant] selects all the employee children of the context node that have both a secretary attribute and an assistant attribute The elements will be returned in the document order. If C<$optional_offset> is used then only one element will be returned, the one with the appropriate offset in the list, starting at 0 Quoting and interpolating variables can be a pain when the Perl syntax and the XPATH syntax collide, so use alternate quoting mechanisms like q or qq (I like q{} and qq{} myself). Here are some more examples to get you started: my $p1= "p1"; my $p2= "p2"; my @res= $t->get_xpath( qq{p[string( "$p1") or string( "$p2")]}); my $a= "a1"; my @res= $t->get_xpath( qq{//*[@att="$a"]}); my $val= "a1"; my $exp= qq{//p[ \@att='$val']}; # you need to use \@ or you will get a warning my @res= $t->get_xpath( $exp); Note that the only supported regexps delimiters are / and that you must backslash all / in regexps AND in regular strings. XML::Twig does not provide natively full XPATH support, but you can use C<L<XML::Twig::XPath>> to get C<findnodes> to use C<XML::XPath> as the XPath engine, with full coverage of the spec. C<L<XML::Twig::XPath>> to get C<findnodes> to use C<XML::XPath> as the XPath engine, with full coverage of the spec. =item find_nodes same asC<get_xpath> =item findnodes same as C<get_xpath> =item text @optional_options Return a string consisting of all the C<PCDATA> and C<CDATA> in an element, without any tags. The text is not XML-escaped: base entities such as C<&> and C<< < >> are not escaped. The 'C<no_recurse>' option will only return the text of the element, not of any included sub-elements (same as C<L<text_only>>). =item text_only Same as C<L<text>> except that the text returned doesn't include the text of sub-elements. =item trimmed_text Same as C<text> except that the text is trimmed: leading and trailing spaces are discarded, consecutive spaces are collapsed =item set_text ($string) Set the text for the element: if the element is a C<PCDATA>, just set its text, otherwise cut all the children of the element and create a single C<PCDATA> child for it, which holds the text. =item merge ($elt2) Move the content of C<$elt2> within the element =item insert ($tag1, [$optional_atts1], $tag2, [$optional_atts2],...) For each tag in the list inserts an element C<$tag> as the only child of the element. The element gets the optional attributes inC<< $optional_atts<n>. >> All children of the element are set as children of the new element. The upper level element is returned. $p->insert( table => { border=> 1}, 'tr', 'td') put C<$p> in a table with a visible border, a single C<tr> and a single C<td> and return the C<table> element: <p><table border="1"><tr><td>original content of p</td></tr></table></p> =item wrap_in (@tag) Wrap elements in C<@tag> as the successive ancestors of the element, returns the new element. C<< $elt->wrap_in( 'td', 'tr', 'table') >> wraps the element as a single cell in a table for example. Optionally each tag can be followed by a hashref of attributes, that will be set on the wrapping element: $elt->wrap_in( p => { class => "advisory" }, div => { class => "intro", id => "div_intro" }); =item insert_new_elt ($opt_position, $tag, $opt_atts_hashref, @opt_content) Combines a C<L<new> > and a C<L<paste> >: creates a new element using C<$tag>, C<$opt_atts_hashref >and C<@opt_content> which are arguments similar to those for C<new>, then paste it, using C<$opt_position> or C<'first_child'>, relative to C<$elt>. Return the newly created element =item erase Erase the element: the element is deleted and all of its children are pasted in its place. =item set_content ( $optional_atts, @list_of_elt_and_strings) ( $optional_atts, '#EMPTY') Set the content for the element, from a list of strings and elements. Cuts all the element children, then pastes the list elements as the children. This method will create a C<PCDATA> element for any strings in the list. The C<$optional_atts> argument is the ref of a hash of attributes. If this argument is used then the previous attributes are deleted, otherwise they are left untouched. B<WARNING>: if you rely on ID's then you will have to set the id yourself. At this point the element does not belong to a twig yet, so the ID attribute is not known so it won't be stored in the ID list. A content of 'C<#EMPTY>' creates an empty element; =item namespace ($optional_prefix) Return the URI of the namespace that C<$optional_prefix> or the element name belongs to. If the name doesn't belong to any namespace, C<undef> is returned. =item local_name Return the local name (without the prefix) for the element =item ns_prefix Return the namespace prefix for the element =item current_ns_prefixes Return a list of namespace prefixes valid for the element. The order of the prefixes in the list has no meaning. If the default namespace is currently bound, '' appears in the list. =item inherit_att ($att, @optional_tag_list) Return the value of an attribute inherited from parent tags. The value returned is found by looking for the attribute in the element then in turn in each of its ancestors. If the C<@optional_tag_list> is supplied only those ancestors whose tag is in the list will be checked. =item all_children_are ($optional_condition) return 1 if all children of the element pass the C<$optional_condition>, 0 otherwise =item level ($optional_condition) Return the depth of the element in the twig (root is 0). If C<$optional_condition> is given then only ancestors that match the condition are counted. B<WARNING>: in a tree created using the C<twig_roots> option this will not return the level in the document tree, level 0 will be the document root, level 1 will be the C<twig_roots> elements. During the parsing (in a C<twig_handler>) you can use the C<depth> method on the twig object to get the real parsing depth. =item in ($potential_parent) Return true if the element is in the potential_parent (C<$potential_parent> is an element) =item in_context ($cond, $optional_level) Return true if the element is included in an element which passes C<$cond> optionally within C<$optional_level> levels. The returned value is the including element. =item pcdata Return the text of a C<PCDATA> element or C<undef> if the element is not C<PCDATA>. =item pcdata_xml_string Return the text of a C<PCDATA> element or undef if the element is not C<PCDATA>. The text is "XML-escaped" ('&' and '<' are replaced by '&' and '<') =item set_pcdata ($text) Set the text of a C<PCDATA> element. This method does not check that the element is indeed a C<PCDATA> so usually you should use C<L<set_text>> instead. =item append_pcdata ($text) Add the text at the end of a C<PCDATA> element. =item is_cdata Return 1 if the element is a C<CDATA> element, returns 0 otherwise. =item is_text Return 1 if the element is a C<CDATA> or C<PCDATA> element, returns 0 otherwise. =item cdata Return the text of a C<CDATA> element or C<undef> if the element is not C<CDATA>. =item cdata_string Return the XML string of a C<CDATA> element, including the opening and closing markers. =item set_cdata ($text) Set the text of a C<CDATA> element. =item append_cdata ($text) Add the text at the end of a C<CDATA> element. =item remove_cdata Turns all C<CDATA> sections in the element into regular C<PCDATA> elements. This is useful when converting XML to HTML, as browsers do not support CDATA sections. =item extra_data Return the extra_data (comments and PI's) attached to an element =item set_extra_data ($extra_data) Set the extra_data (comments and PI's) attached to an element =item append_extra_data ($extra_data) Append extra_data to the existing extra_data before the element (if no previous extra_data exists then it is created) =item set_asis Set a property of the element that causes it to be output without being XML escaped by the print functions: if it contains C<< a < b >> it will be output as such and not as C<< a < b >>. This can be useful to create text elements that will be output as markup. Note that all C<PCDATA> descendants of the element are also marked as having the property (they are the ones that are actually impacted by the change). If the element is a C<CDATA> element it will also be output asis, without the C<CDATA> markers. The same goes for any C<CDATA> descendant of the element =item set_not_asis Unsets the C<asis> property for the element and its text descendants. =item is_asis Return the C<asis> property status of the element ( 1 or C<undef>) =item closed Return true if the element has been closed. Might be useful if you are somewhere in the tree, during the parse, and have no idea whether a parent element is completely loaded or not. =item get_type Return the type of the element: 'C<#ELT>' for "real" elements, or 'C<#PCDATA>', 'C<#CDATA>', 'C<#COMMENT>', 'C<#ENT>', 'C<#PI>' =item is_elt Return the tag if the element is a "real" element, or 0 if it is C<PCDATA>, C<CDATA>... =item contains_only_text Return 1 if the element does not contain any other "real" element =item contains_only ($exp) Return the list of children if all children of the element match the expression C<$exp> if( $para->contains_only( 'tt')) { ... } =item contains_a_single ($exp) If the element contains a single child that matches the expression C<$exp> returns that element. Otherwise returns 0. =item is_field same as C<contains_only_text> =item is_pcdata Return 1 if the element is a C<PCDATA> element, returns 0 otherwise. =item is_ent Return 1 if the element is an entity (an unexpanded entity) element, return 0 otherwise. =item is_empty Return 1 if the element is empty, 0 otherwise =item set_empty Flags the element as empty. No further check is made, so if the element is actually not empty the output will be messed. The only effect of this method is that the output will be C<< <tag att="value""/> >>. =item set_not_empty Flags the element as not empty. if it is actually empty then the element will be output as C<< <tag att="value""></tag> >> =item is_pi Return 1 if the element is a processing instruction (C<#PI>) element, return 0 otherwise. =item target Return the target of a processing instruction =item set_target ($target) Set the target of a processing instruction =item data Return the data part of a processing instruction =item set_data ($data) Set the data of a processing instruction =item set_pi ($target, $data) Set the target and data of a processing instruction =item pi_string Return the string form of a processing instruction (C<< <?target data?> >>) =item is_comment Return 1 if the element is a comment (C<#COMMENT>) element, return 0 otherwise. =item set_comment ($comment_text) Set the text for a comment =item comment Return the content of a comment (just the text, not the C<< <!-- >> and C<< --> >>) =item comment_string Return the XML string for a comment (C<< <!-- comment --> >>) Note that an XML comment cannot start or end with a '-', or include '--' (http://www.w3.org/TR/2008/REC-xml-20081126/#sec-comments), if that is the case (because you have created the comment yourself presumably, as it could not be in the input XML), then a space will be inserted before an initial '-', after a trailing one or between two '-' in the comment (which could presumably mangle javascript "hidden" in an XHTML comment); =item set_ent ($entity) Set an (non-expanded) entity (C<#ENT>). C<$entity>) is the entity text (C<&ent;>) =item ent Return the entity for an entity (C<#ENT>) element (C<&ent;>) =item ent_name Return the entity name for an entity (C<#ENT>) element (C<ent>) =item ent_string Return the entity, either expanded if the expanded version is available, or non-expanded (C<&ent;>) otherwise =item child ($offset, $optional_condition) Return the C<$offset>-th child of the element, optionally the C<$offset>-th child that matches C<$optional_condition>. The children are treated as a list, so C<< $elt->child( 0) >> is the first child, while C<< $elt->child( -1) >> is the last child. =item child_text ($offset, $optional_condition) Return the text of a child or C<undef> if the sibling does not exist. Arguments are the same as child. =item last_child ($optional_condition) Return the last child of the element, or the last child matching C<$optional_condition> (ie the last of the element children matching the condition). =item last_child_text ($optional_condition) Same as C<first_child_text> but for the last child. =item sibling ($offset, $optional_condition) Return the next or previous C<$offset>-th sibling of the element, or the C<$offset>-th one matching C<$optional_condition>. If C<$offset> is negative then a previous sibling is returned, if $offset is positive then a next sibling is returned. C<$offset=0> returns the element if there is no condition or if the element matches the condition>, C<undef> otherwise. =item sibling_text ($offset, $optional_condition) Return the text of a sibling or C<undef> if the sibling does not exist. Arguments are the same as C<sibling>. =item prev_siblings ($optional_condition) Return the list of previous siblings (optionally matching C<$optional_condition>) for the element. The elements are ordered in document order. =item next_siblings ($optional_condition) Return the list of siblings (optionally matching C<$optional_condition>) following the element. The elements are ordered in document order. =item siblings ($optional_condition) Return the list of siblings (optionally matching C<$optional_condition>) of the element (excluding the element itself). The elements are ordered in document order. =item pos ($optional_condition) Return the position of the element in the children list. The first child has a position of 1 (as in XPath). If the C<$optional_condition> is given then only siblings that match the condition are counted. If the element itself does not match the condition then 0 is returned. =item atts Return a hash ref containing the element attributes =item set_atts ({ att1=>$att1_val, att2=> $att2_val... }) Set the element attributes with the hash ref supplied as the argument. The previous attributes are lost (ie the attributes set by C<set_atts> replace all of the attributes of the element). You can also pass a list instead of a hashref: C<< $elt->set_atts( att1 => 'val1',...) >> =item del_atts Deletes all the element attributes. =item att_nb Return the number of attributes for the element =item has_atts Return true if the element has attributes (in fact return the number of attributes, thus being an alias to C<L<att_nb>> =item has_no_atts Return true if the element has no attributes, false (0) otherwise =item att_names return a list of the attribute names for the element =item att_xml_string ($att, $options) Return the attribute value, where '&', '<' and quote (" or the value of the quote option at twig creation) are XML-escaped. The options are passed as a hashref, setting C<escape_gt> to a true value will also escape '>' ($elt( 'myatt', { escape_gt => 1 }); =item set_id ($id) Set the C<id> attribute of the element to the value. See C<L<elt_id> > to change the id attribute name =item id Gets the id attribute value =item del_id ($id) Deletes the C<id> attribute of the element and remove it from the id list for the document =item class Return the C<class> attribute for the element (methods on the C<class> attribute are quite convenient when dealing with XHTML, or plain XML that will eventually be displayed using CSS) =item lclass same as class, except that this method is an lvalue, so you can do C<< $elt->lclass= "foo" >> =item set_class ($class) Set the C<class> attribute for the element to C<$class> =item add_class ($class) Add C<$class> to the element C<class> attribute: the new class is added only if it is not already present. Note that classes are then sorted alphabetically, so the C<class> attribute can be changed even if the class is already there =item remove_class ($class) Remove C<$class> from the element C<class> attribute. Note that classes are then sorted alphabetically, so the C<class> attribute can be changed even if the class is already there =item add_to_class ($class) alias for add_class =item att_to_class ($att) Set the C<class> attribute to the value of attribute C<$att> =item add_att_to_class ($att) Add the value of attribute C<$att> to the C<class> attribute of the element =item move_att_to_class ($att) Add the value of attribute C<$att> to the C<class> attribute of the element and delete the attribute =item tag_to_class Set the C<class> attribute of the element to the element tag =item add_tag_to_class Add the element tag to its C<class> attribute =item set_tag_class ($new_tag) Add the element tag to its C<class> attribute and sets the tag to C<$new_tag> =item in_class ($class) Return true (C<1>) if the element is in the class C<$class> (if C<$class> is one of the tokens in the element C<class> attribute) =item tag_to_span Change the element tag tp C<span> and set its class to the old tag =item tag_to_div Change the element tag tp C<div> and set its class to the old tag =item DESTROY Frees the element from memory. =item start_tag Return the string for the start tag for the element, including the C<< /> >> at the end of an empty element tag =item end_tag Return the string for the end tag of an element. For an empty element, this returns the empty string (''). =item xml_string @optional_options Equivalent to C<< $elt->sprint( 1) >>, returns the string for the entire element, excluding the element's tags (but nested element tags are present) The 'C<no_recurse>' option will only return the text of the element, not of any included sub-elements (same as C<L<xml_text_only>>). =item inner_xml Another synonym for xml_string =item outer_xml An other synonym for sprint =item xml_text Return the text of the element, encoded (and processed by the current C<L<output_filter>> or C<L<output_encoding>> options, without any tag. =item xml_text_only Same as C<L<xml_text>> except that the text returned doesn't include the text of sub-elements. =item set_pretty_print ($style) Set the pretty print method, amongst 'C<none>' (default), 'C<nsgmls>', 'C<nice>', 'C<indented>', 'C<record>' and 'C<record_c>' pretty_print styles: =over 4 =item none the default, no C<\n> is used =item nsgmls nsgmls style, with C<\n> added within tags =item nice adds C<\n> wherever possible (NOT SAFE, can lead to invalid XML) =item indented same as C<nice> plus indents elements (NOT SAFE, can lead to invalid XML) =item record table-oriented pretty print, one field per line =item record_c table-oriented pretty print, more compact than C<record>, one record per line =back =item set_empty_tag_style ($style) Set the method to output empty tags, amongst 'C<normal>' (default), 'C<html>', and 'C<expand>', C<normal> outputs an empty tag 'C<< <tag/> >>', C<html> adds a space 'C<< <tag /> >>' for elements that can be empty in XHTML and C<expand> outputs 'C<< <tag></tag> >>' =item set_remove_cdata ($flag) set (or unset) the flag that forces the twig to output CDATA sections as regular (escaped) PCDATA =item set_indent ($string) Set the indentation for the indented pretty print style (default is 2 spaces) =item set_quote ($quote) Set the quotes used for attributes. can be 'C<double>' (default) or 'C<single>' =item cmp ($elt) Compare the order of the 2 elements in a twig. C<$a> is the <A>..</A> element, C<$b> is the <B>...</B> element document $a->cmp( $b) <A> ... </A> ... <B> ... </B> -1 <A> ... <B> ... </B> ... </A> -1 <B> ... </B> ... <A> ... </A> 1 <B> ... <A> ... </A> ... </B> 1 $a == $b 0 $a and $b not in the same tree undef =item before ($elt) Return 1 if C<$elt> starts before the element, 0 otherwise. If the 2 elements are not in the same twig then return C<undef>. if( $a->cmp( $b) == -1) { return 1; } else { return 0; } =item after ($elt) Return 1 if $elt starts after the element, 0 otherwise. If the 2 elements are not in the same twig then return C<undef>. if( $a->cmp( $b) == -1) { return 1; } else { return 0; } =item other comparison methods =over 4 =item lt =item le =item gt =item ge =back =item path Return the element context in a form similar to XPath's short form: 'C</root/tag1/../tag>' =item xpath Return a unique XPath expression that can be used to find the element again. It looks like C</doc/sect[3]/title>: unique elements do not have an index, the others do. =item flush flushes the twig up to the current element (strictly equivalent to C<< $elt->root->flush >>) =item private methods Low-level methods on the twig: =over 4 =item set_parent ($parent) =item set_first_child ($first_child) =item set_last_child ($last_child) =item set_prev_sibling ($prev_sibling) =item set_next_sibling ($next_sibling) =item set_twig_current =item del_twig_current =item twig_current =item contains_text =back Those methods should not be used, unless of course you find some creative and interesting, not to mention useful, ways to do it. =back =head2 cond Most of the navigation functions accept a condition as an optional argument The first element (or all elements for C<L<children> > or C<L<ancestors> >) that passes the condition is returned. The condition is a single step of an XPath expression using the XPath subset defined by C<L<get_xpath>>. Additional conditions are: The condition can be =over 4 =item #ELT return a "real" element (not a PCDATA, CDATA, comment or pi element) =item #TEXT return a PCDATA or CDATA element =item regular expression return an element whose tag matches the regexp. The regexp has to be created with C<qr//> (hence this is available only on perl 5.005 and above) =item code reference applies the code, passing the current element as argument, if the code returns true then the element is returned, if it returns false then the code is applied to the next candidate. =back =head2 XML::Twig::XPath XML::Twig implements a subset of XPath through the C<L<get_xpath>> method. If you want to use the whole XPath power, then you can use C<XML::Twig::XPath> instead. In this case C<XML::Twig> uses C<XML::XPath> to execute XPath queries. You will of course need C<XML::XPath> installed to be able to use C<XML::Twig::XPath>. See L<XML::XPath> for more information. The methods you can use are: =over 4 =item findnodes ($path) return a list of nodes found by C<$path>. =item findnodes_as_string ($path) return the nodes found reproduced as XML. The result is not guaranteed to be valid XML though. =item findvalue ($path) return the concatenation of the text content of the result nodes =back In order for C<XML::XPath> to be used as the XPath engine the following methods are included in C<XML::Twig>: in XML::Twig =over 4 =item getRootNode =item getParentNode =item getChildNodes =back in XML::Twig::Elt =over 4 =item string_value =item toString =item getName =item getRootNode =item getNextSibling =item getPreviousSibling =item isElementNode =item isTextNode =item isPI =item isPINode =item isProcessingInstructionNode =item isComment =item isCommentNode =item getTarget =item getChildNodes =item getElementById =back =head2 XML::Twig::XPath::Elt The methods you can use are the same as on C<XML::Twig::XPath> elements: =over 4 =item findnodes ($path) return a list of nodes found by C<$path>. =item findnodes_as_string ($path) return the nodes found reproduced as XML. The result is not guaranteed to be valid XML though. =item findvalue ($path) return the concatenation of the text content of the result nodes =back =head2 XML::Twig::Entity_list =over 4 =item new Create an entity list. =item add ($ent) Add an entity to an entity list. =item add_new_ent ($name, $val, $sysid, $pubid, $ndata, $param) Create a new entity and add it to the entity list =item delete ($ent or $tag). Delete an entity (defined by its name or by the Entity object) from the list. =item print ($optional_filehandle) Print the entity list. =item list Return the list as an array =back =head2 XML::Twig::Entity =over 4 =item new ($name, $val, $sysid, $pubid, $ndata, $param) Same arguments as the Entity handler for XML::Parser. =item print ($optional_filehandle) Print an entity declaration. =item name Return the name of the entity =item val Return the value of the entity =item sysid Return the system id for the entity (for NDATA entities) =item pubid Return the public id for the entity (for NDATA entities) =item ndata Return true if the entity is an NDATA entity =item param Return true if the entity is a parameter entity =item text Return the entity declaration text. =back =head2 XML::Twig::Notation_list =over 4 =item new Create an notation list. =item add ($notation) Add an notation to an notation list. =item add_new_notation ($name, $base, $sysid, $pubid) Create a new notation and add it to the notation list =item delete ($notation or $tag). Delete an notation (defined by its name or by the Notation object) from the list. =item print ($optional_filehandle) Print the notation list. =item list Return the list as an array =back =head2 XML::Twig::Notation =over 4 =item new ($name, $base, $sysid, $pubid) Same argumnotations as the Notation handler for XML::Parser. =item print ($optional_filehandle) Print an notation declaration. =item name Return the name of the notation =item base Return the base to be used for resolving a relative URI =item sysid Return the system id for the notation =item pubid Return the public id for the notation =item text Return the notation declaration text. =back =head1 EXAMPLES Additional examples (and a complete tutorial) can be found on the F<XML::Twig PageL<http://www.xmltwig.org/xmltwig/>> To figure out what flush does call the following script with an XML file and an element name as arguments use XML::Twig; my ($file, $elt)= @ARGV; my $t= XML::Twig->new( twig_handlers => { $elt => sub {$_[0]->flush; print "\n[flushed here]\n";} }); $t->parsefile( $file, ErrorContext => 2); $t->flush; print "\n"; =head1 NOTES =head2 Subclassing XML::Twig Useful methods: =over 4 =item elt_class In order to subclass C<XML::Twig> you will probably need to subclass also C<L<XML::Twig::Elt>>. Use the C<elt_class> option when you create the C<XML::Twig> object to get the elements created in a different class (which should be a subclass of C<XML::Twig::Elt>. =item add_options If you inherit C<XML::Twig> new method but want to add more options to it you can use this method to prevent XML::Twig to issue warnings for those additional options. =back =head2 DTD Handling There are 3 possibilities here. They are: =over 4 =item No DTD No doctype, no DTD information, no entity information, the world is simple... =item Internal DTD The XML document includes an internal DTD, and maybe entity declarations. If you use the load_DTD option when creating the twig the DTD information and the entity declarations can be accessed. The DTD and the entity declarations will be C<flush>'ed (or C<print>'ed) either as is (if they have not been modified) or as reconstructed (poorly, comments are lost, order is not kept, due to it's content this DTD should not be viewed by anyone) if they have been modified. You can also modify them directly by changing the C<< $twig->{twig_doctype}->{internal} >> field (straight from XML::Parser, see the C<Doctype> handler doc) =item External DTD The XML document includes a reference to an external DTD, and maybe entity declarations. If you use the C<load_DTD> when creating the twig the DTD information and the entity declarations can be accessed. The entity declarations will be C<flush>'ed (or C<print>'ed) either as is (if they have not been modified) or as reconstructed (badly, comments are lost, order is not kept). You can change the doctype through the C<< $twig->set_doctype >> method and print the dtd through the C<< $twig->dtd_text >> or C<< $twig->dtd_print >> methods. If you need to modify the entity list this is probably the easiest way to do it. =back =head2 Flush Remember that element handlers are called when the element is CLOSED, so if you have handlers for nested elements the inner handlers will be called first. It makes it for example trickier than it would seem to number nested sections (or clauses, or divs), as the titles in the inner sections are handled before the outer sections. =head1 BUGS =over 4 =item segfault during parsing This happens when parsing huge documents, or lots of small ones, with a version of Perl before 5.16. This is due to a bug in the way weak references are handled in Perl itself. The fix is either to upgrade to Perl 5.16 or later (C<perlbrew> is a great tool to manage several installations of perl on the same machine). An other, NOT RECOMMENDED, way of fixing the problem, is to switch off weak references by writing C<XML::Twig::_set_weakrefs( 0);> at the top of the code. This is totally unsupported, and may lead to other problems though, =item entity handling Due to XML::Parser behaviour, non-base entities in attribute values disappear if they are not declared in the document: C<att="val&ent;"> will be turned into C<< att => val >>, unless you use the C<keep_encoding> argument to C<< XML::Twig->new >> =item DTD handling The DTD handling methods are quite bugged. No one uses them and it seems very difficult to get them to work in all cases, including with several slightly incompatible versions of XML::Parser and of libexpat. Basically you can read the DTD, output it back properly, and update entities, but not much more. So use XML::Twig with standalone documents, or with documents referring to an external DTD, but don't expect it to properly parse and even output back the DTD. =item memory leak If you use a REALLY old Perl (5.005!) and a lot of twigs you might find that you leak quite a lot of memory (about 2Ks per twig). You can use the C<L<dispose> > method to free that memory after you are done. If you create elements the same thing might happen, use the C<L<delete>> method to get rid of them. Alternatively installing the C<Scalar::Util> (or C<WeakRef>) module on a version of Perl that supports it (>5.6.0) will get rid of the memory leaks automagically. =item ID list The ID list is NOT updated when elements are cut or deleted. =item change_gi This method will not function properly if you do: $twig->change_gi( $old1, $new); $twig->change_gi( $old2, $new); $twig->change_gi( $new, $even_newer); =item sanity check on XML::Parser method calls XML::Twig should really prevent calls to some XML::Parser methods, especially the C<setHandlers> method. =item pretty printing Pretty printing (at least using the 'C<indented>' style) is hard to get right! Only elements that belong to the document will be properly indented. Printing elements that do not belong to the twig makes it impossible for XML::Twig to figure out their depth, and thus their indentation level. Also there is an unavoidable bug when using C<flush> and pretty printing for elements with mixed content that start with an embedded element: <elt><b>b</b>toto<b>bold</b></elt> will be output as <elt> <b>b</b>toto<b>bold</b></elt> if you flush the twig when you find the C<< <b> >> element =back =head1 Globals These are the things that can mess up calling code, especially if threaded. They might also cause problem under mod_perl. =over 4 =item Exported constants Whether you want them or not you get them! These are subroutines to use as constant when creating or testing elements PCDATA return '#PCDATA' CDATA return '#CDATA' PI return '#PI', I had the choice between PROC and PI :--( =item Module scoped values: constants these should cause no trouble: %base_ent= ( '>' => '>', '<' => '<', '&' => '&', "'" => ''', '"' => '"', ); CDATA_START = "<![CDATA["; CDATA_END = "]]>"; PI_START = "<?"; PI_END = "?>"; COMMENT_START = "<!--"; COMMENT_END = "-->"; pretty print styles ( $NSGMLS, $NICE, $INDENTED, $INDENTED_C, $WRAPPED, $RECORD1, $RECORD2)= (1..7); empty tag output style ( $HTML, $EXPAND)= (1..2); =item Module scoped values: might be changed Most of these deal with pretty printing, so the worst that can happen is probably that XML output does not look right, but is still valid and processed identically by XML processors. C<$empty_tag_style> can mess up HTML bowsers though and changing C<$ID> would most likely create problems. $pretty=0; # pretty print style $quote='"'; # quote for attributes $INDENT= ' '; # indent for indented pretty print $empty_tag_style= 0; # how to display empty tags $ID # attribute used as an id ('id' by default) =item Module scoped values: definitely changed These 2 variables are used to replace tags by an index, thus saving some space when creating a twig. If they really cause you too much trouble, let me know, it is probably possible to create either a switch or at least a version of XML::Twig that does not perform this optimization. %gi2index; # tag => index @index2gi; # list of tags =back If you need to manipulate all those values, you can use the following methods on the XML::Twig object: =over 4 =item global_state Return a hashref with all the global variables used by XML::Twig The hash has the following fields: C<pretty>, C<quote>, C<indent>, C<empty_tag_style>, C<keep_encoding>, C<expand_external_entities>, C<output_filter>, C<output_text_filter>, C<keep_atts_order> =item set_global_state ($state) Set the global state, C<$state> is a hashref =item save_global_state Save the current global state =item restore_global_state Restore the previously saved (using C<Lsave_global_state>> state =back =head1 TODO =over 4 =item SAX handlers Allowing XML::Twig to work on top of any SAX parser =item multiple twigs are not well supported A number of twig features are just global at the moment. These include the ID list and the "tag pool" (if you use C<change_gi> then you change the tag for ALL twigs). A future version will try to support this while trying not to be to hard on performance (at least when a single twig is used!). =back =head1 AUTHOR Michel Rodriguez <mirod@cpan.org> =head1 LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Bug reports should be sent using: F<RT L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-Twig>> Comments can be sent to mirod@cpan.org The XML::Twig page is at L<http://www.xmltwig.org/xmltwig/> It includes the development version of the module, a slightly better version of the documentation, examples, a tutorial and a: F<Processing XML efficiently with Perl and XML::Twig: L<http://www.xmltwig.org/xmltwig/tutorial/index.html>> =head1 SEE ALSO Complete docs, including a tutorial, examples, an easier to use HTML version of the docs, a quick reference card and a FAQ are available at L<http://www.xmltwig.org/xmltwig/> git repository at L<http://github.com/mirod/xmltwig> L<XML::Parser>, L<XML::Parser::Expat>, L<XML::XPath>, L<Encode>, L<Text::Iconv>, L<Scalar::Utils> =head2 Alternative Modules XML::Twig is not the only XML::Processing module available on CPAN (far from it!). The main alternative I would recommend is L<XML::LibXML>. Here is a quick comparison of the 2 modules: XML::LibXML, actually C<libxml2> on which it is based, sticks to the standards, and implements a good number of them in a rather strict way: XML, XPath, DOM, RelaxNG, I must be forgetting a couple (XInclude?). It is fast and rather frugal memory-wise. XML::Twig is older: when I started writing it XML::Parser/expat was the only game in town. It implements XML and that's about it (plus a subset of XPath, and you can use XML::Twig::XPath if you have XML::XPathEngine installed for full support). It is slower and requires more memory for a full tree than XML::LibXML. On the plus side (yes, there is a plus side!) it lets you process a big document in chunks, and thus let you tackle documents that couldn't be loaded in memory by XML::LibXML, and it offers a lot (and I mean a LOT!) of higher-level methods, for everything, from adding structure to "low-level" XML, to shortcuts for XHTML conversions and more. It also DWIMs quite a bit, getting comments and non-significant whitespaces out of the way but preserving them in the output for example. As it does not stick to the DOM, is also usually leads to shorter code than in XML::LibXML. Beyond the pure features of the 2 modules, XML::LibXML seems to be preferred by "XML-purists", while XML::Twig seems to be more used by Perl Hackers who have to deal with XML. As you have noted, XML::Twig also comes with quite a lot of docs, but I am sure if you ask for help about XML::LibXML here or on Perlmonks you will get answers. Note that it is actually quite hard for me to compare the 2 modules: on one hand I know XML::Twig inside-out and I can get it to do pretty much anything I need to (or I improve it ;--), while I have a very basic knowledge of XML::LibXML. So feature-wise, I'd rather use XML::Twig ;--). On the other hand, I am painfully aware of some of the deficiencies, potential bugs and plain ugly code that lurk in XML::Twig, even though you are unlikely to be affected by them (unless for example you need to change the DTD of a document programmatically), while I haven't looked much into XML::LibXML so it still looks shinny and clean to me. That said, if you need to process a document that is too big to fit memory and XML::Twig is too slow for you, my reluctant advice would be to use "bare" XML::Parser. It won't be as easy to use as XML::Twig: basically with XML::Twig you trade some speed (depending on what you do from a factor 3 to... none) for ease-of-use, but it will be easier IMHO than using SAX (albeit not standard), and at this point a LOT faster (see the last test in L<http://www.xmltwig.org/article/simple_benchmark/>). =cut �����������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/tools/��������������������������������������������������������������������������������0000755�0001750�0001750�00000000000�13015347632�014421� 5����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/tools/xml_split/����������������������������������������������������������������������0000755�0001750�0001750�00000000000�13015347632�016434� 5����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/tools/xml_split/xml_split�������������������������������������������������������������0000755�0001750�0001750�00000052216�13015053270�020373� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/perl -w # $Id: /xmltwig/trunk/tools/xml_split/xml_split 17 2007-06-04T11:57:10.366292Z mrodrigu $ use strict; use XML::Twig; use FindBin qw( $RealBin $RealScript); use Getopt::Std; import xml_split::state::parser; import xml_split::state::twig; undef $Getopt::Std::STANDARD_HELP_VERSION; $Getopt::Std::STANDARD_HELP_VERSION=1; # to stop processing after --help or --version use vars qw( $VERSION $USAGE); $VERSION= "0.06"; $USAGE= "xml_split [-l <level> [-s <size> | -g <nb_grouped>] | -c <cond>] [-b <base>] [-n <nb>] [-e <ext>] [-p <plugin>] [-I <plugin_dir>] [-i] [-d] [-v] [-h] [-m] [-V] <files>\n"; { # main block my $opt={}; getopts('l:c:b:g:n:e:p:is:dvhmV', $opt); # defaults $opt->{n} ||= 2; # number of digits used for creating parts $opt->{I} ||= ($ENV{HOME} || '') . "/.xml_split"; if( $opt->{h}) { die $USAGE, "\n"; } if( $opt->{m}) { exec "pod2text $RealBin/$RealScript"; } if( $opt->{V}) { print "xml_split version $VERSION\n"; exit; } my %factor=( ' ' => 1, K => 1000, M => 1_000_000, G => 1_000_000_000); if( $opt->{s}) { if( $opt->{c}) { die "cannot use -c and -s at the same time\n"; } if( $opt->{s}=~ m{^\s*(\d+)\s*(G[bo]?|M[bo]?|K[bo]?\s*)?$}i) { my( $size, $unit)= ($1, uc substr( $2 || ' ', 0, 1)); $opt->{s}= $size * $factor{$unit}; } else { die "invalid size (should be in Kb, Mb or Gb): '$opt->{s}'\n"; } } if( $opt->{g}) { die "cannot use -g and -s at the same time\n" if( $opt->{s}); die "cannot use -g and -c at the same time\n" if( $opt->{c}); $opt->{l} ||= 1; } elsif( $opt->{c}) { die "cannot use -l and -c at the same time\n" if( $opt->{l}); } else { $opt->{l} ||= 1; $opt->{c}= "level( $opt->{l})"; } my $options= { cond => $opt->{c}, base => $opt->{b}, nb_digits => $opt->{n}, ext => $opt->{e}, plugin => $opt->{p}, no_pi => $opt->{d}, verbose => $opt->{v}, xinclude => $opt->{i} ? 1 : 0, }; my $state; if( my $plugin= $opt->{p}) { if( $plugin!~ m{^[\w:.-]+$}) { die "wrong plugin name '$plugin' (only word characters are allowed in plugin names)\n"; } push @INC, $opt->{I}; eval { require $plugin }; if( $@) { die "cannot find plugin '$plugin': $!"; } import $plugin; $state= $plugin->new( $options); } if( $opt->{s}) { $state||= xml_split::state::parser->new( $options); $state->{level} = $opt->{l}; $state->{size} = $opt->{s}; $state->{current_size}=0; $state->{handlers}= { Start => \&parser_start_tag_size, End => \&parser_end_tag_size , Default => \&parser_default_size}; warn "using XML::Parser\n" if( $opt->{v}); split_with_parser( $state, @ARGV); } elsif( $opt->{g}) { $state||= xml_split::state::parser->new( $options); $state->{level}= $opt->{l}; $state->{group}= $opt->{g}; $state->{handlers}= { Start => \&parser_start_tag_grouped, End => \&parser_end_tag_grouped , Default => \&parser_default_grouped}; warn "using XML::Parser\n" if( $opt->{v}); split_with_parser( $state, @ARGV); } elsif( $opt->{l}) { $state||= xml_split::state::parser->new( $options); $state->{level}= $opt->{l}; $state->{handlers}= { Start => \&parser_start_tag_level, End => \&parser_end_tag_level , Default => \&parser_default_level}; warn "using XML::Parser\n" if( $opt->{v}); split_with_parser( $state, @ARGV); } else { $state||= xml_split::state::twig->new( $options); split_with_twig( $state, @ARGV); } exit; } sub split_with_twig { my( $state, @files)= @_; if( !@files) { $state->{base} ||= 'out'; $state->{ext} ||= '.xml'; my $twig_options= twig_options( $state); my $t= XML::Twig->new( %$twig_options, $state); $state->{twig}= $t; $t->parse( \*STDIN); end_file( $t, $state); } else { foreach my $file (@files) { unless( $state->{base}) { $state->{seq_nb}=0; } my( $base, $ext)= ($file=~ m{^(.*?)(\.\w+)?$}); $state->{base} ||= $base; $state->{ext} ||= $ext || '.xml'; my $twig_options= twig_options( $state); my $t= XML::Twig->new( %$twig_options); $state->{twig}= $t; $t->parsefile( $file); end_file( $t, $state); } } } sub split_with_parser { my( $state, @files)= @_; if( !@files) { $state->{base} ||= 'out'; $state->{ext} ||= '.xml'; my $parser_options= parser_options( $state); my $p= XML::Parser->new( %$parser_options); $state->{parser}= $p; $p->parse( \*STDIN); } else { foreach my $file (@files) { unless( $state->{base}) { $state->{seq_nb}=0; } my( $base, $ext)= ($file=~ m{^(.*?)(\.\w+)?$}); $state->{base} ||= $base; $state->{ext} ||= $ext || '.xml'; my $parser_options= parser_options( $state); my $p= XML::Parser->new( %$parser_options); $state->{parser}= $p; $p->parsefile( $file); } } } sub parser_options { my( $state)= @_; # prepare output to the main document unless( $state->{no_pi}) { my $file_name= $state->main_file_name(); # main file name warn "generating main file $file_name\n" if( $state->{verbose}); open( my $out, '>', $file_name) or die "cannot create main file '$file_name': $!"; $state->{main_fh}= $out; $state->{current_fh}= $out; } my $handlers= { Start => sub { $state->{handlers}->{Start}->( $state, shift( @_)); }, End => sub { $state->{handlers}->{End}->( $state, shift( @_)); }, Default => sub { $state->{handlers}->{Default}->( $state, shift( @_)); }, XMLDecl => sub { parser_declaration( $state, @_); }, }; return { Handlers => $handlers }; } ################################################################################### # # # handlers for the -l option # # # ################################################################################### sub parser_start_tag_level { my( $state, $p)= @_; if( $p->depth == $state->{level}) { $state->{seq_nb}++; my $file_name= $state->file_name; # prepare chunk file warn "generating $file_name\n" if( $state->{verbose}); open( my $out, '>', $file_name) or die "cannot create output file '$file_name': $!"; $state->{current_fh}= $out; if( $state->{xml_declaration}) { print {$state->{current_fh}} $state->{xml_declaration}, "\n"; } # output pi unless( $state->{no_pi}) { print {$state->{main_fh}} $state->include( $file_name) ; } } print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); } sub parser_end_tag_level { my( $state, $p)= @_; print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); if( $p->depth == $state->{level}) { unless( $state->{current_fh} == $state->{main_fh}) { close $state->{current_fh}; $state->{current_fh}= $state->{main_fh}; } } } sub parser_default_level { my( $state, $p)= @_; print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); } ################################################################################### # # # handlers for the -s option # # # ################################################################################### sub parser_start_tag_size { my( $state, $p)= @_; if( $p->depth == $state->{level} && !$state->{current_size}) { $state->{seq_nb}++; my $file_name= $state->file_name; # prepare chunk file warn "generating $file_name\n" if( $state->{verbose}); open( my $out, '>', $file_name) or die "cannot create output file '$file_name': $!"; $state->{current_fh}= $out; print {$state->{current_fh}} qq{$state->{xml_declaration}\n} if $state->{xml_declaration}; print {$state->{current_fh}} qq{<xml_split:root xmlns:xml_split="http://xmltwig.com/xml_split">\n}; # output pi unless( $state->{no_pi}) { print {$state->{main_fh}} $state->include( $file_name) ; } $state->{store_size}=1; } my $original_string= $p->original_string; $state->{current_size} += length( $original_string) if( $state->{store_size}); print {$state->{current_fh}} $original_string if( $state->{current_fh}); } sub parser_end_tag_size { my( $state, $p)= @_; my $original_string= $p->original_string; $state->{current_size} += length( $original_string) if( $state->{store_size}); if( $p->depth == $state->{level} && $state->{current_size} > $state->{size}) { print {$state->{current_fh}} $original_string if( $state->{current_fh}); end_file_with_size( $state); } else { if($p->depth < $state->{level}) { end_file_with_size( $state); } print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); } } sub end_file_with_size { my( $state)= @_; unless( $state->{current_fh} == $state->{main_fh}) { print {$state->{current_fh}} qq{\n</xml_split:root>\n}; close $state->{current_fh}; $state->{current_size}=0; $state->{store_size}=0; $state->{current_fh}= $state->{main_fh}; } } sub parser_default_size { my( $state, $p)= @_; my $string= $p->original_string; if( $state->{store_size}) { $state->{current_size} += length( $string); if( $p->depth < $state->{level}) { end_file_with_size( $state); } } print {$state->{current_fh}} $string if( $state->{current_fh}); } ################################################################################### # # # handlers for the -g option # # # ################################################################################### sub parser_start_tag_grouped { my( $state, $p)= @_; if( $p->depth == $state->{level}) { if( !$state->{current_nb}) { $state->{seq_nb}++; my $file_name= $state->file_name; # prepare chunk file warn "generating $file_name\n" if( $state->{verbose}); open( my $out, '>', $file_name) or die "cannot create output file '$file_name': $!"; $state->{current_fh}= $out; print {$state->{current_fh}} join( "\n", grep { $_ } ( $state->{xml_declaration}, qq{<xml_split:root xmlns:xml_split="http://xmltwig.com/xml_split">\n } ) ); # output pi unless( $state->{no_pi}) { print {$state->{main_fh}} $state->include( $file_name) ; } } } print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); } sub parser_end_tag_grouped { my( $state, $p)= @_; if( $p->depth == $state->{level}) { print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); $state->{current_nb}++; if( $state->{current_nb} == $state->{group}) { end_file_grouped( $state); } } else { if($p->depth < $state->{level}) { end_file_grouped( $state, { no_nl => 1 }); } print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); } } sub end_file_grouped { my( $state, $options)= @_; print {$state->{current_fh}} qq{\n} unless( $options->{no_nl}); unless( $state->{current_fh} == $state->{main_fh}) { print {$state->{current_fh}} qq{</xml_split:root>\n}; close $state->{current_fh}; $state->{current_nb}=0; $state->{current_fh}= $state->{main_fh}; } } sub parser_default_grouped { my( $state, $p)= @_; print {$state->{current_fh}} $p->original_string if( $state->{current_fh}); } sub char_parser { my( $state, $p)=( shift, shift); print {$state->{current_fh}} $_[0] if( $state->{current_fh}); } sub parser_declaration { my( $state, $p, $version, $encoding, $standalone)= @_; $state->{xml_declaration}= $p->recognized_string || ''; print {$state->{main_fh}} $state->{xml_declaration}; # avoid calling original_string if not needed #if( !$state->{xml_declaration} || $state->{xml_declaration}=~ m{encoding\s*=\s*["']utf-?8["']}i) # { $state->{utf8_encoded}=1; # $p->setHandlers( Char => \&char_parser); # } } sub twig_options { my( $state)= @_; # base options, ensures maximum fidelity to the original document my $twig_options= { keep_encoding => 1, keep_spaces => 1 }; # prepare output to the main document unless( $state->{no_pi}) { my $file_name= $state->main_file_name(); # main file name warn "generating main file $file_name\n" if( $state->{verbose}); open( my $out, '>', $file_name) or die "cannot create main file '$file_name': $!"; $state->{out}= $out; $twig_options->{twig_print_outside_roots}= $out; $twig_options->{start_tag_handlers}= { $state->{cond} => sub { $_->set_att( '#in_fragment' => 1); } }; } $twig_options->{twig_roots}= { $state->{cond} => sub { dump_elt( @_, $state); } }; return $twig_options; } sub dump_elt { my( $t, $elt, $state)= @_; $state->{seq_nb}++; $state->{elt}= $elt; my $file_name= $state->file_name; warn "generating $file_name\n" if( $state->{verbose}); my $fragment= XML::Twig->new(); $fragment->{twig_xmldecl} = $t->{twig_xmldecl}; $fragment->{twig_doctype} = $t->{twig_doctype}; $fragment->{twig_dtd} = $t->{twig_dtd}; if( !$state->{no_pis}) { # if we are still within a fragment, just replace the element by the PI # otherwise print it to the main document my $include= $state->include( $file_name); $elt->del_att( '#in_fragment'); if( $elt->inherited_att( '#in_fragment')) { $elt->parent( '*[@#in_fragment="1"]')->set_att( '#has_subdocs' => 1); $include->replace( $elt); } else { $elt->cut; $include->print( $state->{out}); } } else { $elt->cut; } $fragment->set_root( $elt); open( my $out, '>', $file_name) or die "cannot create output file '$file_name': $!"; #if( $state->{xml_declaration}) { warn "c1"; print {$out} $state->{xml_declaration}, "\n"; } #if( $fragment->{xml_decl}) { warn "c2"; print {$out} $fragment->xml_decl, "\n"; } $fragment->set_keep_encoding( 1); $fragment->print( $out); close $out; } sub end_file { my( $t, $state)= @_; unless( $state->{no_pi}) { close $state->{out}; } } # for Getop::Std sub HELP_MESSAGE { return $USAGE; } sub VERSION_MESSAGE { return $VERSION; } package xml_split::state; sub new { my( $ref, $options)= @_; my $state= bless $options, $ref; $state->{seq_nb}=0; return $state; } sub file_name { my( $state)= @_; my $nb= sprintf( "%0$state->{nb_digits}d", $state->{seq_nb}); my $file_name= "$state->{base}-$nb$state->{ext}"; $file_name =~ s{\\}{/}g; return $file_name; } sub main_file_name { my( $state)= @_; my $nb= sprintf( "%0$state->{nb_digits}d", 0); my $file_name= "$state->{base}-$nb$state->{ext}"; return $file_name; } 1; ################################################################################### # # # state when using XML::Parser # # # ################################################################################### package xml_split::state::parser; import xml_split::state; use base 'xml_split::state'; sub include { my( $state, $file_name)= @_; if( $state->{xinclude}) { return qq{<xi:include href="$file_name" />}; } else { return qq{<?merge subdocs = 0 :$file_name?>}; } } 1; ################################################################################### # # # state when using XML::Twig # # # ################################################################################### package xml_split::state::twig; import xml_split::state; use base 'xml_split::state'; sub include { my( $state, $file_name)= @_; my $include; my $subdocs= $state->{elt}->att( '#has_subdocs') || 0; if( $state->{xinclude}) { $include= XML::Twig::Elt->new( 'xi:include', { href => $file_name }); if( $subdocs) { $include->set_att( subdocs => 1); } } else { $include= XML::Twig::Elt->new( '#PI') ->set_pi( merge => " subdocs = $subdocs :$file_name"); } return $include; } 1; package main; __END__ =head1 NAME xml_split - cut a big XML file into smaller chunks =head1 DESCRIPTION C<xml_split> takes a (presumably big) XML file and split it in several smaller files. The memory used is the memory needed for the biggest chunk (ie memory is reused for each new chunk). It can split at a given level in the tree (the default, splits children of the root), or on a condition (using the subset of XPath understood by XML::Twig, so C<section> or C</doc/section>). Each generated file is replaced by a processing instruction that will allow C<xml_merge> to rebuild the original document. The processing instruction format is C<< <?merge subdocs=[01] :<filename> ?> >> File names are <file>-<nb>.xml, with <file>-00.xml holding the main document. =head1 OPTIONS =over 4 =item -l <level> level to cut at: 1 generates a file for each child of the root, 2 for each grand child defaults to 1 =item -c <condition> generate a file for each element that passes the condition xml_split -c <section> will put each C<section> element in its own file (nested sections are handled too) Note that at the moment this option is a lot slower than using C<-l> =item -s <size> generates files of (approximately) <size>. The content of each file is enclosed in a new element (C<xml_split::root>), so it's well-formed XML. The size can be given in bytes, Kb, Mb or Gb. =item -g <nb> groups <nb> elements in a single file. The content of each file is enclosed in a new element (C<xml_split::root>), so it's well-formed XML. =item -b <name> base name for the output, files will be named <base>-<nb><.ext> <nb> is a sequence number, see below C<--nb_digits> <ext> is an extension, see below C<--extension> defaults to the original file name (if available) or C<out> (if input comes from the standard input) =item -n <nb> number of digits in the sequence number for each file if more digits than <nb> are needed, then they are used: if C<--nb_digits 2> is used and 112 files are generated they will be named C<< <file>-01.xml >> to C<< <file>-112.xml >> defaults to 2 =item -e <ext> extension to use for generated files defaults to the original file extension or C<.xml> =item -i use XInclude elements instead of Processing Instructions to mark where sub files need to be included =item -v verbose output Note that this option can slow down processing considerably (by an order of magnitude) when generating lots of small documents =item -V outputs version and exit =item -h short help =item -m man (requires pod2text to be in the path) =back =head1 EXAMPLES xml_split foo.xml # split at level 1 xml_split -l 2 foo.xml # split at level 2 xml_split -c section foo.xml # a file is generated for each section element # nested sections are split properly =head1 SEE ALSO XML::Twig, xml_merge =head1 TODO =over 4 =item optimize the code any idea welcome! I have already implemented most of what I thought would improve performances. =item provide other methods that PIs to keep merge information XInclude is a good candidate (alpha support added in 0.04). using entities, which would seem the natural way to do it, doesn't work, as they make it impossible to have both the main document and the sub docs to be well-formed if the sub docs include sub-sub docs (you can't have entity declarations in an entity) =back =head1 AUTHOR Michel Rodriguez <mirod@cpan.org> =head1 LICENSE This tool is free software; you can redistribute it and/or modify it under the same terms as Perl itself. ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/tools/xml_merge/����������������������������������������������������������������������0000755�0001750�0001750�00000000000�13015347632�016400� 5����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/tools/xml_merge/xml_merge�������������������������������������������������������������0000755�0001750�0001750�00000007625�12732215763�020323� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/perl -w # $Id: /xmltwig/trunk/tools/xml_merge/xml_merge 12 2007-04-22T06:04:54.627880Z mrodrigu $ use strict; use XML::Twig; use FindBin qw( $RealBin $RealScript); use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION=1; # twice to prevent warning with 5.6.1 (I know it's dumb!) $Getopt::Std::STANDARD_HELP_VERSION=1; # to stop processing after --help or --version use vars qw( $VERSION $USAGE); $VERSION= "0.02"; $USAGE= "xml_merge [-o <output_file>] [-i] [-v] [-h] [-m] [-V] [file]\n"; { # main block my $opt={}; getopts('o:ivhmV', $opt); if( $opt->{h}) { die $USAGE, "\n"; } if( $opt->{m}) { exec "pod2text $RealBin/$RealScript"; } if( $opt->{V}) { print "xml_merge version $VERSION\n"; exit; } if( $opt->{o}) { open( my $out, '>', $opt->{o}) or die "cannot create $opt->{o}: $!"; $opt->{fh}= $out; # used to set twig_print_outside_roots } else { $opt->{fh}= 1; } # this way twig_print_outside_roots outputs to STDOUT $opt->{subdocs} = 1; $opt->{file} = $ARGV[0]; $opt->{twig_roots}= $opt->{i} ? { 'xi:include' => sub { $opt->{file}= $_->att( 'href'); if( $_->att( 'subdocs')) { merge( $opt); } else { spit( $opt); } }, } : { '?merge' => sub { $opt= parse( $_->data, $opt); if( $opt->{subdocs}) { merge( $opt); } else { spit( $opt); } }, } ; merge( $opt); if( $opt->{v}) { warn "done\n"; } } sub merge { my( $opt)= @_; my $t= XML::Twig->new( keep_encoding => 1, keep_spaces => 1, twig_roots => $opt->{twig_roots}, twig_print_outside_roots => $opt->{fh}, ); if( $opt->{v} && $opt->{file}) { warn "merging $opt->{file} (parsing)\n"; } if( $opt->{file}) { $t->parsefile( $opt->{file}); } else { $t->parse( \*STDIN); } } sub spit { my( $opt)= @_; if( $opt->{v} && $opt->{file}) { warn "merging $opt->{file} (no parsing)\n"; } open( my $in, '<', $opt->{file}) or die "cannot open sub document '$opt->{file}': $!"; while( <$in>) { next if( m{^\Q<?xml version} || m{^\s*</?xml_split:root}); if( $opt->{o}) { print {$opt->{fh}} $_; } else { print $_; } } close $in; } # data is the pi data, # (ugly) format is keyword1 = val1 : keyword2 = val2 ... : filename # ex: subdoc = 1 : file-01.xml sub parse { my( $data, $opt)= @_; while( $data=~ s{^\s*(\S+)\s*=\s*(\S+)\s*:\s*}{}) { $opt->{$1}= $2; } $opt->{file}= $data; return $opt; } # for Getop::Std sub HELP_MESSAGE { return $USAGE; } sub VERSION_MESSAGE { return $VERSION; } __END__ =head1 NAME xml_merge - merge back XML files split with C<xml_split> =head1 DESCRIPTION C<xml_merge> takes several xml files that have been split using C<xml_split> and recreates a single file. =head1 OPTIONS =over 4 =item -o <output_file> unless this option is used the program output goes to STDOUT =item -i the files use XInclude instead of processing instructions (they were created using the C<-i> option in C<xml_split>) =item -v verbose output =item -V outputs version and exit =item -h short help =item -m man (requires pod2text to be in the path) =back =head1 EXAMPLES xml_merge foo-00.xml # output to stdout xml_merge -o foo.xml foo-00.xml # output to foo.xml =head1 SEE ALSO XML::Twig, xml_split =head1 TODO/BUGS =head1 AUTHOR Michel Rodriguez <mirod@cpan.org> =head1 LICENSE This tool is free software; you can redistribute it and/or modify it under the same terms as Perl itself. �����������������������������������������������������������������������������������������������������������XML-Twig-3.52/tools/xml_pp/�������������������������������������������������������������������������0000755�0001750�0001750�00000000000�13015347632�015720� 5����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/tools/xml_pp/xml_pp�������������������������������������������������������������������0000755�0001750�0001750�00000017517�13015053270�017150� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/perl -w # $Id: /xmltwig/trunk/tools/xml_pp/xml_pp 32 2008-01-18T13:11:52.128782Z mrodrigu $ use strict; use XML::Twig; use File::Temp qw/tempfile/; use File::Basename qw/dirname/; my @styles= XML::Twig->_pretty_print_styles; # from XML::Twig my $styles= join '|', @styles; # for usage my %styles= map { $_ => 1} @styles; # to check option my $DEFAULT_STYLE= 'indented'; my $USAGE= "usage: $0 [-v] [-i<extension>] [-s ($styles)] [-p <tag(s)>] [-e <encoding>] [-l] [-f <file>] [<files>]"; # because of the -i.bak option I don't think I can use one of the core # option processing modules, so it's custom handling and no clusterization :--( my %opt= process_options(); # changes @ARGV my @twig_options=( pretty_print => $opt{style}, error_context => 1, ); if( $opt{preserve_space_in}) { push @twig_options, keep_spaces_in => $opt{preserve_space_in};} if( $opt{encoding}) { push @twig_options, output_encoding => $opt{encoding}; } else { push @twig_options, keep_encoding => 1; } # in normal (ie not -l) mode tags are output as soon as possible push @twig_options, twig_handlers => { _all_ => sub { $_[0]->flush } } unless( $opt{load}); if( @ARGV) { foreach my $file (@ARGV) { print STDERR "$file\n" if( $opt{verbose}); my $t= XML::Twig->new( @twig_options); my $tempfile; if( $opt{in_place}) { (undef, $tempfile)= tempfile( DIR => dirname( $file)) or die "cannot create tempfile for $file: $!\n" ; open( PP_OUTPUT, ">$tempfile") or die "cannot create tempfile $tempfile: $!"; select PP_OUTPUT; } $t= $t->safe_parsefile( $file); if( $t) { if( $opt{load}) { $t->print; } select STDOUT; if( $opt{in_place}) { close PP_OUTPUT; my $mode= mode( $file); if( $opt{backup}) { my $backup= backup( $file, $opt{backup}); rename( $file, $backup) or die "cannot create backup file $backup: $!"; } rename( $tempfile, $file) or die "cannot overwrite file $file: $!"; if( $mode ne mode( $file)) { chmod $mode, $file or die "cannot set $file mode to $mode: $!"; } } } else { if( defined $tempfile) { unlink $tempfile or die "cannot unlink temp file $tempfile: $!"; } die $@; } } } else { my $t= XML::Twig->new( @twig_options); $t->parse( \*STDIN); if( $opt{load}) { $t->print; } } sub mode { my( $file)= @_; return (stat($file))[2]; } sub process_options { my %opt; while( @ARGV && ($ARGV[0]=~ m{^-}) ) { my $opt= shift @ARGV; if( ($opt eq '-v') || ($opt eq '--verbose') ) { die $USAGE if( $opt{verbose}); $opt{verbose}= 1; } elsif( ($opt eq '-s') || ($opt eq '--style') ) { die $USAGE if( $opt{style}); $opt{style}= shift @ARGV; die $USAGE unless( $styles{$opt{style}}); } elsif( ($opt=~ m{^-i(.*)$}) || ($opt=~ m{^--in_place(.*)$}) ) { die $USAGE if( $opt{in_place}); $opt{in_place}= 1; $opt{backup}= $1 ||''; } elsif( ($opt eq '-p') || ($opt eq '--preserve') ) { my $tags= shift @ARGV; my @tags= split /\s+/, $tags; $opt{preserve_space_in} ||= []; push @{$opt{preserve_space_in}}, @tags; } elsif( ($opt eq '-e') || ($opt eq '--encoding') ) { die $USAGE if( $opt{encoding}); $opt{encoding}= shift @ARGV; } elsif( ($opt eq '-l') || ($opt eq '--load')) { die $USAGE if( $opt{load}); $opt{load}=1; } elsif( ($opt eq '-f') || ($opt eq '--files') ) { my $file= shift @ARGV; push @ARGV, files_from( $file); } elsif( ($opt eq '-h') || ($opt eq '--help')) { system "pod2text", $0; exit; } elsif( $opt eq '--') { last; } else { die $USAGE; } } $opt{style} ||= $DEFAULT_STYLE; return %opt; } # get the list of files (one per line) from a file sub files_from { my $file= shift; open( FILES, "<$file") or die "cannot open file $file: $!"; my @files; while( <FILES>) { chomp; push @files, $_; } close FILES; return @files; } sub backup { my( $file, $extension)= @_; my $backup; if( $extension=~ m{\*}) { ($backup= $extension)=~ s{\*}{$file}g; } else { $backup= $file.$extension; } return $backup; } __END__ =head1 NAME xml_pp - xml pretty-printer =head1 SYNOPSYS xml_pp [options] [<files>] =head1 DESCRIPTION XML pretty printer using XML::Twig =head1 OPTIONS =over 4 =item -i[<extension>] edits the file(s) in place, if an extension is provided (no space between C<-i> and the extension) then the original file is backed-up with that extension The rules for the extension are the same as Perl's (see perldoc perlrun): if the extension includes no "*" then it is appended to the original file name, If the extension does contain one or more "*" characters, then each "*" is replaced with the current filename. =item -s <style> the style to use for pretty printing: none, nsgmls, nice, indented, record, or record_c (see XML::Twig docs for the exact description of those styles), 'indented' by default =item -p <tag(s)> preserves white spaces in tags. You can use several C<-p> options or quote the tags if you need more than one =item -e <encoding> use XML::Twig output_encoding (based on Text::Iconv or Unicode::Map8 and Unicode::String) to set the output encoding. By default the original encoding is preserved. If this option is used the XML declaration is updated (and created if there was none). Make sure that the encoding is supported by the parser you use if you want to be able to process the pretty_printed file (XML::Parser does not support 'latin1' for example, you have to use 'iso-8859-1') =item -l loads the documents in memory instead of outputting them as they are being parsed. This prevents a bug (see L<BUGS|bugs>) but uses more memory =item -f <file> read the list of files to process from <file>, one per line =item -v verbose (list the current file being processed) =item -- stop argument processing (to process files that start with -) =item -h display help =back =head1 EXAMPLES xml_pp foo.xml > foo_pp.xml # pretty print foo.xml xml_pp < foo.xml > foo_pp.xml # pretty print from standard input xml_pp -v -i.bak *.xml # pretty print .xml files, with backups xml_pp -v -i'orig_*' *.xml # backups are named orig_<filename> xml_pp -i -p pre foo.xhtml # preserve spaces in pre tags xml_pp -i.bak -p 'pre code' foo.xml # preserve spaces in pre and code tags xml_pp -i.bak -p pre -p code foo.xml # same xml_pp -i -s record mydb_export.xml # pretty print using the record style xml_pp -e utf8 -i foo.xml # output will be in utf8 xml_pp -e iso-8859-1 -i foo.xml # output will be in iso-8859-1 xml_pp -v -i.bak -f lof # pretty print in place files from lof xml_pp -- -i.xml # pretty print the -i.xml file xml_pp -l foo.xml # loads the entire file in memory # before pretty printing it xml_pp -h # display help =head1 BUGS Elements with mixed content that start with an embedded element get an extra \n <elt><b>b</b>toto<b>bold</b></elt> will be output as <elt> <b>b</b>toto<b>bold</b></elt> Using the C<-l> option solves this bug (but uses more memory) =head1 TODO update XML::Twig to use Encode with perl 5.8.0 =head1 AUTHOR Michel Rodriguez <mirod@xmltwig.com> ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/tools/xml_spellcheck/�����������������������������������������������������������������0000755�0001750�0001750�00000000000�13015347632�017416� 5����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/tools/xml_spellcheck/xml_spellcheck���������������������������������������������������0000755�0001750�0001750�00000014551�12732215763�022353� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/perl -w use strict; use XML::Twig; use Getopt::Long; use Pod::Usage; use File::Temp qw{tempfile}; my $DEFAULT_SC = 'aspell -c'; my $DEFAULT_PP = 'indented'; my $DEFAULT_EXT= '.bak'; my $VERSION="0.02"; my ( $spellchecker, $ext, $attributes, $exclude_elements, $include_elements, $pretty_print, $version, $help, $man); GetOptions( 'spellchecker=s' => \$spellchecker, 'backup-extension=s' => \$ext, 'attributes' => \$attributes, 'exclude_elements=s' => \$exclude_elements, 'include_elements=s' => \$include_elements, 'pretty_print:s' => \$pretty_print, 'version' => \$version, 'help' => \$help, 'man' => \$man, ) or pod2usage(-verbose => 1, -exitval => -1); pod2usage( -verbose => 1, -exitval => 0) if $help; pod2usage( -verbose => 2, -exitval => 0) if $man; if( $version) { print "$0 version $VERSION\n"; exit;} # option processing $spellchecker ||= $DEFAULT_SC; $ext ||= $DEFAULT_EXT; if( $exclude_elements && $include_elements) { die "cannot use both --exclude-elements and --include-elements\n"; } if( defined $pretty_print and !$pretty_print) { $pretty_print= $DEFAULT_PP; } my %twig_options; my( %include_elements); if( $exclude_elements) { my @exclude_elts = split /\s+/, $exclude_elements; my %start_tag_handlers= map { $_ => \&exclude_elt } @exclude_elts; $twig_options{start_tag_handlers}= \%start_tag_handlers; } if( $include_elements) { my @include_elts = split /\s+/, $include_elements; my %start_tag_handlers= map { $_ => \&include_elt } @include_elts; $twig_options{start_tag_handlers}= \%start_tag_handlers; } $twig_options{pretty_print}= $pretty_print if( $pretty_print); foreach my $file (@ARGV) { my $id=0; my $id2elt={}; # id => element my( $tmp_fh, $tmp_file) = tempfile( "xml_spellcheck_XXXX", SUFFIX => '.txt' ); my $t= XML::Twig->new( keep_encoding =>1, %twig_options,); $t->parsefile( $file); foreach my $elt ($t->descendants( '#TEXT')) { if( (!$include_elements and !$exclude_elements) or ($include_elements and $elt->inherit_att( '#include')) or ($exclude_elements and !$elt->inherit_att( '#exclude')) ) { $id++; process_text( $t, $elt, $id, $id2elt, $tmp_fh) } } close $tmp_fh; system( "$spellchecker $tmp_file") ==0 or die "$spellchecker $tmp_file failed: $?"; open( $tmp_fh, "<$tmp_file") or die "cannot open temp file $tmp_file: $!"; while( <$tmp_fh>) { chomp; my( $id, $text)= split /:/, $_, 2; my $wrap= $id2elt->{$id}; $text=~ s{<\\n>}{\n}g; my $text_elt= $wrap->first_child or die "internal error 100\n"; if( $text_elt->gi eq '#PCDATA') { $text_elt->set_pcdata( $text); } elsif( $text_elt->gi eq '#CDATA') { $text_elt->set_cdata( $text); } else { die "internal error 101\n"; } $wrap->erase; } close $tmp_fh; rename( $file, "$file$ext") or die "cannot save backup file $file$ext: $!"; open( FILE, ">$file") or die "cannot save spell checked file $file: $!"; $t->print( \*FILE); close FILE; } sub include_elt { $_->set_att( '#include' => 1) ; } sub exclude_elt { $_->set_att( '#exclude' => 1) ; } sub process_text { my( $t, $elt, $id, $id2elt, $tmp_fh)= @_; my $wrap= $elt->wrap_in( '#SC'); #$wrap->set_att( '#ID' => $id); $id2elt->{$id}= $wrap; my $text= $elt->text; $text=~ s{\n}{<\\n>}g; print $tmp_fh "$id:$text\n"; } __END__ =head1 NAME xml_spellcheck - spellcheck XML files =head1 SYNOPSIS xml_spellcheck [options] <files> =head1 DESCRIPTION xml_spellcheck lets you spell check the content of an XML file. It extracts the text (the content of elements and optionally of attributes), call a spell checker on it and then recreates the XML document. =head1 OPTIONS Note that all options can be abbreviated to the first letter =over 4 =item --conf <configuration_file> Gets the options from a configuration file. NOT IMPLEMENTED YET. =item --spellchecker <spellchecker> The command to use for spell checking, including any option By default C<aspell -c> is used =item --backup-extension <extension> By default the original file is saved with a C<.bak> extension. This option changes the extension =item --attributes Spell check attribute content. By default attribute values are NOT spell checked. NOT YET IMPLEMENTED =item --exclude_elements <list_of_excluded_elements> A list of elements that should not be spell checked =item --include_elements <list_of_included_elements> A list of elements that should be spell checked (by default all elements are spell checked). C<--exclude_elements> and C<--include_elements> are mutually exclusive =item --pretty_print <optional_pretty_print_style> A pretty print style for the document, as defined in XML::Twig. If the option is provided without a value then the C<indented> style is used =item --version Dislay the tool version and exit =item --help Display help message and exit =item --man Display longer help message and exit =back =head1 EXAMPLES =head1 BUGS =head1 TODO =over 4 =item --conf option =item --attribute option =back =head1 PRE-REQUISITE XML::Twig, Getopt::Long, Pod::Usage, File::Temp XML::Twig requires XML::Parser. =head1 SEE ALSO XML::Twig =head1 COPYRIGHT AND DISCLAIMER This program is Copyright 2003 by Michel Rodriguez This program is free software; you can redistribute it and/or modify it under the terms of the Perl Artistic License or the GNU General Public License as published by the Free Software Foundation either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MER- CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. If you do not have a copy of the GNU General Public License write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. =head1 AUTHOR Michel Rodriguez <mirod@xmltwig.com> xml_spellcheck is available at http://www.xmltwig.com/xmltwig/ �������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/tools/xml_grep/�����������������������������������������������������������������������0000755�0001750�0001750�00000000000�13015347632�016236� 5����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/tools/xml_grep/xml_grep���������������������������������������������������������������0000755�0001750�0001750�00000032522�13015053270�017775� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/perl -w use strict; use Getopt::Long; use Pod::Usage; use XML::Twig; my $VERSION="0.9"; # options (all used globally in the script) my( $help, $man, @roots, @paths, $files, $count, $nb_results, $nb_results_per_file, $encoding, @exclude, $wrap, $nowrap, $descr, $group, $pretty_print, $version, $text_only, $date, $html, $tidy, $add_ns, $verbose, $strict ); # used to check if the wrapping tags need to be output my $results = 0; my $file_results = 0; # first process the case where the user provides only # an xpath expression and a list of files if( @ARGV && ($ARGV[0] !~ m{^-}) ) { splice( @ARGV, 0, 0, '--group_by_file', 'file', '--pretty_print', 'indented', '--cond'); } GetOptions( 'help' => \$help, 'man' => \$man, 'Version' => \$version, 'cond=s' => \@paths, 'exclude|v=s' => \@exclude, 'root=s' => \@roots, 'files' => \$files, 'count' => \$count, 'nb_results=i' => \$nb_results, 'by_file=i' => \$nb_results_per_file, 'encoding=s' => \$encoding, 'wrap:s' => \$wrap, 'nowrap' => \$nowrap, 'descr:s' => \$descr, 'group_by_file:s' => \$group, 'pretty_print:s' => \$pretty_print, 'text_only' => \$text_only, 'date!' => \$date, 'strict' => \$strict, 'html' => \$html, 'tidy' => \$tidy, 'add_ns' => \$add_ns, 'verbose' => \$verbose, ) or pod2usage(2); pod2usage(1) if $help; pod2usage(-exitstatus => 0, -verbose => 2) if $man; if( $version) { warn "$0 version $VERSION\n"; exit; } binmode STDOUT, ':utf8'; # case where options are given, but no root or path, assume the # first arg is a path if( !@roots and !@paths and !@exclude and @ARGV) { @paths= shift @ARGV; } unless( @roots or @paths or @exclude or $files) { pod2usage(1); exit; } if( ($files or $count) and !@paths) { pod2usage(1); exit; } if( ($files or $count) and (@roots or $encoding or defined( $wrap) or defined( $group) or defined( $pretty_print))) { pod2usage(1); exit; } if( $files and !@ARGV) { pod2usage(1); exit; } if( !$files and !$count and @paths and !@roots) { @roots= @paths; @paths=(); } $date=1 unless( defined $date); # defaults for optional arguments to options $group = 'file' if( defined $group and !$group); $pretty_print = 'indented' if( defined $pretty_print and !$pretty_print); if( $nowrap) { $wrap=''; } elsif( !defined( $wrap) and (@roots or @paths)) { $wrap= 'xml_grep'; } if( !defined( $descr) and (@roots or @paths)) { if( $date) { $date= localtime(); $descr = qq{version="$VERSION" date="$date"} } else { $descr = qq{version="$VERSION"}; } } # some globals my $current_file; my $count_file = 0; my $count_total = 0; my $nb_results_left_in_current_file=0; # will be used to create the twig my %options; if( $count) { my $twig_roots={}; my $twig_root= sub { $count_file++; $_[0]->purge; }; foreach my $path (@paths) { $twig_roots->{$path}= $twig_root; } $options{twig_roots}= $twig_roots; } elsif( @exclude) { # general options $nowrap=1; # twig options $options{twig_print_outside_roots} = 1; my $root_handlers={}; foreach my $exclude (@exclude) { $root_handlers->{$exclude}= sub { }; } $options{twig_roots}= $root_handlers; } else { create_regular_handlers( \%options, \@roots, \@paths); } if( $tidy) { $html= 1; $options{use_tidy}= 1; } $options{pretty_print} = $pretty_print if( $pretty_print); $options{output_encoding} = $encoding if( $encoding); my $t= create_twig( %options); if( @ARGV) { foreach my $file (@ARGV) { $current_file= $file; if( $nb_results_per_file) { $nb_results_left_in_current_file= $nb_results_per_file; } if( $verbose) { warn "parsing '$file'\n"; } my $ok= $html && ($current_file=~ m{^(http|ftp|file)://}) ? $t->safe_parseurl_html( $file) : ($current_file=~ m{^(http|ftp|file)://}) ? $t->safe_parseurl( $file) : $html ? $t->safe_parsefile_html( $file) : $t->safe_parsefile( $file); if( !$ok) { if( $@ =~ m{XMLGREP: FOUND}) { # in files mode print $current_file, "\n"; $nb_results--; exit unless( $nb_results); } elsif( $@ =~ m{^XMLGREP: NB_RESULT_REACHED}) { print file_result_end() if( $group && $file_results); print result_end() if( $results); exit; } else { $@ ||= 'unknown cause'; if( $strict) { die $@; } warn $@; if( !$count) { print "<!-- error parsing file '$file' -->\n"; } } } if( $count) { print "$current_file: $count_file\n"; $count_total += $count_file; $count_file=0; } elsif( @roots) { print file_result_end() if( $file_results); } elsif( $count) { print "$count_total matches\n"; } } if( $count) { print "total: $count_total\n"; } print result_end() if( $results); } else { $file_results=0; my $ok= $t->safe_parse( \*STDIN); if( !$ok and ( $@ !~ m{^XMLGREP: NB_RESULT_REACHED})) { if( !$strict) { warn $@; } else { die $@; } } if( $count) { print "$count_total matches\n"; } else { print result_end(); } } sub create_regular_handlers { my( $options, $roots, $paths)= @_; if( @$roots) { my $root_handlers={}; my $root_handler= twig_roots_handler( @$paths); foreach my $root (@$roots) { $root_handlers->{$root}= $root_handler; } $options->{twig_roots}= $root_handlers; } if( @$paths) { my $twig_handlers={}; my $twig_handler= twig_handlers(); foreach my $path (@$paths) { $twig_handlers->{$path}= $twig_handler; } $options->{twig_handlers}= $twig_handlers; } } sub create_twig { my( %options)= @_; my $twig; eval { $twig= XML::Twig->new( %options) }; if( $@) { # see if we are in the case where the only condition uses string() or regexp if( ($@=~ m{^(regexp|string\(\)) condition not supported on twig_roots option}) && $options{twig_roots} && !$options{twig_handlers} && ( keys %{$options{twig_roots}} == 1) ) { # in this case add the proper twig_roots option my $cond= (keys %{$options{twig_roots}})[0]; (my $root= $cond)=~ s{\[[^\]]*\]$}{}; #warn "cond: '$cond' - root: '$root'\n"; delete $options{twig_roots}; delete $options{twig_handlers}; @paths= ($cond); @roots= ($root); create_regular_handlers( \%options, \@roots, \@paths); return create_twig( %options); } elsif( $@=~ m{^wrong condition: unrecognized expression in handler: '(.*?)'}) { die "error in filter condition '$1'\n"; } else { die "error: $@"; } } return $twig; } sub twig_roots_handler { my( @paths)= @_; return sub { my( $t, $root)= @_; if( !@paths or $_->att( '#print')) { print result_start() if( !$results); print file_result_start() if( $group && !$file_results); if( $text_only) { print $root->text, "\n"; } else { $root->print; } if( ! -- $nb_results) { $@= "XMLGREP: NB_RESULT_REACHED"; die; } if( ! -- $nb_results_left_in_current_file) { $t->finish_now(); } } $t->purge; 1; }; } sub twig_handlers { if( $files) { return sub { $@="XMLGREP: FOUND"; die; }; } else { return sub { my( $t, $hit)= @_; foreach my $elt ( $hit->ancestors_or_self) { $elt->set_att( '#print' => 1); } 1; }; } } sub result_start { $results=1; return if( $text_only); my $enc_decl= $encoding ? qq{encoding="$encoding" } : ''; return $wrap ? qq{<?xml version="1.0" $enc_decl?>\n<$wrap $descr>\n} : ''; } sub result_end { my $result; return if( $text_only); if( !$group) { $result= "\n"; } $result .= qq{</$wrap>\n} if( $wrap); return $result; } sub file_result_start { $file_results=1; return if( $text_only); my $result; $result= qq{<$group filename="$current_file">}; if( !$pretty_print) { $result.= "\n"; } return $result; } sub file_result_end { $file_results=0; return '' if( $text_only); return qq{\n</$group>\n}; } __END__ =head1 NAME xml_grep - grep XML files looking for specific elements =head1 SYNOPSYS xml_grep [options] <file list> or xml_grep <xpath expression> <file list> By default you can just give C<xml_grep> an XPath expression and a list of files, and get an XML file with the result. This is equivalent to writing xml_grep --group_by_file file --pretty_print indented --cond <file list> =head1 OPTIONS =over 4 =item B<--help> brief help message =item B<--man> full documentation =item B<--Version> display the tool version =item B<--root> <cond> look for and return xml chunks matching <cond> if neither C<--root> nor C<--file> are used then the element(s) that trigger the C<--cond> option is (are) used. If C<--cond> is not used then all elements matching the <cond> are returned several C<--root> can be provided =item B<--cond> <cond> return the chunks (or file names) only if they contain elements matching <cond> several C<--cond> can be provided (in which case they are OR'ed) =item B<--files> return only file names (do not generate an XML output) usage of this option precludes using any of the options that define the XML output: C<--roots>, C<--encoding>, C<--wrap>, C<--group_by_file> or C<--pretty_print> =item B<--count> return only the number of matches in each file usage of this option precludes using any of the options that define the XML output: C<--roots>, C<--encoding>, C<--wrap>, C<--group_by_file> or C<--pretty_print> =item B<--strict> without this option parsing errors are reported to STDOUT and the file skipped =item B<--date> when on (by default) the wrapping element get a C<date> attribute that gives the date the tool was run. with C<--nodate> this attribute is not added, which can be useful if you need to compare 2 runs. =item B<--encoding> <enc> encoding of the xml output (utf-8 by default) =item B<--nb_results> <nb> output only <nb> results =item B<--by_file> output only <nb> results by file =item B<--wrap> <tag> wrap the xml result in the provided tag (defaults to 'xml_grep') If wrap is set to an empty string (C<--wrap ''>) then the xml result is not wrapped at all. =item B<--nowrap> same as using C<--wrap ''>: the xml result is not wrapped. =item B<--descr> <string> attributes of the wrap tag (defaults to C<< version="<VERSION>" date="<date>" >>) =item B<--group_by_file> <optional_tag> wrap results for each files into a separate element. By default that element is named C<file>. It has an attribute named C<filename> that gives the name of the file. the short version of this option is B<-g> =item B<--exclude> <condition> same as using C<-v> in grep: the elements that match the condition are excluded from the result, the input file(s) is (are) otherwise unchanged the short form of this option is B<-v> =item B<--pretty_print> <optional_style> pretty print the output using XML::Twig styles ('C<indented>', 'C<record>' or 'C<record_c>' are probably what you are looking for) if the option is used but no style is given then 'C<indented>' is used short form for this argument is B<-s> =item B<--text_only> Displays the text of the results, one by line. =item B<--html> Allow HTML input, files are converted using HTML::TreeBuilder =item B<--Tidy> Allow HTML input, files are converted using HTML::Tidy =back =head2 Condition Syntax <cond> is an XPath-like expression as allowed by XML::Twig to trigger handlers. examples: 'para' 'para[@compact="compact"]' '*[@urgent]' '*[@urgent="1"]' 'para[string()="WARNING"]' see XML::Twig for a more complete description of the <cond> syntax options are processed by Getopt::Long so they can start with '-' or '--' and can be abbreviated (C<-r> instead of C<--root> for example) =head1 DESCRIPTION B<xml_grep> does a grep on XML files. Instead of using regular expressions it uses XPath expressions (in fact the subset of XPath supported by XML::Twig) the results can be the names of the files or XML elements containing matching elements. =head1 SEE ALSO XML::Twig Getopt::Long =head1 LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 AUTHOR Michel Rodriguez <mirod@xmltwig.com> ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/MANIFEST������������������������������������������������������������������������������0000644�0001750�0001750�00000020170�13015347632�014412� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������MANIFEST Makefile.PL README Changes Twig_pm.slow Twig.pm Twig/XPath.pm speedup filter_for_5.005 check_optional_modules tools/xml_pp/xml_pp tools/xml_grep/xml_grep tools/xml_spellcheck/xml_spellcheck tools/xml_split/xml_split tools/xml_merge/xml_merge t/latin1_accented_char.iso-8859-1 t/test1.t t/test2.t t/test2_1.exp t/test2_1.res t/test2_1.xml t/test2_2.dtd t/test2_2.exp t/test2_2.res t/test2_2.xml t/test2_3.res t/test3.t t/test4.t t/test5.t t/is_field.t t/test_nav.t t/test_additional.t t/test_class_methods.t t/test_class_selector.t t/test_with_lwp.t t/test_with_lwp.xml t/test_with_lwp_not_wf.xml t/test_attregexp_cond.t t/test_xpath_cond.t t/test_erase.t t/test_even_more_coverage.t t/test_keep_atts_order.t t/test_mark.t t/test_ignore_elts.t t/test_cdata.t t/test_twig_roots.t t/test_spaces.t t/test_simplify.t t/test_entities.t t/test_pi_handler.t t/test_comment_handler.t t/test_pos.t t/test_variables.t t/test_drop_comments.t t/test_unique_xpath.t t/dummy.dtd t/xmlxpath_01basic.t t/xmlxpath_02descendant.t t/xmlxpath_03star.t t/xmlxpath_04pos.t t/xmlxpath_05attrib.t t/xmlxpath_06attrib_val.t t/xmlxpath_07count.t t/xmlxpath_08name.t t/xmlxpath_09a_string_length.t t/xmlxpath_09string_length.t t/xmlxpath_10pipe.t t/xmlxpath_12axisdescendant.t t/xmlxpath_13axisparent.t t/xmlxpath_14axisancestor.t t/xmlxpath_15axisfol_sib.t t/xmlxpath_16axisprec_sib.t t/xmlxpath_17axisfollowing.t t/xmlxpath_18axispreceding.t t/xmlxpath_19axisd_or_s.t t/xmlxpath_20axisa_or_s.t t/xmlxpath_21allnodes.t t/xmlxpath_22name_select.t t/xmlxpath_23func.t t/xmlxpath_24namespaces.t t/xmlxpath_25scope.t t/xmlxpath_26predicate.t t/xmlxpath_28ancestor2.t t/xmlxpath_29desc_with_predicate.t t/xmlxpath_30lang.t t/xmlxpath_31vars.t t/xmlxpath_test_with_handlers.t t/xmlxpath_xpath_cond.t t/xmlxpath_additional.t t/xmlxpath_test_twig_roots.t t/xmlxpath_nav.t t/xmlxpath_test1.t t/xmlxpath_tools.pm t/test_errors.t t/test_safe_encode.t t/pod.t t/pod_coverage.t t/test_expand_external_entities.t t/test_expand_external_entities.xml t/test_expand_external_entities.dtd t/test_need_io_scalar.t t/test_need_use_bytes.t t/test_need_3_args_open.t t/test_bugs_3_15.t t/test_bugs_3_18.t t/test_bugs_3_19.t t/test_bugs_3_21.t t/test_bugs_3_22.t t/test_error_with_unicode_layer t/test_new_features_3_15.t t/test_new_features_3_16.t t/test_new_features_3_18.t t/test_new_features_3_22.t t/test_new_features_3_22.xml t/test_new_features_3_22.html t/tests_3_23.t t/test_3_24.t t/test_3_26.t t/test_3_27.t t/test_3_30.t t/test_3_32.t t/test_3_35.t t/test_3_36.t t/test_3_38.t t/test_3_39.t t/test_3_40.t t/test_3_41.t t/test_3_42.t t/test_3_44.t t/test_3_45.t t/test_3_47.t t/test_3_48.t t/test_3_50.t t/test_changes.t t/test_memory.t t/test_wrapped.t t/test_xml_split.t t/test_xml_split_g.t t/test_xml_split.xml t/test_xml_split_entities.xml t/test_xml_split_w_decl.xml t/test_xml_split/test_xml_split_expected-1-00.xml t/test_xml_split/test_xml_split_expected-1-01.xml t/test_xml_split/test_xml_split_expected-1-02.xml t/test_xml_split/test_xml_split_expected-1-03.xml t/test_xml_split/test_xml_split_expected-1-04.xml t/test_xml_split/test_xml_split_expected-1-05.xml t/test_xml_split/test_xml_split_expected-2-00.xml t/test_xml_split/test_xml_split_expected-2-01.xml t/test_xml_split/test_xml_split_expected-2-02.xml t/test_xml_split/test_xml_split_expected-2-03.xml t/test_xml_split/test_xml_split_expected-2-04.xml t/test_xml_split/test_xml_split_expected-2-05.xml t/test_xml_split/test_xml_split_expected-3-00.xml t/test_xml_split/test_xml_split_expected-3-01.xml t/test_xml_split/test_xml_split_expected-3-02.xml t/test_xml_split/test_xml_split_expected-3-03.xml t/test_xml_split/test_xml_split_expected-3-04.xml t/test_xml_split/test_xml_split_expected-3-05.xml t/test_xml_split/test_xml_split_expected-3-06.xml t/test_xml_split/test_xml_split_expected-3-07.xml t/test_xml_split/test_xml_split_expected-3-08.xml t/test_xml_split/test_xml_split_expected-3-09.xml t/test_xml_split/test_xml_split_expected-4-00.xml t/test_xml_split/test_xml_split_expected-4-01.xml t/test_xml_split/test_xml_split_expected-4-02.xml t/test_xml_split/test_xml_split_expected-4-03.xml t/test_xml_split/test_xml_split_expected-4-04.xml t/test_xml_split/test_xml_split_expected-4-05.xml t/test_xml_split/test_xml_split_expected-4-06.xml t/test_xml_split/test_xml_split_expected-4-07.xml t/test_xml_split/test_xml_split_expected-4-08.xml t/test_xml_split/test_xml_split_expected-4-09.xml t/test_xml_split/test_xml_split_expected-5-00.xml t/test_xml_split/test_xml_split_expected-5-01.xml t/test_xml_split/test_xml_split_expected-5-02.xml t/test_xml_split/test_xml_split_expected-5-03.xml t/test_xml_split/test_xml_split_expected-6-00.xml t/test_xml_split/test_xml_split_expected-6-01.xml t/test_xml_split/test_xml_split_expected-6-02.xml t/test_xml_split/test_xml_split_expected-6-03.xml t/test_xml_split/test_xml_split_expected-7-00.xml t/test_xml_split/test_xml_split_expected-7-01.xml t/test_xml_split/test_xml_split_expected-7-02.xml t/test_xml_split/test_xml_split_expected-8-00.xml t/test_xml_split/test_xml_split_expected-8-01.xml t/test_xml_split/test_xml_split_expected-8-02.xml t/test_xml_split/test_xml_split_expected-9-00.xml t/test_xml_split/test_xml_split_expected-9-01.xml t/test_xml_split/test_xml_split_expected-9-02.xml t/test_xml_split/test_xml_split_expected-9-03.xml t/test_xml_split/test_xml_split_expected-9-04.xml t/test_xml_split/test_xml_split_expected-9-05.xml t/test_xml_split/test_xml_split_expected-10-00.xml t/test_xml_split/test_xml_split_expected-10-01.xml t/test_xml_split/test_xml_split_expected-10-02.xml t/test_xml_split/test_xml_split_expected-10-03.xml t/test_xml_split/test_xml_split_expected-10-04.xml t/test_xml_split/test_xml_split_expected-10-05.xml t/test_xml_split/test_xml_split_expected-11-00.xml t/test_xml_split/test_xml_split_expected-11-01.xml t/test_xml_split/test_xml_split_expected-12-00.xml t/test_xml_split/test_xml_split_expected-12-01.xml t/test_xml_split/test_xml_split_expected-13-00.xml t/test_xml_split/test_xml_split_expected-13-01.xml t/test_xml_split/test_xml_split_expected-13-02.xml t/test_xml_split/test_xml_split_expected-14-00.xml t/test_xml_split/test_xml_split_expected-14-01.xml t/test_xml_split/test_xml_split_expected-14-02.xml t/test_xml_split/test_xml_split_expected-15-00.xml t/test_xml_split/test_xml_split_expected-15-01.xml t/test_xml_split/test_xml_split_expected-15-02.xml t/test_xml_split/test_xml_split_expected-16-00.xml t/test_xml_split/test_xml_split_expected-16-01.xml t/test_xml_split/test_xml_split_expected-16-02.xml t/test_xml_split/test_xml_split_expected-16-03.xml t/test_xml_split/test_xml_split_expected-16-04.xml t/test_xml_split/test_xml_split_expected-16-05.xml t/test_xml_split/test_xml_split_expected-17-00.xml t/test_xml_split/test_xml_split_expected-17-01.xml t/test_xml_split/test_xml_split_expected-17-02.xml t/test_xml_split/test_xml_split_expected-17-03.xml t/test_xml_split/test_xml_split_expected-17-04.xml t/test_xml_split/test_xml_split_expected-17-05.xml t/test_xml_split/test_xml_split_expected-17-06.xml t/test_xml_split/test_xml_split_expected-17-07.xml t/test_xml_split/test_xml_split_expected-17-08.xml t/test_xml_split/test_xml_split_expected-17-09.xml t/test_xml_split/test_xml_split_expected-18-00.xml t/test_xml_split/test_xml_split_expected-18-01.xml t/test_xml_split/test_xml_split_expected-18-02.xml t/test_xml_split/test_xml_split_expected-18-03.xml t/test_xml_split/test_xml_split_expected-19-00.xml t/test_xml_split/test_xml_split_expected-19-01.xml t/test_xml_split/test_xml_split_expected-19-02.xml t/test_xml_split/test_xml_split_expected-19-03.xml t/test_xml_split/test_xml_split_expected-19-04.xml t/test_xml_split/test_xml_split_expected-19-05.xml t/test_xml_split/test_xml_split_expected-20-00.xml t/test_xml_split/test_xml_split_expected-20-01.xml t/test_xml_split/test_xml_split_expected-21-00.xml t/test_xml_split/test_xml_split_expected-21-01.xml t/test_xml_split/test_xml_split_expected-21-02.xml t/test_xml_split/test_xml_split_expected-21-03.xml t/test_autoencoding_conversion.t t/tools.pm t/zz_dump_config.t t/test_kwalitee.t t/test_meta_json.t META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/speedup�������������������������������������������������������������������������������0000644�0001750�0001750�00000011066�13015053270�014645� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/perl my $FIELD = join( '|', qw( parent first_child last_child prev_sibling next_sibling pcdata cdata ent data target cdata pcdata comment flushed)); my $PRIVATE = join( '|', qw( parent first_child last_child prev_sibling next_sibling pcdata cdata comment extra_data_in_pcdata extra_data_before_end_tag ) ); # _$private is inlined my $FORMER = join( '|', qw( parent prev_sibling next_sibling)); # former_$former is inlined my $SET_FIELD = join( '|', qw( first_child next_sibling ent data pctarget comment flushed)); my $SET_NOT_EMPTY= join( '|', qw( pcdata cdata comment)); # set the field # depending on the version of perl use either qr or "" print STDERR "perl version is $]\n"; my $var= '(\$[a-z_]+(?:\[\d\])?|\$t(?:wig)?->root|\$t(?:wig)?->twig_current|\$t(?:wig)?->\{\'?twig_root\'?\}|\$t(?:wig)?->\{\'?twig_current\'?\})'; my $set_to = '(?:undef|\$\w+|\$\w+->\{\w+\}|\$\w+->\w+|\$\w+->\w+\([^)]+\))'; my $elt = '\$(?:elt|new_elt|child|cdata|ent|_?parent|twig_current|next_sibling|first_child|prev_sibling|last_child|ref|elt->_parent)'; my %gi2index=( '', 0, PCDATA => 1, CDATA => 2, PI => 3, COMMENT => 4, ENT => 5); (my $version= $])=~ s{\.}{}g; while( <>) { if( $] <= 5.005) { s{qr/(.*?)/}{"$1"} }; # when finding a comment # perl > 5.8 or # perl < 5.5, process accordingly if( my( $op, $v, $mv)= m{#\s*(>|<|>=|<=)\s*perl\s*5\.(\d+)(?:\.(\d+))?\s*}) { $v= sprintf( "5%03d%03d", $v, $mv || 0); my $comp= "$version $op $v"; if( ! eval $comp) { print "#$_"; next; } else { s{#[^#]*\n}{\n} if m{^=encoding}; } } if( /=/) { s/$var->_children/do { my \$elt= $1; my \@children=(); my \$child= \$elt->_first_child; while( \$child) { push \@children, \$child; \$child= \$child->_next_sibling; } \@children; }/; } s/$var->set_gi\(\s*(PCDATA|CDATA|PI|COMMENT|ENT)\s*\)/$1\->{gi}= $gi2index{$2}/; s/$var->del_(twig_current)/delete $1\->{'$2'}/g; s/$var->set_(twig_current)/$1\->{'$2'}=1/g; s/$var->_del_(flushed)/delete $1\->{'$2'}/g; s/$var->_set_(flushed)/$1\->{'$2'}=1/g; s/$var->_(flushed)/$1\->{'$2'}/g; s/$var->set_($SET_FIELD)\(([^)]*)\)/$1\->\{$2\}= $3/g; s/$var->($FIELD)\b(?!\()/$1\->\{$2\}/g; #s/$var->_($PRIVATE)\b(?!\()/$1\->\{$2\}/g; s/$var->_($PRIVATE)\b(\s*\(\s*\))?(?!\s*\()/$1\->\{$2\}/g; s{($elt)->former_($FORMER)}{($1\->{former} && $1\->{former}\->{$2})}g; s{($elt)->set_(parent|prev_sibling)\(\s*($set_to)\s*\)}{$1\->\{$2\}=$3; if( \$XML::Twig::weakrefs) { weaken( $1\->\{$2\});} }g; s{($elt)->set_(first_child)\(\s*($set_to)\s*\)}{ $1\->set_not_empty; $1\->\{$2\}=$3; }g; s{($elt)->set_(next_sibling)\(\s*($set_to)\s*\)}{ $1\->\{$2\}=$3; }g; s{($elt)->set_(last_child)\(\s*($set_to)\s*\)}{ $1\->set_not_empty; $1\->\{$2\}=$3; if( \$XML::Twig::weakrefs) { weaken( $1\->\{$2\});} }g; s/$var->atts/$1\->{att}/g; s/$var->append_(pcdata|cdata)\(([^)]*)\)/$1\->\{$2\}.= $3/g; s/$var->set_($SET_NOT_EMPTY)\(([^)]*)\)/$1\->\{$2\}= (delete $1->\{empty\} || 1) && $3/g; s/$var->_set_($SET_NOT_EMPTY)\s*\(([^)]*)\)/$1\->{$2}= $3/g; s/(\$[a-z][a-z_]*(?:\[\d\])?)->gi/\$XML::Twig::index2gi\[$1\->{'gi'}\]/g; s/$var->id/$1\->{'att'}->{\$ID}/g; s/$var->att\(\s*([^)]+)\)/$1\->{'att'}->\{$2\}/g; s/(\$[a-z][a-z_]*(?:\[\d\])?)->is_pcdata/(exists $1\->{'pcdata'})/g; s/(\$[a-z][a-z_]*(?:\[\d\])?)->is_cdata/(exists $1\->{'cdata'})/g; s/$var->is_pi/(exists $1\->{'target'})/g; s/$var->is_comment/(exists $1\->{'comment'})/g; s/$var->is_ent/(exists $1\->{'ent'})/g; s/(\$,a-z][a-z_]*(?:\[\d\])?)->is_text/((exists $1\->{'pcdata'}) || (exists $1\->{'cdata'}))/g; s/$var->is_empty/$1\->{'empty'}/g; s/$var->set_empty(?:\(([^)]*)\))?(?!_)/"$1\->{empty}= " . ($2 || 1)/ge; s/$var->set_not_empty/delete $1\->{empty}/g; #s/$var->set_not_empty/delete $1\->{empty}/g; s/$var->_is_private/( (substr( \$XML::Twig::index2gi\[$1\->{'gi'}\], 0, 1) eq '#') && (substr( \$XML::Twig::index2gi\[$1\->{'gi'}\], 0, 9) ne '#default:') )/g; #s/_is_private_name\(\s*$var\s*\)/( (substr( $1, 0, 1) eq '#') && (substr( $1, 0, 9) ne '#default:') )/g; s/_is_private_name\(\s*$var\s*\)/( $1=~ m{^#(?!default:)} )/g; s{_is_fh\(\s*$var\)}{isa( $1, 'GLOB') || isa( $1, 'IO::Scalar')}g; # $var->set_gi( $gi): set the gi, but if it doesn't exist, call the original set_gi s/$var->set_gi\s*\(\s*([^)]*)\s*\)/$1\->{gi}=\$XML::Twig::gi2index{$2} or $1->set_gi( $2)/g; s/$var->xml_string/$1->sprint( 1)/g; print $_ ; } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XML-Twig-3.52/META.json�����������������������������������������������������������������������������0000664�0001750�0001750�00000002012�13015347632�014677� 0����������������������������������������������������������������������������������������������������ustar �mrodrigu������������������������mrodrigu���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������{ "abstract" : "XML, The Perl Way", "author" : [ "Michel Rodriguez <mirod@cpan.org>" ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 7.1001, CPAN::Meta::Converter version 2.150005", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "XML-Twig", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "XML::Parser" : "2.23" } } }, "release_status" : "stable", "resources" : { "repository" : { "url" : "http://github.com/mirod/xmltwig" } }, "version" : "3.52", "x_serialization_backend" : "JSON::PP version 2.27300" } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������