parse("ram=bar>");
$ok;
}
->join();
is($ok, 2);
HTML-Parser-3.83/t/linkextor-base.t 0000644 0001750 0001750 00000001474 14652213774 015471 0 ustar olaf olaf use strict;
use warnings;
use HTML::LinkExtor ();
use URI ();
use Test::More tests => 4;
# This test that HTML::LinkExtor really absolutize links correctly
# when a base URL is given to the constructor.
# Try with base URL and the $p->links interface.
my $p = HTML::LinkExtor->new(undef, "http://www.sn.no/foo/foo.html");
$p->parse(<eof;
This is link and an
.
HTML
my @links = $p->links;
# There should be 4 links in the document
is(@links, 4);
my $t;
my %attr;
for (@links) {
($t, %attr) = @$_ if $_->[0] eq 'img';
}
is($t, 'img');
is(delete $attr{src}, "http://www.sn.no/foo/img.jpg");
# there should be no more attributes
ok(!scalar(keys %attr));
HTML-Parser-3.83/t/tokeparser.t 0000644 0001750 0001750 00000006433 14652213774 014721 0 ustar olaf olaf use strict;
use warnings;
use HTML::TokeParser ();
use Test::More tests => 17;
# First we create an HTML document to test
my $file = "ttest$$.htm";
die "$file already exists" if -e $file;
{
open(my $fh, '>', $file) or die "Can't create $file: $!";
print {$fh} <<'EOT';
This is the <title>
This is the title again
And this is a link to the
Institute
process instruction >
EOT
close($fh);
}
END { unlink($file) || warn "Can't unlink $file: $!"; }
my $p;
$p = HTML::TokeParser->new($file) || die "Can't open $file: $!";
ok($p->unbroken_text);
if ($p->get_tag("foo", "title")) {
my $title = $p->get_trimmed_text;
#diag "Title: $title";
is($title, "This is the ");
}
undef($p);
my $scount = 0;
my $ecount = 0;
my $tcount = 0;
my $pcount = 0;
# Test with reference to glob
{
open(my $fh, '<', $file) || die "Can't open $file: $!";
$p = HTML::TokeParser->new($fh);
while (my $token = $p->get_token) {
$scount++ if $token->[0] eq "S";
$ecount++ if $token->[0] eq "E";
$pcount++ if $token->[0] eq "PI";
}
undef($p);
close $fh;
}
# Test with glob
{
open(my $fh, $file) || die "Can't open $file: $!";
$p = HTML::TokeParser->new($fh);
$tcount++ while $p->get_tag;
undef($p);
close $fh;
}
# Test with plain file name
$p = HTML::TokeParser->new($file) || die;
$tcount++ while $p->get_tag;
undef($p);
#diag "Number of tokens found: $tcount/2 = $scount + $ecount";
is($tcount, 34);
is($scount, 10);
is($ecount, 7);
is($pcount, 1);
is($tcount / 2, $scount + $ecount);
ok(!HTML::TokeParser->new("/noT/thEre/$$"));
$p = HTML::TokeParser->new($file) || die;
$p->get_tag("a");
my $atext = $p->get_text;
undef($p);
is($atext, "Perl\240Institute");
# test parsing of embedded document
$p = HTML::TokeParser->new(\<Title
Heading
HTML
ok($p->get_tag("h1"));
is($p->get_trimmed_text, "Heading");
undef($p);
# test parsing of large embedded documents
my $doc = "foo is bar\n\n\n" x 2022;
#use Time::HiRes qw(time);
my $start = time;
$p = HTML::TokeParser->new(\$doc);
#diag "Construction time: ", time - $start;
my $count;
while (my $t = $p->get_token) {
$count++ if $t->[0] eq "S";
}
#diag "Parse time: ", time - $start;
is($count, 2022);
$p = HTML::TokeParser->new(\<<'EOT');
This is a heading
This is some
text.
This is some more text.
This is even some more.
EOT
$p->get_tag("/h1");
my $t = $p->get_trimmed_text("br", "p");
is($t, "This is some text.");
$p->get_tag;
$t = $p->get_trimmed_text("br", "p");
is($t, "This is some more text.");
undef($p);
$p = HTML::TokeParser->new(\<<'EOT');
This is a bold heading
This is some italic text.
This is some more text.
This is even some more.
EOT
$p->get_tag("h1");
$t = $p->get_phrase;
is($t, "This is a bold heading");
$t = $p->get_phrase;
is($t, "");
$p->get_tag;
$t = $p->get_phrase;
is($t, "This is some italic text. This is some more text.");
undef($p);
HTML-Parser-3.83/t/offset.t 0000644 0001750 0001750 00000002421 14652213774 014021 0 ustar olaf olaf use strict;
use warnings;
use HTML::Parser ();
use Test::More tests => 1;
my $HTML = <<'EOT';
heisan
xmp
EOT
my $p = HTML::Parser->new(api_version => 3);
my $sum_len = 0;
my $count = 0;
my $err;
$p->handler(
default => sub {
my ($offset, $length, $offset_end, $line, $col, $text) = @_;
my $copy = $text;
$copy =~ s/\n/\\n/g;
substr($copy, 30) = "..." if length($copy) > 32;
#diag sprintf ">>> %d.%d %s", $line, $col, $copy;
if ($offset != $sum_len) {
diag "offset mismatch $offset vs $sum_len";
$err++;
}
if ($offset_end != $offset + $length) {
diag "offset_end $offset_end wrong";
$err++;
}
if ($length != length($text)) {
diag "length mismatch";
$err++;
}
if (substr($HTML, $offset, $length) ne $text) {
diag "content mismatch";
$err++;
}
$sum_len += $length;
$count++;
},
'offset,length,offset_end,line,column,text'
);
for (split(//, $HTML)) {
$p->parse($_);
}
$p->eof;
ok($count > 5 && !$err);
HTML-Parser-3.83/t/callback.t 0000644 0001750 0001750 00000002066 14652213774 014274 0 ustar olaf olaf use strict;
use warnings;
use HTML::Parser ();
use Test::More tests => 47;
my @expected;
my $p = HTML::Parser->new(
api_version => 3,
unbroken_text => 1,
default_h => [\@expected, '@{event, text}'],
);
my $doc = <<'EOT';
Hi
Ho ho
<--comment->
EOT
$p->parse($doc)->eof;
#use Data::Dump; Data::Dump::dump(@expected);
for my $i (1 .. length($doc)) {
my @t;
$p->handler(default => \@t);
$p->parse(chunk($doc, $i));
# check that we got the same stuff
#diag "X:", join(":", @t);
#diag "Y:", join(":", @expected);
is(join(":", @t), join(":", @expected));
}
sub chunk {
my $str = shift;
my $size = shift || 1;
sub {
my $res = substr($str, 0, $size);
#diag "...$res";
substr($str, 0, $size) = "";
$res;
}
}
# Test croking behaviour
$p->handler(default => []);
my $error;
{
local $@;
#<<< do not let perltidy touch this
$error = $@ || 'Error' unless eval {
$p->parse(sub { die "Hi" });
1
};
#>>>
}
like($error, qr/^Hi/);
HTML-Parser-3.83/t/argspec.t 0000644 0001750 0001750 00000010512 14652213774 014157 0 ustar olaf olaf use strict;
use warnings;
use HTML::Parser ();
use Test::More;
my $decl = '';
my $com1 = '';
my $com2 = '';
my $start = '';
my $end = '';
my $empty = "
";
my $proc = ' something completely different ?>';
my @argspec = qw(
self offset length event tagname tag token0 text
is_cdata dtext tokens tokenpos attr attrseq
);
my @result;
my $p = HTML::Parser->new(
default_h => [\@result, join(',', @argspec)],
strict_comment => 1,
xml_mode => 1
);
my @tests = ( # string, expected results
$decl => [
[
$p, 0, 52, 'declaration', 'ENTITY', '!ENTITY', 'ENTITY',
'', undef,
undef,
['ENTITY', 'nbsp', 'CDATA', '" "', '-- no-break space --'],
[2, 6, 9, 4, 16, 5, 22, 8, 31, 20], undef, undef
]
],
$com1 => [
[
$p, 0, 16, 'comment', ' Comment ', '# Comment ', ' Comment ',
'',
undef, undef, [' Comment '], [4, 9], undef, undef
]
],
$com2 => [
[
$p, 0,
30, 'comment',
' Comment ', '# Comment ',
' Comment ', '',
undef, undef,
[' Comment ', ' Comment '], [4, 9, 18, 9],
undef, undef
]
],
$start => [
[
$p, 0,
14, 'start',
'a', 'a',
'a', '',
undef, undef,
['a', 'href', '"foo"'], [1, 1, 3, 4, 8, 5],
{'href', 'foo'}, ['href']
]
],
$end => [
[
$p, 0, 4, 'end', 'a', '/a', 'a', '',
undef, undef, ['a'], [2, 1], undef, undef
]
],
$empty => [
[
$p, 0,
16, 'start',
'IMG', 'IMG',
'IMG', "
",
undef, undef,
['IMG', 'SRC', "'foo'"], [1, 3, 5, 3, 9, 5],
{'SRC', 'foo'}, ['SRC']
],
[
$p, 16, 0, 'end', 'IMG', '/IMG',
'IMG', '', undef, undef, ['IMG'], undef,
undef, undef
],
],
$proc => [
[
$p,
0,
36,
'process',
' something completely different ',
'? something completely different ',
' something completely different ',
' something completely different ?>',
undef,
undef,
[' something completely different '],
[2, 32],
undef,
undef
]
],
"$end\n$end" => [
[
$p, 0, 4, 'end', 'a', '/a', 'a', '