', 'font face gothic' );
$s->allow(qw[ U ]);
#use Data::Dumper;warn $/,Dumper($s);
is( $s->scrub($html), q[link bold UNDERLINE ],'only U');
$s->allow(qw[ B U ]);
#use Data::Dumper;warn $/,Dumper($s);
is( $s->scrub($html), q[link bold UNDERLINE ],'B and U');
$s->allow(qw[ A B ]);
$s->deny('U');
$s->default(0,{ '*'=> 1});
#use Data::Dumper;warn $/,Dumper($s);
is( $s->scrub($html), q[link bold UNDERLINE ],'A and B');
$s = HTML::Scrubber->new(
default => [ 1, { '*' => 1 } ]
);
is( $s->scrub($html), q[link
bold UNDERLINE ], 'A B U and BR');
#use Data::Dumper;warn $/,Dumper($s);
02_basic.t 100644 000765 000024 13556 12226003500 15441 0 ustar 00nigel staff 000000 000000 HTML-Scrubber-0.11/t # Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl test.pl'
#########################
# change 'tests => 1' to 'tests => last_test_to_print';
use Test;
BEGIN { plan tests => 77 }
use HTML::Scrubber;
ok(1); # If we made it this far, we're ok. # test 1
#########################
# Insert your test code below, the Test module is use()ed here so read
# its man page ( perldoc Test ) for help writing this test script.
my $html = q[
bold <
underlined
LINK
];
my $scrubber = HTML::Scrubber->new();
ok($scrubber); # test 2
ok( !$scrubber->default() ); # test 3
ok( !$scrubber->comment() ); # test 4
ok( !$scrubber->process() ); # test 5
ok( !$scrubber->allow(qw[ p b i u hr br ]) ); # test 6
$scrubber = $scrubber->scrub($html);
ok($scrubber); # test 7
ok( $scrubber !~ /href/i ); # test 8
ok( $scrubber !~ /Align/i ); # test 9
ok( $scrubber !~ /\Q mid1 mid2 end];
isa_ok($s, 'HTML::Scrubber');
is( $s->comment, 0, 'comment off by default');
is( $s->process, 0, 'process off by default');
is( $s->scrub($html), 'start mid1 mid2 end');
$s->comment(1);
is( $s->comment, 1, 'comment on');
is( $s->scrub($html), 'start mid1 mid2 end', 'comment on');
$s->process(1);
is( $s->process, 1, 'process on');
is( $s->scrub($html), 'start mid1 mid2 end', 'process on'); 06_scrub_file.t 100644 000765 000024 3425 12226003500 16453 0 ustar 00nigel staff 000000 000000 HTML-Scrubber-0.11/t # perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake test
use strict;
use File::Temp qw/ tempfile tempdir /;
use Test::More tests => 10;
BEGIN { $^W = 1 }
use_ok('HTML::Scrubber');
my $s = HTML::Scrubber->new;
my $html = q[hi
start mid1 mid2 end];
isa_ok( $s, 'HTML::Scrubber' );
my $tmpdir = tempdir( CLEANUP => 1 );
SKIP: {
skip "no writable temporary directory found", 6
unless length $tmpdir
and -d $tmpdir;
my $template = 'html-scrubber-XXXX';
my ( $tfh, $tmpfile ) = tempfile( $template, DIR => $tmpdir, SUFFIX => '.html' );
my $r = $s->scrub( $html, $tmpfile );
$r = "Error: \$@=$@ \$!=$!" unless $r;
is( $r, 1, "scrub(\$html,\$tmpfile=$tmpfile)" );
local *FILIS;
open FILIS, "+>$tmpfile" or die "can't write to $tmpfile";
$r = $s->scrub( $html, \*FILIS );
$r = "Error: \$@=$@ \$!=$!" unless $r;
is( $r, 1, q[scrub($html,\*FILIS)] );
seek *FILIS, 0, 0;
$r = join '', readline *FILIS;
is( $r, "histart mid1 mid2 end", "FILIS has the right stuff" );
is( close(FILIS), 1, q[close(FILIS)] );
my ( $tfh2, $tmpfile2 ) = tempfile( $template, DIR => $tmpdir, SUFFIX => '.html' );
$r = $s->scrub_file( $tmpfile, "$tmpfile2" );
$r = "Error: \$@=$@ \$!=$!" unless $r;
is( $r, 1, qq[scrub_file(\$tmpfile,"\$tmpfile2"=$tmpfile2)] );
open FILIS, "+>$tmpfile2" or die "can't write to $tmpfile";
$r = $s->scrub_file( $tmpfile, \*FILIS );
$r = "Error: \$@=$@ \$!=$!" unless $r;
is( $r, 1, q[scrub_file($tmpfile,\*FILIS)] );
seek *FILIS, 0, 0;
$r = join '', readline *FILIS;
is( $r, "histart mid1 mid2 end", "FILIS has the right stuff" );
is( close(FILIS), 1, q[close(FILIS)] );
}
rt19063_xhtml.t 100644 000765 000024 565 12226003500 16257 0 ustar 00nigel staff 000000 000000 HTML-Scrubber-0.11/t # Tests related to RT25477 - https://rt.cpan.org/Public/Bug/Display.html?id=25477
use strict;
use warnings;
use File::Spec;
use Test::More;
use_ok('HTML::Scrubber');
use HTML::Scrubber;
my $scrubber = HTML::Scrubber->new;
$scrubber->default(1);
is(
$scrubber->scrub('
'),
'
',
"correct result"
);
done_testing;
author-critic.t 100644 000765 000024 666 12226003500 16572 0 ustar 00nigel staff 000000 000000 HTML-Scrubber-0.11/t #!perl
BEGIN {
unless ($ENV{AUTHOR_TESTING}) {
require Test::More;
Test::More::plan(skip_all => 'these tests are for testing by the author');
}
}
use strict;
use warnings;
use Test::More;
use English qw(-no_match_vars);
eval "use Test::Perl::Critic";
plan skip_all => 'Test::Perl::Critic required to criticise code' if $@;
Test::Perl::Critic->import( -profile => "perlcritic.rc" ) if -e "perlcritic.rc";
all_critic_ok();
04_style_script.t 100644 000765 000024 1357 12226003500 17062 0 ustar 00nigel staff 000000 000000 HTML-Scrubber-0.11/t # perl Makefile.PL && nmake realclean && cls && perl Makefile.PL && nmake test
use strict;
use Test::More tests => 9;
BEGIN { $^W = 1 }
use_ok( 'HTML::Scrubber' );
my $s = HTML::Scrubber->new;
my $html = q[start middle end];
isa_ok($s, 'HTML::Scrubber');
is( $s->script, 0, 'script off by default');
is( $s->style, 0, 'style off by default');
is( $s->scrub($html), 'start middle end', 'default (no style no script)');
$s->script(1);
is( $s->script, 1, 'script on');
is( $s->scrub($html), 'start middle in the script end', 'script off');
$s->style(1);
is( $s->style, 1, 'style on');
is( $s->scrub($html), 'start in the style middle in the script end', 'style off and script off'); release-no-tabs.t 100644 000765 000024 600 12226003500 16762 0 ustar 00nigel staff 000000 000000 HTML-Scrubber-0.11/t
BEGIN {
unless ($ENV{RELEASE_TESTING}) {
require Test::More;
Test::More::plan(skip_all => 'these tests are for release candidate testing');
}
}
use strict;
use warnings;
# this test was generated with Dist::Zilla::Plugin::NoTabsTests 0.04
use Test::More 0.88;
use Test::NoTabs;
my @files = (
'lib/HTML/Scrubber.pm'
);
notabs_ok($_) foreach @files;
done_testing;
HTML 000755 000765 000024 0 12226003500 14547 5 ustar 00nigel staff 000000 000000 HTML-Scrubber-0.11/lib Scrubber.pm 100644 000765 000024 43701 12226003500 17041 0 ustar 00nigel staff 000000 000000 HTML-Scrubber-0.11/lib/HTML package HTML::Scrubber;
# ABSTRACT: Perl extension for scrubbing/sanitizing html
use strict;
use warnings;
use HTML::Parser 3.47 ();
use HTML::Entities;
our( @_scrub, @_scrub_fh );
our $VERSION = '0.11'; # VERSION
our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
# my my my my, these here to prevent foolishness like
# http://perlmonks.org/index.pl?node_id=251127#Stealing+Lexicals
(@_scrub )= ( \&_scrub, "self, event, tagname, attr, attrseq, text");
(@_scrub_fh )= ( \&_scrub_fh, "self, event, tagname, attr, attrseq, text");
sub new {
my $package = shift;
my $p = HTML::Parser->new(
api_version => 3,
default_h => \@_scrub,
marked_sections => 0,
strict_comment => 0,
unbroken_text => 1,
case_sensitive => 0,
boolean_attribute_value => undef,
empty_element_tags => 1,
);
my $self = {
_p => $p,
_rules => {
'*' => 0,
},
_comment => 0,
_process => 0,
_r => "",
_optimize => 1,
_script => 0,
_style => 0,
};
$p->{"\0_s"} = bless $self, $package;
return $self unless @_;
my(%args)= @_;
for my $f( qw[ default allow deny rules process comment ] ) {
next unless exists $args{$f};
if( ref $args{$f} ) {
$self->$f( @{ $args{$f} } ) ;
} else {
$self->$f( $args{$f} ) ;
}
}
return $self;
}
sub comment {
return
$_[0]->{_comment}
if @_ == 1;
$_[0]->{_comment} = $_[1];
return;
}
sub process {
return
$_[0]->{_process}
if @_ == 1;
$_[0]->{_process} = $_[1];
return;
}
sub script {
return
$_[0]->{_script}
if @_ == 1;
$_[0]->{_script} = $_[1];
return;
}
sub style {
return
$_[0]->{_style}
if @_ == 1;
$_[0]->{_style} = $_[1];
return;
}
sub allow {
my $self = shift;
for my $k(@_){
$self->{_rules}{lc $k}=1;
}
$self->{_optimize} = 1; # each time a rule changes, reoptimize when parse
return;
}
sub deny {
my $self = shift;
for my $k(@_){
$self->{_rules}{lc $k} = 0;
}
$self->{_optimize} = 1; # each time a rule changes, reoptimize when parse
return;
}
sub rules{
my $self = shift;
my(%rules)= @_;
for my $k(keys %rules) {
$self->{_rules}{lc $k} = $rules{$k};
}
$self->{_optimize} = 1; # each time a rule changes, reoptimize when parse
return;
}
sub default {
return
$_[0]->{_rules}{'*'}
if @_ == 1;
$_[0]->{_rules}{'*'} = $_[1] if defined $_[1];
$_[0]->{_rules}{'_'} = $_[2] if defined $_[2] and ref $_[2];
$_[0]->{_optimize} = 1; # each time a rule changes, reoptimize when parse
return;
}
sub scrub_file {
if(@_ > 2){
return unless defined $_[0]->_out($_[2]);
} else {
$_[0]->{_p}->handler( default => @_scrub );
}
$_[0]->_optimize() ;#if $_[0]->{_optimize};
$_[0]->{_p}->parse_file($_[1]);
return delete $_[0]->{_r} unless exists $_[0]->{_out};
print { $_[0]->{_out} } $_[0]->{_r} if length $_[0]->{_r};
delete $_[0]->{_out};
return 1;
}
sub scrub {
if(@_ > 2){
return unless defined $_[0]->_out($_[2]);
} else {
$_[0]->{_p}->handler( default => @_scrub );
}
$_[0]->_optimize();# if $_[0]->{_optimize};
$_[0]->{_p}->parse($_[1]);
$_[0]->{_p}->eof();
return delete $_[0]->{_r} unless exists $_[0]->{_out};
delete $_[0]->{_out};
return 1;
}
sub _out {
my($self, $o ) = @_;
unless( ref $o and ref \$o ne 'GLOB') {
open my $F, '>', $o or return;
binmode $F;
$self->{_out} = $F;
} else {
$self->{_out} = $o;
}
$self->{_p}->handler( default => @_scrub_fh );
return 1;
}
sub _validate {
my($s, $t, $r, $a, $as) = @_;
return "<$t>" unless %$a;
$r = $s->{_rules}->{$r};
my %f;
for my $k( keys %$a ) {
my $check = exists $r->{$k}? $r->{$k} : exists $r->{'*'}? $r->{'*'} : next;
if( ref $check eq 'CODE' ) {
my @v = $check->( $s, $t, $k, $a->{$k}, $a, \%f );
next unless @v;
$f{$k} = shift @v;
} elsif( ref $check || length($check) > 1 ) {
$f{$k} = $a->{$k} if $a->{$k} =~ m{$check};
} elsif( $check ) {
$f{$k} = $a->{$k};
}
}
if( %f ){
my %seen;
return "<$t $r>"
if $r = join ' ',
map {
defined $f{$_}
? qq[$_="].encode_entities($f{$_}).q["]
: $_; # boolean attribute (TODO?)
} grep {
exists $f{$_} and !$seen{$_}++;
} @$as;
}
return "<$t>";
}
sub _scrub_str {
my ( $p, $e, $t, $a, $as, $text ) = @_;
my $s = $p->{"\0_s"};
my $outstr = '';
if ( $e eq 'start' ) {
if ( exists $s->{_rules}->{$t} ) # is there a specific rule
{
if ( ref $s->{_rules}->{$t} ) # is it complicated?(not simple;)
{
$outstr .= $s->_validate( $t, $t, $a, $as );
}
elsif ( $s->{_rules}->{$t} ) # validate using default attribute rule
{
$outstr .= $s->_validate( $t, '_', $a, $as );
}
}
elsif ( $s->{_rules}->{'*'} ) # default allow tags
{
$outstr .= $s->_validate( $t, '_', $a, $as );
}
}
elsif ( $e eq 'end' ) {
my $place = 0;
if ( exists $s->{_rules}->{$t} ) {
$place = 1 if $s->{_rules}->{$t};
}
elsif ( $s->{_rules}->{'*'} ) {
$place = 1;
}
if ( $place ) {
if ( length $text ) {
$outstr .= "$t>";
} else {
substr $s->{_r}, -1, 0, ' /';
}
}
}
elsif ( $e eq 'comment' ) {
$outstr .= $text if $s->{_comment};
}
elsif ( $e eq 'process' ) {
$outstr .= $text if $s->{_process};
}
elsif ( $e eq 'text' or $e eq 'default' ) {
$text =~ s/</g; #https://rt.cpan.org/Ticket/Attachment/8716/10332/scrubber.patch
$text =~ s/>/>/g;
$outstr .= $text;
}
elsif ( $e eq 'start_document' ) {
$outstr = "";
}
return $outstr;
}
sub _scrub_fh {
my $self = $_[0]->{"\0_s"};
print { $self->{_out} } $self->{'_r'} if length $self->{_r};
$self->{'_r'} = _scrub_str(@_);
}
sub _scrub {
$_[0]->{"\0_s"}->{_r} .= _scrub_str(@_);
}
sub _optimize {
my($self) = @_;
my( @ignore_elements ) = grep { not $self->{"_$_"} } qw(script style);
$self->{_p}->ignore_elements(@ignore_elements); # if @ is empty, we reset ;)
return unless $self->{_optimize};
#sub allow
# return unless $self->{_optimize}; # till I figure it out (huh)
if( $self->{_rules}{'*'} ){ # default allow
$self->{_p}->report_tags(); # so clear it
} else {
my(@reports) =
grep { # report only tags we want
$self->{_rules}{$_}
} keys %{
$self->{_rules}
};
$self->{_p}->report_tags( # default deny, so optimize
@reports
) if @reports;
}
# sub deny
# return unless $self->{_optimize}; # till I figure it out (huh)
my(@ignores)=
grep {
not $self->{_rules}{$_}
} grep {
$_ ne '*'
} keys %{
$self->{_rules}
};
$self->{_p}->ignore_tags( # always ignore stuff we don't want
@ignores
) if @ignores;
$self->{_optimize}=0;
return;
}
sub DESTROY {
delete $_[0]->{_p}->{"\0_s"}; # break circular reference
}
1;
#print sprintf q[ '%-12s => %s,], "$_'", $h{$_} for sort keys %h;# perl!
#perl -ne"chomp;print $_;print qq'\t\t# test ', ++$a if /ok\(/;print $/" test.pl >test2.pl
#perl -ne"chomp;print $_;if( /ok\(/ ){s/\#test \d+$//;print qq'\t\t# test ', ++$a }print $/" test.pl >test2.pl
#perl -ne"chomp;if(/ok\(/){s/# test .*$//;print$_,qq'\t\t# test ',++$a}else{print$_}print$/" test.pl >test2.pl
__END__
=pod
=for stopwords html cpan callback homepage
=head1 NAME
HTML::Scrubber - Perl extension for scrubbing/sanitizing html
=head1 VERSION
version 0.11
=head1 SYNOPSIS
use HTML::Scrubber;
my $scrubber = HTML::Scrubber->new( allow => [ qw[ p b i u hr br ] ] );
print $scrubber->scrub('bold missing
');
# output is: bold
# more complex input
my $html = q[
a => link
br =>
b => bold
u => UNDERLINE
];
print $scrubber->scrub($html);
$scrubber->deny( qw[ p b i u hr br ] );
print $scrubber->scrub($html);
=head1 DESCRIPTION
If you want to "scrub" or "sanitize" html input in a reliable and
flexible fashion, then this module is for you.
I wasn't satisfied with HTML::Sanitizer because it is based on
HTML::TreeBuilder, so I thought I'd write something similar that
works directly with HTML::Parser.
=head1 METHODS
First a note on documentation: just study the L below.
It's all the documentation you could need
Also, be sure to read all the comments as well as
L.
If you're new to perl, good luck to you.
=head2 comment
warn "comments are ", $p->comment ? 'allowed' : 'not allowed';
$p->comment(0); # off by default
=head2 process
warn "process instructions are ", $p->process ? 'allowed' : 'not allowed';
$p->process(0); # off by default
=head2 script
warn "script tags (and everything in between) are supressed"
if $p->script; # off by default
$p->script( 0 || 1 );
B<**> Please note that this is implemented
using HTML::Parser's ignore_elements function,
so if C
two
END
is($scrubbed, <<'END', "correct result");
one
two
END
done_testing;
release-dist-manifest.t 100644 000765 000024 466 12226003500 20200 0 ustar 00nigel staff 000000 000000 HTML-Scrubber-0.11/t #!perl
BEGIN {
unless ($ENV{RELEASE_TESTING}) {
require Test::More;
Test::More::plan(skip_all => 'these tests are for release candidate testing');
}
}
use Test::More;
eval "use Test::DistManifest";
plan skip_all => "Test::DistManifest required for testing the manifest"
if $@;
manifest_ok();
release-minimum-version.t 100644 000765 000024 526 12226003500 20564 0 ustar 00nigel staff 000000 000000 HTML-Scrubber-0.11/t #!perl
BEGIN {
unless ($ENV{RELEASE_TESTING}) {
require Test::More;
Test::More::plan(skip_all => 'these tests are for release candidate testing');
}
}
use Test::More;
eval "use Test::MinimumVersion";
plan skip_all => "Test::MinimumVersion required for testing minimum versions"
if $@;
all_minimum_version_from_metayml_ok();