Email-Find-0.10/0000755000175000017500000000000010555166700012302 5ustar brandbrandEmail-Find-0.10/t/0000755000175000017500000000000010555166556012556 5ustar brandbrandEmail-Find-0.10/t/new-api.t0000644000175000017500000000146210553032776014300 0ustar brandbranduse strict; use Test::More 'no_plan'; # XXX BEGIN { use_ok('Email::Find') } my %Tests; BEGIN { %Tests = ( # 'Hahah! Use "@".+*@[132.205.7.51] and watch them cringe!' # => '"@".+*@[132.205.7.51]', 'What about "@"@foo.com?' => '"@"@foo.com', 'Eli the Beared <*@qz.to>' => '*@qz.to', # '"@"+*@[132.205.7.51]' => '+*@[132.205.7.51]', 'somelongusername@aol.com' => 'somelongusername@aol.com', '%2Fjoe@123.com' => '%2Fjoe@123.com', 'joe@123.com?subject=hello.' => 'joe@123.com', ); } while (my($text, $expect) = each %Tests) { my($orig_text) = $text; my $cb = sub { is $_[0]->address, $expect, "Found $_[1]"; return $_[1]; }; my $finder = Email::Find->new($cb); my $found = $finder->find(\$text); is $found, 1, " just one"; is $text, $orig_text, " and replaced"; } Email-Find-0.10/t/Find.t0000644000175000017500000000237010553032765013615 0ustar brandbranduse strict; use Test::More tests => 17; BEGIN { use_ok('Email::Find') } my %Tests; BEGIN { %Tests = ( # 'Hahah! Use "@".+*@[132.205.7.51] and watch them cringe!' # => '"@".+*@[132.205.7.51]', 'What about "@"@foo.com?' => '"@"@foo.com', 'Eli the Beared <*@qz.to>' => '*@qz.to', # '"@"+*@[132.205.7.51]' => '+*@[132.205.7.51]', 'somelongusername@aol.com' => 'somelongusername@aol.com', '%2Fjoe@123.com' => '%2Fjoe@123.com', 'joe@123.com?subject=hello.' => 'joe@123.com', ); } while (my($text, $expect) = each %Tests) { my($orig_text) = $text; my $found = find_emails($text, sub { is $_[0]->address, $expect, "Found $_[1]"; return $_[1] }); is $found, 1, " just one"; is $text, $orig_text, " and replaced"; } # Do all the tests again as one big block of text. my $mess_text = join "\n", keys %Tests; is find_emails($mess_text, sub { return $_[1] }), scalar keys %Tests, 'One big block'; # Tests for false positives. my @FalseTests; BEGIN { # No tests at the moment. @FalseTests = ( ); } foreach my $f_text (@FalseTests) { my $orig_text = $f_text; ok( find_emails($f_text, sub {1}) == 0, "False positive: $f_text" ); ok( $orig_text eq $f_text, " replaced" ); } Email-Find-0.10/t/addr-spec.t0000644000175000017500000000031610406771573014601 0ustar brandbrand# $Id: /mirror/monster/Email-Find/trunk/t/addr-spec.t 702 2002-01-13T12:52:05.000000Z miyagawa $ use strict; use Test::More tests => 2; BEGIN { use_ok 'Email::Find::addrspec' } ok defined $Addr_spec_re; Email-Find-0.10/lib/0000755000175000017500000000000010553225366013052 5ustar brandbrandEmail-Find-0.10/lib/Email/0000755000175000017500000000000010553225366014101 5ustar brandbrandEmail-Find-0.10/lib/Email/Find/0000755000175000017500000000000010553225366014761 5ustar brandbrandEmail-Find-0.10/lib/Email/Find/addrspec.pm0000644000175000017500000000332110406771573017106 0ustar brandbrandpackage Email::Find::addrspec; use strict; use vars qw($VERSION @EXPORT $Addr_spec_re); $VERSION = 0.09; use base qw(Exporter); @EXPORT = qw($Addr_spec_re); # This is the BNF from RFC 822 my $esc = '\\\\'; my $period = '\.'; my $space = '\040'; my $open_br = '\['; my $close_br = '\]'; my $nonASCII = '\x80-\xff'; my $ctrl = '\000-\037'; my $cr_list = '\n\015'; my $qtext = qq/[^$esc$nonASCII$cr_list\"]/; #" my $dtext = qq/[^$esc$nonASCII$cr_list$open_br$close_br]/; my $quoted_pair = qq<$esc>.qq<[^$nonASCII]>; my $atom_char = qq/[^($space)<>\@,;:\".$esc$open_br$close_br$ctrl$nonASCII]/; #" my $atom = qq<$atom_char+(?!$atom_char)>; my $quoted_str = qq<\"$qtext*(?:$quoted_pair$qtext*)*\">; #" my $word = qq<(?:$atom|$quoted_str)>; my $local_part = qq<$word(?:$period$word)*>; # This is a combination of the domain name BNF from RFC 1035 plus the # domain literal definition from RFC 822, but allowing domains starting # with numbers. my $label = q/[A-Za-z\d](?:[A-Za-z\d-]*[A-Za-z\d])?/; my $domain_ref = qq<$label(?:$period$label)*>; my $domain_lit = qq<$open_br(?:$dtext|$quoted_pair)*$close_br>; my $domain = qq<(?:$domain_ref|$domain_lit)>; # Finally, the address-spec regex (more or less) $Addr_spec_re = qr<$local_part\s*\@\s*$domain>; 1; __END__ =head1 NAME Email::Find::addrspec - exports $Addr_spec_re to Email::Find =head1 SYNOPSIS B =head1 DESCRIPTION See L for details. =head1 AUTHOR Tatsuhiko Miyagawa Emiyagawa@bulknews.netE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SEE ALSO L =cut Email-Find-0.10/lib/Email/Find.pm0000644000175000017500000001637310553225357015331 0ustar brandbrandpackage Email::Find; use strict; use vars qw($VERSION @EXPORT); $VERSION = "0.10"; # Need qr//. require 5.005; use base qw(Exporter); @EXPORT = qw(find_emails); use Email::Valid; use Email::Find::addrspec; use Mail::Address; sub addr_regex { $Addr_spec_re } { my $validator = Email::Valid->new( '-fudge' => 0, '-fqdn' => 1, '-local_rules' => 0, '-mxcheck' => 0, ); sub do_validate { my($self, $addr) = @_; $validator->address($addr); } } sub new { my($proto, $callback) = @_; my $class = ref $proto || $proto; bless { callback => $callback }, $class; } sub find { my($self, $r_text) = @_; my $emails_found = 0; my $re = $self->addr_regex; $$r_text =~ s{($re)}{ my($replace, $found) = $self->validate($1); $emails_found += $found; $replace; }eg; return $emails_found; } sub validate { my($self, $orig_match) = @_; my $replace; my $found = 0; # XXX Add cruft handling. my($start_cruft) = ''; my($end_cruft) = ''; if( $orig_match =~ s|([),.'";?!]+)$|| ) { #"')){ $end_cruft = $1; } if( my $email = $self->do_validate($orig_match) ) { $email = Mail::Address->new('', $email); $found++; $replace = $start_cruft . $self->{callback}->($email, $orig_match) . $end_cruft; } else { # XXX Again with the cruft! $replace = $start_cruft . $orig_match . $end_cruft; } return $replace, $found; } # backward comaptibility sub find_emails(\$&) { my($r_text, $callback) = @_; my $finder = __PACKAGE__->new($callback); $finder->find($r_text); } 1; __END__ =pod =head1 NAME Email::Find - Find RFC 822 email addresses in plain text =head1 SYNOPSIS use Email::Find; # new object oriented interface my $finder = Email::Find->new(\&callback); my $num_found - $finder->find(\$text); # good old functional style $num_found = find_emails($text, \&callback); =head1 DESCRIPTION Email::Find is a module for finding a I of RFC 822 email addresses in arbitrary text (see L). The addresses it finds are not guaranteed to exist or even actually be email addresses at all (see L), but they will be valid RFC 822 syntax. Email::Find will perform some heuristics to avoid some of the more obvious red herrings and false addresses, but there's only so much which can be done without a human. =head1 METHODS =over 4 =item new $finder = Email::Find->new(\&callback); Constructs new Email::Find object. Specified callback will be called with each email as they're found. =item find $num_emails_found = $finder->find(\$text); Finds email addresses in the text and executes callback registered. The callback is given two arguments. The first is a Mail::Address object representing the address found. The second is the actual original email as found in the text. Whatever the callback returns will replace the original text. =head1 FUNCTIONS For backward compatibility, Email::Find exports one function, find_emails(). It works very similar to URI::Find's find_uris(). =head1 EXAMPLES use Email::Find; # Simply print out all the addresses found leaving the text undisturbed. my $finder = Email::Find->new(sub { my($email, $orig_email) = @_; print "Found ".$email->format."\n"; return $orig_email; }); $finder->find(\$text); # For each email found, ping its host to see if its alive. require Net::Ping; $ping = Net::Ping->new; my %Pinged = (); my $finder = Email::Find->new(sub { my($email, $orig_email) = @_; my $host = $email->host; next if exists $Pinged{$host}; $Pinged{$host} = $ping->ping($host); }); $finder->find(\$text); while( my($host, $up) = each %Pinged ) { print "$host is ". $up ? 'up' : 'down' ."\n"; } # Count how many addresses are found. my $finder = Email::Find->new(sub { $_[1] }); print "Found ", $finder->find(\$text), " addresses\n"; # Wrap each address in an HTML mailto link. my $finder = Email::Find->new( sub { my($email, $orig_email) = @_; my($address) = $email->format; return qq|$orig_email|; }, ); $finder->find(\$text); =head1 SUBCLASSING If you want to change the way this module works in finding email address, you can do it by making your subclass of Email::Find, which overrides C and C method. For example, the following class can additionally find email addresses with dot before at mark. This is illegal in RFC822, see L for details. package Email::Find::Loose; use base qw(Email::Find); use Email::Valid::Loose; # should return regex, which Email::Find will use in finding # strings which are "thought to be" email addresses sub addr_regex { return $Email::Valid::Loose::Addr_spec_re; } # should validate $addr is a valid email or not. # if so, return the address as a string. # else, return undef sub do_validate { my($self, $addr) = @_; return Email::Valid::Loose->address($addr); } Let's see another example, which validates if the address is an existent one or not, with Mail::CheckUser module. package Email::Find::Existent; use base qw(Email::Find); use Mail::CheckUser qw(check_email); sub do_validate { my($self, $addr) = @_; return check_email($addr) ? $addr : undef; } =head1 CAVEATS =over 4 =item Why a subset of RFC 822? I say that this module finds a I of RFC 822 because if I attempted to look for I possible valid RFC 822 addresses I'd wind up practically matching the entire block of text! The complete specification is so wide open that its difficult to construct soemthing that's I an RFC 822 address. To keep myself sane, I look for the 'address spec' or 'global address' part of an RFC 822 address. This is the part which most people consider to be an email address (the 'foo@bar.com' part) and it is also the part which contains the information necessary for delivery. =item Why are some of the matches not email addresses? Alas, many things which aren't email addresses I like email addresses and parse just fine as them. The biggest headache is email and usenet and email message IDs. I do my best to avoid them, but there's only so much cleverness you can pack into one library. =back =head1 AUTHORS Copyright 2000, 2001 Michael G Schwern Eschwern@pobox.comE. All rights reserved. Current maintainer is Tatsuhiko Miyagawa Emiyagawa@bulknews.netE. =head1 THANKS Schwern thanks to Jeremy Howard for his patch to make it work under 5.005. =head1 LICENSE This module is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =for _private After talking with a few legal people, it was found I can't restrict how code is used, only how it is distributed. Not without making installation of the module annoying. Please don't make me add the annoying installation steps. The author B that this module not be used for the purposes of sending unsolicited email (ie. spamming) in any way, shape or form or for the purposes of generating lists for commercial sale. If you use this module for spamming I reserve the right to make fun of you. =head1 SEE ALSO L, RFC 822, L, L, L =cut Email-Find-0.10/README0000644000175000017500000001503510406771573013173 0ustar brandbrandNAME Email::Find - Find RFC 822 email addresses in plain text SYNOPSIS use Email::Find; # new object oriented interface my $finder = Email::Find->new(\&callback); my $num_found - $finder->find(\$text); # good old functional style $num_found = find_emails($text, \&callback); DESCRIPTION Email::Find is a module for finding a *subset* of RFC 822 email addresses in arbitrary text (see the section on "CAVEATS"). The addresses it finds are not guaranteed to exist or even actually be email addresses at all (see the section on "CAVEATS"), but they will be valid RFC 822 syntax. Email::Find will perform some heuristics to avoid some of the more obvious red herrings and false addresses, but there's only so much which can be done without a human. METHODS new $finder = Email::Find->new(\&callback); Constructs new Email::Find object. Specified callback will be called with each email as they're found. find $num_emails_found = $finder->find(\$text); Finds email addresses in the text and executes callback registered. The callback is given two arguments. The first is a Mail::Address object representing the address found. The second is the actual original email as found in the text. Whatever the callback returns will replace the original text. FUNCTIONS For backward compatibility, Email::Find exports one function, find_emails(). It works very similar to URI::Find's find_uris(). EXAMPLES use Email::Find; # Simply print out all the addresses found leaving the text undisturbed. my $finder = Email::Find->new(sub { my($email, $orig_email) = @_; print "Found ".$email->format."\n"; return $orig_email; }); $finder->find(\$text); # For each email found, ping its host to see if its alive. require Net::Ping; $ping = Net::Ping->new; my %Pinged = (); my $finder = Email::Find->new(sub { my($email, $orig_email) = @_; my $host = $email->host; next if exists $Pinged{$host}; $Pinged{$host} = $ping->ping($host); }); $finder->find(\$text); while( my($host, $up) = each %Pinged ) { print "$host is ". $up ? 'up' : 'down' ."\n"; } # Count how many addresses are found. my $finder = Email::Find->new(sub { $_[1] }); print "Found ", $finder->find(\$text), " addresses\n"; # Wrap each address in an HTML mailto link. my $finder = Email::Find->new( sub { my($email, $orig_email) = @_; my($address) = $email->format; return qq|$orig_email|; }, ); $finder->find(\$text); SUBCLASSING If you want to change the way this module works in finding email address, you can do it by making your subclass of Email::Find, which overrides "addr_regex" and "do_validate" method. For example, the following class can additionally find email addresses with dot before at mark. This is illegal in RFC822, see the Email::Valid::Loose manpage for details. package Email::Find::Loose; use base qw(Email::Find); use Email::Valid::Loose; # should return regex, which Email::Find will use in finding # strings which are "thought to be" email addresses sub addr_regex { return $Email::Valid::Loose::Addr_spec_re; } # should validate $addr is a valid email or not. # if so, return the address as a string. # else, return undef sub do_validate { my($self, $addr) = @_; return Email::Valid::Loose->address($addr); } Let's see another example, which validates if the address is an existent one or not, with Mail::CheckUser module. package Email::Find::Existent; use base qw(Email::Find); use Mail::CheckUser qw(check_email); sub do_validate { my($self, $addr) = @_; return check_email($addr) ? $addr : undef; } CAVEATS Why a subset of RFC 822? I say that this module finds a *subset* of RFC 822 because if I attempted to look for *all* possible valid RFC 822 addresses I'd wind up practically matching the entire block of text! The complete specification is so wide open that its difficult to construct soemthing that's *not* an RFC 822 address. To keep myself sane, I look for the 'address spec' or 'global address' part of an RFC 822 address. This is the part which most people consider to be an email address (the 'foo@bar.com' part) and it is also the part which contains the information necessary for delivery. Why are some of the matches not email addresses? Alas, many things which aren't email addresses *look* like email addresses and parse just fine as them. The biggest headache is email and usenet and email message IDs. I do my best to avoid them, but there's only so much cleverness you can pack into one library. AUTHORS Copyright 2000, 2001 Michael G Schwern . All rights reserved. Current maintainer is Tatsuhiko Miyagawa . THANKS Schwern thanks to Jeremy Howard for his patch to make it work under 5.005. LICENSE This module is free software; you may redistribute it and/or modify it under the same terms as Perl itself. The author STRONGLY SUGGESTS that this module not be used for the purposes of sending unsolicited email (ie. spamming) in any way, shape or form or for the purposes of generating lists for commercial sale. If you use this module for spamming I reserve the right to make fun of you. SEE ALSO the Email::Valid manpage, RFC 822, the URI::Find manpage, the Apache::AntiSpam manpage, the Email::Valid::Loose manpage Email-Find-0.10/Changes0000644000175000017500000000253110553225327013575 0ustar brandbrandRevision history for Perl extension Email::Find. 0.10 Tue Jan 16 11:42:51 PST 2007 * Bumped up Email::Valid requirement to 0.179, and update the tests to skip some invalid email addresses. 0.09 Sun Jan 13 21:52:11 JST 2002 * Complete rewrite of the module: added new OO interface 0.08 Fri Oct 5 08:35:25 JST 2001 - Test::Simple and Test::More goes out of the distribution * Tighten up the definition of domain to match what's in RFC 1035 Thanks to Michael G Schwern and Gil Vidals 0.07 Tue Jul 31 13:58:30 JST 2001 * Turned off -local_rules and -fudge for Email::Valid constructor. Thanks to Joseph Crotty. 0.06 Tue Jun 26 19:48:25 JST 2001 - Nothing new. Maintainer has been switched from Schwern to Miyagawa. 0.05 Sun Jun 24 22:33:18 EDT 2001 * Now Freely usable. The anti-spam license is gone (illegal). 0.04 Thu May 10 00:38:08 BST 2001 * Using Tatsuhiko Miyagawa's much faster regex - Now eating my own dog food (using Test::More for testing) - Added test against RFC822 0.03 Thu Dec 7 15:17:53 EST 2000 - Removed CAVEAT about only working under 5.005 - Added THANKS doc section 0.02 Tue May 16 21:33:10 EDT 2000 * Now works under 5.005_03! (Kudos to Jeremy Howard) 0.01 Sat Feb 12 01:38:16 EST 2000 * Released to CPAN - original version; created by h2xs 1.19 Email-Find-0.10/Makefile.PL0000644000175000017500000000254410553033017014251 0ustar brandbrand# A template for Makefile.PL used by Arena Networks. # - Set the $PACKAGE variable to the name of your module. # - Set $LAST_API_CHANGE to reflect the last version you changed the API # of your module. # - Fill in your dependencies in PREREQ_PM # Alternatively, you can say the hell with this and use h2xs. use ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. # Need qr//. require 5.005; $PACKAGE = 'Email::Find'; my($PACKAGE_FILE) = $PACKAGE =~ /(?:\::)?([^:]+)$/; $LAST_API_CHANGE = 0.09; eval "require $PACKAGE"; unless ($@) { # Make sure we did find the module. print <<"CHANGE_WARN" if ${$PACKAGE.'::VERSION'} < $LAST_API_CHANGE; NOTE: There have been API changes between this version and any older than version $LAST_API_CHANGE! Please read the Changes file if you are upgrading from a version older than $LAST_API_CHANGE. CHANGE_WARN } WriteMakefile( NAME => $PACKAGE, VERSION_FROM => "lib/Email/Find.pm", # finds $VERSION PREREQ_PM => { Email::Valid => 0.179, Mail::Address => 0, Test::More => 0, }, 'dist' => { COMPRESS => 'gzip -9', SUFFIX => '.gz', DIST_DEFAULT => 'all tardist', }, ); Email-Find-0.10/META.yml0000644000175000017500000000064310553225366013560 0ustar brandbrand# http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: Email-Find version: 0.10 version_from: lib/Email/Find.pm installdirs: site requires: Email::Valid: 0.179 Mail::Address: 0 Test::More: 0 distribution_type: module generated_by: ExtUtils::MakeMaker version 6.30 Email-Find-0.10/MANIFEST0000644000175000017500000000030310555166700013427 0ustar brandbrandChanges MANIFEST Makefile.PL README lib/Email/Find.pm lib/Email/Find/addrspec.pm t/Find.t t/addr-spec.t t/new-api.t META.yml Module meta-data (added by MakeMaker)