duck-trusty/0000775000000000000000000000000012310326276010246 5ustar duck-trusty/duck.10000664000000000000000000000335012310326276011257 0ustar .TH "DUCK" 1 "2014-03-10" .SH NAME duck - the Debian Url ChecKer .SH SYNOPSIS \fB duck \fR [ \fIOPTION\fR ]... [\fI-f file\fR] [\fI-u file\fR] [\fI-c file\fR] .SH DESCRIPTION \fBduck\fR extracts links, email address domains and VCS-* entries from \fIdebian/control\fR, \fIdebian/upstream\fr, \fIdebian/copyright\fr, \fIdebian/upstream-metadata.yaml\fR and \fIdebian/upstream/metadata\fR. It tries to access those VCS-* entries and URLs using the approriate tool to find out whether the given URLs or entries are broken or working. If errors are detected, the filename, fieldname and URL of the broken link/URL are displayed. Email address domains are checked for existing MX records, A records, or AAAA records, in this order. If none of these 3 are defined for a given domain, it is considered broken. .TP \fB\-v\fR verbose mode. This shows all URLs found and the checks run. .TP \fB\-q\fR quiet mode. Suppress all output. .TP \fB\-n\fR dry run. Don't run any checks, just show entries to be checked. .TP \fB\-f\fR specify path to control file. The default is \fIdebian/control\fR. .TP \fB\-F\fR skip processing of the control file. .TP \fB\-u\fR specify path to upstream metadata file. .TP \fB\-U\fR skip processing of the upstream metadata file. .TP \fB\-c\fR specify path to copyright file. The default is \fIdebian/copyright\fR. .TP \fB\-C\fR skip processing of copyright file. .SH "EXAMPLE" To run duck, change your working directory to an extracted debian source package and run: . \fB duck .SH "EXIT STATUS" .PP .IP "\fB0\fP" Success, no errors .IP "\fB1\fP" Error(s) detected .SH "SEE ALSO" Please see \fIhttp://duck.debian.net/\fR for additional information as well as an overview of duck checks run on all source packages in \fIDebian/unstable\fR. duck-trusty/lib/0000775000000000000000000000000012310324051011001 5ustar duck-trusty/lib/DUCK.pm0000664000000000000000000001750212310324051012072 0ustar # Copyright (C) 2014 Simon Kainz # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # he Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # On Debian GNU/Linux systems, the complete text of the GNU General # Public License can be found in `/usr/share/common-licenses/GPL-2'. # # You should have received a copy of the GNU General Public License # along with this program. If not, you can find it on the World Wide # Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1301, USA. use strict; use warnings; use lib '.'; package DUCK; our $VERSION ='0.5'; use File::Which; use WWW::Curl::Easy; use strict; use IPC::Open3; use IO::Select; use Net::DNS; use Regexp::Common qw /Email::Address/; use Email::Address; my $callbacks; my $self; my $helpers={ svn =>0, bzr =>0, git =>0, darcs =>1, # Uses WWW::Curl::Easy hg => 0, browser =>1 # This works as we use WWW::Curl::Easy; }; my $tools= { git => { cmd => 'git', args => ['ls-remote','%URL%'] }, hg =>{ cmd => 'hg', args => ['id','%URL%'] }, bzr => { cmd => 'bzr', args => ['-Ossl.cert_reqs=none','log','%URL%'] }, svn => { cmd => 'svn', args => ['--non-interactive','--trust-server-cert','info','%URL%'] } }; sub new { my $class = shift; $self = {}; bless $self, $class; $self->__find_helpers(); return $self; } sub cb() { $callbacks= { "Vcs-Browser" =>\&browser, "Vcs-Darcs" =>\&darcs, "Vcs-Git" =>\&git, "Vcs-Hg" =>\&hg, "Vcs-Svn" =>\&svn, "Vcs-Bzr" =>\&bzr, "Homepage" => \&browser, "URL" => \&browser, "Email" => \&email, "Maintainer" => \&maintainer, "Uploaders" => \&uploaders }; return $callbacks; } sub __find_helpers() { $helpers->{git}=1 unless !defined (which('git')); $helpers->{svn}=1 unless !defined (which('svn')); $helpers->{hg}=1 unless !defined (which('hg')); $helpers->{bzr}=1 unless !defined (which('bzr')); } sub git() { my ($url)=@_; my @urlparts=split(/\s+/,$url); if ($urlparts[1]) { if ($urlparts[1] eq "-b" && $urlparts[2]) { push(@{$tools->{'git'}->{'args'}},'-b '.$urlparts[2]); } } return __run_helper('git',$urlparts[0]); } sub bzr() { my ($url)=@_; return __run_helper('bzr',$url); } sub hg() { my ($url)=@_; return __run_helper('hg',$url); } sub svn() { my ($url)=@_; return __run_helper('svn',$url); } sub browser() { my ($url)=@_; $url =~ s/\.*$//g; return __run_browser($url); } sub darcs() { my ($url)=@_; my $darcsurltemp=$url; $darcsurltemp =~ s/\/$//; $darcsurltemp.='/_darcs/hashed_inventory'; return __run_browser($darcsurltemp); } sub uploaders() { my ($line_uploaders)=@_; $line_uploaders =~ s/\n/ /g; my @emails; if ($line_uploaders =~ /@/) { @emails = ($line_uploaders =~ /($RE{Email}{Address})/g ); } my $res; foreach my $email(@emails) { my $r=check_domain($email); if ($r->{retval}>0) { if (!$res->{retval}) { $res=$r; } else { $res->{retval}=$r->{retval}; $res->{response}.="\n".$r->{response}; $res->{url}="foo"; } } } if (!$res->{retval}) { $res->{'retval'}=0; $res->{'response'}=""; $res->{'url'}=$line_uploaders; } return $res; } sub maintainer() { my ($email)=@_; return check_domain($email); } sub email() { my ($email) =@_; return check_domain($email); } sub __run_browser { # my $job = shift; # my $url = $job->arg; my ($url)=@_; #check if URL is mailto: link if ($url =~/mailto:\s*.+@.+/) { return check_domain($url); } my $curl = WWW::Curl::Easy->new; my @website_moved_regexs=('new homepage','update your links','we have moved'); my @website_moved_whitelist=('anonscm.debian.org.*duck.git'); $curl->setopt(CURLOPT_HEADER,0); $curl->setopt(CURLOPT_SSL_VERIFYPEER,0); $curl->setopt(CURLOPT_FOLLOWLOCATION,1); $curl->setopt(CURLOPT_SSLVERSION,3); $curl->setopt(CURLOPT_MAXREDIRS,10); $curl->setopt(CURLOPT_TIMEOUT,60); $curl->setopt(CURLOPT_USERAGENT,'Mozilla/5.0 (X11; Linux x86_64; rv:10.0.4) Gecko/20100101 Firefox/10.0.4 Iceweasel/10.0.4'); $curl->setopt(CURLOPT_URL, $url); my $response_body; $curl->setopt(CURLOPT_WRITEDATA,\$response_body); # Starts the actual request my $retcode = $curl->perform; # Looking at the results... my $status=0; my $disp=0; my $response_code = $curl->getinfo(CURLINFO_HTTP_CODE); my $response=$curl->strerror($retcode)." ".$curl->errbuf."\n"; if ($retcode == 0) # no curl error, but maybe a http error { #default to error $status=1; $disp=1; #handle ok cases, 200 is ok for sure if ($response_code ==200 ) { $status=0; $disp=0; } if ($response_code ==226 ) { $status=0; $disp=0; } if ($response_code ==227 ) { $status=0; $disp=0; } if ($response_code ==302 ) #temporary redirect is ok { $status=0; $disp=0; } if ($response_code ==403) { ## special case for sourceforge.net sites ## sourceforge seems to always return correct pages wit http code 40. if ( $url =~ m/(sourceforge|sf).net/i) { # print "Sourceforge site, so hande special!!"; $status=0; $disp=0; } } my $whitelisted=0; foreach my $whitelist_url (@website_moved_whitelist) { if ( $url =~ m/$whitelist_url/i) {$whitelisted=1;} } if ($whitelisted == 0) { foreach my $regex (@website_moved_regexs) { # print "$regex\n"; if ($response_body =~ m/$regex/i ) { $disp=1; $response.="Website seems to be outdated. Please update your links!"; last; } } } } else { # we have a curl error, so we show this entry for sure $status=1; $disp=1; } my $ret; $ret->{'retval'}=$disp; $ret->{'response'}="Curl:$retcode HTTP:$response_code $response"; $ret->{'url'}=$url; return $ret; } sub __run_helper { my ($tool,$url)=@_; return undef unless $helpers->{$tool} == 1; return undef unless defined $tools->{$tool}; my @args=@{$tools->{$tool}->{'args'}}; for(@args){s/\%URL\%/$url/g} my $pid=open3(\*WRITE,\*READ,0,$tools->{$tool}->{'cmd'},@args); my @results = ; waitpid ($pid,0); close READ; my $retval=$?; my $ret; $ret->{'retval'}=$retval; $ret->{'response'}=join("\n",@results); $ret->{'url'}=$url; return $ret; } sub check_domain($) { my $res = Net::DNS::Resolver->new; my ($email) = @_; my @emails=Email::Address->parse($email); $email=$emails[0]; my @domain = ( $email =~ m/^[^@]*@([^?^&^>]*).*/); my @queries=('MX','A','AAAA'); my @results; my $iserror=1; foreach my $query (@queries) { my $q=$res->query($domain[0],$query); if ($q) { my @answers=$q->answer; my $mxcount=scalar @answers; push (@results,$mxcount." ".$query." entries found."); $iserror=0; last; } else { push (@results,"$email: No ".$query." entry found."); } } my $ret; $ret->{'retval'}=$iserror; $ret->{'response'}=join("\n",@results); $ret->{'url'}=$email; return $ret; } 1; duck-trusty/debian/0000775000000000000000000000000012364523440011471 5ustar duck-trusty/debian/duck.install0000664000000000000000000000003712277375152014017 0ustar duck usr/bin lib usr/share/duckduck-trusty/debian/changelog0000664000000000000000000000413012364523440013341 0ustar duck (0.5~ubuntu14.04.1) trusty-backports; urgency=medium * No-change backport to trusty (LP: #1348753) -- Felix Geyer Fri, 25 Jul 2014 20:51:12 +0200 duck (0.5) unstable; urgency=medium * Add support for Emails and URLs in debian/control. * Fix leftover dependencies on libparse-debian-packages-perl. * Add new command line option: -n dry run, don't run any checks, just show what would be checked * Add info for gbp buildpackage hook in README.Debian * Use libregexp-common-perl and libregexp-common-email-address-perl to improve detection of URLs and email addresses. -- Simon Kainz Thu, 13 Mar 2014 13:53:05 +0100 duck (0.4) unstable; urgency=low * Add support for mailto: URLS (Closes: #740862) * Fix Architecture: entry in debian/control (Closes: #740997) * Add check for email domains in Maintainer: field * Add check for email domains in Uploaders: field * Change parsing backend to libparse-debcontrol-perl (Closes: #740899) * Add minimal URL scheme detection for Repository: field in upstream metadata (Closes: #740859) * Add new command line options: -F skip processing of the control file. -u specify path to upstream metadata file. -U skip processing of the upstream metadata file. * Fixed URL handling for VCS-Git Urls: -b option now parsed correctly. -- Simon Kainz Tue, 11 Mar 2014 09:01:11 +0100 duck (0.3) unstable; urgency=low * Add Missing dependency libyaml-libyaml-perl (Closes: #740923) -- Simon Kainz Thu, 06 Mar 2014 10:23:09 +0100 duck (0.2) unstable; urgency=low * Remove convenience copy of libparse-debian-packages-perl. * Change dh version to 9. * Minor code cleanup. * Add new check for Vcs-Bzr: entries -- Simon Kainz Tue, 04 Mar 2014 08:40:40 +0100 duck (0.1) unstable; urgency=low * Initial release. (Closes: #739483) -- Simon Kainz Wed, 19 Feb 2014 09:50:04 +0000 duck-trusty/debian/duck.manpages0000664000000000000000000000000612300652560014124 0ustar duck.1duck-trusty/debian/gbp.conf0000664000000000000000000000010612310303534013074 0ustar [buildpackage] postbuild=cd $GBP_BUILD_DIR; perl -Mlib=lib ./duck -v duck-trusty/debian/README.Debian0000664000000000000000000000041512310321775013530 0ustar Post-build hook for gbp ----------------------- To automagically run duck after using gbp buildpackage, add the following entry to your $HOME/.gbp.conf file: [buildpackage] postbuild= which duck >/dev/null && { cd "$GBP_BUILD_DIR" && duck -v; exit $?; } || true duck-trusty/debian/compat0000664000000000000000000000000212305301257012662 0ustar 9 duck-trusty/debian/rules0000775000000000000000000000017312300351075012543 0ustar #!/usr/bin/make -f PERL ?= /usr/bin/perl LIBDIR = lib %: dh $@ override_dh_auto_test: $(PERL) -Mlib=$(LIBDIR) -wc duckduck-trusty/debian/copyright0000664000000000000000000000034412310060431013410 0ustar Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Files: * Copyright: 2014 Simon Kainz License: GPL-2+ On Debian systems, the complete text of the GPL can be found in /usr/share/common-licenses/GPL-2. duck-trusty/debian/control0000664000000000000000000000170712310322207013066 0ustar Source: duck Section: devel Priority: optional Maintainer: Simon Kainz Build-Depends: debhelper (>= 9), libregexp-common-email-address-perl, libregexp-common-perl, libparse-debcontrol-perl, libnet-dns-perl, libyaml-libyaml-perl, libwww-curl-perl, libfile-which-perl Standards-Version: 3.9.5 Homepage: http://duck.debian.net Vcs-Git: git://anonscm.debian.org/collab-maint/duck.git Vcs-Browser: http://anonscm.debian.org/gitweb/?p=collab-maint/duck.git Package: duck Architecture: all Depends: ${misc:Depends}, ${perl:Depends}, libregexp-common-email-address-perl, libregexp-common-perl, libparse-debcontrol-perl, libnet-dns-perl, libwww-curl-perl, libfile-which-perl, libyaml-libyaml-perl, git, mercurial, subversion, bzr Description: checks URLs in debian/control and debian/upstream files duck, the Debian Url ChecKer, processes several fields in the debian/control and debian/upstream file and checks if links found therin are valid. duck-trusty/debian/source/0000775000000000000000000000000012277376171013003 5ustar duck-trusty/debian/source/format0000664000000000000000000000001512277376171014212 0ustar 3.0 (native) duck-trusty/duck0000775000000000000000000001633112310324404011115 0ustar #!/usr/bin/perl -w # duck - the Debian Url Checker # Copyright (C) 2014 Simon Kainz # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # he Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # On Debian GNU/Linux systems, the complete text of the GNU General # Public License can be found in `/usr/share/common-licenses/GPL-2'. # # You should have received a copy of the GNU General Public License # along with this program. If not, you can find it on the World Wide # Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1301, USA. use strict; use lib '/usr/share/duck/lib'; use DUCK; use Getopt::Std; use YAML::XS qw(Load); use Parse::DebControl qw(parse_file); use Regexp::Common qw /URI Email::Address/; use Email::Address; sub proc($;$;$;$;$); sub guess_type($); our $VERSION="0.5"; our $copyright_year="2014"; $Getopt::Std::STANDARD_HELP_VERSION=1; my $cf_parser = new Parse::DebControl; #my $debug=0; my $exitcode=0; my @yaml_urls; my @extract=("Homepage","Repository","Repository-Browse","Screenshots","Bug-Submit","Bug-Database","Changelog","Donation","FAQ","Gallery","Other-References","Webservice","Reference","URL","Eprint"); my @extract_copyright=("Format","Source"); my @upstream_filenames=("debian/upstream","debian/upstream-metadata.yaml","debian/upstream/metadata"); my $extract_hash; my $extract_copyright_hash; my $upstream_filename; my %opt; getopts('qvf:u:c:FUCn', \%opt); if ( $opt{v} && $opt{q} ) { print STDERR " Please specify either -q or -v\n"; exit(1); } foreach my $a (@extract) { $extract_hash->{$a}=1; } foreach my $a (@extract_copyright) { $extract_copyright_hash->{$a}=1; } my $DUCK= DUCK->new(); my $funcref= $DUCK->cb(); my @entries; if (!$opt{C}) { #processing copyright file open my $fh,"<",($opt{c} or "debian/copyright"); my @copy_raw=<$fh>; close($fh); chomp @copy_raw; my $linenum=0; foreach my $copyright_line (@copy_raw) { $linenum++; $copyright_line =~ s/^[*\s#\-|\/\.]*//; $copyright_line =~ s/[\s#\-|\)*]*$//; next unless length($copyright_line); if ($copyright_line =~ /($RE{URI}{HTTP}{-keep})/) { push (@entries, ["debian/copyright:".$linenum,"URL",$1,$copyright_line ]); } if ($copyright_line =~ /@/) { my $copyright_line_mangled =$copyright_line; $copyright_line_mangled =~ s/[\*\#|<>\(\)\/]/ /g; $copyright_line_mangled =~ s/\s\s*/ /g; next unless length($copyright_line_mangled); my @emails = ($copyright_line_mangled =~ /$RE{Email}{Address}{-keep}/go ); if (@emails) { my @parsed = map $_->address,Email::Address->parse(@emails); foreach (@parsed) { push (@entries, ["debian/copyright:".$linenum,"Email",$_,$copyright_line_mangled,$copyright_line ]); } } } } } #Processing debian/control file if (!$opt{F}) { my $opts= {stripComments => 'true'}; my $cf=($opt{f} or "debian/control"); my @data_file = $cf_parser->parse_file($cf, $opts); my @cfdata=$data_file[0][0]; # create list of urls from debian/control foreach my $cfline1 (@data_file) { foreach my $cfline2 (@$cfline1) { foreach my $k (keys %$cfline2) { push (@entries, ["debian/control",$k,$cfline2->{$k} ]); } } } } #Processing upstream metadata file if (!$opt{U}) { # extend list of urls by urls from upstream metadata foreach (@upstream_filenames) { @yaml_urls=(); if ( -f $_) { $upstream_filename=$_; open my $fh,"<",$_; my @raw=<$fh>; my $raw_string=join("",@raw); close($fh); my $hashref; eval { Load($raw_string);}; if (!$@) { $hashref=Load($raw_string); foreach my $k (keys $hashref) { if ($extract_hash->{$k}) { proc("",\@yaml_urls,$k,$hashref->{$k}); } } } } foreach my $yaml_url(@yaml_urls) { # try to be smart: git:// and svn:// based urls must not be handled # by curl. my $keyname=guess_type(@$yaml_url[1]); if (!$keyname) {$keyname="URL";} @$yaml_url[1] =~ s/^\s*//; push (@entries, [$upstream_filename.": ".@$yaml_url[2],$keyname,@$yaml_url[1] ]); } } } # iterate over all urls, run checks. foreach my $entry (@entries) { my $type=@$entry[0]; my $k=@$entry[1]; my $url=@$entry[2]; my $origline=@$entry[3]; chomp $origline unless !$origline; if ($funcref->{$k}) { if ($opt{n}) { print STDOUT $type.": ".$k.": ".$url.": "; print STDOUT " DRY RUN\n"; next; } my $res=&{$funcref->{$k}}($url); if (!defined $res) { if (!$opt{q}) { print STDERR " Skipping field ".$k." (Reason: Missing helper!)\n"; } } else { if ($res->{retval}>0) { if (!$opt{q}) { print STDERR $type.": ".$k.": ".$url.": "; if ($origline) { print STDERR $origline.": "; } print STDERR " ERROR\n"; print STDERR $res->{response}; print STDERR "\n\n"; } $exitcode=1; } else { if ($opt{v}) { print STDOUT $type.": ".$k.": ".$url.": "; if ($origline) { print STDOUT $origline.": "; } print STDOUT " OK\n\n"; } } } } } exit($exitcode); ############################################################################## # Helper functions sub guess_type($) { my ($url)=@_; return "Vcs-Git" if ($url =~/^\s*git:\/\//); return "Vcs-Svn" if ($url =~/^\s*svn:\/\//); return "URL" if ($url =~/$RE{URI}{HTTP}/); return "URL" if ($url =~/$RE{URI}{FTP}/); return undef; } sub HELP_MESSAGE() { print STDOUT < all rights reserved. This program comes with ABSOLUTELY NO WARRANTY. You are free to redistribute this code under the terms of the GNU General Public License, version 2 or later. EOF } sub proc($;$;$;$;$) { my ($sp,$ref,$key,$r,$p)=@_; my $t=ref($r); if ($t eq "HASH") { my %a=%{$r}; foreach my $e (keys %a) { return proc($sp,$ref,$e,$a{$e},$key); } } if ($t eq "ARRAY") { my @a=@{$r}; foreach my $e (@a) { return proc($sp,$ref,$key,$e,$key); } } if ($t eq "") { if ($extract_hash->{$key}) { my @data=($sp,$r,$key); push(@{$ref},\@data); } } }