ip2host-1.13/0000775000175000017500000000000012041105625011130 5ustar mlamlaip2host-1.13/ip2host0000775000175000017500000002404312025405024012447 0ustar mlamla#!/usr/bin/perl -w # ip2host - Resolve IPs to hostnames in web server logs # Maurice Aubrey # # Usage: ip2host [OPTIONS] [cache_file] < infile > outfile # # For a usage summary, run: ip2host --usage # For documentation, run: perldoc ip2host our $CHILDREN = 40; # Number of processes to fork our $TIMEOUT = 20; # DNS timeout our $BUFFER = 50000; # Maximum number of log lines to keep in memory our $FLUSH = 500; # Flush output buffer every $FLUSH lines our $CACHE = ''; # Optional disk cache file to use our $TTL = 86400 * 7; # Seconds until disk cached ips are expired our $DEBUG = 0; # Regex for matching IPv6 address. Source: # people.spodhuis.org/phil.pennock/software/emit_ipv6_regexp-0.304 our $IPV6 = '(?:(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-fA-F]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:(?:[0-9a-fA-F]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,1}(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:(?:[0-9a-fA-F]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,2}(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:(?:[0-9a-fA-F]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,3}(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:[0-9a-fA-F]{1,4})):)(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,4}(?:(?:[0-9a-fA-F]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,5}(?:(?:[0-9a-fA-F]{1,4})))?::)(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,6}(?:(?:[0-9a-fA-F]{1,4})))?::))))'; our $IPV4 = '\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'; use strict; use vars qw( %Opt %Buffer %Pending %Cache $Output_Line $Input_Line ); use Socket; use IO::Handle; use IO::Select; use Getopt::Long; our $VERSION = '1.13'; BEGIN { package IP_Cache; use strict; our $TTL; our @ISA = qw/ DB_File /; sub TIEHASH { my $class = shift; $TTL = shift; require DB_File; $class->SUPER::TIEHASH(@_); } # In order to implement EXISTS, we'd need to parse # the value to see if the IP has expired, which is just # as expensive as FETCH. So we'll just make sure we # never use it. sub EXISTS { die 'exists not implemented!' } sub _DB_FETCH { my $self = shift; my $ip = shift; my $val = $self->SUPER::FETCH($ip) or return; my($utc, $host) = split /:/, $val, 2; time - $utc < $TTL or return; return $host; } { my %cache; sub FETCH { my $self = shift; my $ip = shift; return $cache{ $ip } if exists $cache{ $ip }; return $cache{ $ip } = $self->_DB_FETCH($ip); } sub STORE { my $self = shift; my($ip, $host) = @_; $cache{ $ip } = $host; $self->SUPER::STORE( $ip => (time . ':' . $host) ); } } } sub usage { my $exit = shift || 0; print STDERR < output_log See the POD for more details: perldoc $0 Copyright 1999-2012, Maurice Aubrey This module is free software; you may redistribute it and/or modify it under the same terms as Perl itself. EOF exit $exit; } sub resolve_ips($) { my $parent_fh = shift; $SIG{'ALRM'} = sub { die 'alarmed' }; while(my $ip = <$parent_fh>) { # Get IP to resolve chomp($ip); my $host = undef; eval { # Try to resolve, but give up after $TIMEOUT seconds alarm($Opt{timeout}); my $ip_struct; if ($ip_struct = inet_aton($ip)) { $host = gethostbyaddr $ip_struct, AF_INET; } elsif ($ip_struct = Socket::inet_pton(AF_INET6, $ip)) { $host = gethostbyaddr $ip_struct, AF_INET6; } alarm(0); }; # XXX Debug if ($Opt{debug} and $@ =~ /alarmed/) { $host ||= 'TIMEOUT'; # warn "Alarming ($ip)...\n"; } $host ||= $ip; print $parent_fh "$ip $host\n"; } } sub fork_children($) { my $num_children = shift; my $read_set = IO::Select->new; my $write_set = IO::Select->new; for(my $child = 1; $child <= $num_children; $child++) { my($child_fh, $parent_fh) = (IO::Handle->new, IO::Handle->new); socketpair($child_fh, $parent_fh, AF_UNIX, SOCK_STREAM, PF_UNSPEC) or die "socketpair failed: $!"; $child_fh->autoflush; $parent_fh->autoflush; if (my $pid = fork) { # parent close $parent_fh; $write_set->add($child_fh); # Start out writing to children } else { # child defined $pid or die "fork failed: $!"; close $child_fh; close STDIN; close STDOUT; resolve_ips($parent_fh); exit 0; } } return ($read_set, $write_set); } # Write as many lines as we can until we come across one # that's missing (that means it's still pending DNS). sub flush_output { for (; exists $Buffer{ $Output_Line }; $Output_Line++) { print delete $Buffer{ $Output_Line }; } } %Opt = ( children => $CHILDREN, timeout => $TIMEOUT, buffer => $BUFFER, flush => $FLUSH, cache => $CACHE, ttl => $TTL, debug => $DEBUG, regex => '', ); GetOptions(\%Opt, 'children|kids=i', 'timeout=i', 'buffer=i', 'flush=i', 'ttl=i', 'cache=s', 'usage|help|version', 'debug', 'regex=s', 'ipv6', ); usage(0) if $Opt{usage}; usage(1) if @ARGV > 1; $Opt{cache} = shift if @ARGV; unless ($Opt{regex}) { my $regex = $Opt{ipv6} ? "$IPV4|$IPV6" : $IPV4; $Opt{regex} = qr/\b ($regex) \b/sx; } my($read_set, $write_set) = fork_children($Opt{children}); if ($Opt{cache}) { # Cache results to disk if asked tie %Cache, 'IP_Cache', $Opt{ttl}, $Opt{cache} or die "unable to tie '$Opt{cache}': $!"; } $Output_Line = 1; $Input_Line = 0; while (1) { my $buffer_full = $Input_Line - $Output_Line >= $Opt{buffer}; my($readable, $writable) = IO::Select->select( $read_set, $buffer_full ? undef : $write_set, # Throttle if buffer is full undef ); while (@$writable) { # One or more children ready for IP my $line = ; $Input_Line++; unless (defined $line) { undef $write_set; last; } my($ip) = ($line =~ /$Opt{regex}/); my($before, $after) = ($`, $'); if (not defined $ip) { # No IP seen, pass it through unmolested $Buffer{ $Input_Line } = $line; } elsif (my $host = $Cache{ $ip }) { # We found this answer already $Buffer{ $Input_Line } = "$before$host$after"; } elsif (exists $Pending{ $ip }) { # We're still looking push @{ $Pending{ $ip } }, [ $Input_Line, $before, $after ]; } else { # Send IP to child my $fh = shift @$writable; print $fh "$ip\n"; $Pending{ $ip } = [ [ $Input_Line, $before, $after ] ]; $write_set->remove($fh); $read_set->add($fh); # Move to read set to wait for answer } flush_output if $Input_Line % $Opt{flush} == 0; } while (@$readable) { # One or more children have an answer my $fh = shift @$readable; chomp(my $str = <$fh>); my($ip, $host) = split / /, $str, 2; $Cache{ $ip } = $host; # Take all the lines that were pending for this IP and # toss them into the output buffer foreach my $pending (@{ $Pending{ $ip } }) { $Buffer{ $pending->[0] } = "$pending->[1]$host$pending->[2]"; } delete $Pending{ $ip }; $read_set->remove($fh); $write_set->add($fh) if defined $write_set; # Ready for new question } last if not defined $write_set and not keys %Pending; flush_output if $buffer_full; } flush_output; exit 0; =pod =head1 NAME ip2host - Resolves IPs to hostnames in web server logs =head1 SYNOPSIS ip2host [OPTIONS] [cache_file] < infile > outfile infile - Web server log file. outfile - Same as input file, but with IPs resolved to hostnames. Options: --children=... Number of child processes to spawn (default: 40) --timeout=... Seconds to wait on DNS response (default: 20) --buffer=... Maximum number of log lines to keep in memory (default: 50000) --flush=... Number of lines to process before flushing output buffer (default: 500) --cache=... Filename to use as disk cache (default: none) --ttl=... Number of seconds before IPs cached on disk are expired (default: 604800 - One week) --ipv6 Match IPv6 addresses too --regex=... Perl regular expression to match IP =head1 DESCRIPTION This is a faster, drop-in replacement for the logresolve utility distributed with the Apache web server. It's been reported to work under Linux, FreeBSD, Solaris, Tru64, and IRIX. =head1 AUTHOR Maurice Aubrey Emaurice.aubrey+ip2host@gmail.comE Based on the logresolve.pl script by Rob Hartill. =head1 COPYRIGHT Copyright 1999-2012, Maurice Aubrey Emaurice.aubrey+ip2host@gmail.comE. This module is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =head1 README Resolves IPs to hostnames in web server logs. This is a faster, drop-in replacement for the logresolve utility distributed with the Apache web server. =head1 SCRIPT CATEGORIES Web =cut ip2host-1.13/README0000664000175000017500000000302412025405024012005 0ustar mlamlaNAME ip2host - Resolves IPs to hostnames in web server logs SYNOPSIS ip2host [OPTIONS] [cache_file] < infile > outfile infile - Web server log file. outfile - Same as input file, but with IPs resolved to hostnames. Options: --children=... Number of child processes to spawn (default: 40) --timeout=... Seconds to wait on DNS response (default: 20) --buffer=... Maximum number of log lines to keep in memory (default: 50000) --flush=... Number of lines to process before flushing output buffer (default: 500) --cache=... Filename to use as disk cache (default: none) --ttl=... Number of seconds before IPs cached on disk are expired (default: 604800 - One week) DESCRIPTION This is a faster, drop-in replacement for the logresolve utility distributed with the Apache web server. It's been reported to work under Linux, FreeBSD, Solaris, Tru64, and IRIX. AUTHOR Maurice Aubrey Based on the logresolve.pl script by Rob Hartill. COPYRIGHT Copyright 1999-2010, Maurice Aubrey . This module is free software; you may redistribute it and/or modify it under the same terms as Perl itself. README Resolves IPs to hostnames in web server logs. This is a faster, drop-in replacement for the logresolve utility distributed with the Apache web server. SCRIPT CATEGORIES Web ip2host-1.13/CHANGELOG0000664000175000017500000000047012025405024012341 0ustar mlamlaip2host 1.12 * IPv6 support thanks to Gunnar Wolf https://github.com/gwolf/ip2host ip2host 1.11 * Silence warnings thanks to Andrew McNaughton and Gunnar Wolf http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=563129 ip2host 1.10 * Moved repository to Github http://github.com/mla/ip2host