htdig-3.2.0b6/ 0040755 0063146 0012731 00000000000 10063260371 012364 5 ustar angusgb htdig htdig-3.2.0b6/contrib/ 0040755 0063146 0012731 00000000000 10063260367 014031 5 ustar angusgb htdig htdig-3.2.0b6/contrib/acroconv.pl 0100755 0063146 0012731 00000004235 07427026534 016212 0 ustar angusgb htdig #!/usr/local/bin/perl # # Sample external converter for htdig 3.1.4 or later, to convert PDFs # using Adobe Acrobat 3's acroread -toPostScript option on UNIX systems. # (Use it in place of conv_doc.pl if you have acroread but not pdftotext.) # Written by Gilles Detillieux. # # Usage: (in htdig.conf) # # external_parsers: application/pdf->text/html /usr/local/bin/acroconv.pl # # This is a pretty quick and dirty implementation, but it does seem to # give functionality equivalent to the now defunct htdig/PDF.cc parser. # I'm not a Perl expert by any stretch of the imagination, so the code # could probably use a lot of optimization to make it work better. # $watch = 0; $bigspace = 0; $putspace = 0; $putbody = 1; system("ln $ARGV[0] $ARGV[0].pdf; acroread -toPostScript $ARGV[0].pdf"); open(INP, "< $ARGV[0].ps") || die "Can't open $ARGV[0].ps\n"; print "\n
\n"; while (
Run Date | Run Time | # Servers | # URL's | # Words | Index (MB) | ||
---|---|---|---|---|---|---|---|
$RunTime | $NumServers | $NumURLS | $NumWords | $IndexSize | |||
Top 10 Servers | # URL's |
---|
$NumURLS |
EOT ; } else { &PrintFooterHTML; } } } htdig-3.2.0b6/contrib/changehost/ 0040755 0063146 0012731 00000000000 10063260367 016154 5 ustar angusgb htdig htdig-3.2.0b6/contrib/changehost/changehost.pl 0100755 0063146 0012731 00000014166 06275416055 020653 0 ustar angusgb htdig #!/usr/local/bin/perl ## ## changehost.pl (C) 1995 Andrew Scherpbier ## ## This program will change hostnames of URLs in the document database and index. ## ## usage: ## changehost.pl database_base from to ## ## example: ## changehost.pl /opt/www/htdig/sdsu www.sdsu.edu www.northpole.net ## ## Two new database will be created with a base of '/tmp/new'. ## These databases can then be used by htsearch. ## use GDBM_File; $base = $ARGV[0]; $from = $ARGV[1]; $to = $ARGV[2]; $dbfile = "$base.docdb"; $newfile = "/tmp/new.docdb"; ## ## Convert the document database first. ## tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'"; tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!"; while (($key, $value) = each %docdb) { if ($key =~ /http:\/\/$from/i) { %record = parse_ref_record($value); $key =~ s/http:\/\/$from/http:\/\/$to/i; print "$key\n"; $t = $record{"URL"}; $t =~ s/http:\/\/$from/http:\/\/$to/i; $record{"URL"} = $t; $value = create_ref_record(%record); } $newdb{$key} = $value; } untie %newdb; untie %docdb; ## ## Now create the document index ## $newfile = "/tmp/new.docs.index"; $dbfile = "$base.docs.index"; tie(%newdb, GDBM_File, $newfile, GDBM_NEWDB, 0644) || die "$newfile: '$!'"; tie(%docdb, GDBM_File, $dbfile, GDBM_READER, 0) || die "$dbfile: $!"; while (($key, $value) = each %docdb) { if ($value =~ /http:\/\/$from/i) { $value =~ s/http:\/\/$from/http:\/\/$to/i; } $newdb{$key} = $value; } untie %newdb; untie %docdb; ###################################################################### sub create_ref_record { local(%rec) = @_; local($s); if (exists $rec{"ID"}) { $s .= pack("Ci", 0, $rec{"ID"}); } if (exists $rec{"TIME"}) { $s .= pack("Ci", 1, $rec{"TIME"}); } if (exists $rec{"ACCESSED"}) { $s .= pack("Ci", 2, $rec{"ACCESSED"}); } if (exists $rec{"STATE"}) { $s .= pack("Ci", 3, $rec{"STATE"}); } if (exists $rec{"SIZE"}) { $s .= pack("Ci", 4, $rec{"SIZE"}); } if (exists $rec{"LINKS"}) { $s .= pack("Ci", 5, $rec{"LINKS"}); } if (exists $rec{"IMAGESIZE"}) { $s .= pack("Ci", 6, $rec{"IMAGESIZE"}); } if (exists $rec{"HOPCOUNT"}) { $s .= pack("Ci", 7, $rec{"HOPCOUNT"}); } if (exists $rec{"URL"}) { $s .= pack("Ci", 8, length($rec{"URL"})); $s .= $rec{"URL"}; } if (exists $rec{"HEAD"}) { $s .= pack("Ci", 9, length($rec{"HEAD"})); $s .= $rec{"HEAD"}; } if (exists $rec{"TITLE"}) { $s .= pack("Ci", 10, length($rec{"TITLE"})); $s .= $rec{"TITLE"}; } if (exists $rec{"DESCRIPTIONS"}) { @v = split('', $rec{"DESCRIPTIONS"}); $s .= pack("Ci", 11, $#v - 1); foreach (@v) { $s .= pack("i", length($_)); $s .= $_; } } if (exists $rec{"ANCHORS"}) { @v = split('', $rec{"ANCHORS"}); $s .= pack("Ci", 12, $#v - 1); foreach (@v) { $s .= pack("i", length($_)); $s .= $_; } } if (exists $rec{"EMAIL"}) { $s .= pack("Ci", 13, length($rec{"EMAIL"})); $s .= $rec{"EMAIL"}; } if (exists $rec{"NOTIFICATION"}) { $s .= pack("Ci", 14, length($rec{"NOTIFICATION"})); $s .= $rec{"NOTIFICATION"}; } if (exists $rec{"SUBJECT"}) { $s .= pack("Ci", 15, length($rec{"SUBJECT"})); $s .= $rec{"SUBJECT"}; } return $s; } sub parse_ref_record { local($value) = @_; local(%rec, $length, $count, $result); while (length($value) > 0) { $what = unpack("C", $value); $value = substr($value, 1); if ($what == 0) { # ID $rec{"ID"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 1) { # TIME $rec{"TIME"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 2) { # ACCESSED $rec{"ACCESSED"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 3) { # STATE $rec{"STATE"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 4) { # SIZE $rec{"SIZE"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 5) { # LINKS $rec{"LINKS"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 6) { # IMAGESIZE $rec{"IMAGESIZE"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 7) { # HOPCOUNT $rec{"HOPCOUNT"} = unpack("i", $value); $value = substr($value, 4); } elsif ($what == 8) { # URL $length = unpack("i", $value); $rec{"URL"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 9) { # HEAD $length = unpack("i", $value); $rec{"HEAD"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 10) { # TITLE $length = unpack("i", $value); $rec{"TITLE"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 11) { # DESCRIPTIONS $count = unpack("i", $value); $value = substr($value, 4); $result = ""; foreach (1 .. $count) { $length = unpack("i", $value); $result = $result . unpack("x4 A$length", $value) . ""; $value = substr($value, 4 + $length); } chop $result; $rec{"DESCRIPTIONS"} = $result; } elsif ($what == 12) { # ANCHORS $count = unpack("i", $value); $value = substr($value, 4); $result = ""; foreach (1 .. $count) { $length = unpack("i", $value); $result = $result . unpack("x4 A$length", $value) . ""; $value = substr($value, 4 + $length); } chop $result; $rec{"ANCHORS"} = $result; } elsif ($what == 13) { # EMAIL $length = unpack("i", $value); $rec{"EMAIL"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 14) { # NOTIFICATION $length = unpack("i", $value); $rec{"NOTIFICATION"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } elsif ($what == 15) { # SUBJECT $length = unpack("i", $value); $rec{"SUBJECT"} = unpack("x4 A$length", $value); $value = substr($value, 4 + $length); } } return %rec; } htdig-3.2.0b6/contrib/doc2html/ 0040755 0063146 0012731 00000000000 10063260367 015545 5 ustar angusgb htdig htdig-3.2.0b6/contrib/doc2html/doc2html.cfg 0100644 0063146 0012731 00000022452 07427026534 017752 0 ustar angusgb htdig # Configuration file for use with doc2html.pl, which is used # to index Word, WordPerfect , etc. files using Ht://dig. # # Based on wp2html.cfg file supplied with wp2html version 3.0 # The special token "typeout" simply outputs the given text # and can be used to inform users of versions, configuration changes etc. typeout="" #------------------- Single character translations --------------- # Protect HTML magic symbols. '<'="<" '>'=">" '&'="&" '"'=""" #------------------- WP code translations --------------- # File header. BEGIN is called before any text is output # BEGIN is passed three strings being the # Input Directory, Input file name and Input file type. # Do what you like with them! BEGIN="
%xH " # Beginning of a subpart. This is called for each file. begin="
%xf
%xF
%X