namazu-2.0.21/ 0000777 0000000 0000000 00000000000 11611035343 006577 5 namazu-2.0.21/pl/ 0000777 0000000 0000000 00000000000 11611035326 007213 5 namazu-2.0.21/pl/Makefile.am 0000644 0000000 0000000 00000002131 11036427275 011171 ## Process this file with automake to produce Makefile.in
AUTOMAKE_OPTIONS = 1.4 no-dependencies
localedir = $(prefix)/$(DATADIRNAME)/locale
perllibdir = $(pkgdatadir)/pl
perllib_DATA = \
codeconv.pl \
conf.pl \
gettext.pl \
gfilter.pl \
htmlsplit.pl \
nmzidx.pl \
seed.pl \
usage.pl \
util.pl \
var.pl \
wakati.pl \
time.pl \
document.pl \
filter.pl \
ext.pl \
extzip.pl \
extutf8.pl
# Slightly different from perllib_DATA because of *.in files.
EXTRA_DIST = $\
codeconv.pl \
conf.pl \
gettext.pl.in \
gfilter.pl \
htmlsplit.pl \
nmzidx.pl \
seed.pl \
usage.pl \
util.pl \
var.pl \
wakati.pl \
time.pl \
document.pl \
filter.pl.in \
ext.pl \
extzip.pl \
extutf8.pl
CLEANFILES = gettext.pl filter.pl
gettext.pl: gettext.pl.in Makefile
rm -f gettext.pl
sed -e 's!@LOCALEDIR@!$(localedir)!g' \
$(srcdir)/gettext.pl.in > gettext.pl
filter.pl: filter.pl.in Makefile
rm -f filter.pl
sed -e 's!@pkgdatadir@!$(pkgdatadir)!g' \
$(srcdir)/filter.pl.in > filter.pl.tmp
mv filter.pl.tmp filter.pl
ETAGS_ARGS = $(perllib_DATA) --lang=perl
TAGS_DEPENDENCIES = $(perllib_DATA)
namazu-2.0.21/pl/Makefile.in 0000644 0000000 0000000 00000023517 11611035110 011173 # Makefile.in generated by automake 1.6.3 from Makefile.am.
# @configure_input@
# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
# Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.
@SET_MAKE@
SHELL = @SHELL@
srcdir = @srcdir@
top_srcdir = @top_srcdir@
VPATH = @srcdir@
prefix = @prefix@
exec_prefix = @exec_prefix@
bindir = @bindir@
sbindir = @sbindir@
libexecdir = @libexecdir@
datadir = @datadir@
sysconfdir = @sysconfdir@
sharedstatedir = @sharedstatedir@
localstatedir = @localstatedir@
libdir = @libdir@
infodir = @infodir@
mandir = @mandir@
includedir = @includedir@
oldincludedir = /usr/include
pkgdatadir = $(datadir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
top_builddir = ..
ACLOCAL = @ACLOCAL@
AUTOCONF = @AUTOCONF@
AUTOMAKE = @AUTOMAKE@
AUTOHEADER = @AUTOHEADER@
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
INSTALL = @INSTALL@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_DATA = @INSTALL_DATA@
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_HEADER = $(INSTALL_DATA)
transform = @program_transform_name@
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
host_alias = @host_alias@
host_triplet = @host@
EXEEXT = @EXEEXT@
OBJEXT = @OBJEXT@
PATH_SEPARATOR = @PATH_SEPARATOR@
ADDITIONAL_INC = @ADDITIONAL_INC@
AMTAR = @AMTAR@
AS = @AS@
AWK = @AWK@
BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@
CATOBJEXT = @CATOBJEXT@
CC = @CC@
CHASEN = @CHASEN@
COPYRIGHT = @COPYRIGHT@
DATADIRNAME = @DATADIRNAME@
DEPDIR = @DEPDIR@
DLLTOOL = @DLLTOOL@
DO_CHASEN = @DO_CHASEN@
DO_CHASEN_NOUN = @DO_CHASEN_NOUN@
DO_KAKASI = @DO_KAKASI@
DO_MECAB = @DO_MECAB@
ECHO = @ECHO@
EMACS = @EMACS@
GENCAT = @GENCAT@
GLIBC21 = @GLIBC21@
GMSGFMT = @GMSGFMT@
HAVE_ASPRINTF = @HAVE_ASPRINTF@
HAVE_LIB = @HAVE_LIB@
HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@
HAVE_SNPRINTF = @HAVE_SNPRINTF@
HAVE_WPRINTF = @HAVE_WPRINTF@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
INSTOBJEXT = @INSTOBJEXT@
INTLBISON = @INTLBISON@
INTLLIBS = @INTLLIBS@
INTLOBJS = @INTLOBJS@
INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@
KAKASI = @KAKASI@
LIB = @LIB@
LIBICONV = @LIBICONV@
LIBINTL = @LIBINTL@
LIBTOOL = @LIBTOOL@
LN_S = @LN_S@
LTALLOCA = @LTALLOCA@
LTLIB = @LTLIB@
LTLIBICONV = @LTLIBICONV@
LTLIBINTL = @LTLIBINTL@
LTVERSION = @LTVERSION@
LYNX = @LYNX@
MAILING_ADDRESS = @MAILING_ADDRESS@
MAINT = @MAINT@
MECAB = @MECAB@
MKINSTALLDIRS = @MKINSTALLDIRS@
NKF = @NKF@
NMZ_LIBOBJS = @NMZ_LIBOBJS@
OBJDUMP = @OBJDUMP@
OPT_ADMIN_EMAIL = @OPT_ADMIN_EMAIL@
OPT_NMZ_URI = @OPT_NMZ_URI@
OPT_WAKATI_DEFAULT = @OPT_WAKATI_DEFAULT@
PACKAGE = @PACKAGE@
PERL = @PERL@
POSUB = @POSUB@
RANLIB = @RANLIB@
STRIP = @STRIP@
TRAC_URI = @TRAC_URI@
USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@
USE_NLS = @USE_NLS@
VERSION = @VERSION@
WISH = @WISH@
am__include = @am__include@
am__quote = @am__quote@
install_sh = @install_sh@
lispdir = @lispdir@
AUTOMAKE_OPTIONS = 1.4 no-dependencies
localedir = $(prefix)/$(DATADIRNAME)/locale
perllibdir = $(pkgdatadir)/pl
perllib_DATA = \
codeconv.pl \
conf.pl \
gettext.pl \
gfilter.pl \
htmlsplit.pl \
nmzidx.pl \
seed.pl \
usage.pl \
util.pl \
var.pl \
wakati.pl \
time.pl \
document.pl \
filter.pl \
ext.pl \
extzip.pl \
extutf8.pl
# Slightly different from perllib_DATA because of *.in files.
EXTRA_DIST = $\
codeconv.pl \
conf.pl \
gettext.pl.in \
gfilter.pl \
htmlsplit.pl \
nmzidx.pl \
seed.pl \
usage.pl \
util.pl \
var.pl \
wakati.pl \
time.pl \
document.pl \
filter.pl.in \
ext.pl \
extzip.pl \
extutf8.pl
CLEANFILES = gettext.pl filter.pl
ETAGS_ARGS = $(perllib_DATA) --lang=perl
TAGS_DEPENDENCIES = $(perllib_DATA)
subdir = pl
mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES = var.pl conf.pl
depcomp =
am__depfiles_maybe =
DIST_SOURCES =
DATA = $(perllib_DATA)
DIST_COMMON = Makefile.am Makefile.in conf.pl.in var.pl.in
all: all-am
.SUFFIXES:
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4)
cd $(top_srcdir) && \
$(AUTOMAKE) --gnu pl/Makefile
Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in $(top_builddir)/config.status
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
var.pl: $(top_builddir)/config.status var.pl.in
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
conf.pl: $(top_builddir)/config.status conf.pl.in
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
mostlyclean-libtool:
-rm -f *.lo
clean-libtool:
-rm -rf .libs _libs
distclean-libtool:
-rm -f libtool
uninstall-info-am:
perllibDATA_INSTALL = $(INSTALL_DATA)
install-perllibDATA: $(perllib_DATA)
@$(NORMAL_INSTALL)
$(mkinstalldirs) $(DESTDIR)$(perllibdir)
@list='$(perllib_DATA)'; for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
f="`echo $$p | sed -e 's|^.*/||'`"; \
echo " $(perllibDATA_INSTALL) $$d$$p $(DESTDIR)$(perllibdir)/$$f"; \
$(perllibDATA_INSTALL) $$d$$p $(DESTDIR)$(perllibdir)/$$f; \
done
uninstall-perllibDATA:
@$(NORMAL_UNINSTALL)
@list='$(perllib_DATA)'; for p in $$list; do \
f="`echo $$p | sed -e 's|^.*/||'`"; \
echo " rm -f $(DESTDIR)$(perllibdir)/$$f"; \
rm -f $(DESTDIR)$(perllibdir)/$$f; \
done
ETAGS = etags
ETAGSFLAGS =
tags: TAGS
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
mkid -fID $$unique
TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
$(TAGS_FILES) $(LISP)
tags=; \
here=`pwd`; \
list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
unique=`for i in $$list; do \
if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
done | \
$(AWK) ' { files[$$0] = 1; } \
END { for (i in files) print i; }'`; \
test -z "$(ETAGS_ARGS)$$tags$$unique" \
|| $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
$$tags $$unique
GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& cd $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) $$here
distclean-tags:
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
top_distdir = ..
distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
distdir: $(DISTFILES)
@list='$(DISTFILES)'; for file in $$list; do \
if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
if test "$$dir" != "$$file" && test "$$dir" != "."; then \
dir="/$$dir"; \
$(mkinstalldirs) "$(distdir)$$dir"; \
else \
dir=''; \
fi; \
if test -d $$d/$$file; then \
if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
fi; \
cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
else \
test -f $(distdir)/$$file \
|| cp -p $$d/$$file $(distdir)/$$file \
|| exit 1; \
fi; \
done
check-am: all-am
check: check-am
all-am: Makefile $(DATA)
installdirs:
$(mkinstalldirs) $(DESTDIR)$(perllibdir)
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
installcheck: installcheck-am
install-strip:
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
INSTALL_STRIP_FLAG=-s \
`test -z '$(STRIP)' || \
echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
mostlyclean-generic:
clean-generic:
-test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
distclean-generic:
-rm -f Makefile $(CONFIG_CLEAN_FILES)
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
clean: clean-am
clean-am: clean-generic clean-libtool mostlyclean-am
distclean: distclean-am
distclean-am: clean-am distclean-generic distclean-libtool \
distclean-tags
dvi: dvi-am
dvi-am:
info: info-am
info-am:
install-data-am: install-perllibDATA
install-exec-am:
install-info: install-info-am
install-man:
installcheck-am:
maintainer-clean: maintainer-clean-am
maintainer-clean-am: distclean-am maintainer-clean-generic
mostlyclean: mostlyclean-am
mostlyclean-am: mostlyclean-generic mostlyclean-libtool
uninstall-am: uninstall-info-am uninstall-perllibDATA
.PHONY: GTAGS all all-am check check-am clean clean-generic \
clean-libtool distclean distclean-generic distclean-libtool \
distclean-tags distdir dvi dvi-am info info-am install \
install-am install-data install-data-am install-exec \
install-exec-am install-info install-info-am install-man \
install-perllibDATA install-strip installcheck installcheck-am \
installdirs maintainer-clean maintainer-clean-generic \
mostlyclean mostlyclean-generic mostlyclean-libtool tags \
uninstall uninstall-am uninstall-info-am uninstall-perllibDATA
gettext.pl: gettext.pl.in Makefile
rm -f gettext.pl
sed -e 's!@LOCALEDIR@!$(localedir)!g' \
$(srcdir)/gettext.pl.in > gettext.pl
filter.pl: filter.pl.in Makefile
rm -f filter.pl
sed -e 's!@pkgdatadir@!$(pkgdatadir)!g' \
$(srcdir)/filter.pl.in > filter.pl.tmp
mv filter.pl.tmp filter.pl
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
namazu-2.0.21/pl/conf.pl.in 0000644 0000000 0000000 00000014143 11140013717 011016 #
# This is a Namazu configuration file for mknmz.
#
package conf; # Don't remove this line!
#===================================================================
#
# Administrator's email address
#
$ADDRESS = '@OPT_ADMIN_EMAIL@';
#===================================================================
#
# Regular Expression Patterns
#
#
# This pattern specifies HTML suffixes.
#
$HTML_SUFFIX = "html?|[ps]html|html\\.[a-z]{2}";
#
# This pattern specifies file names which will be targeted.
# NOTE: It can be specified by --allow=regex option.
# Do NOT use `$' or `^' anchors.
# Case-insensitive.
#
$ALLOW_FILE = ".*\\.(?:$HTML_SUFFIX)|.*\\.txt" . # HTML, plain text
"|.*\\.gz|.*\\.Z|.*\\.bz2" . # Compressed files
"|.*\\.pdf|.*\\.ps" . # PDF, PostScript
"|.*\\.tex|.*\\.dvi" . # TeX, DVI
"|.*\\.rpm|.*\\.deb" . # RPM, DEB
"|.*\\.doc|.*\\.xls|.*\\.pp[st]" . # Word, Excel, PowerPoint
"|.*\\.docx|.*\\.xlsx|.*\\.pp[st]x" . # MS-OfficeOpenXML Word, Excel, PowerPoint
"|.*\\.vs[dst]|.*\\.v[dst]x" . # Visio
"|.*\\.j[sabf]w|.*\\.jtd" . # Ichitaro 4, 5, 6, 7, 8
"|.*\\.sx[widc]" . # OpenOffice Writer,Calc,Impress,Draw
"|.*\\.od[tspg]" . # OpenOffice2.0
"|.*\\.rtf" . # Rich Text Format
"|.*\\.hdml|.*\\.mht" . # HDML MHTML
"|.*\\.mp3" . # MP3
"|.*\\.gnumeric" . # Gnumeric
"|.*\\.kwd|.*\\.ksp" . # KWord, KSpread
"|.*\\.kpr|.*\\.flw" . # KPresenter, Kivio
"|.*\\.eml|\\d+|[-\\w]+\\.[1-9n]"; # Mail/News, man
#
# This pattern specifies file names which will NOT be targeted.
# NOTE: It can be specified by --deny=regex option.
# Do NOT use `$' or `^' anchors.
# Case-insensitive.
#
$DENY_FILE = ".*\\.(gif|png|jpg|jpeg)|.*\\.tar\\.gz|core|.*\\.bak|.*~|\\..*|\x23.*";
#
# This pattern specifies DDN(DOS Device Name) which will NOT be targeted.
# NOTE: Only for Windows.
# Do NOT use `$' or `^' anchors.
# Case-insensitive.
#
$DENY_DDN = "con|aux|nul|prn|lpt[1-9]|com[1-9][0-9]?|clock\$|xmsxxxx0";
#
# This pattern specifies PATHNAMEs which will NOT be targeted.
# NOTE: Usually specified by --exclude=regex option.
#
$EXCLUDE_PATH = undef;
#
# This pattern specifies file names which can be omitted
# in URI. e.g., 'index.html|index.htm|Default.html'
#
# NOTE: This is similar to Apache's "DirectoryIndex" directive.
#
$DIRECTORY_INDEX = "";
#
# This pattern specifies Mail/News's fields in its header which
# should be searchable. NOTE: case-insensitive
#
$REMAIN_HEADER = "From|Date|Message-ID";
#
# This pattern specifies fields which used for field-specified
# searching. NOTE: case-insensitive
#
$SEARCH_FIELD = "message-id|subject|from|date|uri|newsgroups|to|summary|size";
#
# This pattern specifies meta tags which used for field-specified
# searching. NOTE: case-insensitive
#
$META_TAGS = "keywords|description";
#
# This pattern specifies aliases for NMZ.field.* files.
# NOTE: Editing NOT recommended.
#
%FIELD_ALIASES = ('title' => 'subject', 'author' => 'from');
#
# This pattern specifies HTML elements which should be replaced with
# null string when removing them. Normally, the elements are replaced
# with a single space character.
#
$NON_SEPARATION_ELEMENTS = 'A|TT|CODE|SAMP|KBD|VAR|B|STRONG|I|EM|CITE|FONT|U|'.
'STRIKE|BIG|SMALL|DFN|ABBR|ACRONYM|Q|SUB|SUP|SPAN|BDO';
#
# This pattern specifies attribute of a HTML tag which should be
# searchable.
#
$HTML_ATTRIBUTES = 'ALT|SUMMARY|TITLE';
#===================================================================
#
# Critical Numbers
#
#
# The max size of files which can be loaded in memory at once.
# If you have much memory, you can increase the value.
# If you have less memory, you can decrease the value.
#
$ON_MEMORY_MAX = 5000000;
#
# The max file size for indexing. Files larger than this
# will be ignored.
# NOTE: This value is usually larger than TEXT_SIZE_MAX because
# binary-formated files such as PDF, Word are larger.
#
$FILE_SIZE_MAX = 2000000;
#
# The max text size for indexing. Files larger than this
# will be ignored.
#
$TEXT_SIZE_MAX = 600000;
#
# The max length of a word. the word longer than this will be ignored.
#
$WORD_LENG_MAX = 128;
#
# Weights for HTML elements which are used for term weightning.
#
%Weight =
(
'html' => {
'title' => 16,
'h1' => 8,
'h2' => 7,
'h3' => 6,
'h4' => 5,
'h5' => 4,
'h6' => 3,
'a' => 4,
'strong' => 2,
'em' => 2,
'kbd' => 2,
'samp' => 2,
'var' => 2,
'code' => 2,
'cite' => 2,
'abbr' => 2,
'acronym'=> 2,
'dfn' => 2,
},
'metakey' => 32, # for
'headers' => 8, # for Mail/News' headers
);
#
# The max length of a HTML-tagged string which can be processed for
# term weighting.
# NOTE: There are not a few people has a bad manner using
# for changing a font size.
#
$INVALID_LENG = 128;
#
# The max length of a field.
# This MUST be smaller than libnamazu.h's BUFSIZE (usually 1024).
#
$MAX_FIELD_LENGTH = 200;
#===================================================================
#
# Softwares for handling a Japanese text
#
#
# Network Kanji Filter nkf v1.71 or later
#
$NKF = "@NKF@";
#
# KAKASI 2.x or later
# Text::Kakasi 1.05 or later
#
$KAKASI = "@DO_KAKASI@";
#
# ChaSen 2.02 or later (simple wakatigaki)
# Text::ChaSen 1.03
#
$CHASEN = "@DO_CHASEN@";
#
# ChaSen 2.02 or later (with noun words extraction)
#
$CHASEN_NOUN = "@DO_CHASEN_NOUN@";
#
# MeCab
#
$MECAB = "@DO_MECAB@";
#
# Default Japanese processer: KAKASI or ChaSen or MeCab.
#
$WAKATI = $@OPT_WAKATI_DEFAULT@;
#===================================================================
#
# Directories
#
# $LIBDIR = "@PERLLIBDIR@";
# $FILTERDIR = "@FILTERDIR@";
# $TEMPLATEDIR = "@TEMPLATEDIR@";
#
#[WIN] $LIBDIR = 'C:/namazu/share/namazu/pl';
#[WIN] $FILTERDIR = 'C:/namazu/share/namazu/filter';
#[WIN] $TEMPLATEDIR = 'C:/namazu/share/namazu/template';
1;
namazu-2.0.21/pl/var.pl.in 0000644 0000000 0000000 00000007051 11140212154 010655 # -*- Perl -*-
# $Id: var.pl.in,v 1.12.8.4 2009-01-29 02:29:00 opengl2772 Exp $
# Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
# Copyright (C) 2000-2009 Namazu Project All rights reserved.
# This is free software with ABSOLUTELY NO WARRANTY.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either versions 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA
#
# This file must be encoded in EUC-JP encoding
#
package var;
#-----------------------------------------------------------
#
# Software information.
#
$VERSION = "@VERSION@";
$COPYRIGHT = "@COPYRIGHT@"; # should be doublequote
$MAILING_ADDRESS = '@MAILING_ADDRESS@';
$TRAC_URI = '@TRAC_URI@';
#-----------------------------------------------------------
#
# File names of index files.
#
my $base = "NMZ";
%NMZ = ();
$NMZ{'i'} = "$base.i";
$NMZ{'ii'} = "$base.ii";
$NMZ{'r'} = "$base.r";
$NMZ{'head'} = "$base.head";
$NMZ{'foot'} = "$base.foot";
$NMZ{'log'} = "$base.log";
$NMZ{'slog'} = "$base.slog";
$NMZ{'lock'} = "$base.lock";
$NMZ{'lock2'} = "$base.lock2";
$NMZ{'msg'} = "$base.msg";
$NMZ{'body'} = "$base.body";
$NMZ{'err'} = "$base.err";
$NMZ{'w'} = "$base.w";
$NMZ{'wi'} = "$base.wi";
$NMZ{'p'} = "$base.p";
$NMZ{'pi'} = "$base.pi";
$NMZ{'field'} = "$base.field";
$NMZ{'result'} = "$base.result";
$NMZ{'t'} = "$base.t";
$NMZ{'status'} = "$base.status";
$NMZ{'tips'} = "$base.tips";
$NMZ{'version'}= "$base.version";
$NMZ{'_t'} = $NMZ{'t'};
$NMZ{'_i'} = $NMZ{'i'};
$NMZ{'_p'} = $NMZ{'p'};
$NMZ{'_pi'} = $NMZ{'pi'};
$NMZ{'_r'} = $NMZ{'r'};
$NMZ{'_ii'} = $NMZ{'ii'};
$NMZ{'_w'} = $NMZ{'w'};
$NMZ{'_wi'} = $NMZ{'wi'};
$NMZ{'_flist'} = "$base.flist";
$NMZ{'_checkpoint'} = "$base.checkpoint";
$NMZ{'__i'} = "$base.tmp_i";
$NMZ{'__w'} = "$base.tmp_w";
$NMZ{'__p'} = "$base.tmp_p";
$NMZ{'__pi'} = "$base.tmp_pi";
#-----------------------------------------------------------
#
# Options
#
$Opt{'debug'} = 0;
$Opt{'quiet'} = 0;
$Opt{'verbose'} = 0;
$Opt{'robotexclude'} = 0;
$Opt{'htaccessexclude'} = 0;
$Opt{'htmlsplit'} = 0;
$Opt{'uuencode'} = 0;
$Opt{'noheadabst'} = 0;
$Opt{'hiragana'} = 0;
$Opt{'okurigana'} = 0;
$Opt{'noedgesymbol'} = 0;
$Opt{'nosymbol'} = 0;
$Opt{'noencodeuri'} = 0;
$Opt{'nodelete'} = 0;
$Opt{'noupdate'} = 0;
$Opt{'checkfilesize'} = 0;
$Opt{'decodebase64'} = 0;
#-----------------------------------------------------------
#
# Size of `int'
#
{
my $tmp = 0;
$tmp = pack("i", $tmp);
$INTSIZE = length($tmp);
}
#-----------------------------------------------------------
#
# Misc
#
$OUTPUT_DIR = undef;
$NO_TITLE = N_("No title in original");
$USE_NKF_MODULE = 0;
%REQUIRE_ACTIONS = ();
%RECURSIVE_ACTIONS = ();
%REQUIRE_PRE_CODECONV =
(
'text/plain' => 1,
);
%REQUIRE_POST_CODECONV =
(
'text/plain' => 0,
);
%Supported =
(
'text/plain' => "yes",
);
# Dummy function for gettextization.
sub N_ {};
1;
namazu-2.0.21/pl/codeconv.pl 0000644 0000000 0000000 00000016102 11140260405 011257 #
# -*- Perl -*-
# $Id: codeconv.pl,v 1.11.8.17 2009-01-29 07:55:49 opengl2772 Exp $
# Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
# Copyright (C) 2000-2009 Namazu Project All rights reserved.
# This is free software with ABSOLUTELY NO WARRANTY.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either versions 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA
#
# This file must be encoded in EUC-JP encoding
#
# package for code conversion
#
# imported from Rei FURUKAWA san's pnamazu.
# [1998-09-24]
package codeconv;
use strict;
my @ktoe = (0xA3, 0xD6, 0xD7, 0xA2, 0xA6, 0xF2, 0xA1, 0xA3,
0xA5, 0xA7, 0xA9, 0xE3, 0xE5, 0xE7, 0xC3, 0xBC,
0xA2, 0xA4, 0xA6, 0xA8, 0xAA, 0xAB, 0xAD, 0xAF,
0xB1, 0xB3, 0xB5, 0xB7, 0xB9, 0xBB, 0xBD, 0xBF,
0xC1, 0xC4, 0xC6, 0xC8, 0xCA, 0xCB, 0xCC, 0xCD,
0xCE, 0xCF, 0xD2, 0xD5, 0xD8, 0xDB, 0xDE, 0xDF,
0xE0, 0xE1, 0xE2, 0xE4, 0xE6, 0xE8, 0xE9, 0xEA,
0xEB, 0xEC, 0xED, 0xEF, 0xF3, 0xAB, 0xAC, );
# convert JIS X0201 KANA characters to JIS X0208 KANA
sub ktoe ($$) {
my ($c1, $c2) = @_;
$c1 = ord($c1) & 0x7f;
my($hi) = ($c1 <= 0x25 || $c1 == 0x30 || 0x5e <= $c1)? "\xa1": "\xa5";
$c1 -= 0x21;
my($lo) = $ktoe[$c1];
if ($c2) {
if ($c1 == 5) {
$lo = 0xdd;
} else {
$lo++;
$lo++ if (ord($c2) == 0xdf);
}
}
return $hi . chr($lo);
}
sub eucjp_han2zen_kana ($) {
my ($strref) = @_;
if (util::islang("ja")) {
$$strref =~ s/\x8e([\xa1-\xdf])(\x8e([\xde\xdf]))?/&ktoe($1,$3)/geo;
}
}
# convert Shift_JIS to EUC-JP
sub stoe ($) {
my ($c1, $c2) = unpack('CC', shift);
if (0xa1 <= $c1 && $c1 <= 0xdf) {
$c2 = $c1;
$c1 = 0x8e;
} elsif (0x9f <= $c2) {
$c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe0 : 0x60);
$c2 += 2;
} else {
$c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe1 : 0x61);
$c2 += 0x60 + ($c2 < 0x7f);
}
# Outside of the range of an EUC-JP code.
return chr(0xa2) . chr(0xae)
if ($c1 < 0x80 || $c1 >= 0x100 || $c2 < 0x80 || $c2 >= 0x100);
return chr($c1) . chr($c2);
}
sub shiftjis_to_eucjp ($) {
my ($str) = @_;
if (util::islang("ja")) {
$str =~ s/([\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]|[\xa1-\xdf])/&stoe($1)/geo;
}
return $str;
}
# convert EUC-JP to Shift_JIS
sub etos ($) {
my ($c1, $c2) = unpack('CC', shift);
if ($c1 == 0x8e) { # JIS X 0201 KATAKANA
return chr($c2);
} elsif ($c1 == 0x8f) { # JIS X 0212 HOJO KANJI
return "\x81\xac";
} elsif ($c1 % 2) {
$c1 = ($c1>>1) + ($c1 < 0xdf ? 0x31 : 0x71);
$c2 -= 0x60 + ($c2 < 0xe0);
} else {
$c1 = ($c1>>1) + ($c1 < 0xdf ? 0x30 : 0x70);
$c2 -= 2;
}
return chr($c1) . chr($c2);
}
sub eucjp_to_shiftjis ($) {
my ($str) = @_;
if (util::islang("ja")) {
$str =~ s/([\xa1-\xfe][\xa1-\xfe]|\x8e[\xa1-\xdf]|\x8f[\xa1-\xfe][\xa1-\xfe])/&etos($1)/ge;
}
return $str;
}
# Remove a garbage EUC-JP 1st charactor at the end.
sub chomp_eucjp ($) {
my ($str) = @_;
if (util::islang("ja")) {
if ($str =~ /\x8f$/ or $str =~ tr/\x8e\xa1-\xfe// % 2) {
chop($str);
chop($str) if ($str =~ /\x8f$/);
}
}
return $str;
}
# convert to EUC-JP by using NKF
sub toeuc ($) {
my ($contref, $opt) = @_;
if (util::islang("ja")) {
my $nkf_opt = "-emXZ1";
if ($var::USE_NKF_MODULE) {
$$contref = NKF::nkf($nkf_opt, $$contref); # namazu-devel-ja #3152 -> backed out, #3181
} else {
my $nkftmp = util::tmpnam("NMZ.nkf");
{
my $nh = util::efopen("|$conf::NKF $nkf_opt > $nkftmp");
print $nh $$contref;
util::fclose($nh);
}
{
my $nh = util::efopen("< $nkftmp");
$$contref = util::readfile($nh);
util::fclose($nh);
}
unlink($nkftmp);
}
}
}
sub eucjp_zen2han_ascii ($) {
my ($strref) = @_;
if (util::islang("ja")) {
$$strref =~ s/([\xa1-\xfe][\xa1-\xfe]|\x8e[\xa1-\xdf]|\x8f[\xa1-\xfe][\xa1-\xfe])/
my $tmp = $1;
if ($tmp =~ m!\xa3([\xb0-\xb9\xc1-\xda\xe1-\xfa])!) {
$tmp = $1 & "\x7F";
} elsif ($tmp =~ m!\xa1([\xa0-\xfe])!) {
my $kigou = (
# X0208 kigou conversion table
# 0xa1a0 - 0xa1fe
"\x00","\x20","\x00","\x00","\x2C","\x2E","\x00","\x3A",
"\x3B","\x3F","\x21","\x00","\x00","\x27","\x60","\x00",
"\x5E","\x00","\x5F","\x00","\x00","\x00","\x00","\x00",
"\x00","\x00","\x00","\x00","\x00","\x2D","\x00","\x2F",
"\x5C","\x00","\x00","\x7C","\x00","\x00","\x60","\x27",
"\x22","\x22","\x28","\x29","\x00","\x00","\x5B","\x5D",
"\x7B","\x7D","\x3C","\x3E","\x00","\x00","\x00","\x00",
"\x00","\x00","\x00","\x00","\x2B","\x2D","\x00","\x00",
"\x00","\x3D","\x00","\x3C","\x3E","\x00","\x00","\x00",
"\x00","\x00","\x00","\x00","\x00","\x00","\x00","\x00",
"\x24","\x00","\x00","\x25","\x23","\x26","\x2A","\x40",
"\x00","\x00","\x00","\x00","\x00","\x00","\x00","\x00"
)[unpack("C", $1) - unpack("C", "\xa0")];
$tmp = $kigou unless ($kigou eq "\x00");
}
$tmp;
/gse;
}
}
sub normalize_eucjp ($) {
my ($contref) = @_;
if (util::islang("ja")) {
codeconv::eucjp_han2zen_kana($contref);
codeconv::eucjp_zen2han_ascii($contref);
}
$contref;
}
sub normalize_nl ($) {
my ($conts) = @_;
$$conts =~ s/\x0d\x0a/\x0a/g; # Windows
$$conts =~ s/\x0d/\x0a/g; # Mac
$$conts =~ s/\x0a/\n/g;
$$conts;
}
sub remove_control_char ($) {
my ($textref) = @_;
$$textref =~ tr/\x01-\x08\x0b-\x0c\x0e-\x1f\x7f/ /; # Remove control char.
}
sub normalize_document ($) {
my ($textref) = @_;
codeconv::normalize_nl($textref);
codeconv::remove_control_char($textref);
}
sub codeconv_document ($) {
my ($textref) = @_;
#codeconv::to_inner_encoding($textref, 'unknown');
codeconv::toeuc($textref);
codeconv::normalize_document($textref);
}
sub normalize_eucjp_document ($) {
my ($textref) = @_;
codeconv::normalize_eucjp($textref);
codeconv::normalize_document($textref);
}
sub tousascii ($) {
my ($contref) = @_;
$$contref =~ s/[\x80-\xFF]/#/g;
return $$contref;
}
1;
namazu-2.0.21/pl/conf.pl 0000644 0000000 0000000 00000014265 11611035166 010423 #
# This is a Namazu configuration file for mknmz.
#
package conf; # Don't remove this line!
#===================================================================
#
# Administrator's email address
#
$ADDRESS = 'webmaster@vectra.akaneiro.jp';
#===================================================================
#
# Regular Expression Patterns
#
#
# This pattern specifies HTML suffixes.
#
$HTML_SUFFIX = "html?|[ps]html|html\\.[a-z]{2}";
#
# This pattern specifies file names which will be targeted.
# NOTE: It can be specified by --allow=regex option.
# Do NOT use `$' or `^' anchors.
# Case-insensitive.
#
$ALLOW_FILE = ".*\\.(?:$HTML_SUFFIX)|.*\\.txt" . # HTML, plain text
"|.*\\.gz|.*\\.Z|.*\\.bz2" . # Compressed files
"|.*\\.pdf|.*\\.ps" . # PDF, PostScript
"|.*\\.tex|.*\\.dvi" . # TeX, DVI
"|.*\\.rpm|.*\\.deb" . # RPM, DEB
"|.*\\.doc|.*\\.xls|.*\\.pp[st]" . # Word, Excel, PowerPoint
"|.*\\.docx|.*\\.xlsx|.*\\.pp[st]x" . # MS-OfficeOpenXML Word, Excel, PowerPoint
"|.*\\.vs[dst]|.*\\.v[dst]x" . # Visio
"|.*\\.j[sabf]w|.*\\.jtd" . # Ichitaro 4, 5, 6, 7, 8
"|.*\\.sx[widc]" . # OpenOffice Writer,Calc,Impress,Draw
"|.*\\.od[tspg]" . # OpenOffice2.0
"|.*\\.rtf" . # Rich Text Format
"|.*\\.hdml|.*\\.mht" . # HDML MHTML
"|.*\\.mp3" . # MP3
"|.*\\.gnumeric" . # Gnumeric
"|.*\\.kwd|.*\\.ksp" . # KWord, KSpread
"|.*\\.kpr|.*\\.flw" . # KPresenter, Kivio
"|.*\\.eml|\\d+|[-\\w]+\\.[1-9n]"; # Mail/News, man
#
# This pattern specifies file names which will NOT be targeted.
# NOTE: It can be specified by --deny=regex option.
# Do NOT use `$' or `^' anchors.
# Case-insensitive.
#
$DENY_FILE = ".*\\.(gif|png|jpg|jpeg)|.*\\.tar\\.gz|core|.*\\.bak|.*~|\\..*|\x23.*";
#
# This pattern specifies DDN(DOS Device Name) which will NOT be targeted.
# NOTE: Only for Windows.
# Do NOT use `$' or `^' anchors.
# Case-insensitive.
#
$DENY_DDN = "con|aux|nul|prn|lpt[1-9]|com[1-9][0-9]?|clock\$|xmsxxxx0";
#
# This pattern specifies PATHNAMEs which will NOT be targeted.
# NOTE: Usually specified by --exclude=regex option.
#
$EXCLUDE_PATH = undef;
#
# This pattern specifies file names which can be omitted
# in URI. e.g., 'index.html|index.htm|Default.html'
#
# NOTE: This is similar to Apache's "DirectoryIndex" directive.
#
$DIRECTORY_INDEX = "";
#
# This pattern specifies Mail/News's fields in its header which
# should be searchable. NOTE: case-insensitive
#
$REMAIN_HEADER = "From|Date|Message-ID";
#
# This pattern specifies fields which used for field-specified
# searching. NOTE: case-insensitive
#
$SEARCH_FIELD = "message-id|subject|from|date|uri|newsgroups|to|summary|size";
#
# This pattern specifies meta tags which used for field-specified
# searching. NOTE: case-insensitive
#
$META_TAGS = "keywords|description";
#
# This pattern specifies aliases for NMZ.field.* files.
# NOTE: Editing NOT recommended.
#
%FIELD_ALIASES = ('title' => 'subject', 'author' => 'from');
#
# This pattern specifies HTML elements which should be replaced with
# null string when removing them. Normally, the elements are replaced
# with a single space character.
#
$NON_SEPARATION_ELEMENTS = 'A|TT|CODE|SAMP|KBD|VAR|B|STRONG|I|EM|CITE|FONT|U|'.
'STRIKE|BIG|SMALL|DFN|ABBR|ACRONYM|Q|SUB|SUP|SPAN|BDO';
#
# This pattern specifies attribute of a HTML tag which should be
# searchable.
#
$HTML_ATTRIBUTES = 'ALT|SUMMARY|TITLE';
#===================================================================
#
# Critical Numbers
#
#
# The max size of files which can be loaded in memory at once.
# If you have much memory, you can increase the value.
# If you have less memory, you can decrease the value.
#
$ON_MEMORY_MAX = 5000000;
#
# The max file size for indexing. Files larger than this
# will be ignored.
# NOTE: This value is usually larger than TEXT_SIZE_MAX because
# binary-formated files such as PDF, Word are larger.
#
$FILE_SIZE_MAX = 2000000;
#
# The max text size for indexing. Files larger than this
# will be ignored.
#
$TEXT_SIZE_MAX = 600000;
#
# The max length of a word. the word longer than this will be ignored.
#
$WORD_LENG_MAX = 128;
#
# Weights for HTML elements which are used for term weightning.
#
%Weight =
(
'html' => {
'title' => 16,
'h1' => 8,
'h2' => 7,
'h3' => 6,
'h4' => 5,
'h5' => 4,
'h6' => 3,
'a' => 4,
'strong' => 2,
'em' => 2,
'kbd' => 2,
'samp' => 2,
'var' => 2,
'code' => 2,
'cite' => 2,
'abbr' => 2,
'acronym'=> 2,
'dfn' => 2,
},
'metakey' => 32, # for
'headers' => 8, # for Mail/News' headers
);
#
# The max length of a HTML-tagged string which can be processed for
# term weighting.
# NOTE: There are not a few people has a bad manner using
# for changing a font size.
#
$INVALID_LENG = 128;
#
# The max length of a field.
# This MUST be smaller than libnamazu.h's BUFSIZE (usually 1024).
#
$MAX_FIELD_LENGTH = 200;
#===================================================================
#
# Softwares for handling a Japanese text
#
#
# Network Kanji Filter nkf v1.71 or later
#
$NKF = "module_nkf";
#
# KAKASI 2.x or later
# Text::Kakasi 1.05 or later
#
$KAKASI = "module_kakasi -ieuc -oeuc -w";
#
# ChaSen 2.02 or later (simple wakatigaki)
# Text::ChaSen 1.03
#
$CHASEN = "module_chasen -i e -j -F \"\%m \"";
#
# ChaSen 2.02 or later (with noun words extraction)
#
$CHASEN_NOUN = "module_chasen -i e -j -F \"\%m %H\\n\"";
#
# MeCab
#
$MECAB = "module_mecab -Owakati -b 8192";
#
# Default Japanese processer: KAKASI or ChaSen or MeCab.
#
$WAKATI = $KAKASI;
#===================================================================
#
# Directories
#
# $LIBDIR = "@PERLLIBDIR@";
# $FILTERDIR = "@FILTERDIR@";
# $TEMPLATEDIR = "@TEMPLATEDIR@";
#
#[WIN] $LIBDIR = 'C:/namazu/share/namazu/pl';
#[WIN] $FILTERDIR = 'C:/namazu/share/namazu/filter';
#[WIN] $TEMPLATEDIR = 'C:/namazu/share/namazu/template';
1;
namazu-2.0.21/pl/gettext.pl.in 0000644 0000000 0000000 00000007744 11140212154 011562 # Toying at an interface between Perl and GNU gettext .mo format.
# Copyright (C) 1995 Free Software Foundation, Inc.
# Fran.ANgois Pinard , 1995.
#
# Modified by NOKUBI Takatsugu.
# Copyright (C) 1999, 2000 NOKUBI Takatsugu
## --------------------------------------------------------------- ##
## The `&textdomain (DOMAIN_NAME, LANG)' routine reads the given ##
## domain into an associative array %_, able to later translate ##
## strings. ##
## --------------------------------------------------------------- ##
sub textdomain
{
my ($language, $catalog, $domain, $buffer);
my ($reverse);
my ($magic, $revision, $nstrings, $orig_tab_offset, $trans_tab_offset);
my ($orig_length, $orig_pointer, $trans_length, $trans_pointer);
%_ = ();
$domain = $_[0];
$language = $_[1];
return if ! $language;
$catalog = choose_catalog($language, $domain);
return if ! $catalog;
open (CATALOG, $catalog) || return;
binmode (CATALOG);
sysread (CATALOG, $buffer, (stat CATALOG)[7]);
close CATALOG;
$magic = unpack ("I", $buffer);
if (sprintf ("%x", $magic) eq "de120495")
{
$reverse = 1;
}
elsif (sprintf ("%x", $magic) ne "950412de")
{
die "Not a catalog file\n";
}
$revision = &mo_format_value($reverse, $buffer,4);
$nstrings = &mo_format_value($reverse, $buffer,8);
$orig_tab_offset = &mo_format_value($reverse, $buffer,12);
$trans_tab_offset = &mo_format_value($reverse, $buffer,16);
while ($nstrings-- > 0)
{
$orig_length = &mo_format_value($reverse, $buffer,$orig_tab_offset);
$orig_pointer = &mo_format_value($reverse, $buffer,$orig_tab_offset + 4);
$orig_tab_offset += 8;
$trans_length = &mo_format_value($reverse, $buffer,$trans_tab_offset);
$trans_pointer = &mo_format_value($reverse, $buffer,$trans_tab_offset + 4);
$trans_tab_offset += 8;
$_{substr ($buffer, $orig_pointer, $orig_length)}
= substr ($buffer, $trans_pointer, $trans_length);
}
}
sub choose_catalog
{
my ($language, $domain) = @_;
while (1) {
#
# To support a binary package for Windows, we should
# allow to change LOCALEDIR with the environment variable
# `NAMAZULOCALEDIR' after installation is done.
#
# NOTE: Windows has a nasty "drive letter" convention.
#
my $base = "@LOCALEDIR@";
if (defined $ENV{NAMAZULOCALEDIR}) {
$base = $ENV{NAMAZULOCALEDIR};
}
my $catalog = "$base/$language/LC_MESSAGES/$domain.mo";
return $catalog if -f $catalog; # if the catalog file exists.
# Truncate $language by the following order:
# ja_JP.eucJP -> ja_JP -> ja
unless ($language =~ s/[\._][^\._]+$//) {
return undef;
}
}
}
## ----------------------------------------------------------------- ##
## The `&mo_format_value (ADDRESS)' routine returns the value at a ##
## given address in the .mo format catalog, once read into $buffer ##
## by `&textdomain'. This is a service routine of `&textdomain', ##
## which uses $buffer and $reverse variables local in that routine. ##
## ----------------------------------------------------------------- ##
sub mo_format_value
{
my ($reverse) = shift @_;
my ($buffer) = shift @_;
unpack ("i",
$reverse
? pack ("c4", reverse unpack ("c4", substr ($buffer, $_[0], 4)))
: substr ($buffer, $_[0], 4));
}
## ------------------------------------------------------------ ##
## The `&_(STRING)' routine translates STRING if there is some ##
## translation offered for it in the `%_' associative array, or ##
## return STRING itself, otherwize. ##
## ------------------------------------------------------------ ##
sub _
{
my $msg = $_[0];
$msg =~ s/\$/\\\$/g;
defined $_{$msg} ? $_{$msg} : $_[0];
}
## ------------------------------------------------------------ ##
## Dummy function. ##
## ------------------------------------------------------------ ##
sub N_
{
return $_[0];
}
1;
namazu-2.0.21/pl/gfilter.pl 0000644 0000000 0000000 00000006402 10420436461 011124 #
# -*- Perl -*-
# $Id: gfilter.pl,v 1.1.2.6 2006-04-16 12:48:49 opengl2772 Exp $
# Copyright (C) 1999 Satoru Takabayashi ,
# 2000-2006 Namazu Project All rights reserved.
# This is free software with ABSOLUTELY NO WARRANTY.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either versions 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA
#
# This file must be encoded in EUC-JP encoding
#
package gfilter;
use English;
# Show debug information for filters
sub show_filter_debug_info ($$$$) {
my ($contref, $weighted_str, $fields, $headings) = @_;
util::dprint("-- title --\n$fields->{'title'}\n")
if defined $fields->{'title'};
util::dprint("-- content --\n$$contref\n");
util::dprint("-- weighted_str: --\n$$weighted_str\n");
util::dprint("-- headings --\n$$headings\n");
}
# Adjust white spaces
sub white_space_adjust_filter ($) {
my ($text) = @_;
return undef unless defined($$text);
$$text =~ s/[ \t]+/ /g;
$$text =~ s/\r\n/\n/g;
$$text =~ s/\r/\n/g;
$$text =~ s/\n+/\n/g;
$$text =~ s/^ +//gm;
$$text =~ s/ +$//gm;
$$text =~ s/ +/ /g;
# Control characters be into space
$$text =~ tr/\x00-\x09\x0b-\x1f/ /;
}
# get a title from a file name.
sub filename_to_title ($$) {
my ($cfile, $weighted_str) = @_;
# for MSWin32's filename using Shift_JIS [1998-09-24]
if (($English::OSNAME eq "MSWin32") || ($English::OSNAME eq "os2")) {
$cfile = codeconv::shiftjis_to_eucjp($cfile);
codeconv::eucjp_han2zen_kana(\$cfile);
}
codeconv::normalize_eucjp(\$cfile);
my $filename = $cfile;
$filename = $1 if ($cfile =~ m!^.*/([^/]*)$!);
# get keywords from a file name.
# modified [1998-09-18]
my $tmp = $filename;
$tmp =~ tr|/\\_\.-| |;
my $weight = $conf::Weight{'html'}->{'title'};
$$weighted_str .= "\x7f$weight\x7f$tmp\x7f/$weight\x7f\n";
my $title = $filename;
return $title
}
# Remove SPACE/TAB at the beginning or ending of the line.
# And remove '>|#:' at the begenning of the line.
# Join hyphenation for English text.
# Remove LF if the line is ended with a Japanese character and
# length of the line is 40 or more longer.
#
# Original of this code was contributed by .
# [1997-09-15]
#
sub line_adjust_filter ($) {
my ($text) = @_;
return undef unless defined($$text);
my @tmp = split(/\n/, $$text);
for my $line (@tmp) {
$line .= "\n";
$line =~ s/^[ \>\|\#\:]+//;
$line =~ s/ +$//;
$line =~ s/\n// if (($line =~ /[\xa1-\xfe]\n*$/) &&
(length($line) >=40));
$line =~ s/(。|、)$/$1\n/;
$line =~ s/([a-z])-\n/$1/; # for hyphenation.
}
$$text = join('', @tmp);
}
# not implimented yet.
sub analize_rcs_stamp()
{
}
1;
namazu-2.0.21/pl/htmlsplit.pl 0000644 0000000 0000000 00000012752 11005141413 011503 #
# -*- Perl -*-
# $Id: htmlsplit.pl,v 1.9.4.6 2008-04-27 18:16:43 opengl2772 Exp $
#
# Copyright (C) 2000-2008 Namazu Project All rights reserved.
# This is free software with ABSOLUTELY NO WARRANTY.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either versions 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA
#
# This file must be encoded in EUC-JP encoding
package htmlsplit;
require "util.pl";
#require "html.pl"; # don't need it because it sould be already loaded by load_filtermodules()
use strict;
use File::Copy;
my $Header = << 'EOS';
${subject}
EOS
my $Footer = << 'EOS';
EOS
sub split ($$) {
my ($fname, $base) = @_;
my $mtime = (stat($fname))[9];
my $cont = '';
# for handling a filename which contains Shift_JIS code for Windows.
# for handling a filename which contains including space.
if (($fname =~ /\s/) ||
($English::OSNAME eq "MSWin32"
&& $fname =~ /[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]|[\x20\xa1-\xdf]/))
{
my $shelter_fname = $fname;
$fname = util::tmpnam("NMZ.win32");
unlink $fname if (-e $fname);
copy($shelter_fname, $fname);
$cont = util::readfile($fname);
unlink $fname;
$fname = $shelter_fname;
} else {
$cont = util::readfile($fname);
}
codeconv::codeconv_document(\$cont);
my %info = (
'title' => get_title(\$cont),
'author' => get_author(\$cont),
'anchored' => "",
'name' => "",
'base' => $base,
'names' => [],
);
#
#
# In certain cases, authors may specify the value of an attribute
# without any quotation marks. The attribute value may only contain
# letters (a-z and A-Z), digits (0-9), hyphens (ASCII decimal 45), and
# periods (ASCII decimal 46). We recommend using quotation marks even
# when it is possible to eliminate them.
my $id = 0;
# $cont =~ s/(]*href=(["']))#(.+?)(\2[^>]*>)/$1$3.html$4/gi; #'
$cont =~ s#(]*\s+)name=(["'])\2([^>]*>(.*?))#$1$4#sgi; #'
$cont =~ s {
\G(.+?) # 1
()?\s* # 2, 3
]*\s+name=([a-zA-Z0-9-\.]+| # 4,
(["']).+?\5)[^>]*>(.*?) # 5,6
\s*()? # 7
} {
write_partial_file($1, $4, $6, $id++, $mtime, \%info)
}sgexi;
write_partial_file($cont, "", "", $id, $mtime, \%info);
return @{$info{'names'}};
}
sub get_title ($) {
my ($contref) = @_;
my $title = undef;
if ($$contref =~ s!]*>([^<]+)!!i) {
$title = $1;
$title =~ s/\s+/ /g;
$title =~ s/^\s+//;
$title =~ s/\s+$//;
} else {
$title = "no title";
}
return $title;
}
sub get_author ($) {
my ($contref) = @_;
my $author = "unknown";
#
if ($$contref =~ m!]*?HREF=([\"\'])mailto:(.*?)\1\s*>!i) { #"
$author = $2;
} elsif ($$contref =~ m!.*]*>([^<]*?)!i) {
my $tmp = $1;
if ($tmp =~ /\b([\w\.\-]+\@[\w\.\-]+(?:\.[\w\.\-]+)+)\b/) {
$author = $1;
}
}
return $author;
}
sub write_partial_file($$$$$$) {
my ($cont, $name, $anchored, $id, $mtime, $info_ref) = @_;
$name =~ s/^([\"\'])(.*)\1$/$2/; # Remove quotation marks.
my $author = $info_ref->{'author'};
my $base = $info_ref->{'base'};
my $orig_title = $info_ref->{'title'};
my $prev_name = $info_ref->{'name'};
my $prev_anchored = $info_ref->{'anchored'};
$prev_name =~ s#\n\r##sg;
$prev_name =~ s#\n##sg;
html::remove_html_elements(\$prev_anchored);
$prev_anchored =~ s/^\s+//;
$prev_anchored =~ s/\s+$//;
my $title = $orig_title;
# FIXME: I don't know why this processing causes "Use of
# uninitialized value" warning if use $prev_anchored or
# $prev_name directly. perl's bug?
if ($prev_anchored ne "") {
$title .= ": $prev_anchored";
} elsif ($prev_name ne "") {
$title .= ": $prev_name";
}
my $fname = util::tmpnam("$base.$id");
my $fh = util::efopen(">$fname");
my $header = $Header;
$header =~ s/\$\{subject\}/$title/g;
$header =~ s/\$\{author\}/$author/g;
print $fh $header;
print $fh $cont;
my $footer = $Footer;
print $fh $footer;
push @{$info_ref->{'names'}}, $prev_name;
$info_ref->{'anchored'} = $anchored;
$info_ref->{'name'} = $name;
# FIXME: Actually we don't need this.
# But some perl versions need this.
util::fclose($fh);
utime($mtime, $mtime, $fname);
return "";
}
1;
namazu-2.0.21/pl/nmzidx.pl 0000644 0000000 0000000 00000041137 10437044754 011015 #
# -*- Perl -*-
# nmzidx.pl - subroutines for accessing Namazu index files (NMZ.*)
# by furukawa@tcp-ip.or.jp
#
# $Id: nmzidx.pl,v 1.13.4.7 2006-05-30 13:34:36 opengl2772 Exp $
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either versions 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA
#
use strict;
use English;
use IO::File;
use DirHandle;
package nmzlib;
sub open_db{
my $par = shift;
my $ext = shift;
my $path = $par->{'dir'} . "/NMZ.$ext";
my $fh;
if ($par->{'mode'} =~ /w/i){
$fh = new IO::File "$path.$$.tmp", "w";
}else{
$fh = new IO::File $path, "r";
}
$par->{'dblist'}->{$path} = $fh, binmode $fh if defined $fh;
return $fh;
}
sub readw{
my $fh = shift;
my $ret = '';
my $c;
while (read($fh, $c, 1)){
$ret .= $c;
last unless 0x80 & ord $c;
}
unpack('w', $ret);
}
package nmzfile;
sub new{
my $class = shift;
my $self = {};
bless $self, $class;
my $par = shift;
my $ext = shift;
my $fhb = &nmzlib::open_db($par, $ext);
$self->{'dir'} = $par->{'dir'};
$self->{'mode'} = $par->{'mode'};
$self->{'body'} = $fhb if defined $fhb;
$self->{'ext'} = $ext;
$ext .= ($ext =~ /^field/)? '.i': 'i';
my $fhi = &nmzlib::open_db($par, $ext);
$self->{'index'} = $fhi if defined $fhi;
$self->{'offset'} = 0;
if (defined($self->{'index'})){
$self->{'size'} = (-s $self->{'index'}) / length(pack('N', 0));
}
return $self;
}
sub close{
my $self = shift;
$self->{'body'}->close;
$self->{'index'}->close if defined $self->{'index'};
}
sub seek{
my $self = shift;
my $offset = @_? shift: 0;
my $whence = @_? shift: 0;
if ($whence == 1){
$offset += $self->{'offset'};
}elsif ($whence == 2){
$offset += $self->{'size'};
}
return $offset if $offset == $self->{'offset'};
return -1 if ($offset < 0 || $offset > $self->{'size'});
$self->{'offset'} = $offset;
$self->{'index'}->seek($offset * length(pack('N', 0)), 0);
if ($self->{'ext'} ne 'p'){
if ($offset == $self->{'size'}){
$self->{'body'}->seek(0, 2);
}else{
my $buf;
$self->{'index'}->read($buf, length pack('N', 0));
$self->{'body'}->seek(unpack('N', $buf), 0);
}
}
return $offset;
}
sub getline{
my $self = shift;
return undef unless defined $self->{'body'};
++$self->{'offset'};
return $self->{'body'}->getline;
}
sub getlist{
my $self = shift;
return undef unless defined $self->{'body'};
return undef if $self->{'offset'} >= $self->{'size'};
if ($self->{'offset'} == $self->{'size'}){
return ();
}
++$self->{'offset'};
if ($self->{'ext'} eq 'p'){
my $buf;
$self->{'index'}->read($buf, length pack('N', 0));
return () if $buf eq pack('N', -1);
$self->{'body'}->seek(unpack('N', $buf), 0);
}
$self->{'body'}->read(my $buf, &nmzlib::readw($self->{'body'}));
return unpack('w*', $buf);
}
sub putline{
my $self = shift;
if (@_){
my $output;
$output = shift;
$output =~ s/\n.*$//s;
$output .= "\n";
$self->{'index'}->print(pack('N', $self->{'body'}->tell));
$self->{'body'}->print($output);
++$self->{'size'};
++$self->{'offset'};
}
}
sub putlist{
my $self = shift;
if (@_){
$self->{'index'}->print(pack('N', $self->{'body'}->tell));
my $tmp = pack('w*', @_);
$self->{'body'}->print(pack('w', length $tmp) . $tmp);
}elsif ($self->{'ext'} eq 'p'){
$self->{'index'}->print(pack('N', -1));
++$self->{'size'};
++$self->{'offset'};
}
}
package nmzfield;
sub new{
my $class = shift;
my $self = {};
bless $self, $class;
$self->open(@_) if @_;
return $self;
}
sub open{
my $self = shift;
my $par = shift;
my $ext = shift;
$self->{$ext} = new nmzfile($par, "field." . $ext);
}
sub open_all{
my $self = shift;
my $par = shift;
my $dh = new DirHandle($par->{'dir'});
while (defined(my $ent = $dh->read)){
if ($ent =~ /^NMZ\.field\.([^\.]+)$/){
$self->{$1} = new nmzfile($par, "field." . $1);
}
}
$dh->close;
}
sub close{
my $self = shift;
for my $key (keys %$self){
$self->{$key}->close;
}
}
sub seek{
my $self = shift;
$self->seek(@_);
}
package nmzflist;
sub new{
my $class = shift;
my $self = {};
my $par = shift;
bless $self, $class;
$self->{'dir'} = $par->{'dir'};
$self->{'mode'} = $par->{'mode'};
$self->{'t'} = &nmzlib::open_db($par, 't');
$self->{'r'} = &nmzlib::open_db($par, 'r') unless $par->{'mode'} =~ /s/i;
$self->{'field'} = new nmzfield;
$self->{'field'}->open_all($par);
$self->{'offset'} = 0;
if (defined $self->{'t'}) {
$self->{'size'} = (-s $self->{'t'}) / length(pack('N', 0));
$self->{'valid'} = $self->{'size'};
}
return $self;
}
sub close{
my $self = shift;
$self->{'t'}->close if defined $self->{'t'};
$self->{'r'}->close if defined $self->{'r'};
$self->{'field'}->close;
}
sub read{
my $self = shift;
my $list = shift;
%$list = ();
my $fh = $self->{'t'};
$fh->read(my $pindex, length pack('N', 0));
$list->{'t'} = ($pindex eq pack('N', -1))? -1: unpack('N', $pindex);
if (defined(my $fh = $self->{'r'})){
$list->{'r'} = $fh->getline;
$list->{'r'} = $fh->getline while (defined($list->{'r'}) && $list->{'r'} =~ /^[\#\r\n]/);
chomp $list->{'r'} if defined $list->{'r'};
}
my $field = $self->{'field'};
for my $key (keys %$field){
$fh = $field->{$key};
my $line = $fh->getline;
$line = '' unless defined $line;
chomp $line;
$list->{'field'}->{$key} = $line;
}
--$self->{'valid'} if defined($list->{'t'}) && $list->{'t'} == -1;
++$self->{'offset'};
return $list->{'t'}
}
sub write{
my $self = shift;
my $list = shift;
my $fh = $self->{'t'};
$fh->print(pack('N', $list->{'t'}));
$fh = $self->{'r'};
$fh->print($list->{'r'} . "\n");
my $field = $self->{'field'};
for my $key (keys %$field){
$field->{$key}->putline($list->{'field'}->{$key} . "\n")
}
++$self->{'valid'} if $list->{'t'} != -1;
++$self->{'size'};
++$self->{'offset'};
}
sub seek{
my $self = shift;
my $offset = @_? shift: 0;
my $whence = @_? shift: 0;
$self->{'t'}->seek($offset * length pack('N', 0), $whence);
my $field = $self->{'field'};
for my $key (keys %$field){
$field->{$key}->seek($offset, $whence);
}
if ($whence == 0){
$self->{'offset'} = $offset;
}elsif ($whence == 1){
$self->{'offset'} += $offset;
}elsif ($whence == 2){
$self->{'offset'} = $offset + $self->{'size'};
}
return $self->{'offset'};
}
package nmzword;
sub new{
my $class = shift;
my $par = shift;
my $self = {};
bless $self, $class;
$self->{'dir'} = $par->{'dir'};
$self->{'mode'} = $par->{'mode'};
$self->{'i'} = new nmzfile($par, 'i');
$self->{'w'} = new nmzfile($par, 'w');
$self->{'offset'} = 0;
$self->{'size'} = $self->{'i'}->{'size'};
return $self;
}
sub close{
my $self = shift;
$self->{'i'}->close;
$self->{'w'}->close;
}
sub read{
my $self = shift;
my $word = shift;
my $list = shift;
%$list = ();
return unless defined($$word = $self->{'w'}->getline);
chomp $$word;
my $key = 0;
my @tmp = $self->{'i'}->getlist;
$key += shift @tmp, $list->{$key} = shift @tmp while @tmp;
++$self->{'offset'};
return $$word;
}
sub write{
my $self = shift;
my $word = shift;
my $list = shift;
if (length $word and scalar keys %$list){
$self->{'w'}->putline($word . "\n");
my @tmp = ();
my $ndx = 0;
for my $key (sort {$a <=> $b} keys %$list){
push(@tmp, $key - $ndx);
push(@tmp, $list->{$key});
$ndx = $key;
}
$self->{'i'}->putlist(@tmp);
++$self->{'size'};
++$self->{'offset'};
}
}
sub seek{
my $self = shift;
my $offset_i = $self->{'i'}->seek(@_);
my $offset_w = $self->{'w'}->seek(@_);
if ($offset_i == $offset_w){
return $self->{'offset'} = $offset_i;
}else{
return -1;
}
}
sub getword{
my $self = shift;
my $number = shift;
$self->seek($number, 0);
my $buf = $self->{'w'}->getline;
chomp $buf;
return $buf;
}
sub _search_{
my $self = shift;
my $keyword = shift;
my $l = 0;
my $r = $self->{'size'} - 1;
my $ptr = (@_ && ref($_[0]) eq 'SCALAR')? shift: undef;
$r = $$ptr if defined($ptr) && $$ptr >= 0;
if (defined $self->{'cache'}->{'search'}->{$keyword}){
$$ptr = $self->{'cache'}->{'search'}->{$keyword} if defined $ptr;
return $self->{'cache'}->{'search'}->{$keyword};
}
my $x;
while ($x = ($l + $r) >> 1, $l < $r){
my $buf = $self->getword($x);
if ($buf eq $keyword){
$$ptr = $self->{'cache'}->{'search'}->{$keyword} = $x if defined $ptr;
return $x;
}
if ($buf ge $keyword){
$r = $x;
}else{
$l = $x + 1;
}
}
$$ptr = $x if defined $ptr;
return $self->{'cache'}->{'search'}->{$keyword} = -1;
}
sub wakati{
my $self = shift;
my $keyword = shift;
my $opt = @_? shift: '';
my $buf;
my $r = -1;
my $x;
my $post = '';
my $pat = ($opt =~ /b/)? '.': '..';
while (1){
$x = $self->_search_($keyword, \$r);
last if $x >= 0 || $keyword !~ s/($pat)$//;
$post = $1 . $post;
}
return ($keyword, $post);
}
sub forward{
my $self = shift;
my $word = shift;
my $keyword = shift;
my $buf;
my $x = $self->_search_($keyword);
$keyword = quotemeta($keyword);
@$word = ();
while (($buf = $self->getword($x)) =~ /^$keyword/){
$self->{'cache'}->{'search'}->{$buf} = $x++;
push(@$word, $buf);
}
return @$word;
}
sub search{
my $self = shift;
my $list = shift;
my $keyword = shift;
my $word;
if ((my $x = $self->_search_($keyword)) >= 0){
$self->seek($x, 0);
return $self->read(\$word, $list);
}else{
return undef;
}
}
package nmzphrase;
@nmzphrase::Seed = ();
sub new{
my $class = shift;
my $par = shift;
my $self = {};
bless $self, $class;
$self->{'dir'} = $par->{'dir'};
$self->{'mode'} = $par->{'mode'};
$self->{'p'} = new nmzfile($par, 'p');
$self->{'offset'} = 0;
$self->{'size'} = 0x10000;
$self->init_seed if $self->{'mode'} =~ /s/i;
return $self;
}
sub close{
my $self = shift;
$self->{'p'}->close;
}
sub read{
my $self = shift;
my $list = shift;
@$list = ();
my $ndx = 0;
my @tmp = $self->{'p'}->getlist;
push(@$list, $ndx += shift @tmp) while @tmp;
++$self->{'offset'};
return scalar @$list;
}
sub write{
my $self = shift;
my $list = shift;
my $fh_p = $self->{'p'};
my $fh_pi = $self->{'pi'};
my @tmp = ();
my $ndx = 0;
for my $key (@$list){
push(@tmp, $key - $ndx);
$ndx = $key;
}
$self->{'p'}->putlist(@tmp);
++$self->{'offset'};
}
sub seek{
my $self = shift;
return $self->{'offset'} = $self->{'p'}->seek(@_);
}
sub search{
my $self = shift;
my $list = shift;
my $phrase = shift;
$phrase .= shift if @_;
my $hash = 0;
my $i = 0;
while ($phrase =~ m/([\xa1-\xfea-z\d])/g){
$hash ^= $nmzphrase::Seed[($i++) & 3][ord($1)];
}
$self->seek($hash & 0xffff);
%$list = ();
my @tmp = ();
if ($self->read(\@tmp)){
for my $x (@tmp){
$list->{$x} = 1;
}
}
return scalar @tmp;
}
sub init_seed{
return if scalar @nmzphrase::Seed;
require 'seed.pl';
@nmzphrase::Seed = &seed::init;
}
package nmzidx;
sub new{
my $class = shift;
my $dir = @_? shift: '.';
my $mode = @_? shift: 'r';
if ($mode =~ /[RS]/){
return undef if -f "$dir/NMZ.lock";
if (defined(my $fh = new IO::File ">$dir/NMZ.lock2")){
$fh->print($$);
$fh->close;
}
}
my $self = {};
bless $self, $class;
$self->{'dir'} = $dir;
$self->{'mode'} = $mode;
return $self;
}
sub close{
my $self = shift;
unlink ($self->{'dir'} . "/NMZ.lock2") if $self->{'mode'} =~ /[RS]/;
}
sub open_field{
my $self = shift;
$self->{'field'} = new nmzfield() unless $self->{'field'};
$self->{'field'}->open($self, @_);
return $self->{'field'};
}
sub open_flist{
my $self = shift;
$self->{'flist'} = new nmzflist($self);
return $self->{'flist'};
}
sub open_word{
my $self = shift;
$self->{'word'} = new nmzword($self);
return $self->{'word'};
}
sub open_phrase{
my $self = shift;
return $self->{'phrase'} = new nmzphrase($self);
}
sub replace_db{
my $self = shift;
my $bak = @_? shift : 0;
my $lock = $self->{'dir'} . "/NMZ.lock";
if ($self->{'mode'} =~ /W/){
my $fh = new IO::File($lock, 'w');
$fh->close;
}
for my $path (keys %{$self->{'dblist'}}){
$self->{'dblist'}->{$path}->close;
if ($bak){
unlink "$path.BAK" if (-f $path) && (-f "$path.BAK");
rename $path, "$path.BAK";
}
unlink $path if (-f "$path.$$.tmp") && (-f $path);
rename "$path.$$.tmp", $path;
}
unlink $lock if $self->{'mode'} =~ /W/;
}
sub remove_tmpdb{
my $self = shift;
for my $path (keys %{$self->{'dblist'}}){
$self->{'dblist'}->{$path}->close;
unlink "$path.$$.tmp";
}
}
sub write_status{
my $self = shift;
my $in = shift;
my $key = undef;
$key = $self->{'word'}->{'size'} if defined $self->{'word'};
my $key_comma = comma($key);
my $file = undef;
$file = $self->{'flist'}->{'valid'} if defined $self->{'flist'};
my $file_comma = comma($file);
if ($self->{'mode'} =~ /w/i){
my $fi = &nmzlib::open_db($in, 'status');
my $fo = &nmzlib::open_db($self, 'status');
while (defined(my $line = $fi->getline)){
$line = "files $file\n" if $line =~ /^files / && defined $file;
$line = "keys $key\n" if $line =~ /^keys / && defined $key;
$fo->print($line);
}
$fi->close;
$fo->close;
my $dh = new DirHandle($in->{'dir'});
while (defined(my $ent = $dh->read)){
next if $ent =~ /\.(BAK|tmp)$/;
if ($ent =~ /^NMZ\.(head(?:\.[-\w\.]+)?)$/){
$fi = &nmzlib::open_db($in, $1);
$fo = &nmzlib::open_db($self, $1);
while (defined(my $line = $fi->getline)){
$line =~ s/(\<\!-- FILE --\>).*?\1/$1 $file_comma $1/ if defined $file_comma;
$line =~ s/(\<\!-- KEY --\>).*?\1/$1 $key_comma $1/ if defined $key_comma;
$fo->print($line);
}
$fi->close;
$fo->close;
}
}
undef $dh;
}
}
sub log_open{
my $self = shift;
my $tag = shift;
my $path = $self->{'dir'} . "/NMZ.log";
my $fh = new IO::File ">>$path";
$self->{'log'} = $fh;
if (defined $fh){
binmode $fh;
$fh->print("$tag\n") if defined $tag;
$self->log_putline("Date:", localtime($English::BASETIME) . "");
}
return $self->{'log'};
}
sub log_putline{
my $self = shift;
$self->{'log'}->printf("%-20s %s\n", @_);
}
sub log_close{
my $self = shift;
if (defined $self->{'log'}){
$self->log_putline("Time (sec):", (time - $English::BASETIME));
$self->log_putline("System:", $English::OSNAME);
$self->log_putline("Perl:", sprintf("%f", $English::PERL_VERSION));
$self->{'log'}->print("\n");
$self->{'log'}->close;
}
}
# copy from util.pl
sub comma ($) {
my ($num) = @_;
$num = "0" if ($num eq "");
# 1 while $num =~ s/(.*\d)(\d\d\d)/$1,$2/;
# from Mastering Regular Expressions
$num =~ s<\G((?:^-)?\d{1,3})(?=(?:\d\d\d)+(?!\d))><$1,>g;
$num;
}
1;
namazu-2.0.21/pl/seed.pl 0000644 0000000 0000000 00000020710 07044262000 010400 #
# -*- Perl -*-
# $Id: seed.pl,v 1.5 2000-01-28 09:40:16 satoru Exp $
# Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
# Copyright (C) 2000 Namazu Project All rights reserved.
# This is free software with ABSOLUTELY NO WARRANTY.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either versions 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA
#
# This file must be encoded in EUC-JP encoding
package seed;
#
# Dr. Knuth's ``hash'' (from UNIX MAGAZINE May, 1998)
#
sub init () {
return (
[
3852, 26205, 51350, 2876, 47217, 47194, 55549, 43312,
63689, 40984, 62703, 10954, 13108, 60460, 41680, 32277,
51887, 28590, 17502, 57168, 37798, 27466, 13800, 12816,
53745, 8833, 55089, 15481, 18993, 15262, 8490, 22846,
41468, 59841, 25722, 23150, 41499, 15735, 926, 39653,
56720, 63629, 50607, 4292, 58554, 26752, 36570, 44905,
55343, 54073, 36538, 27605, 16003, 50339, 40422, 4213,
59172, 29975, 19694, 12629, 45238, 28185, 35475, 21170,
22491, 61198, 44320, 63991, 11398, 45247, 38108, 2583,
43341, 23180, 6875, 36359, 49933, 43446, 15728, 39740,
31983, 52267, 1809, 47986, 37070, 42232, 52199, 30706,
6672, 6358, 43336, 51910, 34544, 13276, 7545, 57036,
8939, 51866, 55491, 20338, 31577, 28064, 22921, 9383,
51245, 29797, 45742, 35642, 7707, 61471, 9847, 39691,
48202, 11656, 22141, 19736, 53889, 8805, 50443, 60561,
15164, 28244, 46936, 49709, 41521, 54481, 41209, 50460,
40812, 31165, 5262, 6853, 59230, 28184, 16237, 44940,
57981, 61979, 15046, 152, 57914, 24893, 39843, 40581,
36550, 61985, 60318, 24904, 5255, 45226, 19929, 20420,
7934, 1329, 4593, 49456, 55811, 45803, 34381, 31087,
11433, 39644, 37941, 5128, 2292, 54178, 50068, 60273,
50622, 65115, 60426, 43000, 24473, 34734, 18046, 61024,
31184, 12828, 20392, 36439, 58054, 40322, 56860, 453,
41651, 61453, 49909, 31927, 41721, 18754, 63015, 53155,
58398, 35421, 58283, 60691, 24063, 42816, 55428, 9149,
42395, 50319, 52150, 1332, 19517, 4661, 62357, 50701,
17489, 17213, 21605, 10008, 57535, 12929, 10462, 33651,
8847, 60371, 43, 50569, 13590, 63058, 38188, 6453,
32943, 30936, 1608, 57007, 8216, 57037, 621, 50611,
41820, 52771, 51944, 61338, 57433, 48765, 46504, 9387,
443, 2573, 19395, 57978, 15503, 29857, 26094, 24351,
24693, 26137, 9385, 38284, 23659, 47573, 44738, 56602
],
[
12974, 46347, 48074, 21190, 37848, 48695, 6266, 14133,
35931, 58211, 9935, 27828, 41440, 56440, 37215, 41883,
59014, 56610, 34326, 8982, 20932, 60420, 33333, 45626,
21021, 42718, 18375, 44681, 24756, 63113, 35748, 37730,
43924, 18286, 58920, 1445, 65187, 30371, 37376, 57862,
40307, 65205, 33766, 31211, 36884, 10114, 24689, 27959,
44441, 33671, 48892, 39326, 1469, 28982, 60348, 44188,
47357, 39493, 3408, 44935, 9705, 41138, 23324, 27992,
34523, 39562, 29437, 34174, 4397, 1278, 26500, 44705,
947, 60267, 10380, 37832, 4846, 35070, 255, 49288,
3206, 49147, 23078, 4676, 12594, 17890, 48864, 59951,
57383, 52273, 39351, 1553, 27875, 62675, 29545, 62399,
36701, 58983, 31038, 41099, 60262, 57539, 20268, 61210,
52271, 30649, 33506, 57118, 184, 33762, 40870, 3390,
17374, 63949, 8067, 29968, 16303, 56931, 24384, 8151,
43668, 63736, 6008, 60875, 39251, 2872, 32040, 32699,
33910, 7603, 27426, 25914, 27872, 23100, 12649, 58521,
56607, 4231, 58705, 24834, 45102, 62096, 42208, 43515,
4627, 6641, 59819, 61559, 31026, 2435, 39692, 29226,
12141, 45700, 24565, 51392, 48573, 56606, 18556, 16947,
64210, 45982, 42861, 26546, 3546, 55511, 19531, 60154,
59743, 12700, 19452, 39309, 9261, 61660, 17289, 13888,
2766, 11572, 9912, 33792, 14008, 49604, 63018, 26149,
29769, 22048, 12006, 12806, 13118, 30562, 29754, 11792,
11008, 7080, 38339, 14554, 62591, 57870, 9172, 56798,
5035, 28625, 30572, 14297, 24749, 47861, 27515, 59433,
38098, 61308, 7906, 22166, 58790, 34055, 51935, 15303,
46061, 64742, 28421, 11087, 28960, 40214, 22095, 36041,
13018, 36650, 33096, 5352, 45823, 24359, 10388, 8912,
54931, 24685, 33662, 37257, 52871, 61178, 31155, 25433,
56950, 39061, 47599, 50204, 7580, 33999, 65507, 53642,
33205, 28393, 64730, 62166, 3072, 21290, 32671, 16090
],
[
57940, 232, 21443, 38228, 24592, 31831, 47141, 13988,
56517, 15268, 43852, 10910, 16864, 3750, 2324, 55926,
52529, 63507, 19813, 52501, 51613, 53019, 15359, 50807,
49650, 18431, 6561, 16785, 34522, 64502, 17018, 55965,
37195, 41610, 22261, 18801, 55598, 13243, 34069, 41307,
57095, 44979, 58172, 60846, 47304, 48562, 46660, 34298,
46533, 938, 21264, 32611, 53957, 36623, 17883, 38072,
55055, 24444, 54857, 24042, 23411, 6340, 14471, 60606,
47950, 36733, 13872, 38012, 49976, 47941, 13784, 41536,
27385, 6421, 36846, 9154, 54984, 17971, 43452, 35982,
18909, 64716, 3057, 7331, 35804, 20941, 45403, 25324,
45385, 34725, 49366, 3261, 41065, 63838, 63868, 23479,
35036, 12204, 61492, 19476, 60146, 9741, 61013, 21995,
16163, 32324, 31149, 5612, 50295, 9066, 41594, 3669,
8247, 44652, 11000, 44052, 57, 56404, 3840, 45443,
25593, 53206, 48704, 1123, 51508, 47037, 24603, 21008,
59241, 20559, 40485, 53851, 30301, 35963, 10311, 46465,
2751, 41461, 52077, 53047, 50527, 28135, 56717, 58775,
7252, 2182, 37291, 7309, 58586, 41131, 52753, 18644,
28802, 35922, 19767, 14775, 17423, 44371, 35784, 11128,
64931, 10734, 64980, 29696, 46697, 9756, 10626, 49449,
51217, 36961, 36209, 25303, 28142, 29448, 32555, 30324,
1204, 39865, 23375, 42336, 27082, 42020, 5602, 63004,
61788, 20378, 14892, 40623, 56162, 26021, 40018, 1360,
25466, 4179, 48058, 35222, 14805, 31971, 20903, 11973,
3396, 57112, 37276, 31539, 21025, 4295, 61864, 22230,
44161, 19704, 64566, 5707, 61724, 4633, 3176, 57977,
25011, 18069, 33064, 15638, 44090, 7547, 16998, 4020,
11727, 65056, 39242, 26532, 31492, 38506, 34888, 51723,
10246, 891, 7213, 14542, 62756, 29443, 58703, 16924,
28473, 64411, 13112, 33107, 2052, 5554, 58118, 20121,
38618, 8220, 64212, 46166, 25219, 2696, 57893, 24740
],
[
41939, 18890, 56232, 36549, 57396, 25584, 22736, 2106,
26476, 29949, 16648, 23697, 59393, 9816, 40621, 22331,
8691, 53734, 55438, 10743, 59288, 48021, 30865, 32371,
56242, 29541, 13001, 15925, 32237, 5358, 40666, 8641,
24249, 31362, 45191, 16109, 56947, 2391, 18216, 17887,
32341, 34864, 41584, 26199, 44680, 16670, 48530, 53372,
4868, 38432, 64115, 64156, 20918, 29445, 30992, 11624,
58986, 43993, 27550, 25688, 49352, 2680, 34329, 8065,
34042, 13984, 24174, 25454, 16376, 42391, 43342, 48718,
11719, 19390, 9381, 56400, 36061, 57911, 44237, 40929,
30808, 39550, 51726, 6725, 5006, 63351, 176, 49000,
25365, 25864, 32816, 28046, 60193, 40882, 62089, 8642,
65057, 22007, 25018, 41912, 65349, 8201, 53632, 19204,
17582, 44496, 55265, 9957, 23197, 30659, 40765, 478,
4674, 26956, 7204, 9681, 24771, 7380, 58681, 50137,
33245, 25962, 12647, 27903, 1308, 9200, 36545, 829,
31207, 61564, 42741, 31021, 4229, 30837, 50225, 21812,
9798, 39955, 31769, 32996, 5078, 6999, 33475, 9753,
33956, 40679, 19434, 58727, 48060, 12579, 43328, 15770,
38541, 55975, 43673, 39849, 65176, 14683, 30848, 10711,
17884, 61869, 14941, 48722, 46559, 36753, 58520, 20978,
2987, 25981, 26057, 9987, 59456, 35810, 43943, 34600,
55244, 37135, 17124, 2288, 14928, 32895, 40829, 5368,
11032, 15143, 5008, 25715, 55822, 35856, 36427, 8171,
32190, 51369, 56893, 13214, 22587, 49878, 34193, 25575,
10323, 60250, 35562, 4243, 30525, 13970, 38843, 20234,
51106, 55968, 22523, 498, 23327, 63352, 5866, 34360,
12960, 10874, 60076, 3247, 46731, 30967, 11418, 13386,
16801, 2776, 26600, 39388, 52654, 60793, 64963, 62978,
55508, 34990, 1686, 20498, 48960, 40530, 40733, 34530,
30962, 63256, 35029, 54290, 61073, 40895, 23115, 8497,
51770, 17655, 11744, 32966, 48622, 23162, 46352, 65423
]
);
}
1;
namazu-2.0.21/pl/usage.pl 0000644 0000000 0000000 00000011246 10365420317 010577 # -*- Perl -*-
# $Id: usage.pl,v 1.25.8.11 2006-01-24 12:42:55 opengl2772 Exp $
# Copyright (C) 1997-1999 Satoru Takabayashi All rights reserved.
# Copyright (C) 2000-2006 Namazu Project All rights reserved.
# This is free software with ABSOLUTELY NO WARRANTY.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either versions 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA
#
# This file must be encoded in EUC-JP encoding
#
package usage;
require "var.pl";
# dummy function.
sub N_ {
return $_[0];
}
#
# Fake "\n\" was removed because gettext 0.13 supports perl string.
# (2004/03/04 knok)
#
$USAGE = N_("mknmz %s, an indexer of Namazu.
Usage: mknmz [options] ...
Target files:
-a, --all target all files.
-t, --media-type=MTYPE set the media type for all target files to MTYPE.
-h, --mailnews same as --media-type='message/rfc822'
--mhonarc same as --media-type='text/html; x-type=mhonarc'
-F, --target-list=FILE load FILE which contains a list of target files.
--allow=PATTERN set PATTERN for file names which should be allowed.
--deny=PATTERN set PATTERN for file names which should be denied.
--exclude=PATTERN set PATTERN for pathnames which should be excluded.
-e, --robots exclude HTML files containing
-M, --meta handle HTML meta tags for field-specified search.
-r, --replace=CODE set CODE for replacing URI.
--html-split split an HTML file with anchors.
--mtime=NUM limit by mtime just like find(1)'s -mtime option.
e.g., -50 for recent 50 days, +50 for older than 50.
Morphological Analysis:
-b, --use-mecab use MeCab for analyzing Japanese.
-c, --use-chasen use ChaSen for analyzing Japanese.
-k, --use-kakasi use KAKASI for analyzing Japanese.
-m, --use-chasen-noun use ChaSen for extracting only nouns.
-L, --indexing-lang=LANG index with language specific processing.
Text Operations:
-E, --no-edge-symbol remove symbols on edge of word.
-G, --no-okurigana remove Okurigana in word.
-H, --no-hiragana ignore words consist of Hiragana only.
-K, --no-symbol remove symbols.
--decode-base64 decode base64 bodies within multipart entities.
Summarization:
-U, --no-encode-uri do not encode URI.
-x, --no-heading-summary do not make summary with HTML's headings.
Index Construction:
--update=INDEX set INDEX for updating.
-z, --check-filesize detect file size changed.
-Y, --no-delete do not detect removed documents.
-Z, --no-update do not detect update and deleted documents.
Miscellaneous:
-s, --checkpoint turn on the checkpoint mechanism.
-C, --show-config show the current configuration.
-f, --config=FILE use FILE as a config file.
-I, --include=FILE include your customization FILE.
-O, --output-dir=DIR set DIR to output the index.
-T, --template-dir=DIR set DIR having NMZ.{head,foot,body}.*.
-q, --quiet suppress status messages during execution.
-v, --version show the version of namazu and exit.
-V, --verbose be verbose.
-d, --debug be debug mode.
--help show this help and exit.
--norc do not read the personal initialization files.
-- Terminate option list.
Report bugs to <%s>
or <%s>.
");
##
## Version information
##
$VERSION_INFO = <