debian/0000755000000000000000000000000011637736761007207 5ustar debian/libstemmer-dev.install0000644000000000000000000000004211637736761013512 0ustar include/libstemmer.h /usr/include debian/source/0000755000000000000000000000000011637736761010507 5ustar debian/source/format0000644000000000000000000000001411637736761011715 0ustar 3.0 (quilt) debian/libstemmer-tools.manpages0000644000000000000000000000002311637736761014220 0ustar debian/stemwords.1 debian/changelog0000644000000000000000000000265511637736761011071 0ustar snowball (0+svn546-2) unstable; urgency=low * Pass dpkg-buildflags CFLAGS and LDFLAGS to dh_auto_build - No need to check for noopt in build-options.diff any more. * Drop unnecessary dh_auto_test override -- Stefano Rivera Mon, 26 Sep 2011 01:48:59 +0200 snowball (0+svn546-1) unstable; urgency=low * New upstream snapshot. * Binary package descriptions: - Provide the full description for all except -dbg. - Mention that -dev contains static library. * Patches: - Renamed shared-module.diff -> shared-library.diff. Removed explicit ranlib call. - Moved all compile options changes into build-options.diff. * Bump Standards-Version to 3.9.2, no changes needed. * Bump debhelper compat to 8. * Updated my e-mail address. * Update copyright format. * Sort *Depends and debhelper install file contents. * Convert for multi-arch: - libstemmer0d: Pre-Depend on ${misc:Pre-Depends} - libstemmer0d{,-dbg}: Multi-Arch: same - libstemmer-tools: Multi-Arch: foreign - Install shared and static library to multi-arch location. - Build-Depend on dpkg-dev >= 1.16. -- Stefano Rivera Sat, 10 Sep 2011 21:34:47 +0200 snowball (0+svn527-1) unstable; urgency=low [ Stefano Rivera ] * Initial release. (Closes: #572135) [ Jakub Wilk ] * Add myself to uploaders. -- Stefano Rivera Sat, 13 Mar 2010 03:52:06 +0200 debian/examples0000644000000000000000000000001511637736761010744 0ustar examples/*.c debian/stemwords.10000644000000000000000000000176111637736761011325 0ustar .TH STEMWORDS "1" "March 2010" "Snowball" "User Commands" .SH NAME stemwords \- Snowball word stemming utility .SH DESCRIPTION .B stemwords .RB [ \-l .IR language ] .RB [ \-i .IR "input file" ] .RB [ \-o .IR "output file" ] .RB [ \-c .IR "character encoding" ] .RB [ \-p [ 2 "]] [" \-h ] .PP The input file consists of a list of words to be stemmed, one per line. Words should be in lower case, but (for English) A\-Z letters are mapped to their a\-z equivalents anyway. If omitted, stdin is used. .PP If \fB\-c\fR is given, the argument is the character encoding of the input and output files. If it is omitted, the UTF\-8 encoding is used. .PP If \fB\-p\fR is given the output file consists of each word of the input file followed by "\->" followed by its stemmed equivalent. If \fB\-p2\fR is given the output file is a two column layout containing the input words in the first column and the stemmed equivalents in the second column. Otherwise, the output file consists of the stemmed words, one per line. debian/control0000644000000000000000000000757511637736761010630 0ustar Source: snowball Section: libs Priority: optional Maintainer: Stefano Rivera Uploaders: Jakub Wilk Build-Depends: debhelper (>= 8), dpkg-dev (>= 1.16) Standards-Version: 3.9.2 Homepage: http://snowball.tartarus.org/ Vcs-Bzr: http://bzr.debian.org/bzr/collab-maint/snowball/trunk/ Vcs-Browser: http://bzr.debian.org/loggerhead/collab-maint/snowball/trunk/ Package: libstemmer0d Architecture: any Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, ${shlibs:Depends} Multi-Arch: same Description: Snowball stemming algorithms for use in Information Retrieval Snowball provides access to efficient algorithms for calculating a "stemmed" form of a word. This is a form with most of the common morphological endings removed; hopefully representing a common linguistic base form. This is most useful in building search engines and information retrieval software; for example, a search with stemming enabled should be able to find a document containing "cycling" given the query "cycles". . Snowball provides algorithms for several (mainly European) languages. It also provides access to the classic Porter stemming algorithm for English: although this has been superseded by an improved algorithm, the original algorithm may be of interest to information retrieval researchers wishing to reproduce results of earlier experiments. Package: libstemmer0d-dbg Section: debug Priority: extra Architecture: any Depends: libstemmer0d (= ${binary:Version}), ${misc:Depends}, ${shlibs:Depends} Multi-Arch: same Description: Snowball stemming algorithms, debugging symbols Snowball provides access to efficient algorithms for calculating a "stemmed" form of a word. . This package contains the debugging symbols associated with Snowball. gdb will automatically use these symbols when debugging programs linked with Snowball. Package: libstemmer-dev Section: libdevel Architecture: any Depends: libstemmer0d (= ${binary:Version}), ${misc:Depends} Description: Snowball stemming algorithms, development kit Snowball provides access to efficient algorithms for calculating a "stemmed" form of a word. This is a form with most of the common morphological endings removed; hopefully representing a common linguistic base form. This is most useful in building search engines and information retrieval software; for example, a search with stemming enabled should be able to find a document containing "cycling" given the query "cycles". . Snowball provides algorithms for several (mainly European) languages. It also provides access to the classic Porter stemming algorithm for English: although this has been superseded by an improved algorithm, the original algorithm may be of interest to information retrieval researchers wishing to reproduce results of earlier experiments. . This package contains the static library and header files used in development. Package: libstemmer-tools Section: text Architecture: any Multi-Arch: foreign Depends: ${misc:Depends}, ${shlibs:Depends} Description: Simple word stemming utility using Snowball Snowball provides access to efficient algorithms for calculating a "stemmed" form of a word. This is a form with most of the common morphological endings removed; hopefully representing a common linguistic base form. This is most useful in building search engines and information retrieval software; for example, a search with stemming enabled should be able to find a document containing "cycling" given the query "cycles". . Snowball provides algorithms for several (mainly European) languages. It also provides access to the classic Porter stemming algorithm for English: although this has been superseded by an improved algorithm, the original algorithm may be of interest to information retrieval researchers wishing to reproduce results of earlier experiments. . This package contains "stemwords", a simple utility for stemming words. debian/libstemmer-tools.install0000644000000000000000000000002311637736761014073 0ustar stemwords /usr/bin debian/copyright0000644000000000000000000000525311637736761011147 0ustar Format: http://anonscm.debian.org/viewvc/dep/web/deps/dep5.mdwn?revision=174 Upstream-Name: snowball Upstream-Contact: Martin Porter Source: http://snowball.tartarus.org/ Comment: Upstream doesn't include a COPYING file, as described here: http://snowball.tartarus.org/license.php Copyright information obtained from the above link, and license text from the LICENSE file in pystemmer: http://svn.tartarus.org/snowball/trunk/pystemmer/LICENSE Files: * Copyright: 2001-2011, Dr Martin Porter and Richard Boulton License: Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Snowball project nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. . THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Files: debian/* Copyright: 2010-2011, Stefano Rivera License: GPL-2+ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. . This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. . On Debian systems, the complete text of the GNU General Public License version 2 can be found in the /usr/share/common-licenses/GPL-2 file. debian/rules0000755000000000000000000000262611637736761010275 0ustar #!/usr/bin/make -f DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH) %: dh $@ override_dh_auto_build: dh_auto_build -- CFLAGS="$(shell dpkg-buildflags --get CFLAGS)" \ LDFLAGS="$(shell dpkg-buildflags --get LDFLAGS)" override_dh_installdocs: dh_installdocs --link-doc=libstemmer0d override_dh_strip: dh_strip --dbg-package=libstemmer0d-dbg rm -rf debian/libstemmer0d-dbg/usr/lib/debug/usr/bin/ override_dh_install: dh_install -plibstemmer0d libstemmer*.so.* /usr/lib/$(DEB_HOST_MULTIARCH) dh_install -plibstemmer-dev libstemmer.a libstemmer.so /usr/lib/$(DEB_HOST_MULTIARCH) dh_install get-orig-source: set -e; \ REV=$(shell dpkg-parsechangelog | sed -rne 's,^Version: .*svn([0-9]+).*,\1,p'); \ VER=$(shell dpkg-parsechangelog | sed -rne 's,^Version: ([^-]+).*,\1,p'); \ svn export -r $$REV svn://snowball.tartarus.org/snowball/trunk/snowball snowball-$$VER; \ svn export -r $$REV svn://snowball.tartarus.org/snowball/trunk/data snowball-$$VER/testdata; \ sed -e 's/The Regents of the University of California./2001, Dr Martin Porter, and (for the Java developments) Copyright (c) 2002, Richard Boulton/' \ -e 's/the University/the Snowball project/' \ /usr/share/common-licenses/BSD > snowball-$$VER/COPYING; \ GZIP=--best tar -cz --owner root --group root --mode a+rX \ -f snowball_$$VER.orig.tar.gz snowball-$$VER; \ rm -rf snowball-$$VER debian/libstemmer0d.symbols0000644000000000000000000000030511637736761013206 0ustar libstemmer.so.0d libstemmer0d #MINVER# sb_stemmer_delete@Base 0+svn527 sb_stemmer_length@Base 0+svn527 sb_stemmer_list@Base 0+svn527 sb_stemmer_new@Base 0+svn527 sb_stemmer_stem@Base 0+svn527 debian/README.Debian0000644000000000000000000000030311637736761011244 0ustar Snowball for Debian ------------------- Snowball upstream doesn't build shared libraries, so they are Debian-specific. -- Stefano Rivera Fri, 12 Mar 2010 20:04:32 +0200 debian/libstemmer.ver0000644000000000000000000000004311637736761012065 0ustar { global: sb_*; local: *; }; debian/docs0000644000000000000000000000005111637736761010056 0ustar AUTHORS doc/libstemmer_c_README doc/TODO debian/patches/0000755000000000000000000000000011637736761010636 5ustar debian/patches/build-options.diff0000644000000000000000000000120411637736761014255 0ustar Description: Move -Iinclude to CPPFLAGS Author: Stefano Rivera Forwarded: http://news.gmane.org/find-root.php?message_id=%3c20110821220427.GC1738%40bach.rivera.co.za%3e Last-Update: 2011-09-25 --- a/GNUmakefile +++ b/GNUmakefile @@ -69,8 +69,8 @@ JAVA_CLASSES = $(JAVA_SOURCES:.java=.class) JAVA_RUNTIME_CLASSES=$(JAVARUNTIME_SOURCES:.java=.class) -CFLAGS=-Iinclude -O2 -CPPFLAGS=-W -Wall -Wmissing-prototypes -Wmissing-declarations +CFLAGS=-O2 +CPPFLAGS=-W -Wall -Wmissing-prototypes -Wmissing-declarations -Iinclude all: snowball libstemmer.o stemwords $(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS) debian/patches/series0000644000000000000000000000007611637736761012056 0ustar build-options.diff shared-library.diff testdata-location.diff debian/patches/testdata-location.diff0000644000000000000000000000302511637736761015107 0ustar Description: Test data location in our source is different to upstream. Author: Stefano Rivera Forwarded: not-needed Last-Update: 2011-02-05 --- a/GNUmakefile +++ b/GNUmakefile @@ -268,7 +268,7 @@ STEMWORDS=LD_LIBRARY_PATH=.:$(LD_LIBRARY_PATH) ./stemwords -check_utf8_%: ../data/% stemwords +check_utf8_%: testdata/% stemwords @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with UTF-8" @$(STEMWORDS) -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $ Forwarded: http://news.gmane.org/find-root.php?message_id=%3c20110821220427.GC1738%40bach.rivera.co.za%3e Last-Update: 2011-09-25 --- a/GNUmakefile +++ b/GNUmakefile @@ -72,12 +72,13 @@ CFLAGS=-O2 CPPFLAGS=-W -Wall -Wmissing-prototypes -Wmissing-declarations -Iinclude -all: snowball libstemmer.o stemwords $(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS) +all: snowball libstemmer.so stemwords $(C_OTHER_SOURCES) $(C_OTHER_HEADERS) $(C_OTHER_OBJECTS) clean: rm -f $(COMPILER_OBJECTS) $(RUNTIME_OBJECTS) \ $(LIBSTEMMER_OBJECTS) $(LIBSTEMMER_UTF8_OBJECTS) $(STEMWORDS_OBJECTS) snowball \ - libstemmer.o stemwords \ + $(wildcard libstemmer.so*) libstemmer.a \ + stemwords \ libstemmer/modules.h \ libstemmer/modules_utf8.h \ snowball.splint \ @@ -86,7 +87,7 @@ $(JAVA_SOURCES) $(JAVA_CLASSES) $(JAVA_RUNTIME_CLASSES) \ libstemmer/mkinc.mak libstemmer/mkinc_utf8.mak \ libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c - rm -rf dist + rm -rf dist .shared rmdir $(c_src_dir) || true snowball: $(COMPILER_OBJECTS) @@ -108,11 +109,16 @@ libstemmer/libstemmer.o: libstemmer/modules.h $(C_LIB_HEADERS) -libstemmer.o: libstemmer/libstemmer.o $(RUNTIME_OBJECTS) $(C_LIB_OBJECTS) - $(AR) -cru $@ $^ +libstemmer.so: libstemmer/libstemmer.o $(RUNTIME_OBJECTS) $(C_LIB_OBJECTS) + $(CC) $(CFLAGS) -shared $(LDFLAGS) \ + -Wl,--version-script=debian/libstemmer.ver,-soname,libstemmer.so.0d \ + -o $@.0d.0.0 ${^:%=.shared/%} + ln -s $@.0d.0.0 $@.0d + ln -s $@.0d.0.0 $@ + $(AR) -crs ${@:.so=.a} $^ -stemwords: $(STEMWORDS_OBJECTS) libstemmer.o - $(CC) -o $@ $^ +stemwords: $(STEMWORDS_OBJECTS) libstemmer.so + $(CC) -g -o $@ $(STEMWORDS_OBJECTS) -L. -lstemmer algorithms/%/stem_Unicode.sbl: algorithms/%/stem_ISO_8859_1.sbl cp $^ $@ @@ -146,7 +152,6 @@ ./snowball $< -o $${o} -eprefix $${l}_ISO_8859_2_ -r ../runtime $(c_src_dir)/stem_%.o: $(c_src_dir)/stem_%.c $(c_src_dir)/stem_%.h - $(CC) $(CFLAGS) -O2 -c -o $@ $< -Wall $(java_src_dir)/%Stemmer.java: algorithms/%/stem_Unicode.sbl snowball @mkdir -p $(java_src_dir) @@ -261,31 +266,38 @@ check_koi8r: $(KOI8_R_algorithms:%=check_koi8r_%) +STEMWORDS=LD_LIBRARY_PATH=.:$(LD_LIBRARY_PATH) ./stemwords + check_utf8_%: ../data/% stemwords @echo "Checking output of `echo $<|sed 's!.*/!!'` stemmer with UTF-8" - @./stemwords -c UTF_8 -l `echo $<|sed 's!.*/!!'` -i $