debian/0000755000000000000000000000000012163337464007177 5ustar debian/rules0000755000000000000000000000146212163327637010263 0ustar #!/usr/bin/make -f include /usr/share/cdbs/1/rules/debhelper.mk include /usr/share/cdbs/1/class/ant.mk PACKAGE := $(DEB_SOURCE_PACKAGE) VERSION := $(DEB_UPSTREAM_VERSION) JAVA_HOME := /usr/lib/jvm/default-java ANT_OPTS := -Dlib.dir=. DEB_JARS := xercesImpl nekohtml DEB_ANT_BUILD_TARGET := jars binary-post-install/lib$(PACKAGE)-java:: mh_installpoms -plib$(PACKAGE)-java mh_installjar -plib$(PACKAGE)-java -l debian/pom.xml dist/boilerpipe-$(VERSION).jar jh_classpath -plibboilerpipe-java clean:: mh_clean get-orig-source: -uscan --download-version $(DEB_UPSTREAM_VERSION) --force-download --rename get-orig-pom: wget -O debian/pom.xml http://boilerpipe.googlecode.com/svn/repo/de/l3s/boilerpipe/boilerpipe/$(VERSION)/boilerpipe-$(VERSION).pom debian/watch0000644000000000000000000000014012163327637010224 0ustar version=3 http://code.google.com/p/boilerpipe/downloads/list .*/boilerpipe-([\d\.]+)-src.tar.gz debian/libboilerpipe-java.classpath0000644000000000000000000000007312163327637014644 0ustar /usr/share/java/boilerpipe.jar nekohtml.jar xercesImpl.jar debian/changelog0000644000000000000000000000023312163327637011050 0ustar boilerpipe (1.2.0-1) unstable; urgency=low * Initial release (Closes: #712827) -- Emmanuel Bourg Thu, 20 Jun 2013 00:19:21 +0200 debian/libboilerpipe-java.poms0000644000000000000000000000003212163327637013633 0ustar debian/pom.xml --java-lib debian/source/0000755000000000000000000000000012163327637010500 5ustar debian/source/format0000644000000000000000000000001412163327637011706 0ustar 3.0 (quilt) debian/pom.xml0000644000000000000000000000342612163327637010522 0ustar 4.0.0 de.l3s.boilerpipe boilerpipe jar 1.2.0 http://code.google.com/p/boilerpipe/ Apache License 2.0 Boilerpipe -- Boilerplate Removal and Fulltext Extraction from HTML pages The boilerpipe library provides algorithms to detect and remove the surplus "clutter" (boilerplate, templates) around the main textual content of a web page. The library already provides specific strategies for common tasks (for example: news article extraction) and may also be easily extended for individual problem settings. Extracting content is very fast (milliseconds), just needs the input document (no global or site-level information required) and is usually quite accurate. Boilerpipe is a Java library written by Christian Kohlschütter. It is released under the Apache License 2.0. The algorithms used by the library are based on (and extending) some concepts of the paper "Boilerplate Detection using Shallow Text Features" by Christian Kohlschütter et al., presented at WSDM 2010 -- The Third ACM International Conference on Web Search and Data Mining New York City, NY USA. scm:svn:http://boilerpipe.googlecode.com/svn/trunk/ http://code.google.com/p/boilerpipe/source/browse/ Christian Kohlschütter debian/compat0000644000000000000000000000000212163327637010376 0ustar 7 debian/copyright0000644000000000000000000000127512163327637011140 0ustar Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: boilerpipe Upstream-Contact: Christian Kohlschütter Source: http://code.google.com/p/boilerpipe/ Files: * Copyright: 2009-2012, Christian Kohlschütter License: Apache-2.0 Files: src/main/org/cyberneko/html/HTMLTagBalancer.java src/main/org/cyberneko/html/HTMLElements.java Copyright: 2002-2009, Andy Clark, Marc Guillemot License: Apache-2.0 Files: debian/* Copyright: 2013, Emmanuel Bourg License: Apache-2.0 License: Apache-2.0 On Debian systems you can find the complete text of the Apache-2.0 license in '/usr/share/common-licenses/Apache-2.0' debian/control0000644000000000000000000000227712163327637010613 0ustar Source: boilerpipe Section: java Priority: optional Maintainer: Debian Java Maintainers Uploaders: Emmanuel Bourg Build-Depends: debhelper (>= 8), cdbs, default-jdk Build-Depends-Indep: maven-repo-helper, ant (>= 1.6.5), javahelper, libnekohtml-java, libxerces2-java Standards-Version: 3.9.4 Vcs-Svn: svn://anonscm.debian.org/pkg-java/trunk/boilerpipe Vcs-Browser: http://anonscm.debian.org/viewvc/pkg-java/trunk/boilerpipe Homepage: http://code.google.com/p/boilerpipe Package: libboilerpipe-java Architecture: all Depends: ${misc:Depends}, libnekohtml-java, libxerces2-java Description: Boilerplate removal and fulltext extraction from HTML pages The boilerpipe library provides algorithms to detect and remove the surplus "clutter" (boilerplate, templates) around the main textual content of a web page. . The library already provides specific strategies for common tasks (for example: news article extraction) and may also be easily extended for individual problem settings. . Extracting content is very fast (milliseconds), just needs the input document (no global or site-level information required) and is usually quite accurate.