package.xml 0000644 0001750 0001750 00000006436 12062276643 012574 0 ustar steinm steinm
LetoDMS_Lucene
pear.php.net
Fulltext search for LetoDMS
LetoDMS is a web based document management system (DMS). This is
the fulltext search engine for it, based on Lucene.
Uwe Steinmann
steinm
uwe@steinmann.cx
yes
2012-12-13
1.1.1
1.1.1
beta
beta
GPL License
catch exception if index is opened but not available
4.3.0
1.5.4
0.0.1
0.0.1
alpha
alpha
2009-04-27
BSD License
2011-11-06
1.0.1
1.0.0
beta
beta
GPL License
- New Release
2012-11-06
1.1.0
1.1.0
beta
beta
GPL License
use a configurable list of mime type converters, fixed indexing and searching
of special chars like german umlaute.
LetoDMS_Lucene-1.1.1/Lucene/Indexer.php 0000644 0001750 0001750 00000002654 12062276643 017175 0 ustar steinm steinm
* @copyright Copyright (C) 2010, Uwe Steinmann
* @version Release: 1.1.1
*/
/**
* Class for managing a lucene index.
*
* @category DMS
* @package LetoDMS_Lucene
* @version @version@
* @author Uwe Steinmann
* @copyright Copyright (C) 2011, Uwe Steinmann
* @version Release: 1.1.1
*/
class LetoDMS_Lucene_Indexer {
/**
* @var string $indexname name of lucene index
* @access protected
*/
protected $indexname;
function open($luceneDir) { /* {{{ */
try {
$index = Zend_Search_Lucene::open($luceneDir);
return($index);
} catch (Exception $e) {
return null;
}
} /* }}} */
function create($luceneDir) { /* {{{ */
$index = Zend_Search_Lucene::create($luceneDir);
return($index);
} /* }}} */
/**
* Do some initialization
*
*/
function init($stopWordsFile='') { /* {{{ */
$analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive();
if($stopWordsFile && file_exists($stopWordsFile)) {
$stopWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_StopWords();
$stopWordsFilter->loadFromFile($stopWordsFile);
$analyzer->addFilter($stopWordsFilter);
}
Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer);
} /* }}} */
}
?>
LetoDMS_Lucene-1.1.1/Lucene/Search.php 0000644 0001750 0001750 00000003262 12062276643 017000 0 ustar steinm steinm
* @copyright Copyright (C) 2010, Uwe Steinmann
* @version Release: 1.1.1
*/
/**
* Class for searching in a lucene index.
*
* @category DMS
* @package LetoDMS_Lucene
* @version @version@
* @author Uwe Steinmann
* @copyright Copyright (C) 2011, Uwe Steinmann
* @version Release: 1.1.1
*/
class LetoDMS_Lucene_Search {
/**
* @var object $index lucene index
* @access protected
*/
protected $index;
/**
* Create a new instance of the search
*
* @param object $index lucene index
* @return object instance of LetoDMS_Lucene_Search
*/
function __construct($index) { /* {{{ */
$this->index = $index;
$this->version = '1.1.1';
if($this->version[0] == '@')
$this->version = '3.0.0';
} /* }}} */
/**
* Search in index
*
* @param object $index lucene index
* @return object instance of LetoDMS_Lucene_Search
*/
function search($term, $owner, $status='', $categories=array(), $fields=array()) { /* {{{ */
$query = '';
if($fields) {
} else {
if($term)
$query .= trim($term);
}
if($owner) {
if($query)
$query .= ' && ';
$query .= 'owner:'.$owner;
}
if($categories) {
if($query)
$query .= ' && ';
$query .= '(category:"';
$query .= implode('" || category:"', $categories);
$query .= '")';
}
$hits = $this->index->find($query);
$recs = array();
foreach($hits as $hit) {
$recs[] = array('id'=>$hit->id, 'document_id'=>$hit->document_id);
}
return $recs;
} /* }}} */
}
?>
LetoDMS_Lucene-1.1.1/Lucene/IndexedDocument.php 0000644 0001750 0001750 00000007526 12062276643 020661 0 ustar steinm steinm
* @copyright Copyright (C) 2010, Uwe Steinmann
* @version Release: 1.1.1
*/
/**
* Class for managing an indexed document.
*
* @category DMS
* @package LetoDMS_Lucene
* @version @version@
* @author Uwe Steinmann
* @copyright Copyright (C) 2011, Uwe Steinmann
* @version Release: 1.1.1
*/
class LetoDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document {
/**
* Constructor. Creates our indexable document and adds all
* necessary fields to it using the passed in document
*/
public function __construct($dms, $document, $convcmd=null, $nocontent=false) {
$_convcmd = array(
'application/pdf' => 'pdftotext -enc UTF-8 -nopgbrk %s - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'',
'application/msword' => 'catdoc %s',
'application/vnd.ms-excel' => 'ssconvert -T Gnumeric_stf:stf_csv -S %s fd://1',
'audio/mp3' => "id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g'",
'audio/mpeg' => "id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g'",
'text/plain' => 'cat %s',
);
if($convcmd) {
$_convcmd = $convcmd;
}
$version = $document->getLatestContent();
$this->addField(Zend_Search_Lucene_Field::Keyword('document_id', $document->getID()));
if($version) {
$this->addField(Zend_Search_Lucene_Field::Keyword('mimetype', $version->getMimeType()));
$this->addField(Zend_Search_Lucene_Field::Keyword('origfilename', $version->getOriginalFileName()));
if(!$nocontent)
$this->addField(Zend_Search_Lucene_Field::UnIndexed('created', $version->getDate()));
if($attributes = $version->getAttributes()) {
foreach($attributes as $attribute) {
$attrdef = $attribute->getAttributeDefinition();
if($attrdef->getValueSet() != '')
$this->addField(Zend_Search_Lucene_Field::Keyword('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue()));
else
$this->addField(Zend_Search_Lucene_Field::Text('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue()));
}
}
}
$this->addField(Zend_Search_Lucene_Field::Text('title', $document->getName()));
if($categories = $document->getCategories()) {
$names = array();
foreach($categories as $cat) {
$names[] = $cat->getName();
}
$this->addField(Zend_Search_Lucene_Field::Text('category', implode(' ', $names)));
}
if($attributes = $document->getAttributes()) {
foreach($attributes as $attribute) {
$attrdef = $attribute->getAttributeDefinition();
if($attrdef->getValueSet() != '')
$this->addField(Zend_Search_Lucene_Field::Keyword('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue()));
else
$this->addField(Zend_Search_Lucene_Field::Text('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue()));
}
}
$owner = $document->getOwner();
$this->addField(Zend_Search_Lucene_Field::Text('owner', $owner->getLogin()));
if($keywords = $document->getKeywords()) {
$this->addField(Zend_Search_Lucene_Field::Text('keywords', $keywords));
}
if($comment = $document->getComment()) {
$this->addField(Zend_Search_Lucene_Field::Text('comment', $comment));
}
if($version && !$nocontent) {
$path = $dms->contentDir . $version->getPath();
$content = '';
$fp = null;
$mimetype = $version->getMimeType();
if(isset($_convcmd[$mimetype])) {
$cmd = sprintf($_convcmd[$mimetype], $path);
$fp = popen($cmd, 'r');
if($fp) {
$content = '';
while(!feof($fp)) {
$content .= fread($fp, 2048);
}
pclose($fp);
}
if($content) {
$this->addField(Zend_Search_Lucene_Field::UnStored('content', $content, 'utf-8'));
}
}
}
}
}
?>
LetoDMS_Lucene-1.1.1/Lucene.php 0000644 0001750 0001750 00000002405 12062276643 015571 0 ustar steinm steinm