package.xml0000644000175000017500000000643612062276643012574 0ustar steinmsteinm LetoDMS_Lucene pear.php.net Fulltext search for LetoDMS LetoDMS is a web based document management system (DMS). This is the fulltext search engine for it, based on Lucene. Uwe Steinmann steinm uwe@steinmann.cx yes 2012-12-13 1.1.1 1.1.1 beta beta GPL License catch exception if index is opened but not available 4.3.0 1.5.4 0.0.1 0.0.1 alpha alpha 2009-04-27 BSD License 2011-11-06 1.0.1 1.0.0 beta beta GPL License - New Release 2012-11-06 1.1.0 1.1.0 beta beta GPL License use a configurable list of mime type converters, fixed indexing and searching of special chars like german umlaute. LetoDMS_Lucene-1.1.1/Lucene/Indexer.php0000644000175000017500000000265412062276643017175 0ustar steinmsteinm * @copyright Copyright (C) 2010, Uwe Steinmann * @version Release: 1.1.1 */ /** * Class for managing a lucene index. * * @category DMS * @package LetoDMS_Lucene * @version @version@ * @author Uwe Steinmann * @copyright Copyright (C) 2011, Uwe Steinmann * @version Release: 1.1.1 */ class LetoDMS_Lucene_Indexer { /** * @var string $indexname name of lucene index * @access protected */ protected $indexname; function open($luceneDir) { /* {{{ */ try { $index = Zend_Search_Lucene::open($luceneDir); return($index); } catch (Exception $e) { return null; } } /* }}} */ function create($luceneDir) { /* {{{ */ $index = Zend_Search_Lucene::create($luceneDir); return($index); } /* }}} */ /** * Do some initialization * */ function init($stopWordsFile='') { /* {{{ */ $analyzer = new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive(); if($stopWordsFile && file_exists($stopWordsFile)) { $stopWordsFilter = new Zend_Search_Lucene_Analysis_TokenFilter_StopWords(); $stopWordsFilter->loadFromFile($stopWordsFile); $analyzer->addFilter($stopWordsFilter); } Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer); } /* }}} */ } ?> LetoDMS_Lucene-1.1.1/Lucene/Search.php0000644000175000017500000000326212062276643017000 0ustar steinmsteinm * @copyright Copyright (C) 2010, Uwe Steinmann * @version Release: 1.1.1 */ /** * Class for searching in a lucene index. * * @category DMS * @package LetoDMS_Lucene * @version @version@ * @author Uwe Steinmann * @copyright Copyright (C) 2011, Uwe Steinmann * @version Release: 1.1.1 */ class LetoDMS_Lucene_Search { /** * @var object $index lucene index * @access protected */ protected $index; /** * Create a new instance of the search * * @param object $index lucene index * @return object instance of LetoDMS_Lucene_Search */ function __construct($index) { /* {{{ */ $this->index = $index; $this->version = '1.1.1'; if($this->version[0] == '@') $this->version = '3.0.0'; } /* }}} */ /** * Search in index * * @param object $index lucene index * @return object instance of LetoDMS_Lucene_Search */ function search($term, $owner, $status='', $categories=array(), $fields=array()) { /* {{{ */ $query = ''; if($fields) { } else { if($term) $query .= trim($term); } if($owner) { if($query) $query .= ' && '; $query .= 'owner:'.$owner; } if($categories) { if($query) $query .= ' && '; $query .= '(category:"'; $query .= implode('" || category:"', $categories); $query .= '")'; } $hits = $this->index->find($query); $recs = array(); foreach($hits as $hit) { $recs[] = array('id'=>$hit->id, 'document_id'=>$hit->document_id); } return $recs; } /* }}} */ } ?> LetoDMS_Lucene-1.1.1/Lucene/IndexedDocument.php0000644000175000017500000000752612062276643020661 0ustar steinmsteinm * @copyright Copyright (C) 2010, Uwe Steinmann * @version Release: 1.1.1 */ /** * Class for managing an indexed document. * * @category DMS * @package LetoDMS_Lucene * @version @version@ * @author Uwe Steinmann * @copyright Copyright (C) 2011, Uwe Steinmann * @version Release: 1.1.1 */ class LetoDMS_Lucene_IndexedDocument extends Zend_Search_Lucene_Document { /** * Constructor. Creates our indexable document and adds all * necessary fields to it using the passed in document */ public function __construct($dms, $document, $convcmd=null, $nocontent=false) { $_convcmd = array( 'application/pdf' => 'pdftotext -enc UTF-8 -nopgbrk %s - |sed -e \'s/ [a-zA-Z0-9.]\{1\} / /g\' -e \'s/[0-9.]//g\'', 'application/msword' => 'catdoc %s', 'application/vnd.ms-excel' => 'ssconvert -T Gnumeric_stf:stf_csv -S %s fd://1', 'audio/mp3' => "id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g'", 'audio/mpeg' => "id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g'", 'text/plain' => 'cat %s', ); if($convcmd) { $_convcmd = $convcmd; } $version = $document->getLatestContent(); $this->addField(Zend_Search_Lucene_Field::Keyword('document_id', $document->getID())); if($version) { $this->addField(Zend_Search_Lucene_Field::Keyword('mimetype', $version->getMimeType())); $this->addField(Zend_Search_Lucene_Field::Keyword('origfilename', $version->getOriginalFileName())); if(!$nocontent) $this->addField(Zend_Search_Lucene_Field::UnIndexed('created', $version->getDate())); if($attributes = $version->getAttributes()) { foreach($attributes as $attribute) { $attrdef = $attribute->getAttributeDefinition(); if($attrdef->getValueSet() != '') $this->addField(Zend_Search_Lucene_Field::Keyword('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue())); else $this->addField(Zend_Search_Lucene_Field::Text('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue())); } } } $this->addField(Zend_Search_Lucene_Field::Text('title', $document->getName())); if($categories = $document->getCategories()) { $names = array(); foreach($categories as $cat) { $names[] = $cat->getName(); } $this->addField(Zend_Search_Lucene_Field::Text('category', implode(' ', $names))); } if($attributes = $document->getAttributes()) { foreach($attributes as $attribute) { $attrdef = $attribute->getAttributeDefinition(); if($attrdef->getValueSet() != '') $this->addField(Zend_Search_Lucene_Field::Keyword('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue())); else $this->addField(Zend_Search_Lucene_Field::Text('attr_'.str_replace(' ', '_', $attrdef->getName()), $attribute->getValue())); } } $owner = $document->getOwner(); $this->addField(Zend_Search_Lucene_Field::Text('owner', $owner->getLogin())); if($keywords = $document->getKeywords()) { $this->addField(Zend_Search_Lucene_Field::Text('keywords', $keywords)); } if($comment = $document->getComment()) { $this->addField(Zend_Search_Lucene_Field::Text('comment', $comment)); } if($version && !$nocontent) { $path = $dms->contentDir . $version->getPath(); $content = ''; $fp = null; $mimetype = $version->getMimeType(); if(isset($_convcmd[$mimetype])) { $cmd = sprintf($_convcmd[$mimetype], $path); $fp = popen($cmd, 'r'); if($fp) { $content = ''; while(!feof($fp)) { $content .= fread($fp, 2048); } pclose($fp); } if($content) { $this->addField(Zend_Search_Lucene_Field::UnStored('content', $content, 'utf-8')); } } } } } ?> LetoDMS_Lucene-1.1.1/Lucene.php0000644000175000017500000000240512062276643015571 0ustar steinmsteinm