pax_global_header 0000666 0000000 0000000 00000000064 13052627552 0014521 g ustar 00root root 0000000 0000000 52 comment=432ac6140e7ace0c809abfebc4c53e062b939d99
thawab-4.1/ 0000775 0000000 0000000 00000000000 13052627552 0012633 5 ustar 00root root 0000000 0000000 thawab-4.1/Makefile 0000664 0000000 0000000 00000007366 13052627552 0014307 0 ustar 00root root 0000000 0000000 APPNAME=thawab
DESTDIR?=/
DATADIR?=$(DESTDIR)/usr/share
SOURCES=$(wildcard *.desktop.in)
TARGETS=${SOURCES:.in=}
ECHO := echo
MAKE := make
PYTHON := python2
INSTALL := install
INTLTOOL_MERGE := intltool-merge
RM := $(shell which rm | egrep '/' | sed 's/\s//g')
GTK_UPDATE_ICON_CACHE := $(shell which gtk-update-icon-cache)
UPDATE_DESKTOP_DATABASE := $(shell which update-desktop-database)
all: $(TARGETS) icons
icons:
@for i in 96 72 64 48 36 32 24 22 16; do \
convert -background none $(APPNAME).svg -resize $${i}x$${i} $(APPNAME)-$${i}.png; \
done
pos:
$(MAKE) -C po all
install: locale
@$(ECHO) "*** Installing..."
@$(PYTHON) setup.py install -O2 --root $(DESTDIR)
@$(ECHO) "Copying: $(APPNAME).desktop -> $(DATADIR)/applications/"
@$(INSTALL) -d $(DATADIR)/applications/
@$(INSTALL) -d $(DATADIR)/$(APPNAME)/
@$(INSTALL) -m 0644 $(APPNAME).desktop $(DATADIR)/applications/
@$(INSTALL) -m 0644 -D $(APPNAME).svg $(DATADIR)/icons/hicolor/scalable/apps/$(APPNAME).svg;
@for i in 96 72 64 48 36 32 24 22 16; do \
$(INSTALL) -d $(DATADIR)/icons/hicolor/$${i}x$${i}/apps; \
$(INSTALL) -m 0644 -D $(APPNAME)-$${i}.png $(DATADIR)/icons/hicolor/$${i}x$${i}/apps/$(APPNAME).png; \
done
@$(RM) -rf build
@$(DESTDIR)/$(UPDATE_DESKTOP_DATABASE) --quiet $(DATADIR)/applications &> /dev/null || :
@$(DESTDIR)/$(GTK_UPDATE_ICON_CACHE) --quiet $(DATADIR)/icons/hicolor &> /dev/null || :
uninstall:
@$(ECHO) "*** Uninstalling..."
@$(ECHO) "- Removing: $(DATADIR)/applications/$(APPNAME).desktop"
@$(RM) -f $(DATADIR)/applications/$(APPNAME).desktop
@$(ECHO) "- Removing: $(DESTDIR)/usr/share/locale/*/LC_MESSAGES/$(APPNAME).mo"
@$(RM) -f $(DESTDIR)/usr/share/locale/*/LC_MESSAGES/$(APPNAME).mo
@$(ECHO) "- Removing: $(DESTDIR)/usr/bin/$(APPNAME)"
@$(RM) -f $(DESTDIR)/usr/bin/$(APPNAME)-gtk
@$(RM) -f $(DESTDIR)/usr/bin/$(APPNAME)-server
@$(ECHO) "- Removing: $(DESTDIR)/usr/lib/python*/*-packages/Thawab"
@$(RM) -rf $(DESTDIR)/usr/lib/python*/*-packages/Thawab
@$(ECHO) "- Removing: $(DESTDIR)/usr/lib/python*/*-packages/$(APPNAME)*"
@$(RM) -rf $(DESTDIR)/usr/lib/python*/*-packages/$(APPNAME)*
@$(ECHO) "- Removing: $(DESTDIR)/usr/share/$(APPNAME)"
@$(RM) -rf $(DESTDIR)/usr/share/$(APPNAME)
@$(ECHO) "- Removing: $(DESTDIR)/usr/*/share/locale/*/LC_MESSAGES/$(APPNAME).mo"
@$(RM) -f $(DESTDIR)/usr/*/share/locale/*/LC_MESSAGES/$(APPNAME).mo
@$(ECHO) "- Removing: $(DESTDIR)/usr/*/bin/$(APPNAME)"
@$(RM) -f $(DESTDIR)/usr/*/bin/$(APPNAME)-gtk
@$(RM) -f $(DESTDIR)/usr/*/bin/$(APPNAME)-server
@$(ECHO) "- Removing: $(DESTDIR)/usr/*/lib/python*/*-packages/Thawab"
@$(RM) -rf $(DESTDIR)/usr/*/lib/python*/*-packages/Thawab
@$(ECHO) "- Removing: $(DESTDIR)/usr/*/lib/python*/*-packages/$(APPNAME)*"
@$(RM) -rf $(DESTDIR)/usr/*/lib/python*/*-packages/$(APPNAME)*
@$(ECHO) "- Removing: $(DESTDIR)/usr/*/share/$(APPNAME)"
@$(RM) -rf $(DESTDIR)/usr/*/share/$(APPNAME)
@$(RM) -f $(DATADIR)/icons/hicolor/scalable/apps/$(APPNAME).svg
@$(RM) -f $(DATADIR)/icons/hicolor/*/apps/$(APPNAME).png;
@$(DESTDIR)/$(UPDATE_DESKTOP_DATABASE) --quiet $(DATADIR)/applications &> /dev/null || :
@$(DESTDIR)/$(GTK_UPDATE_ICON_CACHE) --quiet $(DATADIR)/icons/hicolor &> /dev/null || :
%.desktop: %.desktop.in pos
intltool-merge -d po $< $@
clean:
@$(ECHO) "*** Cleaning..."
@$(MAKE) -C po clean
@$(ECHO) "- Removing: $(TARGETS)"
@$(RM) -f $(TARGETS)
@$(ECHO) "- Removing: locale build"
@$(RM) -rf locale build
@$(ECHO) "- Removing: *.pyc"
@$(RM) -f *.pyc
@$(ECHO) "- Removing: */*.pyc"
@$(RM) -f */*.pyc
@$(ECHO) "- Removing: $(APPNAME)-*.png"
@$(RM) -f $(APPNAME)-*.png
@$(ECHO) "- Removing Cache directories"
@$(RM) -f thawab-data/user.db
@$(RM) -rf thawab-data/cache
@$(RM) -rf thawab-data/index
@$(RM) -rf thawab-data/tmp
@$(RM) -rf thawab-data/db
@$(RM) -rf thawab-data/conf
thawab-4.1/TODO 0000664 0000000 0000000 00000000362 13052627552 0013324 0 ustar 00root root 0000000 0000000 بسم الله الرحمن الرحيم
- fix indixing after clear cache
- fix indexing repete jobs in current session ( reload search cache )
- add close button to chlid windows ( dialogs ) ....... Done
- fix tabs title
- fix FIXME lines
thawab-4.1/Thawab/ 0000775 0000000 0000000 00000000000 13052627552 0014041 5 ustar 00root root 0000000 0000000 thawab-4.1/Thawab/__init__.py 0000664 0000000 0000000 00000000000 13052627552 0016140 0 ustar 00root root 0000000 0000000 thawab-4.1/Thawab/asyncIndex.py 0000664 0000000 0000000 00000007023 13052627552 0016522 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
The async threaded indexing class of thawab
Copyright © 2010, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
from Queue import Queue
from threading import Thread, Lock
from time import sleep
class AsyncIndex():
def __init__(self, searchEngine, queueSize = 0, workers = 1):
"""
if number of workers>1 then queued jobs need not be executed in order
"""
self.searchEngine = searchEngine
self.workers_n = workers
self.running = 0
self.lock = Lock() # used to report running tasks correctly
self._q = Queue(queueSize)
self.start()
# we enqueue jobs like this
#for item in source(): self._q.put(item)
def queueIndexNew(self):
"""
index all non-indexed
"""
self.searchEngine.indexingStart()
for n in self.searchEngine.th.getKitabList():
vr = self.searchEngine.getIndexedVersion(n)
if not vr:
self.queue("indexKitab", n)
def queue(self, method, *args, **kw):
"""
examples: queue("indexNew"); queue("indexKitab","kitab_name");
"""
self._q.put((method, args, kw))
def start(self):
self.keepworking = True
self.end_when_done = False
self.started = False
# here we create our thread pool of workers
for i in range(self.workers_n):
t = Thread(target = self._worker)
t.setDaemon(True)
t.start()
# sleep to make sure all threads are waiting for jobs (inside loop)
while not self.started: sleep(0.25)
def jobs(self, with_running = True):
"""
return number of queued jobs.
"""
if with_running:
return self._q.qsize()+self.running
else:
return self._q.qsize()
def join(self):
"""
block till queued jobs are done.
"""
return self._q.join()
def cancelQueued(self):
self.keepworking = False
self._q.join()
self.started = False
def endWhenDone(self):
self.end_when_done = True
self._q.join()
self.started = False
def _worker(self):
while self.keepworking:
self.started = True
# get a job from queue or block sleeping till one is available
item = self._q.get(not self.end_when_done)
if item:
self.lock.acquire()
self.running += 1
self.lock.release()
method, args, kw = item
f = getattr(self.searchEngine, method)
f(*args,**kw)
if self._q.qsize() == 0:
self.searchEngine.indexingEnd()
self._q.task_done()
self.lock.acquire()
self.running -= 1
self.lock.release()
elif self._q.empty():
if self.end_when_done:
self.keepworking = False
thawab-4.1/Thawab/baseSearchEngine.py 0000664 0000000 0000000 00000022660 13052627552 0017607 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
Copyright © 2009, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
from meta import metaVrr
from okasha.utils import strverscmp
from tags import *
# TODO: use flags in meta cache object to indicate if indexing was started for some kitab so that if something wrong happend while indexing we can drop index of that kitab
class BaseSearchEngine:
def __init__(self, th, multithreading = False):
self.th = th
self.multithreading = multithreading
def getIndexedVersion(self, name):
"""
return a Version-Release string if in index, otherwise return None
"""
raise NotImplementedError
def queryIndex(self, queryString):
"""
return an interatable of fields dict
this method must be overridden in implementation specific way
"""
raise NotImplementedError
def indexingStart(self):
"""
should be called before any sequence of indexing Ops, reindexAll() calls this method automatically
"""
pass
def indexingEnd(self):
"""
should be called after a sequence of indexing Ops, reindexAll() calls this method automatically
"""
pass
def reload(self):
"""
called after commiting changes to index (eg. adding or dropping from index)
"""
pass
def dropKitabIndex(self, name):
"""
drop search index for a given Kitab name
you need to call indexingStart() before this and indexingEnd() after it
this method must be overridden in implementation specific way
"""
raise NotImplementedError
def addDocumentToIndex(self, name, vrr, nodeIdNum, title, content, tags):
"""
this method must be overridden in implementation specific way
"""
raise NotImplementedError
def dropAll(self):
raise NotImplementedError
# NOTE: the following implementation is buggy, since there could be documents index but no longer exists
#t = []
#self.indexingStart()
#for i in self.th.getManagedUriList(): self.dropKitabIndex(i)
#self.indexingEnd()
def dropChanged(self):
"""
drop index for all indexed kutub that got changed (updated or downgraded)
this is useful if followed by indexNew
no need you need to call indexingStart() indexingEnd() around this
"""
self.indexingStart()
m = self.th.getMeta()
for n in self.th.getKitabList():
vr = self.getIndexedVersion(n)
if vr and vr != metaVrr(m.getLatestKitab(n)):
self.dropKitabIndex(n)
self.indexingEnd()
def dropOld(self):
"""
drop index for all indexed kutub that got updated
this is useful if followed by indexNew
no need you need to call indexingStart() indexingEnd() around this
"""
self.indexingStart()
m = self.th.getMeta()
for n in self.th.getKitabList():
vr = self.getIndexedVersion(n)
if vr and strverscmp(vr,metaVrr(m.getLatestKitab(n))) > 0:
self.dropKitabIndex(n)
self.indexingEnd()
def indexNew(self):
"""
index all non-indexed
no need to call indexingStart() indexingEnd() around this
"""
self.indexingStart()
for n in self.th.getKitabList():
vr = self.getIndexedVersion(n)
if not vr:
self.indexKitab(n)
self.indexingEnd()
def refresh(self):
"""
drop changed then index them along with new unindexed.
no need to call indexingStart() indexingEnd() around this
"""
self.dropChanged()
self.indexNew()
def reindexAll(self):
"""
no need to call indexingStart() indexingEnd() around this
"""
self.dropAll()
# FIXME: should be dropAll() then usual index not reindex
t = []
self.indexingStart()
for n in self.th.getKitabList():
self.indexKitab(n)
# if threading is supported by indexer it would look like
#if self.multithreading:
# for i in self.getManagedUriList():
# t.append(threading.Thread(target=self.indexKitab,args=(i,)))
# t[-1].start()
# for i in t: i.join()
self.indexingEnd()
def reindexKitab(self, name):
"""
you need to call indexingStart() before this and indexingEnd() after it
"""
self.dropKitabIndex(name)
self.indexKitab(name)
def __ix_nodeStart(self, node, name, vrr, iix):
# NOTE: benchmarks says append then join is faster than s += "foo"
tags = node.getTags()
tag_flags = node.getTagFlags()
# create new consuming main indexing fields [ie. headers]
# TODO: let loadToc use TAG_FLAGS_HEADER instead of hard-coding 'header'
#if node.getTagsByFlagsMask(TAG_FLAGS_HEADER):
# NOTE: for consistency, header is the only currentely allowed tag having TAG_FLAGS_HEADER
if tag_flags & TAG_FLAGS_HEADER:
iix.main_f_node_idnums.append(node.idNum)
iix.main_f_content_index.append(len(iix.contents))
iix.main_f_tags_index.append(len(iix.tags))
# create new sub non-consuming indexing fields
if tag_flags & TAG_FLAGS_IX_FIELD:
iix.sub_f_node_idnums.append(node.idNum)
iix.sub_f_content_index.append(len(iix.contents))
iix.sub_f_tags_index.append(len(iix.tags))
# TODO: check for nodes that are not supposed to be indexed TAG_FLAGS_IX_SKIP
# append ix contents
iix.contents.append(node.getContent()) # TODO: append extra padding space if TAG_FLAGS_PAD_CONTENT
# append ix tags
iix.tags.extend(map(lambda t: tags[t] == None and t or u'.'.join((t,tags[t])),
node.getTagsByFlagsMask(TAG_FLAGS_IX_TAG)))
def __ix_nodeEnd(self, node, name, vrr, iix):
# index extra sub fields if any
if iix.sub_f_node_idnums and iix.sub_f_node_idnums[-1] == node.idNum:
n = iix.sub_f_node_idnums.pop()
i = iix.sub_f_content_index.pop()
j = iix.sub_f_tags_index.pop()
c = u"".join(iix.contents[i:])
T = u" ".join(iix.tags[j:])
del iix.tags[j:]
k = iix.main_f_content_index[-1] # the nearest header title index
N = iix.main_f_node_idnums[-1] # the nearest header node.idNum
# NOTE: the above two lines means that a sub ix fields should be children of some main field (header)
t = iix.contents[k]
self.addDocumentToIndex(unicode(name), vrr, N, t, c, T)
# index consuming main indexing fields if any
if iix.main_f_node_idnums and iix.main_f_node_idnums[-1] == node.idNum:
n = iix.main_f_node_idnums.pop()
i = iix.main_f_content_index.pop()
j = iix.main_f_tags_index.pop()
t = iix.contents[i]
c = (u"".join(iix.contents[i:])).strip()
del iix.contents[i:]
T = u" ".join(iix.tags[j:])
del iix.tags[j:]
self.addDocumentToIndex(unicode(name), vrr, n, t.strip(), c, T)
class __IIX(object):
"internal indexing object"
def __init__(self):
# independent arrays
self.contents = [] # array of contents to be indexed
self.tags = [] # array of ix tags
# main_f* parallel arrays
self.main_f_node_idnums = [] # array of node.idNum of consuming ix fields (ie. header)
self.main_f_content_index = [] # array of the starting index in self.contents for each main ix field (ie. header)
self.main_f_tags_index = [] # array of the starting index in self.contents for each main ix field (ie. header)
# sub_f* parallel arrays
self.sub_f_node_idnums = [] # array of node.idNum for each sub ix field
self.sub_f_content_index = [] # array of the starting index in self.contents for each sub ix field
self.sub_f_tags_index = [] # array of the starting index in self.tags for each sub ix field
# TODO: benchmark which is faster parallel arrays or small tubles sub_field = (idNum,content_i,tag_i)
def indexKitab(self, name):
"""
create search index for a given Kitab name
NOTE: you need to call indexingStart() before this and indexingEnd() after it
"""
#print "creating index for kitab with name:", name
ki = self.th.getKitab(name)
self.th.getMeta().setIndexedFlags(ki.uri, 1)
vrr = metaVrr(ki.meta)
iix = self.__IIX()
ki.root.traverser(3,
self.__ix_nodeStart,
self.__ix_nodeEnd,
name,
vrr,
iix)
self.th.getMeta().setIndexedFlags(ki.uri, 2)
thawab-4.1/Thawab/core.py 0000664 0000000 0000000 00000117364 13052627552 0015357 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
The core classes of thawab
Copyright © 2008, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import sys, os, os.path, sqlite3, re
import threading
from glob import glob
from itertools import imap,groupby
from tempfile import mkstemp
from StringIO import StringIO
from xml.sax.saxutils import escape, unescape, quoteattr # for xml rendering
from dataModel import *
from tags import *
from meta import MCache, metaDict2Hash, prettyId, makeId, metaVrr
from userDb import UserDb
from platform import guess_prefixes
from whooshSearchEngine import SearchEngine
from asyncIndex import AsyncIndex
from othman.core import othmanCore
from okasha.utils import ObjectsCache, fromFs, toFs
th_ext = u'.ki'
th_ext_glob = u'*.ki'
othman = othmanCore()
class ThawabMan (object):
def __init__(self, prefixes=None, isMonolithic = True, indexerQueueSize = 0):
"""Create a new Thawab instance given a user writable directory and an optional system-wide read-only directory
prefixes a list of directories all are read-only except the first
the first writable directory can be
os.path.expanduser('~/.thawab')
os.path.join([os.path.dirname(sys.argv[0]),'..','data'])
isMonolithic = True if we should use locks and reconnect to sqlite
indexerQueueSize is the size of threaded index queue (0 infinite, -1 disabled)
the first thing you should do is to call loadMCache()
"""
if not prefixes:
prefixes = guess_prefixes()
try:
if not os.path.isdir(prefixes[0]):
os.makedirs(prefixes[0])
except:
raise OSError
self.prefixes = filter(lambda i:os.path.isdir(i),
[os.path.realpath(os.path.abspath(p)) for p in prefixes])
# make sure it's unique
p = self.prefixes[0]
s = set(self.prefixes[1:])
if p in s:
s.remove(p)
if len(s)= 0:
self.asyncIndexer = AsyncIndex(self.searchEngine, indexerQueueSize)
else:
self.asyncIndexer = None
self.isMonolithic = isMonolithic
if not self.isMonolithic:
import threading
lock1 = threading.Lock()
else:
lock1 = None
self.kutubCache = ObjectsCache(lock = lock1)
def prase_conf(self):
r = {}
fn = os.path.join(self.prefixes[0], 'conf', 'main.txt')
if not os.path.exists(fn):
return {}
try:
f = open(fn)
t = f.readlines()
f.close()
except:
return {}
for l in t:
a = l.strip().split(" = ",1)
if len(a) != 2:
continue
r[a[0].strip()] = a[1].strip()
return r
def assertManagedTree(self):
"""create the hierarchy inside the user-managed prefix
# db contains Kitab files [.thawab]
# index contains search index
# conf application configuration
# cache contains the metadata cache for all containers"""
P = self.prefixes[0]
if not os.access(P, os.W_OK):
return False
for i in ['db','index','conf','cache', 'tmp', 'themes']:
p = os.path.join(P,i)
if not os.path.isdir(p):
os.makedirs(p)
return True
def mktemp(self):
h, fn = mkstemp(th_ext, 'THAWAB_' ,os.path.join(self.prefixes[0], 'tmp'))
return Kitab(fn, True)
def getCachedKitab(self, uri):
"""
try to get a kitab by uri from cache,
if it's not in the cache, it will be opened and cached
"""
ki = self.kutubCache.get(uri)
if not ki:
ki = self.getKitabByUri(uri)
if ki:
self.kutubCache.append(uri, ki)
#elif not self.isMonolithic: ki.connect() # FIXME: no longer needed, kept to trace other usage of isMonolithic
return ki
def getCachedKitabByNameV(self, kitabNameV):
a = kitabNameV.split(u'-')
l = len(a)
if l == 1:
m = self.getMeta().getLatestKitab(kitabNameV)
elif l == 2:
m = self.getMeta().getLatestKitabV(*a)
else:
m = self.getMeta().getLatestKitabVr(*a)
if m:
return self.getCachedKitab(m['uri'])
return None
def getUriByKitabName(self,kitabName):
"""
return uri for the latest kitab with the given name
"""
m = self.getMeta().getLatestKitab(kitabName)
if not m:
return None
return m['uri']
def getKitab(self,kitabName):
m = self.getMeta().getLatestKitab(kitabName)
if m:
return Kitab(m['uri'], th = self, meta = m)
return None
def getKitabByUri(self,uri):
m = self.getMeta().getByUri(uri)
if m:
return Kitab(uri, th = self, meta = m)
return Kitab(uri, th=self)
def getKitabList(self):
"""
return a list of managed kitab's name
"""
return self.getMeta().getKitabList()
def getManagedUriList(self):
"""list of all managed uri (absolute filenames for a Kitab)
this is low level as the user should work with kitabName, title, and rest of meta data"""
if self.__meta:
return self.__meta.getUriList()
r = []
for i in self.prefixes:
a = glob(toFs(os.path.join(fromFs(i),u'db',th_ext_glob)))
p = map(lambda j: fromFs(j), a)
r.extend(p)
return r
def getMeta(self):
if not self.__meta:
self.loadMeta()
return self.__meta
def loadMeta(self):
self.__meta = None
p = os.path.join(self.prefixes[0],'cache','meta.db')
self.__meta = MCache(p, self.getManagedUriList())
return self.__meta
def reconstructMetaIndexedFlags(self):
# NOTE: getMeta is not used because we want to make sure we are using a fresh one
m = self.loadMeta()
l1 = m.getIndexedList()
l2 = m.getUnindexedList()
# NOTE: Dirty are kept as is
#l3 = m.getDirtyIndexList()
for i in l1:
v = self.searchEngine.getIndexedVersion(i['kitab'])
# mark as unindexed
if not v or metaVrr(i) != v:
m.setIndexedFlags(i['uri'], 0)
for i in l2:
v = self.searchEngine.getIndexedVersion(i['kitab'])
if v and metaVrr(i) == v:
# mark as indexed if same version
m.setIndexedFlags(i['uri'])
class KitabCursor:
"""
an object used to do a sequence of SQL operation
"""
def __init__(self, ki , *args, **kw):
self.ki = ki
self.__is_tailing = False
self.__is_tmp = False
self.__tmp_str = ''
self.__parents=[]
self.__c = None
self.__last_go = -1
if args or kw:
self.seek(*args, **kw)
def __lock(self):
# TODO: this is just a place holders, could be used to do "BEGIN TRANS"
pass
def __unlock(self):
pass
def seek(self, parentNodeIdNum = -1,nodesNum = -1):
"""
should be called before concatenating nodes, all descendants will be dropped
where:
parentNodeIdNum - the parent below which the concatenation will begin, -1 at the tail
nodesNum - number of nodes to be concatenated, -1 for unknown open number
seek()
appendNode(parentNodeIdNum, content, tags)
appendNode(parentNodeIdNum, content, tags)
...
plush()
"""
self.__lock()
self.__is_tailing = False
self.__is_tmp = False
self.__tmp_str = ''
self.__parents = []
self.__c = self.ki.cn().cursor()
self.__c.execute('BEGIN TRANSACTION')
if parentNodeIdNum != -1:
self.dropDescendants(parentNodeIdNum)
if nodesNum == -1:
self.__is_tmp = True
self.__tmp_str = 'tmp'
else:
# FIXME: make sure
raise IndexError, "not implented"
else:
self.__parents = [self.ki.root]
r = self.__c.execute(SQL_GET_LAST_GLOBAL_ORDER).fetchone()
if r:
self.__last_go = r[0]
else:
self.__last_go = 0
self.__is_tailing = True
def flush(self):
"""Called after the last appendNode"""
if self.__is_tmp:
# TODO: implement using "insert into ... select tmp_nodes ...;"
raise IndexError, "not implented"
self.__c.execute('END TRANSACTION')
#self.__c.execute('COMMIT')
#self.ki.cn.commit() # FIXME: is this needed ?
self.__unlock()
def appendNode(self, parentNode, content, tags):
parentNodeIdNum = parentNode.idNum
while(self.__parents[-1].idNum != parentNodeIdNum):
self.__parents.pop()
new_go = self.__last_go + self.ki.inc_size
newid = self.__c.execute(SQL_APPEND_NODE[self.__is_tmp],
(content,
self.__parents[-1].idNum,
new_go,
self.__parents[-1].depth + 1)).lastrowid
self.__last_go = new_go
node = Node(kitab = self.ki,
idNum = newid,
parent = self.__parents[-1].idNum,
depth = self.__parents[-1].depth + 1)
node.applyTags(tags)
self.__parents.append(node)
return node
def dropDescendants(self,parentNodeIdNum, withParent = False):
"""remove all child nodes going deep at any depth, and optionally with their parent"""
o1, o2 = self.ki.getSliceBoundary(parentNodeIdNum)
c = self.__c
if not c:
c = self.ki.cn().cursor()
if o2 == -1:
c.execute(SQL_DROP_TAIL_NODES[withParent],(o1,))
else:
c.execute(SQL_DROP_DESC_NODES[withParent],(o1,o2))
class Kitab(object):
"""this class represents a book or an article ...etc."""
def __init__(self,uri, is_tmp = False, th = None, meta = None):
"""
open the Kitab pointed by uri (or try to create it)
is_tmp should be set to True when we are creating a new kitab from scratch in temporary
th is ThawabManaget to which this book belongs
meta is meta cache entry of this kitab
Note: don't rely on meta having uri, mtime, flags unless th is set (use uri property instead)
"""
self._cn_h = {} # per-thread sqlite connection
# node generators
self.grouped_rows_to_node = (self.grouped_rows_to_node0,
self.grouped_rows_to_node1,
self.grouped_rows_to_node2,
self.grouped_rows_to_node3)
self.row_to_node = (self.row_to_node0, self.row_to_node1)
# TODO: do we need a mode = r|w ?
self.uri = uri
self.is_tmp = is_tmp
self.th = th
self.meta = meta
if not meta: self.getMCache()
if meta and meta.get('originalKitab',None):
self.originalKi = self.th.getCachedKitabByNameV(meta['originalKitab'] + \
u"-" + \
meta['originalVersion'])
else: self.originalKi = None
# the logic to open the uri goes here
# check if fn exists, if not then set the flag sql_create_schema
if is_tmp or not os.path.exists(toFs(uri)):
sql_create_schema = True
else:
sql_create_schema = False
cn = self.cn()
# FIXME: do we really need this
cn.create_function("th_enumerate", 0, self.rowsEnumerator)
# NOTE: we have a policy, no saving of cursors in object attributes for thread safty
c = cn.cursor()
self.toc = KitabToc(self)
# private
self.__tags = {} # a hash by of tags data by tag name
self.__tags_loaded = False
self.__counter = 0 # used to renumber rows
self.inc_size = 1 << 10
# TODO: make a decision, should the root node be saved in SQL,
# if so a lower bound checks to Kitab.getSliceBoundary() and an exception into Kitab.getNodeByIdNum()
self.root = Node(kitab = self,
idNum = 0,
parent = -1,
depth = 0,
content = '',
tags = {})
if sql_create_schema:
c.executescript(SQL_DATA_MODEL)
# create standard tags
for t in STD_TAGS_ARGS:
c.execute(SQL_ADD_TAG, t)
def cn(self):
"""
return an sqlite connection for the current thread
"""
n = threading.current_thread().name
if self._cn_h.has_key(n):
r = self._cn_h[n]
else:
r = sqlite3.connect(self.uri, isolation_level = None)
self._cn_h[n] = r
return r
def getMCache(self):
if not self.th:
return None # needs a manager
if self.meta:
return self.meta
self.meta = self.th.getMeta().load_from_uri(self.uri)
return self.meta
def setMCache(self, meta):
# TODO: add more checks
a = meta.get('author', None)
oa = meta.get('originalAuthor', None)
if not oa and not a:
meta['author'] = '_unset'
if not a and oa != None:
meta['author'] = oa
if not oa and a != None:
meta['originalAuthor'] = a
y = meta.get('year', None)
oy = meta.get('originalYear', None)
if not y and oy != None:
meta['year'] = oy
if not oy and y != None:
meta['originalYear'] = y
if not meta.get('cache_hash',None):
meta['cache_hash'] = metaDict2Hash(meta)
self.meta = meta
self.cn().execute(SQL_MCACHE_SET, meta)
###################################
# retrieving data from the Kitab
###################################
def getTags(self):
if not self.__tags_loaded:
self.reloadTags()
return self.__tags
def reloadTags(self):
self.__tags = dict(map(lambda r: (r[0],r[1:]),
self.cn().execute(SQL_GET_ALL_TAGS).fetchall()))
self.__tags_loaded = True
def getNodeByIdNum(self, idNum, load_content = False):
if idNum <= 0:
return self.root
r = self.cn().execute(SQL_GET_NODE_BY_IDNUM[load_content],
(idNum,)).fetchone()
if not r:
raise IndexError, "idNum not found"
return self.row_to_node[load_content](r)
def getNodesByTagValueIter(self, tagname, value, load_content = True, limit = 0):
"""an iter that retrieves all the modes tagged with tagname having value"""
sql = SQL_GET_NODES_BY_TAG_VALUE[load_content]
if type(limit) == int and limit > 0:
sql + " LIMIT " + str(limit)
it = self.cn().execute(sql, (tagname, value,))
return imap(self.row_to_node[load_content], it)
def nodeFromId(self, i, load_content = False):
"""
get node from Id where is is one of the following:
* an intger (just call getNodeByIdNum)
* a string prefixed with "_i" followed by IdNum
* the value of "header" param
"""
if type(i) == int:
j = i
return self.getNodeByIdNum(j, load_content)
elif i.startswith('_i'):
try:
j = int(i[2:])
except TypeError:
return None
return self.getNodeByIdNum(j, load_content)
else:
nodes = self.getNodesByTagValueIter("header", i, load_content, 1)
if nodes: return nodes[0]
return None
def seek(self, *args, **kw):
"""
short hand for creating a cursor object and seeking it, returns a new cursor object used for manipulation ops
"""
return KitabCursor(self, *args, **kw)
def getSliceBoundary(self, nodeIdNum):
"""return a tuble of o1,o2 where:
o1: is the globalOrder of the given Node
o2: is the globalOrder of the next sibling of the given node, -1 if unbounded
all the descendants of the given nodes have globalOrder belongs to the interval (o1,o2)
"""
# this is a private method used by dropDescendants
if nodeIdNum == 0:
return 0,-1
cn = self.cn()
r = cn.execute(SQL_GET_GLOBAL_ORDER,(nodeIdNum,)).fetchone()
if not r:
raise IndexError
o1 = r[0]
depth = r[1]
r = cn.execute(SQL_GET_DESC_UPPER_BOUND, (o1, depth)).fetchone()
if not r:
o2 = -1
else:
o2 = r[0]
return o1, o2
# node generators
def row_to_node0(self,r):
return Node(kitab=self,
idNum = r[0],
parent = r[1],
depth = r[2],
globalOrder = r[3])
def row_to_node1(self,r):
return Node(kitab=self,
idNum = r[0],
parent = r[1],
depth = r[2],
globalOrder = r[3],
content = r[4])
def grouped_rows_to_node0(self,l):
r = list(l[1])
return Node(kitab=self,
idNum = r[0][0],
parent = r[0][1],
depth = r[0][2],
globalOrder = r[0][3])
def grouped_rows_to_node1(self,l):
r = list(l[1])
return Node(kitab=self,
idNum = r[0][0],
parent = r[0][1],
depth = r[0][2],
globalOrder = r[0][3],
content = r[0][4])
def grouped_rows_to_node2(self,l):
r = list(l[1])
return Node(kitab=self.kitab,
idNum = r[0][0],
parent = r[0][1],
depth = r[0][2],
globalOrder = r[0][3],
tags = dict(map(lambda i: (i[4],i[5]),r)),
tag_flags = reduce(lambda a,b: a|b[6],r,0))
def grouped_rows_to_node3(self,l):
r = list(l[1])
return Node(kitab=self,
idNum = r[0][0],
parent = r[0][1],
depth = r[0][2],
globalOrder = r[0][3],
content = r[0][4],
tags = dict(map(lambda i: (i[5],i[6]),r)),
tag_flags = reduce(lambda a,b: a|b[7],r, 0))
def getChildNodesIter(self, idNum, preload = WITH_CONTENT_AND_TAGS):
"""
an iter that retrieves all direct children of a node by its IdNum,
just one level deeper, content and tags will be pre-loaded by default.
where preload can be:
0 WITH_NONE
1 WITH_CONTENT
2 WITH_TAGS
3 WITH_CONTENT_AND_TAGS
"""
it = self.cn().execute(SQL_GET_CHILD_NODES[preload],(idNum,))
# will work but having the next "if" is faster
# return imap(self.grouped_rows_to_node[preload], groupby(it,lambda i:i[0]))
if preload & 2:
return imap(self.grouped_rows_to_node[preload],
groupby(it,lambda i:i[0]))
return imap(self.row_to_node[preload], it)
def getTaggedChildNodesIter(self, idNum, tagName, load_content = True):
"""
an iter that retrieves all direct children of a node having tagName by its IdNum,
just one level deeper, content will be preloaded by default.
"""
it = self.cn().execute(SQL_GET_TAGGED_CHILD_NODES[load_content], (idNum,tagName,))
return imap(self.row_to_node[load_content], it)
# FIXME: do we really need this
def rowsEnumerator(self):
"""private method used internally"""
self.__counter += self.inc_size
return self.__counter
class KitabToc(object):
def __init__(self, kitab):
self.ki = kitab
def breadcrumbs(self, node):
l = []
n = node
p = self.ki.getNodeByIdNum(n.parent, True)
while(p.idNum):
# TODO: do some kind of cache like this if p.idNum in cache: l = cached + l else: ...
l.insert(0, (p.idNum, p.getContent()))
p = self.ki.getNodeByIdNum(p.parent, True)
return l
def getNodePrevUpNextChildrenBreadcrumbs(self, i):
"""
an optimized way to get a tuple of node, prev, up, next, children, breadcrumbs
where i is nodeIdNum or preferably the node it self
"""
if type(i) == int:
n = self.ki.getNodeByIdNum(i, True)
elif isinstance(i,basestring):
n = self.ki.nodeFromId(i, True)
else:
n = i
return (n,
self.prev(n),
self.ki.getNodeByIdNum(n.parent, True),
self.next(n),
self.children(n.idNum),
self.breadcrumbs(n))
def children(self, i):
"""
return list of Node that are direct children of i
where i is idNum of the node
"""
return list(self.ki.getTaggedChildNodesIter(i, 'header', True))
def up(self, i):
if type(i) == int:
n = self.ki.getNodeByIdNum(i, True)
else:
n = i
return self.ki.getNodeByIdNum(n.parent, True)
def prev(self, i):
if type(i) == int:
n = self.ki.getNodeByIdNum(i, True)
else:
n = i
return n.getPrevTaggedNode('header')
def next(self, i):
if type(i) == int:
n = self.ki.getNodeByIdNum(i, True)
else:
n = i
return n.getNextTaggedNode('header')
class Node (object):
"""
A node class returned by some Kitab methods, avoid creating your own
it has the following properities:
kitab the Kitab instance to which this node belonds, none if floating
parent the parent node idNum, -1 if root
idNum the node idNum, -1 if floating or not yet saved
depth the depth of node, -1 for floating, 0 for root
tags the applied tags, {tagname:param,...}, None if not loaded
and the following methods:
getContent() return node's content, loading it from back-end if needed
reloadContent() force reloading content
unloadContent() unload content to save memory
"""
_footnote_s_re = re.compile(r'(\^\[([^\[\]]+)\])', re.M)
_footnote_t_re = re.compile(r'^( \* *\(([^\(\)]+)\))', re.M)
_href_named_re = re.compile(r'\[\[([^ \[\]]+) ([^\[\]]+)\]\]', re.M)
_href_re = re.compile(r'\[\[([^ \[\]]+)\]\]', re.M)
def __init__(self, **args):
self.kitab = args.get('kitab')
self.parent = args.get('parent', -1)
self.idNum = args.get('idNum', -1)
self.depth = args.get('depth', -1)
self.globalOrder = args.get('globalOrder', -1)
# TODO: should globalOrder be a properity ?
try:
self.__content = args['content']
self.__content_loaded = True
except KeyError:
self.__content_loaded = False
# TODO: should tags be called tagDict
try:
self.__tags=args['tags']
self.__tags_loaded = True
except KeyError:
self.__tags_loaded = False
try:
self.__tag_flags = args['tag_flags']
self.__tag_flags_loaded = True
except KeyError:
self.__tag_flags_loaded = False
# tags related methods
def getTags(self):
"""
return tag dictionary applied to the node, loading it from back-end if needed
"""
if not self.__tags_loaded:
self.reloadTags()
return self.__tags
def getTagFlags(self):
"""
return the "or" summation of flags of all tags applied to this node
"""
if not self.__tag_flags_loaded:
self.reloadTags()
return self.__tag_flags
def getTagsByFlagsMask(self, mask):
"""
return tag names having flags masked with mask,
used like this node.getTagsByFlagsMask(TAG_FLAGS_IX_TAG)
"""
# return filter(lambda t: STD_TAGS_HASH[t][2]&mask, self.getTags())
return filter(lambda t: self.kitab.getTags()[t][0]&mask, self.getTags())
def reloadTags(self):
"""force reloading of Tags"""
self.__tags = dict(self.kitab.cn().execute(SQL_GET_NODE_TAGS,(self.idNum,)).fetchall())
self.__tags_loaded = True
T = map(lambda t: self.kitab.getTags()[t][0], self.__tags.keys())
self.__tag_flags = reduce(lambda a,b: a|b,T, 0)
self.__tag_flags_loaded = True
def unloadTags(self):
"""unload content to save memory"""
self.__tags_loaded = False
self.__tags = None
self.__tag_flags = 0
self.__tag_flags_loaded = False
# content related methods
def getContent(self):
"""return node's content, loading it from back-end if needed"""
if not self.__content_loaded:
self.reloadContent()
return self.__content
def reloadContent(self):
"""force reloading content"""
r = self.kitab.cn().execute(SQL_GET_NODE_CONTENT,(self.idNum,)).fetchone()
if not r:
self.__content = None
self.__content_loaded = False
raise IndexError, 'node not found, could be a floating node'
self.__content = r[0]
self.__content_loaded = True
def unloadContent(self):
"""unload content to save memory"""
self.__content_loaded = False
self.__content = None
# tags editing
def tagWith(self,tag,param = None):
"""
apply a single tag to this node,
if node is already taged with it, just update the param
the tag should already be in the kitab.
"""
r = self.kitab.cn().execute(SQL_TAG,(self.idNum,param,tag)).rowcount
if not r:
raise IndexError, "tag not found"
def applyTags(self,tags):
"""
apply a set of taga to this node,
if node is already taged with them, just update the param
each tag should already be in the kitab.
"""
for k in tags:
self.tagWith(k, tags[k])
def clearTags(self):
"""clear all tags applyed to this node"""
self.kitab.cn().execute(SQL_CLEAR_TAGS_ON_NODE, (self.idNum,))
def getPrevTaggedNode(self, tagName, load_content = True):
if self.idNum <= 0:
return None
r = self.kitab.cn().execute(SQL_GET_PREV_TAGGED_NODE[load_content],
(self.globalOrder, tagName)).fetchone()
if not r:
return None
return self.kitab.row_to_node[load_content](r)
def getNextTaggedNode(self, tagName, load_content = True):
r = self.kitab.cn().execute(SQL_GET_NEXT_TAGGED_NODE[load_content],
(self.globalOrder, tagName)).fetchone()
if not r:
return None
return self.kitab.row_to_node[load_content](r)
# methods that give nodes
def childrenIter(self, preload = WITH_CONTENT_AND_TAGS):
"""
an iter that retrieves all direct children of this node,
just one level deeper, content and tags will be pre-loaded by default.
where preload can be:
0 WITH_NONE
1 WITH_CONTENT
2 WITH_TAGS
3 WITH_CONTENT_AND_TAGS
"""
return self.kitab.nodeChildrenIter(self.idNum, preload)
def descendantsIter(self,preload = WITH_CONTENT_AND_TAGS, upperBound = -1):
"""
an iter retrieves all the children of this node,
going deeper in a flat-fashion, pre-loading content and tags by default.
where preload can be:
0 WITH_NONE
1 WITH_CONTENT
2 WITH_TAGS
3 WITH_CONTENT_AND_TAGS
"""
o1, o2 = self.kitab.getSliceBoundary(self.idNum)
if upperBound != -1 and (o2 == -1 or o2 > upperBound):
o2 = upperBound
if o2 == -1:
sql = SQL_GET_UNBOUNDED_NODES_SLICE[preload]
args = (o1,)
else:
sql = SQL_GET_NODES_SLICE[preload]
args = (o1, o2)
it = self.kitab.cn().execute(sql, args)
# will work but having the next "if" is faster
# return imap(self.kitab.grouped_rows_to_node[preload], groupby(it,lambda i:i[0]))
if preload & 2:
return imap(self.kitab.grouped_rows_to_node[preload],
groupby(it, lambda i:i[0]))
return imap(self.kitab.row_to_node[preload], it)
def childrenWithTagNameIter(self, tagname, load_content = True):
"""
an iter that retrieves all direct children taged with tagname, just one level deeper
"""
it = self.kitab.cn().execute(SQL_GET_TAGGED_CHILD_NODES[load_content],
(self.idNum, tagname))
return imap(self.kitab.row_to_node[load_content], it)
def descendantsWithTagNameIter(self, tagname,load_content = True):
"""
an iter that retrieves all the children tagged with tagname,
going deeper in a flat-fashion
"""
o1, o2 = self.kitab.getSliceBoundary(self.idNum)
if o2 == -1:
sql = SQL_GET_UNBOUNDED_TAGGED_NODES_SLICE[load_content]
args=(tagname, o1,)
else:
sql = SQL_GET_TAGGED_NODES_SLICE[load_content]
args=(tagname, o1, o2)
it = self.kitab.cn().execute(sql, args)
return imap(self.kitab.row_to_node[load_content], it)
# recursive non-optimized code
# def traverser_(self, nodeStart, nodeEnd,preload = WITH_CONTENT_AND_TAGS,*args):
# """recursively traverser nodes calling nodeStart and nodeEnd"""
# nodeStart(self,*args)
# for i in self.childrenIter(preload):
# i.traverser_(nodeStart,nodeEnd,*args)
# nodeEnd(self,*args)
def traverser(self, preload, nodeStart, nodeEnd, *args):
"""
recursively traverser nodes calling nodeStart and nodeEnd
Note: the implementation is a non-recursive optimized code with a single query
"""
dummy = lambda *args: None
if not nodeStart:
nodeStart = dummy
if not nodeEnd:
nodeEnd = dummy
stack = [self]
nodeStart(self, *args)
for i in self.descendantsIter(preload):
while(i.parent != stack[-1].idNum):
nodeEnd(stack[-1], *args)
stack.pop()
stack.append(i)
nodeStart(i, *args)
while(stack):
nodeEnd(stack[-1], *args)
stack.pop()
def traverserWithStack(self, preload, nodeStart, nodeEnd, *args):
"""
recursively traverser nodes calling nodeStart
and nodeEnd passing the nodes stack to them
Note: the implementation is a non-recursive optimized code with a single query
"""
dummy = lambda *args: None
if not nodeStart:
nodeStart = dummy
if not nodeEnd:
nodeEnd = dummy
stack = [self]
nodeStart(stack, *args)
for i in self.descendantsIter(preload):
while(i.parent != stack[-1].idNum):
nodeEnd(stack, *args)
stack.pop()
stack.append(i)
nodeStart(stack, *args)
while(stack):
nodeEnd(stack, *args)
stack.pop()
def sTraverser(self, preload, nodeStart, nodeEnd,upperBound = -1, *args):
"""
recursively traverser nodes calling nodeStart and nodeEnd
and concatenating the return values
"""
stack = [self]
s = nodeStart(self, *args)
for i in self.descendantsIter(preload, upperBound):
while(i.parent != stack[-1].idNum):
s += nodeEnd(stack[-1],*args)
stack.pop()
s += nodeStart(i, *args)
stack.append(i)
while(stack):
s += nodeEnd(stack[-1], *args)
stack.pop()
return s
def toWiki(self):
"""export the node and its descendants into a wiki-like string"""
return self.sTraverser(3,
lambda n: n.getTags().has_key('header') and \
''.join((u'\n',
((7-n.depth)*u' = '),
n.getContent(),
((7-n.depth)*u' = '),u'\n')) or \
n.getContent(),
lambda n: u'')
def toHtml_cb(self, n):
# trivial implementation
#return n.getTags().has_key('header') and \
# u'\n%s\n' % (n.depth,escape(n.getContent()),n.depth) or \
# "%s
" % escape(n.getContent())
r = u""
if n.getTags().has_key('header'):
r = u'\n%s\n' % (n.depth, escape(n.getContent()), n.depth)
else:
r = u"%s
" % self._wiki2html(escape(n.getContent()))
if n.getTags().has_key('quran.tafseer.ref'):
sura,aya,na = n.getTags()['quran.tafseer.ref'].split('-')
#r += u'نص من القرآن %s:%s:%s
\n\n' % (sura,aya,na)
# tanween fix u'\u064E\u064E', u'\u064E\u200C\u064E'
r += u'%s
\n\n' % \
"".join(map(lambda i: (i[0] + u'\u202C').replace(u' \u06dd',
u' \u202D\u06dd'),
othman.getAyatIter(othman.ayaIdFromSuraAya(int(sura),
int(aya)),
int(na))))
if n.kitab and n.kitab.th:
if n.kitab.originalKi and n.getTags().has_key('embed.original.section'):
xref = n.getTags()['embed.original.section']
matnKi = n.kitab.originalKi
embd_class="quote_orignal"
embd = u"تعليقا على"
elif n.getTags().has_key('embed.section.ref'):
try:
matn, xref = n.getTags()['embed.section.ref'].split(u'/', 1)
except ValueError:
pass
else:
matnKi = n.kitab.th.getCachedKitabByNameV(matn)
embd_class = "quote_external"
embd = u"اقتباس"
else:
embd = None
if embd:
matnNode = list(matnKi.getNodesByTagValueIter("header", xref, False, 1))
if matnNode:
matnNode = matnNode[0]
s = u'%s:
' % (embd_class, embd)
nx = matnKi.toc.next(matnNode)
if nx:
ub = nx.globalOrder
else:
ub = -1
# pass an option to disable embed to avoid endless recursion
s += matnNode.toHtml(upperBound = ub)
s += u' -- من كتاب %s
' % (matnKi.meta['kitab'],"_i"+str(matnNode.idNum),prettyId(matnKi.meta['kitab']))
s += u'
'
r += s
return r
def _wiki2html(self, txt):
# TODO: split from "^__________$"
# FIXME: when an embedded quoted section got footnotes there would be duplicated ids
txt = self._footnote_s_re.sub(r'''(\2)''', txt)
txt = self._footnote_t_re.sub(r'''(\2)''', txt)
txt = self._href_named_re.sub(r'''\2''', txt)
txt = self._href_re.sub(r'''\1''', txt)
return txt
def toHtml(self, upperBound = -1):
"""export the node and its descendants into HTML string"""
# TODO: escape special chars
# TODO: replace ^$ with '
' or '
'
# trivial implementation
#return self.sTraverser( 3, lambda n: n.getTags().has_key('header') and u'\n%s\n' % (n.depth,escape(n.getContent()),n.depth) or "
%s
" % escape(n.getContent()), lambda n: u'', upperBound);
return self.sTraverser( 3, self.toHtml_cb, lambda n: u'', upperBound);
def toText(self, upperBound = -1):
"""
export node and its descendants into plain text string,
can be used for generating excerpts of search results
"""
return self.sTraverser( 3, lambda n: n.getContent(), lambda n: u'', upperBound);
def __toXmlStart(self, node, ostream):
margin = u' '*node.depth
tags=u' '.join(map(lambda d: d[1] == None and \
d[0] or \
d[0] + u' = ' + quoteattr(unicode(d[1])),
node.getTags().items()))
ostream.write(u' '.join((margin, u'',)))
ostream.write(escape(node.getContent()))
ostream.write(u'\n')
def __toXmlEnd(self, node, ostream):
margin = u' ' * node.depth
ostream.write(margin + u' \n')
def toXml(self,ostream):
"""
export the node and its descendants into a xml-like string using ostream as output
"""
# TODO: escape special chars
self.traverser(3, self.__toXmlStart, self.__toXmlEnd,ostream)
# def toXml(self,ostream):
# # fixme
# margin = u' '*self.depth
# tags=u' '.join(map(lambda d: d[1] == None and d[0] or d[0]+u' = '+unicode(d[1]) ,self.getTags().items()))
# ostream.write(u' '.join((margin,u'\n',)))
# ostream.write(self.getContent())
# for i in descendantsIter():
# margin = u' '*i.depth
# tags=u' '.join(map(lambda d: d[1] == None and d[0] or d[0]+u' = '+unicode(d[1]) ,i.getTags().items()))
# ostream.write(u' '.join((margin,u'\n',)))
# ostream.write(i.getContent())
# ostream.write(u'\n'+margin+u' \n')
# ostream.write(u'\n'+margin+u' \n')
####################################
if __name__ == '__main__':
th = ThawabMan(os.path.expanduser('~/.thawab'))
ki = th.mktemp()
wiki = open(wiki_filename, "r")
ki.seek(-1, -1)
wiki2th(ki, wiki)
ki.flush()
thawab-4.1/Thawab/dataModel.py 0000664 0000000 0000000 00000033204 13052627552 0016307 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
The string constants to handle the data model
Copyright © 2008, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
from tags import *
MCACHE_BASE_FIELDS = [
'cache_hash','repo','lang','kitab','version', 'releaseMajor', 'releaseMinor', 'type',
'author', 'year', 'originalAuthor', 'originalYear', 'originalKitab', 'originalVersion',
'classification', 'keywords'
]
MCACHE_FIELDS = MCACHE_BASE_FIELDS + ['uri', 'mtime', 'flags']
SQL_MCACHE_SET = 'INSERT OR REPLACE INTO meta (rowid, %s) VALUES (1, %s)' % \
(', '.join(MCACHE_BASE_FIELDS),
', '.join(map(lambda i: ":"+i,MCACHE_BASE_FIELDS)))
SQL_MCACHE_ADD = 'INSERT OR REPLACE INTO meta (%s) VALUES (%s)' % \
(', '.join(MCACHE_FIELDS),
', '.join(map(lambda i: ":"+i,MCACHE_FIELDS)))
SQL_MCACHE_DROP = 'DELETE FROM meta WHERE uri=?'
MCACHE_BASE = """\
CREATE TABLE "meta" (
"cache_hash" TEXT,
"repo" TEXT,
"lang" TEXT,
"kitab" TEXT,
"version" TEXT,
"releaseMajor" INTEGER,
"releaseMinor" INTEGER,
"type" INTEGER,
"author" TEXT,
"year" INTEGER,
"originalAuthor" TEXT,
"originalYear" INTEGER,
"originalKitab" TEXT,
"originalVersion" TEXT,
"classification" TEXT,
"keywords" TEXT
);"""
SQL_MCACHE_DATA_MODEL = MCACHE_BASE[:MCACHE_BASE.find('\n)')] + \
""",\n\
"uri" TEXT UNIQUE,
"mtime" FLOAT,
"flags" INTEGER DEFAULT 0
);
CREATE INDEX MetaURIIndex on meta (uri);
CREATE INDEX MetaRepoIndex on meta (repo);
CREATE INDEX MetaLangIndex on meta (lang);
CREATE INDEX MetaKitabIndex on meta (kitab);
CREATE INDEX MetaKitabTypeIndex on meta (type);
CREATE INDEX MetaKitabVersionIndex on meta (repo,kitab,version);
CREATE INDEX MetaAuthorIndex on meta (author);
CREATE INDEX MetaYearIndex on meta (year);
CREATE INDEX MetaOriginalAuthorIndex on meta (originalAuthor);
CREATE INDEX MetaOriginalYearIndex on meta (originalYear);
CREATE INDEX MetaClassificationIndex on meta (classification);
CREATE INDEX MetaFlagsIndex on meta (flags);
CREATE TABLE "directories" (
"abspath" TEXT,
"mtime" FLOAT
);
"""
SQL_MCACHE_GET = """SELECT rowid,* FROM meta"""
SQL_MCACHE_GET_BY_KITAB = """SELECT rowid,* FROM meta ORDER BY kitab"""
SQL_MCACHE_GET_UNINDEXED = """SELECT rowid,* FROM meta WHERE flags=0"""
SQL_MCACHE_GET_DIRTY_INDEX = """SELECT rowid,* FROM meta WHERE flags=1"""
SQL_MCACHE_GET_INDEXED = """SELECT rowid,* FROM meta WHERE flags=2"""
SQL_MCACHE_SET_INDEXED = """UPDATE OR IGNORE meta SET flags=? WHERE uri=?"""
SQL_MCACHE_SET_ALL_INDEXED = """UPDATE OR IGNORE meta SET flags=? WHERE flags>0"""
SQL_DATA_MODEL = """\
%s
CREATE TABLE "nodes" (
"idNum" INTEGER PRIMARY KEY NOT NULL,
"content" TEXT,
"parent" INTEGER,
"globalOrder" INTEGER,
"depth" INTEGER NOT NULL
);
CREATE TABLE "tags" (
"idNum" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
"name" VARCHAR NOT NULL,
"flags" INTEGER NOT NULL,
"comment" VARCHAR,
"parent" INTEGER,
"relation" INTEGER
);
CREATE TABLE "nodesTags" (
"tagIdNum" INTEGER NOT NULL,
"nodeIdNum" INTEGER NOT NULL,
"param" VARCHAR,
PRIMARY KEY ("tagIdNum", "nodeIdNum")
);
CREATE INDEX NodesParentIndex on nodes (parent);
CREATE INDEX NodesNodesGlobalOrderIndex on nodes (globalOrder);
CREATE INDEX NodesDepthIndex on nodes (depth);
CREATE INDEX NodesTagTagIdNumIndex on nodesTags(tagIdNum);
CREATE INDEX NodesTagNodeIdNumIndex on nodesTags(nodeIdNum);
CREATE INDEX NodesTagParamIndex on nodesTags(param);
CREATE INDEX TagsName on tags (name);
""" % MCACHE_BASE
#################################################
# arguments to make the built-in tags
STD_TAGS_ARGS = ( \
# (name, comment, flags, parent, relation)
("header", "an anchor that marks header in TOC.", TAG_FLAGS_FLOW_BLOCK | TAG_FLAGS_HEADER),
("request.fix.head", "a tag that marks an error in content.", 0),
("request.fix.footnote", "a tag that marks an error in content footnotes.", 0),
("textbody", "a tag that marks a typical text.",0),
("quran.tafseer.ref", 'a reference to some Ayat in tafseer (in the form of "Sura-Aya-number").', 0),
("embed.section.ref", 'a reference to some section in another kitab to embed (in the form of "kitabName-version/section").', 0),
("embed.original.section", 'a reference to some section in the original kitab to embed. (used in commentary books)', 0),
# the following index-tags marks the header
("hadith.authenticity", "marks the authenticity of the hadith, param values are Sahih, Hasan, weak, fabricated", TAG_FLAGS_IX_TAG),
# new index field for rawi
("hadith.ruwah.rawi", "marks a rawi", TAG_FLAGS_IX_FIELD),
# the following index-tags marks the rawi field
("hadith.ruwah.authenticity", "marks the authenticity of the rawi, param values are thiqah, ...,kathoob", TAG_FLAGS_IX_TAG),
("hadith.ruwah.tabaqa", "marks the tabaqa of the rawi, param values are sahabi,tabii,...", TAG_FLAGS_IX_TAG)
)
STD_TAGS_HASH = dict(map(lambda i: (i[0],i),STD_TAGS_ARGS))
# ENUMs
WITH_NONE = 0
WITH_CONTENT = 1
WITH_TAGS = 2
WITH_CONTENT_AND_TAGS = 3
#################################################
# SQL statements for manipulating the dataModel
SQL_GET_ALL_TAGS = """SELECT name,flags,comment,parent,relation FROM tags"""
SQL_GET_NODE_CONTENT = """SELECT content from nodes WHERE idNum=? LIMIT 1"""
SQL_GET_NODE_TAGS = """SELECT tags.name,nodesTags.param FROM nodesTags LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodesTags.nodeIdNum=?"""
# FIXME: all sql that uses SQL_NODE_ARGS should be revised to check the shift after adding globalOrder
SQL_NODE_ARGS = "nodes.idNum, nodes.parent, nodes.depth, nodes.globalOrder"
SQL_NODE_COLS = (SQL_NODE_ARGS, SQL_NODE_ARGS+", nodes.content",
SQL_NODE_ARGS+", tags.name, nodesTags.param, tags.flags",
SQL_NODE_ARGS+", nodes.content"+", tags.name, nodesTags.param, tags.flags")
SQL_GET_CHILD_NODES = ( \
"""SELECT %s FROM nodes WHERE parent=? ORDER BY globalOrder""" % SQL_NODE_ARGS,
"""SELECT %s FROM nodes WHERE parent=? ORDER BY globalOrder""" % SQL_NODE_COLS[WITH_CONTENT],
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.parent=? ORDER BY nodes.globalOrder""" % SQL_NODE_COLS[WITH_TAGS],
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.parent=? ORDER BY nodes.globalOrder""" % SQL_NODE_COLS[WITH_CONTENT_AND_TAGS]
)
SQL_TAG = """INSERT OR REPLACE INTO nodesTags (tagIdNum,nodeIdNum,param) SELECT tags.IdNum,?,? FROM tags WHERE tags.name = ? LIMIT 1"""
SQL_CLEAR_TAGS_ON_NODE = """DELETE FROM nodesTags WHERE tags.name = ?"""
SQL_GET_NODE_BY_IDNUM = ( \
"""SELECT %s FROM nodes WHERE idNum=? ORDER BY globalOrder""" % SQL_NODE_ARGS,
"""SELECT %s FROM nodes WHERE idNum=? ORDER BY globalOrder""" % SQL_NODE_COLS[1],
)
# node slices
SQL_GET_NODES_SLICE = ( \
"""SELECT %s FROM nodes WHERE globalOrder>? AND globalOrder ORDER BY globalOrder""" % SQL_NODE_ARGS,
"""SELECT %s FROM nodes WHERE globalOrder>? AND globalOrder ORDER BY globalOrder""" % SQL_NODE_COLS[WITH_CONTENT],
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.globalOrder>? AND nodes.globalOrder ORDER BY nodes.globalOrder""" % SQL_NODE_COLS[WITH_TAGS],
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.globalOrder>? AND nodes.globalOrder ORDER BY nodes.globalOrder""" % SQL_NODE_COLS[WITH_CONTENT_AND_TAGS]
)
SQL_GET_UNBOUNDED_NODES_SLICE = (
"""SELECT %s FROM nodes WHERE globalOrder>? ORDER BY globalOrder""" % SQL_NODE_ARGS,
"""SELECT %s FROM nodes WHERE globalOrder>? ORDER BY globalOrder""" % SQL_NODE_COLS[WITH_CONTENT],
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.globalOrder>? ORDER BY nodes.globalOrder""" % SQL_NODE_COLS[WITH_TAGS],
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.globalOrder>? ORDER BY nodes.globalOrder""" % SQL_NODE_COLS[WITH_CONTENT_AND_TAGS]
)
# tagged children node
SQL_GET_TAGGED_CHILD_NODES = ( \
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.parent=? AND tags.name=? ORDER BY nodes.globalOrder""" % SQL_NODE_ARGS,
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.parent=? AND tags.name=? ORDER BY nodes.globalOrder""" % SQL_NODE_COLS[1]
)
# tagged node slices
SQL_GET_TAGGED_NODES_SLICE = ( \
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE tags.name=? AND nodes.globalOrder>? AND nodes.globalOrder ORDER BY nodes.globalOrder""" % SQL_NODE_ARGS,
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE tags.name=? AND nodes.globalOrder>? AND nodes.globalOrder ORDER BY nodes.globalOrder""" % SQL_NODE_COLS[1]
)
SQL_GET_UNBOUNDED_TAGGED_NODES_SLICE = ( \
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE tags.name=? AND nodes.globalOrder>? ORDER BY nodes.globalOrder""" % SQL_NODE_ARGS,
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE tags.name=? AND nodes.globalOrder>? ORDER BY nodes.globalOrder""" % SQL_NODE_COLS[1])
# get tagged node slices by param value
SQL_GET_NODES_BY_TAG_VALUE = ( \
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE tags.name=? AND nodesTags.param=? ORDER BY nodes.globalOrder""" % SQL_NODE_ARGS,
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE tags.name=? AND nodesTags.param=? ORDER BY nodes.globalOrder""" % SQL_NODE_COLS[1])
# get prev/next tagged node
SQL_GET_PREV_TAGGED_NODE = ( \
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.globalOrder and tags.name=? ORDER BY nodes.globalOrder DESC LIMIT 1""" % SQL_NODE_ARGS,
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.globalOrder and tags.name=? ORDER BY nodes.globalOrder DESC LIMIT 1""" % SQL_NODE_COLS[1])
SQL_GET_NEXT_TAGGED_NODE = ( \
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.globalOrder>? and tags.name=? ORDER BY nodes.globalOrder LIMIT 1""" % SQL_NODE_ARGS,
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.globalOrder>? and tags.name=? ORDER BY nodes.globalOrder LIMIT 1""" % SQL_NODE_COLS[1])
# get tagged child nodes
SQL_GET_TAGGED_CHILD_NODES = ( \
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.parent=? and tags.name=? ORDER BY nodes.globalOrder""" % SQL_NODE_ARGS,
"""SELECT %s FROM nodes LEFT OUTER JOIN nodesTags ON nodes.idNum = nodesTags.nodeIdNum LEFT OUTER JOIN tags on nodesTags.tagIdNum=tags.idNum WHERE nodes.parent=? and tags.name=? ORDER BY nodes.globalOrder""" % SQL_NODE_COLS[1])
SQL_GET_GLOBAL_ORDER = """SELECT globalOrder,depth FROM nodes WHERE idNum=? LIMIT 1"""
SQL_GET_DESC_UPPER_BOUND = """SELECT globalOrder FROM nodes WHERE globalOrder>? AND depth<=? ORDER BY globalOrder LIMIT 1"""
SQL_GET_SIBLING_GLOBAL_ORDER = """SELECT globalOrder FROM nodes WHERE parent=? and globalOrder>? ORDER BY globalOrder LIMIT 1"""
SQL_GET_LAST_GLOBAL_ORDER = """SELECT globalOrder FROM nodes ORDER BY globalOrder DESC LIMIT 1"""
SQL_DROP_DESC_NODES = ["""DELETE FROM nodes WHERE globalOrder>? AND globalOrder""",
"""DELETE FROM nodes WHERE globalOrder>=? AND globalOrder"""]
SQL_DROP_TAIL_NODES = ["""DELETE FROM nodes WHERE globalOrder>?""",
"""DELETE FROM nodes WHERE globalOrder>=?"""]
SQL_APPEND_NODE = ["""INSERT INTO nodes (content,parent,globalOrder,depth) VALUES (?,?,?,?)""",
"""INSERT INTO tmp_nodes (content,parent,globalOrder,depth) VALUES (?,?,?,?)"""]
# SQL tags commands
SQL_ADD_TAG = "INSERT OR REPLACE INTO tags (name, comment, flags, parent,relation) VALUES (?,?,?,-1,-1)"
# modified:
# SQL_GET_NODE_BY_IDNUM
# SQL_GET_CHILD_NODES
# SQL_GET_NODES_SLICE
# SQL_GET_UNBOUNDED_NODES_SLICE
# SQL_GET_TAGGED_CHILD_NODES
# SQL_GET_TAGGED_NODES_SLICE
# SQL_GET_UNBOUNDED_TAGGED_NODES_SLICE
# removed:
# SQL_GET_CHILD_NODES_AND_TAGS
# SQL_GET_NODES_SLICE_AND_TAGS
# SQL_GET_UNBOUNDED_NODES_SLICE_AND_TAGS
# TODO:
# make SQL_GET_NODE_BY_IDNUM capable of pre-loading tags (is this really needed??)
thawab-4.1/Thawab/gtkUi.py 0000664 0000000 0000000 00000120366 13052627552 0015506 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
gtkUi - gtk interface for thawab
Copyright © 2009-2010, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import sys, os, os.path, time, re, sqlite3
import shutil, tempfile
import threading, socket
import gettext
import gi
gi.require_version("Gtk", "3.0")
gi.require_version("WebKit", "3.0")
from gi.repository import Gtk, Gdk, GObject, WebKit, Pango, GLib
from subprocess import Popen, PIPE
from urllib import unquote
import Thawab.core
from Thawab.webApp import webApp, get_theme_dirs
from Thawab.shamelaUtils import ShamelaSqlite, shamelaImport
from Thawab.platform import uri_to_filename
from paste import httpserver
setsid = getattr(os, 'setsid', None)
if not setsid: setsid = getattr(os, 'setpgrp', None)
_ps = []
def run_in_bg(cmd):
global _ps
setsid = getattr(os, 'setsid', None)
if not setsid: setsid = getattr(os, 'setpgrp', None)
_ps = filter(lambda x: x.poll() != None,_ps) # remove terminated processes from _ps list
_ps.append(Popen(cmd,0,'/bin/sh',shell = True, preexec_fn = setsid))
def get_exec_full_path(fn):
a = filter(lambda p: os.access(p, os.X_OK),
map(lambda p: os.path.join(p, fn),
os.environ['PATH'].split(os.pathsep)))
if a:
return a[0]
return None
def guess_browser():
e = get_exec_full_path("xdg-open")
if not e:
e = get_exec_full_path("firefox")
if not e:
e = "start"
return e
broswer = guess_browser()
def sure(msg, parent = None):
dlg = Gtk.MessageDialog(parent,
Gtk.DialogFlags.MODAL,
Gtk.MessageType.QUESTION,
Gtk.ButtonsType.YES_NO,
msg)
dlg.connect("response", lambda *args: dlg.hide())
r = dlg.run()
dlg.destroy()
return r == Gtk.ResponseType.YES
def info(msg, parent = None):
dlg = Gtk.MessageDialog(parent,
Gtk.DialogFlags.MODAL,
Gtk.MessageType.INFO,
Gtk.ButtonsType.OK,
msg)
dlg.connect("response", lambda *args: dlg.hide())
r = dlg.run()
dlg.destroy()
def error(msg, parent = None):
dlg = Gtk.MessageDialog(parent,
Gtk.DialogFlags.MODAL,
Gtk.MessageType.ERROR,
Gtk.ButtonsType.OK,
msg)
dlg.connect("response", lambda *args: dlg.hide())
r = dlg.run()
dlg.destroy()
class ThWV(WebKit.WebView):
def __init__(self):
WebKit.WebView.__init__(self)
self.set_full_content_zoom(True)
self.connect_after("populate-popup", self.populate_popup)
self.connect("navigation-requested", self._navigation_requested_cb)
def _navigation_requested_cb(self, view, frame, networkRequest):
uri = networkRequest.get_uri()
if not uri.startswith('http://127.0.0.1') and not uri.startswith('http://localhost'):
run_in_bg("%s '%s'" % (broswer ,uri))
return 1
return 0
def reload_if_index(self, *a, **kw):
if self.get_property('uri').endswith('/index/'):
self.reload()
def _eval_js(self, e):
"""
can be used to eval a javascript expression
eg. to obtain value of a javascript variable given its name
"""
self.execute_script('thawab_eval_js_oldtitle=document.title;document.title=%s;' % e)
r = self.get_main_frame().get_title()
self.execute_script('document.title=thawab_eval_js_oldtitle;')
return r
def populate_popup(self, view, menu):
menu.append(Gtk.SeparatorMenuItem.new())
i = Gtk.ImageMenuItem.new_from_stock(Gtk.STOCK_ZOOM_IN, None)
i.connect('activate', lambda m,v,*a,**k: v.zoom_in(), view)
menu.append(i)
i = Gtk.ImageMenuItem.new_from_stock(Gtk.STOCK_ZOOM_OUT, None)
i.connect('activate', lambda m,v,**k: v.zoom_out(), view)
menu.append(i)
i = Gtk.ImageMenuItem.new_from_stock(Gtk.STOCK_ZOOM_100, None)
i.connect('activate', lambda m,v,*a,**k: v.get_zoom_level() == 1.0 or v.set_zoom_level(1.0), view)
menu.append(i)
menu.show_all()
return False
targets = Gtk.TargetList.new([])
targets.add_uri_targets((1 << 5) -1)
class ThImportWindow(Gtk.Window):
def __init__(self, main):
Gtk.Window.__init__(self)
self.progress_dict = { }
self.progress_phase = 0
self.progress_books_in_file = 0
self.progress_element = 0
self.add_dlg = None
self.set_size_request(-1, 400)
## prepare dnd
self.drag_dest_set(Gtk.DestDefaults.ALL, [], Gdk.DragAction.COPY)
self.drag_dest_set_target_list(targets)
self.connect('drag-data-received', self.drop_data_cb)
self.set_title(_('Import Shamela .bok files'))
self.set_type_hint(Gdk.WindowTypeHint.DIALOG)
self.set_modal(True)
self.set_transient_for(main)
self.main = main
self.connect('delete-event', self.close_cb)
self.connect('destroy', self.close_cb)
vb = Gtk.VBox(False,2)
self.add(vb)
hb0 = Gtk.HBox(False,2)
vb.pack_start(hb0,False, False, 2)
self.tool = hb = Gtk.HBox(False,2)
hb0.pack_start(hb,False, False, 2)
b = Gtk.Button(stock = Gtk.STOCK_ADD)
b.connect('clicked', self.add_cb, self)
hb.pack_start(b, False, False, 2)
b = Gtk.Button(stock = Gtk.STOCK_REMOVE)
b.connect('clicked', self.rm)
hb.pack_start(b, False, False, 2)
b = Gtk.Button(stock = Gtk.STOCK_CLEAR)
b.connect('clicked', lambda *a: self.ls.clear())
hb.pack_start(b, False, False, 2)
b = Gtk.Button(stock = Gtk.STOCK_CONVERT)
b.connect('clicked', self.start)
hb.pack_start(b, False, False, 2)
self.progress = Gtk.ProgressBar()
self.progress.set_fraction(0.0)
hb0.pack_start(self.progress, True, True, 2)
self.cancel_b = b = Gtk.Button(stock = Gtk.STOCK_CANCEL)
b.connect('clicked', self.stop)
b.set_sensitive(False)
hb0.pack_start(b, False, False, 2)
self.close_b = b = Gtk.Button(stock = Gtk.STOCK_CLOSE)
b.connect('clicked', self.close_cb)
hb0.pack_start(b, False, False, 2)
self.ls = Gtk.ListStore(str,str,float,int,str) # fn, basename, percent, pulse, label
self.lsv = Gtk.TreeView(self.ls)
#self.lsv.set_size_request(250, -1)
cells = []
cols = []
cells.append(Gtk.CellRendererText())
cols.append(Gtk.TreeViewColumn('Files', cells[-1], text = 1))
cols[-1].set_sizing(Gtk.TreeViewColumnSizing.AUTOSIZE)
cols[-1].set_resizable(True)
cols[-1].set_expand(True)
cells.append(Gtk.CellRendererProgress())
cols.append(Gtk.TreeViewColumn('%', cells[-1], value = 2,pulse = 3,text = 4))
cols[-1].set_expand(False)
self.lsv.set_headers_visible(True)
self.lsv.get_selection().set_mode(Gtk.SelectionMode.MULTIPLE)
for i in cols:
self.lsv.insert_column(i, -1)
scroll = Gtk.ScrolledWindow()
scroll.set_policy(Gtk.PolicyType.NEVER,Gtk.PolicyType.AUTOMATIC)
scroll.add(self.lsv)
vb.pack_start(scroll,True, True, 2)
self.x = x = Gtk.Expander.new(_("Advanced options"))
vb.pack_start(x, False, False, 2)
xvb = Gtk.VBox(False,2); x.add(xvb)
f = Gtk.Frame.new(_('Performance tuning:'))
xvb.add(f)
fvb = Gtk.VBox(False,2)
f.add(fvb)
hb = Gtk.HBox(False,2)
fvb.add(hb)
self.in_mem = Gtk.CheckButton(_('in memory'))
self.in_mem.set_tooltip_text(_("faster but consumes more memory and harder to debug."))
hb.pack_start(self.in_mem, False, False, 2)
f = Gtk.Frame.new('Version Control:')
xvb.add(f)
fvb = Gtk.VBox(False,2); f.add(fvb)
hb = Gtk.HBox(False,2); fvb.add(hb)
hb.pack_start(Gtk.Label(_('Release Major:')), False, False, 2)
adj = Gtk.Adjustment(0, 0, 10000, 1, 10, 0)
self.releaseMajor = s = Gtk.SpinButton()
s.set_adjustment(adj)
hb.pack_start(self.releaseMajor, False, False, 2)
hb.pack_start(Gtk.Label(_('Release Minor:')), False, False, 2)
self.releaseMinor = s = Gtk.SpinButton()
s.set_adjustment(adj)
hb.pack_start(self.releaseMinor, False, False, 2)
f = Gtk.Frame.new('Footnotes:'); xvb.add(f)
fvb = Gtk.VBox(False,2); f.add(fvb)
hb = Gtk.HBox(False,2); fvb.add(hb)
hb.pack_start(Gtk.Label(_('Prefix:')), False, False, 2)
self.ft_prefix = Gtk.Entry()
self.ft_prefix.set_text('(')
self.ft_prefix.set_width_chars(3)
hb.pack_start(self.ft_prefix, False, False, 2)
hb.pack_start(Gtk.Label(_('Suffix:')), False, False, 2)
self.ft_suffix = Gtk.Entry()
self.ft_suffix.set_text(')')
self.ft_suffix.set_width_chars(3)
hb.pack_start(self.ft_suffix, False, False, 2)
self.ft_at_line_start = Gtk.CheckButton(_('only at line start'))
hb.pack_start(self.ft_at_line_start, False, False, 2)
hb = Gtk.HBox(False,2); fvb.add(hb)
hb.pack_start(Gtk.Label(_('in between spaces:')), False, False, 2)
self.ft_sp = [Gtk.RadioButton(group = None, label = _('no spaces'))]
self.ft_sp.append(Gtk.RadioButton(group=self.ft_sp[0], label = _('optional white-space')))
self.ft_sp.append(Gtk.RadioButton(group=self.ft_sp[0], label = _('optional white-spaces')))
for i in self.ft_sp: hb.pack_start(i, False, False, 2)
f = Gtk.Frame.new('Footnote anchors in body:'); xvb.add(f)
fvb = Gtk.VBox(False,2); f.add(fvb)
hb = Gtk.HBox(False,2); fvb.add(hb)
hb.pack_start(Gtk.Label(_('Prefix:')), False, False, 2)
self.bft_prefix = Gtk.Entry()
self.bft_prefix.set_text('(')
self.bft_prefix.set_width_chars(3)
hb.pack_start(self.bft_prefix, False, False, 2)
hb.pack_start(Gtk.Label(_('Suffix:')), False, False, 2)
self.bft_suffix = Gtk.Entry()
self.bft_suffix.set_text(')')
self.bft_suffix.set_width_chars(3)
hb.pack_start(self.bft_suffix, False, False, 2)
hb = Gtk.HBox(False,2); fvb.add(hb)
hb.pack_start(Gtk.Label(_('in between spaces:')), False, False, 2)
self.bft_sp = [Gtk.RadioButton(group = None, label = _('no spaces'))]
self.bft_sp.append(Gtk.RadioButton(group=self.bft_sp[0], label = _('optional white-space')))
self.bft_sp.append(Gtk.RadioButton(group=self.bft_sp[0], label = _('optional white-spaces')))
for i in self.bft_sp: hb.pack_start(i, False, False, 2)
# TODO: add options to specify version and revision
# TODO: add options to specify wither to break by hno
# TODO: add options for handling existing files (overwrite?)
ft_at_line_start = False
ft_prefix = u'('
ft_suffix = u')'
ft_sp = u'' # can be ur'\s?' or ur'\s*'
body_footnote_re = re.escape(ft_prefix)+ft_sp+ur'(\d+)'+ft_sp+re.escape(ft_suffix)
footnote_re = (ft_at_line_start and u'^\s*' or u'') + body_footnote_re
ft_prefix_len = len(ft_prefix)
ft_suffix_len = len(ft_suffix)
#shamelaImport(cursor, sh, bkid, footnote_re = ur'\((\d+)\)', body_footnote_re = ur'\((\d+)\)', ft_prefix_len = 1, ft_suffix_len = 1):
#self.show_all()
def close_cb(self, *w):
return self.hide() or True
def element_pulse_cb(self, i):
self.ls[(i,)][2] = 0
self.ls[(i,)][3] = int(abs(self.ls[(i,)][3])+1)
Gtk.main_iteration()
def element_progress_cb(self, i, percent, text = None):
l = self.ls[(i,)]
if percent >= 0.0:
l[2] = percent
if text != None and not 'working' in text:
l[4] = text
else:
l[4] = '%s%%' % str(int(percent))
Gtk.main_iteration()
def progress_cb(self, msg, p, *d, **kw):
# print " ** progress phase %d: [%g%% completed] %s" % (self.progress_phase, p, msg)
i = self.progress_element
N = len(self.ls)
j = self.progress_book_in_file
n = self.progress_books_in_file
if n == 0 or N == 0:
return
if self.progress_phase == 1:
percent = p*0.25
else:
percent = (75.0/n)*j + p*0.75/n + 25.0
if not kw.has_key('show_msg'):
msg = _("working ...")
self.element_progress_cb(i, percent, msg)
self.progress.set_fraction( float(i)/N + percent/100.0/N )
Gtk.main_iteration()
def start_cb(self):
self.tool.set_sensitive(False)
self.x.set_sensitive(False)
self.cancel_b.set_sensitive(True)
self.progress_dict['cancel'] = False
def start(self, b):
self.start_cb()
self.progress.set_text(_("working ..."))
ft_at_line_start = self.ft_at_line_start.get_active()
ft_prefix = self.ft_prefix.get_text()
ft_prefix_len = len(ft_prefix)
ft_suffix=self.ft_suffix.get_text()
ft_suffix_len = len(ft_suffix)
ft_sp = [u'', ur'\s?' , ur'\s*'][ [i.get_active() for i in self.ft_sp].index(True) ]
footnote_re = (ft_at_line_start and u'^\s*' or u'') + \
re.escape(ft_prefix) + \
ft_sp+ur'(\d+)' + \
ft_sp + \
re.escape(ft_suffix)
bft_prefix=self.bft_prefix.get_text()
bft_suffix=self.bft_suffix.get_text()
bft_sp = [u'', ur'\s?' , ur'\s*'][ [i.get_active() for i in self.bft_sp].index(True) ]
body_footnote_re = re.escape(bft_prefix) + \
bft_sp + \
ur'(\d+)' + \
bft_sp + \
re.escape(bft_suffix)
if not self.in_mem.get_active():
fh, db_fn = tempfile.mkstemp(suffix = '.sqlite', prefix = 'th_shamela_tmp')
else:
db_fn = None
for i,l in enumerate(self.ls):
self.progress_element = i
self.progress_book_in_file = 0
self.progress_books_in_file = 1
fn = l[0]
if db_fn:
f = open(db_fn, "w")
f.truncate(0)
f.close()
cn = sqlite3.connect(db_fn, isolation_level = None)
else:
cn = None
self.progress_phase = 1
try:
sh = ShamelaSqlite(fn,
cn,
int(self.releaseMajor.get_value()),
int(self.releaseMinor.get_value()),
self.progress_cb,
progress_dict = self.progress_dict)
except TypeError:
print "not a shamela file"
continue
except OSError:
print "mdbtools is not installed"
break
if not sh.toSqlite():
# canceled
self.progress.set_text(_("Canceled"))
self.element_progress_cb(self.progress_element, -1.0, _("Canceled"))
return
self.progress_phase = 2
ids = sh.getBookIds()
self.progress_books_in_file = len(ids)
for j, bkid in enumerate(ids):
self.progress_book_in_file = j
ki = self.main.th.mktemp()
c = ki.seek(-1,-1)
m = shamelaImport(c,
sh,
bkid,
footnote_re,
body_footnote_re,
ft_prefix_len,
ft_suffix_len)
if m == None:
# canceled
self.progress.set_text(_("Canceled"))
self.element_progress_cb(self.progress_element, -1.0, _("Canceled"))
return
c.flush()
t_fn = os.path.join(self.main.th.prefixes[0],
'db',
u"".join((m['kitab'] + \
u"-" + \
m['version'] + \
Thawab.core.th_ext,)))
#print "moving %s to %s" % (ki.uri, t_fn)
try:
shutil.move(ki.uri, t_fn)
except OSError:
print "unable to move converted file." # windows can't move an opened file
# FIXME: close ki in a clean way so the above code works in windows
self.progress_cb(_("Done"), 100.0, show_msg = True)
if db_fn and os.path.exists(db_fn):
try:
os.unlink(db_fn)
except OSError:
pass
#self.element_progress_cb(0, 25.0, "testing")
self.tool.set_sensitive(True)
self.x.set_sensitive(True)
self.cancel_b.set_sensitive(False)
self.main.th.loadMeta()
self.main._do_in_all_views('reload_if_index')
self.progress.set_text(_("Done"))
info(_("Convert Book, Done"), self.main)
self.ls.clear()
self.progress.set_text("")
self.progress.set_fraction(0.0)
self.hide()
def stop(self, b):
self.tool.set_sensitive(True)
self.x.set_sensitive(True)
self.cancel_b.set_sensitive(False)
self.progress_dict['cancel'] = True
def add_cb(self, b, parent=None):
if self.run_add_dlg(parent) == Gtk.ResponseType.ACCEPT:
for i in self.add_dlg.get_filenames():
self.add_fn(i)
def run_add_dlg(self, parent=None):
if self.add_dlg:
return self.add_dlg.run()
self.add_dlg = Gtk.FileChooserDialog(_("Select files to import"),
parent = parent,
buttons=(Gtk.STOCK_CANCEL,
Gtk.ResponseType.REJECT,
Gtk.STOCK_OK,
Gtk.ResponseType.ACCEPT))
ff = Gtk.FileFilter()
ff.set_name(_('Shamela BOK files'))
ff.add_pattern('*.[Bb][Oo][Kk]')
self.add_dlg.add_filter(ff)
ff = Gtk.FileFilter()
ff.set_name('All files')
ff.add_pattern('*')
self.add_dlg.add_filter(ff)
self.add_dlg.set_select_multiple(True)
self.add_dlg.connect('delete-event', lambda w,*a: w.hide() or True)
self.add_dlg.connect('response', lambda w,*a: w.hide() or True)
return self.add_dlg.run()
def rm(self, b):
l, ls_p = self.lsv.get_selection().get_selected_rows()
r = map(lambda p: Gtk.TreeRowReference.new(self.ls, p), ls_p)
for i in r:
self.ls.remove(self.ls.get_iter(i.get_path()))
def add_fn(self, fn):
self.ls.append([fn, os.path.basename(fn), float(0), -1, "Not started"])
def add_uri(self, i):
if i.startswith('file://'):
f = uri_to_filename(unquote(i[7:]))
self.add_fn(f)
else:
print "Protocol not supported in [%s]" % i
def drop_data_cb(self, widget, dc, x, y, selection_data, info, t):
for i in selection_data.get_uris():
self.add_uri(i)
#dc.drop_finish(True, t)
class TabLabel(Gtk.HBox):
"""A class for Tab labels"""
__gsignals__ = {
"close": (GObject.SIGNAL_RUN_FIRST,
GObject.TYPE_NONE,
(GObject.TYPE_OBJECT,))
}
def __init__ (self, title, child):
"""initialize the tab label"""
Gtk.HBox.__init__(self, False, 4)
self.title = title
self.child = child
self.label = Gtk.Label(title)
self.label.props.max_width_chars = 30
self.label.set_ellipsize(Pango.EllipsizeMode.MIDDLE)
self.label.set_alignment(0.0, 0.5)
# FIXME: use another icon
icon = Gtk.Image.new_from_icon_name("thawab", Gtk.IconSize.MENU)
close_image = Gtk.Image.new_from_stock(Gtk.STOCK_CLOSE, Gtk.IconSize.MENU)
close_button = Gtk.Button()
close_button.set_relief(Gtk.ReliefStyle.NONE)
close_button.connect("clicked", self._close_tab, child)
close_button.add(close_image)
self.pack_start(icon, False, False, 0)
self.pack_start(self.label, True, True, 0)
self.pack_start(close_button, False, False, 0)
#self.set_data("label", self.label)
#self.set_data("close-button", close_button)
self.connect("style-set", tab_label_style_set_cb)
def set_label_text (self, text):
"""sets the text of this label"""
if text:
self.label.set_label(text)
def _close_tab (self, widget, child):
self.emit("close", child)
def tab_label_style_set_cb (tab_label, style):
#context = tab_label.get_pango_context()
#font_desc = Pango.font_description_from_string(tab_label.label.get_label())
#metrics = context.get_metrics(font_desc, context.get_language())
#metrics = context.get_metrics(tab_label.style.font_desc, context.get_language())
#char_width = metrics.get_approximate_digit_width()
#(icons, width, height) = Gtk.icon_size_lookup_for_settings(tab_label.get_settings(),
# Gtk.IconSize.MENU)
#tab_label.set_size_request(20 * char_width + 2 * width, -1)
tab_label.set_size_request(230, -1)
#button = tab_label.get_data("close-button")
#button.set_size_request(width + 4, height + 4)
class ContentPane (Gtk.Notebook):
__gsignals__ = {
"focus-view-title-changed": (GObject.SIGNAL_RUN_FIRST,
GObject.TYPE_NONE,
(GObject.TYPE_OBJECT,
GObject.TYPE_STRING,)),
"focus-view-load-committed": (GObject.SIGNAL_RUN_FIRST,
GObject.TYPE_NONE,
(GObject.TYPE_OBJECT,
GObject.TYPE_OBJECT,)),
"new-window-requested": (GObject.SIGNAL_RUN_FIRST,
GObject.TYPE_NONE,
(GObject.TYPE_OBJECT,))
}
def __init__ (self, default_url = None,
default_title = None,
hp = Gtk.PolicyType.NEVER,
vp = Gtk.PolicyType.ALWAYS):
"""initialize the content pane"""
Gtk.Notebook.__init__(self)
self.set_scrollable(True)
self.default_url = default_url
self.default_title = default_title
self.hp = hp
self.vp = vp
self.props.scrollable = True
#self.props.homogeneous = True
self.connect("switch-page", self._switch_page)
self.show_all()
self._hovered_uri = None
def load (self, uri):
"""load the given uri in the current web view"""
child = self.get_nth_page(self.get_current_page())
wv = child.get_child()
wv.open(uri)
def new_tab_with_webview (self, webview):
"""creates a new tab with the given webview as its child"""
self._construct_tab_view(webview)
def new_tab (self, url = None):
"""creates a new page in a new tab"""
# create the tab content
wv = ThWV()
self._construct_tab_view(wv, url)
return wv
def _construct_tab_view (self, wv, url = None, title = None):
wv.connect("hovering-over-link", self._hovering_over_link_cb)
wv.connect("populate-popup", self._populate_page_popup_cb)
wv.connect("load-committed", self._view_load_committed_cb)
wv.connect("load-finished", self._view_load_finished_cb)
wv.connect("create-web-view", self._new_web_view_request_cb)
# load the content
self._hovered_uri = None
if not url:
url=self.default_url
if url:
wv.open(url)
scrolled_window = Gtk.ScrolledWindow()
scrolled_window.props.hscrollbar_policy = self.hp
scrolled_window.props.vscrollbar_policy = self.vp
scrolled_window.add(wv)
scrolled_window.show_all()
# create the tab
if not title: title=self.default_title
if not title: title = url
label = TabLabel(title, scrolled_window)
label.connect("close", self._close_tab)
label.show_all()
new_tab_number = self.append_page(scrolled_window, label)
self.set_tab_reorderable(scrolled_window, True)
#self.set_tab_label_packing(scrolled_window, False, False, Gtk.PACK_START)
self.set_tab_label(scrolled_window, label)
# hide the tab if there's only one
self.set_show_tabs(self.get_n_pages() > 1)
self.show_all()
self.set_current_page(new_tab_number)
def _populate_page_popup_cb(self, view, menu):
# misc
if self._hovered_uri:
open_in_new_tab = Gtk.MenuItem(_("Open Link in New Tab"))
open_in_new_tab.connect("activate", self._open_in_new_tab, view)
menu.insert(open_in_new_tab, 0)
menu.show_all()
def _open_in_new_tab (self, menuitem, view):
self.new_tab(self._hovered_uri)
def _close_tab (self, label, child):
page_num = self.page_num(child)
if page_num != -1:
view = child.get_child()
view.destroy()
self.remove_page(page_num)
self.set_show_tabs(self.get_n_pages() > 1)
def _switch_page (self, notebook, page, page_num):
child = self.get_nth_page(page_num)
view = child.get_child()
frame = view.get_main_frame()
self.emit("focus-view-load-committed", view, frame)
def _hovering_over_link_cb (self, view, title, uri):
self._hovered_uri = uri
def _view_load_committed_cb (self, view, frame):
self.emit("focus-view-load-committed", view, frame)
def _view_load_finished_cb(self, view, frame):
child = self.get_nth_page(self.get_current_page())
label = self.get_tab_label(child)
title = frame.get_title()
if not title:
title = frame.get_uri()
label.set_label_text(title)
def _new_web_view_request_cb (self, web_view, web_frame):
view = self.new_tab()
view.connect("web-view-ready", self._new_web_view_ready_cb)
return view
def _new_web_view_ready_cb (self, web_view):
self.emit("new-window-requested", web_view)
class ThIndexerWindow(Gtk.Window):
def __init__(self, main):
Gtk.Window.__init__(self)
self.main = main
self.connect('delete-event', lambda w,*a: w.hide() or True)
self.set_title(_('Manage search index'))
self.set_type_hint(Gdk.WindowTypeHint.DIALOG)
self.set_modal(True)
self.set_transient_for(main)
self.main = main
self.set_position(Gtk.WindowPosition.CENTER_ON_PARENT)
vb = Gtk.VBox(False,2); self.add(vb)
hb = Gtk.HBox(False,2); vb.pack_start(hb, False, False, 0)
self.progress = Gtk.ProgressBar()
self.progress.set_show_text(True)
self.progress.set_text("")
self.start_b = b = Gtk.Button(_("Queue new books"))
b.connect('clicked', self.indexNew)
hb.pack_start(b, False, False, 0)
hb.pack_start(self.progress, False, False, 0)
self.cancel_b = b = Gtk.Button(stock = Gtk.STOCK_CLOSE)
#b.connect('clicked', self.cancel_cb)
b.connect('clicked', lambda w,*a: self.hide() or True)
hb.pack_start(b, False, False, 0)
#b.set_sensitive(False)
#self.update()
def cancel_cb(self, *w):
return False
if self.main.th.asyncIndexer.started:
self.main.th.asyncIndexer.cancelQueued()
self.start_b.set_sensitive(True)
self.cancel_b.set_sensitive(False)
self.progress.set_text(_("Indexing jobs canceled"))
return False
def indexNew(self, *a):
self.start_b.set_sensitive(False)
#self.cancel_b.set_sensitive(True)
self.main.th.asyncIndexer.queueIndexNew()
if not self.main.th.asyncIndexer.started:
self.main.th.asyncIndexer.start()
self.update()
#GLib.timeout_add(250, self.update)
def update(self, *a):
#if not self.get_property('visible'):
# return True
jj = j = self.main.th.asyncIndexer.jobs()
while (j > 0 and self.main.get_property('visible')):
self.progress.set_text (_("Indexing ... (%d left)") % j)
self.progress.pulse()
j = self.main.th.asyncIndexer.jobs()
Gtk.main_iteration()
#Gtk.main_iteration_do(True)
self.progress.set_text (_("No indexing jobs left"))
self.start_b.set_sensitive(True)
if j <= 0 and jj > 0:
info(_("Indexing %d jobs, Done") % jj, self.main)
#self.cancel_b.set_sensitive(False)
return True
class ThFixesWindow(Gtk.Window):
def __init__(self, main):
Gtk.Window.__init__(self)
self.set_title(_('Misc. Fixes'))
self.set_type_hint(Gdk.WindowTypeHint.DIALOG)
self.set_modal(True)
self.set_transient_for(main)
self.main = main
self.set_position(Gtk.WindowPosition.CENTER_ON_PARENT)
self.connect('delete-event', lambda w,*a: w.hide() or True)
self.set_deletable(True)
vb = Gtk.VBox(False,2); self.add(vb)
hb = Gtk.HBox(False,2); vb.pack_start(hb, False, False, 0)
l = Gtk.Label()
l.set_markup(_("""Those procedures are to be used in case of emergency only,
for example to recover power failure."""))
hb.pack_start(l , False, False, 0)
hb = Gtk.HBox(False,2); vb.pack_start(hb, False, False, 0)
b = Gtk.Button(_('remove search index'))
b.set_tooltip_text(_('you will need to re-index all books'))
hb.pack_start(b , False, False, 0)
b.connect('clicked', self.rm_index_cb)
hb = Gtk.HBox(False,2); vb.pack_start(hb, False, False, 0)
b = Gtk.Button(_('remove meta data cache to generate a fresh one'))
b.set_tooltip_text(_('instead of incremental meta data gathering'))
hb.pack_start(b , False, False, 0)
b.connect('clicked', self.rm_mcache_cb)
b = Gtk.Button(stock = Gtk.STOCK_CLOSE)
hb.pack_end(b , False, False, 0)
b.connect('clicked', lambda w,*a: self.hide() or True)
#self.show_all()
def rm_index_cb(self, b):
if not sure(_("You will need to recreate search index in-order to search again.\nAre you sure you want to remove search index?"), self.main): return
p = os.path.join(self.main.th.prefixes[0], 'index')
try:
shutil.rmtree(p)
except OSError:
error(_("unable to remove folder [%s]" % p), self.main)
else:
info(_("Done"), self.main)
def rm_mcache_cb(self, b):
if not sure(_("Are you sure you want to remove search meta data cache?"), self.main): return
p = os.path.join(self.main.th.prefixes[0], 'cache', 'meta.db')
try:
os.unlink(p)
except OSError:
error(_("unable to remove file [%s]" % p), self.main)
else:
self.main.th.reconstructMetaIndexedFlags()
info(_("Done"), self.main)
class ThMainWindow(Gtk.Window):
def __init__(self, th, port, server):
self.th = th
self.port = port
self.server = server # we need this to quit the server when closing main window
Gtk.Window.set_default_icon_name('thawab')
Gtk.Window.__init__(self)
self.set_title(_('Thawab'))
self.set_default_size(600, 480)
self.maximize()
self.fixes_w = ThFixesWindow(self)
self.import_w = ThImportWindow(self)
self.ix_w = ThIndexerWindow(self)
vb = Gtk.VBox(False,0); self.add(vb)
tools = Gtk.Toolbar()
vb.pack_start(tools, False, False, 2)
self._content = ContentPane("http://127.0.0.1:%d/" % port, _("Thawab"))
vb.pack_start(self._content,True, True, 2)
self.axl = Gtk.AccelGroup()
self.add_accel_group(self.axl)
ACCEL_CTRL_KEY, ACCEL_CTRL_MOD = Gtk.accelerator_parse("")
ACCEL_SHFT_KEY, ACCEL_SHFT_MOD = Gtk.accelerator_parse("")
b = Gtk.ToolButton.new_from_stock(Gtk.STOCK_NEW)
b.connect('clicked', lambda bb: self._content.new_tab())
b.add_accelerator("clicked", self.axl, ord('n'), ACCEL_CTRL_MOD, Gtk.AccelFlags.VISIBLE)
b.set_tooltip_text("{}\t{}".format(_("Open a new tab"), "(Ctrl+N)" ))
tools.insert(b, -1)
# TODO: add navigation buttons (back, forward ..etc.) and zoom buttons
tools.insert(Gtk.SeparatorToolItem(), -1)
img = Gtk.Image()
img.set_from_stock(Gtk.STOCK_CONVERT, Gtk.IconSize.BUTTON)
b = Gtk.ToolButton.new(icon_widget = img, label = _("Import"))
b.set_tooltip_text(_("Import .bok files"))
b.connect('clicked', self.import_cb)
tools.insert(b, -1)
img = Gtk.Image()
img.set_from_stock(Gtk.STOCK_FIND_AND_REPLACE, Gtk.IconSize.BUTTON)
b = Gtk.ToolButton(icon_widget = img, label = _("Index"))
b.set_is_important(True)
b.set_tooltip_text(_("Create search index"))
b.connect('clicked', lambda *a: self.ix_w.show_all())
tools.insert(b, -1)
tools.insert(Gtk.SeparatorToolItem(), -1)
img = Gtk.Image()
img.set_from_stock(Gtk.STOCK_ZOOM_IN, Gtk.IconSize.BUTTON)
b = Gtk.ToolButton(icon_widget = img, label = _("Zoom in"))
b.add_accelerator("clicked", self.axl, Gdk.KEY_equal, ACCEL_CTRL_MOD, Gtk.AccelFlags.VISIBLE)
b.add_accelerator("clicked", self.axl, Gdk.KEY_plus, ACCEL_CTRL_MOD, Gtk.AccelFlags.VISIBLE)
b.add_accelerator("clicked", self.axl, Gdk.KEY_KP_Add, ACCEL_CTRL_MOD, Gtk.AccelFlags.VISIBLE)
b.set_is_important(True)
b.set_tooltip_text("{}\t{}".format(_("Makes things appear bigger"), "(Ctrl++)"))
b.connect('clicked', lambda a: self._do_in_current_view("zoom_in"))
tools.insert(b, -1)
img = Gtk.Image()
img.set_from_stock(Gtk.STOCK_ZOOM_OUT, Gtk.IconSize.BUTTON)
b = Gtk.ToolButton(icon_widget = img, label = _("Zoom out"))
b.add_accelerator("clicked", self.axl, Gdk.KEY_minus, ACCEL_CTRL_MOD, Gtk.AccelFlags.VISIBLE)
b.add_accelerator("clicked", self.axl, Gdk.KEY_KP_Subtract, ACCEL_CTRL_MOD, Gtk.AccelFlags.VISIBLE)
b.set_tooltip_text("{}\t{}".format(_("Makes things appear smaller"), "(Ctrl+-)"))
b.connect('clicked', lambda a: self._do_in_current_view("zoom_out"))
tools.insert(b, -1)
img = Gtk.Image()
img.set_from_stock(Gtk.STOCK_ZOOM_100, Gtk.IconSize.BUTTON)
b = Gtk.ToolButton(icon_widget = img, label = _("1:1 Zoom"))
b.add_accelerator("clicked", self.axl, ord('0'), ACCEL_CTRL_MOD, Gtk.AccelFlags.VISIBLE)
b.add_accelerator("clicked", self.axl, Gdk.KEY_KP_0, ACCEL_CTRL_MOD, Gtk.AccelFlags.VISIBLE)
b.set_tooltip_text("{}\t{}".format(_("Restore original zoom factor"), "(Ctrl+0)"))
b.connect('clicked', lambda a: self._do_in_current_view("set_zoom_level",1.0))
tools.insert(b, -1)
tools.insert(Gtk.SeparatorToolItem(), -1)
img = Gtk.Image()
img.set_from_stock(Gtk.STOCK_PREFERENCES, Gtk.IconSize.BUTTON)
b = Gtk.ToolButton(icon_widget = img, label = _("Fixes"))
b.set_is_important(True)
b.set_tooltip_text(_("Misc Fixes"))
b.connect('clicked', self.fixes_cb)
tools.insert(b, -1)
tools.insert(Gtk.SeparatorToolItem(), -1)
img = Gtk.Image()
img.set_from_stock(Gtk.STOCK_HELP, Gtk.IconSize.BUTTON)
b = Gtk.ToolButton(icon_widget = img, label = _("Help"))
b.set_tooltip_text(_("Show user manual"))
b.connect('clicked', lambda a: self._content.new_tab ("http://127.0.0.1:%d/_theme/manual/manual.html" % port))
tools.insert(b, -1)
self._content.new_tab()
self.connect("delete_event", self.quit)
self.connect("destroy", self.quit)
## prepare dnd
self.drag_dest_set(Gtk.DestDefaults.ALL, [], Gdk.DragAction.COPY)
self.drag_dest_set_target_list(targets)
self.connect('drag-data-received', self.drop_data_cb)
self.show_all()
def _do_in_current_view (self, action, *a, **kw):
n = self._content.get_current_page()
if n < 0:
return
view = self._content.get_nth_page(n).get_child()
getattr(view, action)(*a,**kw)
def _do_in_all_views (self, action, *a, **kw):
for n in range(self._content.get_n_pages()):
view = self._content.get_nth_page(n).get_child()
getattr(view, action)(*a,**kw)
def fixes_cb(self, b):
if not self.fixes_w:
self.fixes_w = ThFixesWindow(self)
self.fixes_w.show_all()
def drop_data_cb(self, widget, dc, x, y, selection_data, info, t):
if not self.import_w:
self.import_w = ThImportWindow(self)
for i in selection_data.get_uris():
self.import_w.add_uri(i)
self.import_w.show_all()
#dc.drop_finish (True, t);
def import_cb(self, b):
if not self.import_w:
self.import_w = ThImportWindow(self)
self.import_w.show_all()
def quit(self,*args):
#if self.import_w.cancel_b.get_sensitive():
# self.import_w.show()
# return True
#if not self.ix_w.start_b.get_sensitive():
# self.ix_w.show_all()
# return True
self.server.running = False
Gtk.main_quit()
return False
THAWAB_HIGH_PORT = 18080
def launchServer():
exedir = os.path.dirname(sys.argv[0])
th = Thawab.core.ThawabMan(isMonolithic = False)
lookup = [
os.path.join(exedir,'thawab-themes'),
os.path.join(exedir,'..','share','thawab','thawab-themes'),
]
lookup.extend(map(lambda i: os.path.join(i, 'themes'), th.prefixes))
app = webApp(th,
'app',
lookup,
th.conf.get('theme', 'default'),
'/_theme/',)
launched = False
port = THAWAB_HIGH_PORT
while(not launched):
try:
server = httpserver.serve(app,
host = '127.0.0.1',
port = port,
start_loop = False)
except socket.error:
port += 1
else:
launched = True
return th, port, server
def onlyterminal(): #To run thawab by terminal only by thawab-server
exedir = os.path.dirname(sys.argv[0])
ld = os.path.join(exedir,'..','share','locale')
if not os.path.isdir(ld):
ld = os.path.join(exedir, 'locale')
gettext.install('thawab', ld, unicode = 0)
th, port, server = launchServer()
try:
thread=threading.Thread(target=server.serve_forever, args=())
thread.daemon=True
thread.start()
while True: time.sleep(100)
except (KeyboardInterrupt, SystemExit):
print '\nHope to made a nice time, Ojuba team .\n'
os._exit(0)
def main():
exedir = os.path.dirname(sys.argv[0])
ld = os.path.join(exedir,'..','share','locale')
if not os.path.isdir(ld):
ld = os.path.join(exedir, 'locale')
gettext.install('thawab', ld, unicode = 0)
th, port, server = launchServer()
GObject.threads_init()
Gdk.threads_init()
threading.Thread(target=server.serve_forever, args=()).start()
while(not server.running):
time.sleep(0.25)
Gdk.threads_enter()
w = ThMainWindow(th, port,server)
Gtk.main()
Gdk.threads_leave()
if __name__ == "__main__":
main()
thawab-4.1/Thawab/meta.py 0000664 0000000 0000000 00000021233 13052627552 0015342 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
The meta handling classes of thawab
Copyright © 2008, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import os
import os.path
import sqlite3
import threading
import time
import hashlib
from itertools import imap,groupby
from dataModel import *
from okasha.utils import fromFs, toFs, strverscmp
import re
def prettyId(i, empty_for_special = True):
"""convert the id into a more human form"""
if empty_for_special and i.startswith('_'):
return ''
return i.replace('_',' ')
def makeId(i):
"""convert the id into a canonical form"""
return i.strip().replace(' ','_').replace('/','_')
def metaVr(m):
return m[u"version"] + u"-" + unicode(m[u"releaseMajor"])
def metaVrr(m):
return u"-".join((m[u"version"],
unicode(m[u"releaseMajor"]),
unicode(m[u"releaseMinor"])))
def metaDict2Hash(meta, suffix = None):
k = filter(lambda i: i != 'cache_hash', meta.keys())
k.sort()
l = []
for i in k:
l.append(u"%s:%s" % (i,meta[i]))
l.append(u"timestamp:%d" % int(time.time()))
if suffix:
l.append(suffix)
return hashlib.sha256((u"-".join(l)).encode('utf-8')).digest().encode('base64').strip()[:-1]
class MCache(object):
"""a class holding metadata cache"""
def __init__(self, mcache_db, uri_list, smart = -1):
self.db_fn = mcache_db
if not os.path.exists(mcache_db):
create_new = True
else:
create_new = False
self._cn = {}
cn = self._getConnection()
if create_new:
cn.executescript(SQL_MCACHE_DATA_MODEL)
cn.commit()
self.__reload()
if self.__create_cache(uri_list, smart) > 0:
self.__reload()
def _getConnection(self):
n = threading.current_thread().name
if self._cn.has_key(n):
r = self._cn[n]
else:
r = sqlite3.connect(self.db_fn)
r.row_factory = sqlite3.Row
self._cn[n] = r
return r
def __reload(self):
self.__meta = map(lambda i: dict(i), self._getConnection().execute(SQL_MCACHE_GET_BY_KITAB))
self.__meta_by_uri = (dict(map(lambda a: (a[1]['uri'], a[0]), enumerate(self.__meta))))
self.__meta_uri_list = self.__meta_by_uri.keys()
self.__meta_by_kitab = {}
for k,G in groupby(enumerate(self.__meta), lambda a: a[1]['kitab']):
g = list(G)
self.__meta_by_kitab[k] = map(lambda i: i[0], g)
def load_from_uri(self, uri):
"""extract meta object from kitab's uri and return it"""
cn = sqlite3.connect(uri)
cn.row_factory=sqlite3.Row
c = cn.cursor()
try:
r = c.execute(SQL_MCACHE_GET).fetchone()
except sqlite3.OperationalError:
return None
if not r:
return None
return dict(r)
def __cache(self, c, uri, meta = None):
if not meta:
meta = self.load_from_uri(uri)
if not meta:
return 0
#if drop_old_needed:
meta['uri'] = uri
meta['mtime'] = os.path.getmtime(toFs(uri))
meta['flags'] = 0
c.execute(SQL_MCACHE_ADD, meta)
return 1
def __create_cache(self, uri_list, smart = -1):
"""
create cache and return the number of newly created meta caches
smart is how fast you want to do that:
* 0 force regeneration of entire meta cache
* 1 regenerate cache when hash differs (it would need to open every kitab)
* 2 regenerate when mtime differs
* -1 do not update cache for exiting meta (even if the file is changed)
"""
cn = self._getConnection()
c = cn.cursor()
r = 0
uri_set = set(uri_list)
#c.execute('BEGIN TRANSACTION')
# remove meta for kitab that no longer exists
deleted = filter(lambda i: i not in uri_set, self.__meta_uri_list)
for uri in deleted:
c.execute(SQL_MCACHE_DROP, (uri,))
r += 1
# update meta for the rest (in a smart way)
for uri in uri_list:
if not os.access(toFs(uri), os.R_OK): continue
if smart == 0:
# force recreation of cache, drop all, then create all
r+=self.__cache(c, uri, uri in self.__meta_uri_list)
continue
meta = None
drop_old_needed = False
cache_needed = False
if uri not in self.__meta_uri_list:
cache_needed = True
else:
drop_old_needed = True
cache_needed = True
if smart == -1: continue # don't replace existing cache
elif smart == 2: # rely of mtime
if abs(os.path.getmtime(toFs(uri)) - self.getByUri(uri)['mtime']) < 1e-5:
continue
elif smart == 1: # rely on a hash saved inside the database
old_meta = self.getByUri(uri)
meta = self.load_from_uri(uri)
if not meta or old_meta['hash'] == meta['hash']:
continue
if cache_needed:
r += self.__cache(c, uri, meta)
#c.execute('END TRANSACTION')
cn.commit()
return r
def getKitabList(self):
return self.__meta_by_kitab.keys()
def getUriList(self):
return self.__meta_by_uri.keys()
def getByUri(self, uri):
"""return meta object for uri"""
i = self.__meta_by_uri.get(uri,None)
if i == None: return None
return self.__meta[i]
def getByKitab(self, kitab):
"""return a list of meta objects for a kitab"""
a = self.__meta_by_kitab.get(kitab,None)
if not a:
return None
return map(lambda i: self.__meta[i], a)
def _latest(self, a):
lm = a[0]
l = metaVrr(lm)
for m in a[1:]:
v = metaVrr(m)
if strverscmp(v, l) > 0:
lm = m
l = v
return lm
def getLatestKitab(self, kitab):
"""return a meta object for latest kitab (based on version)"""
a = self.__meta_by_kitab.get(kitab, None)
if not a:
return None
return self._latest([self.__meta[i] for i in a])
def getLatestKitabV(self, kitab, v):
"""
given kitab name and version
return a meta object for latest kitab (based on version)
"""
a = self.__meta_by_kitab.get(kitab, None)
if not a:
return None
ma = filter(lambda m: m[u'version'] == v,[self.__meta[i] for i in a])
if not ma:
return None
return self._latest(ma)
def getLatestKitabVr(self, kitab, v, r):
"""
given kitab name and version and major release
return a meta object for latest kitab (based on version)
"""
if type(r) != int:
r = int(r)
a = self.__meta_by_kitab.get(kitab, None)
ma = filter(lambda m: m[u'version'] == v and m[u'releaseMajor'] == r,
[self.__meta[i] for i in a])
if not ma:
return None
return self._latest(ma)
def setIndexedFlags(self, uri, flags=2):
cn = self._getConnection()
cn.execute(SQL_MCACHE_SET_INDEXED, (flags, uri,))
cn.commit()
def setAllIndexedFlags(self, flags=0):
cn = self._getConnection()
cn.execute(SQL_MCACHE_SET_ALL_INDEXED, (flags,))
cn.commit()
def getUnindexedList(self):
"""
return a list of meta dicts for Kutub that are likely to be unindexed
"""
return map(lambda i: dict(i), self._getConnection().execute(SQL_MCACHE_GET_UNINDEXED))
def getDirtyIndexList(self):
"""
return a list of meta dicts for Kutub that are likely to have broken index
"""
return map(lambda i: dict(i), self._getConnection().execute(SQL_MCACHE_GET_DIRTY_INDEX))
def getIndexedList(self):
"""
return a list of meta dicts for Kutub that are already in index.
"""
return map(lambda i: dict(i), self._getConnection().execute(SQL_MCACHE_GET_INDEXED))
thawab-4.1/Thawab/platform.py 0000664 0000000 0000000 00000004613 13052627552 0016243 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
Platform specific routines of thawab
Copyright © 2008-2010, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import sys, os, os.path
from glob import glob
if sys.platform == 'win32':
def uri_to_filename(u):
if len(u) <= 1:
return u
return u[1:].replace('/','\\')
def get_drives():
return filter(lambda j: os.path.exists(j), [chr(i)+':\\' for i in range(67,91)])
try:
from winpaths import get_appdata as application_data
except ImportError:
try:
from winshell import application_data
except ImportError:
try:
import win32com.shell as shell
def application_data():
return shell.SHGetFolderPath(0, 26, 0, 0)
except ImportError:
application_data = None
if application_data:
app_data = application_data()
th_conf = os.path.join(app_data, u"thawab", "conf", "main.conf")
else:
app_data = u"C:\\"
th_conf = u"C:\\thawab.conf"
else:
app_data = u"/usr/share/"
application_data = None
def uri_to_filename(u):
return u
def get_drives():
return []
th_conf = os.path.expanduser('~/.thawab/conf/main.conf')
def guess_prefixes():
l = []
ed = os.path.join(os.path.dirname(sys.argv[0]), u'thawab-data')
ed_1st = False
if os.path.isdir(ed) and os.access(ed, os.W_OK):
l.append(ed)
ed_1st = True
if sys.platform == 'win32':
l.append(os.path.join(app_data,'thawab'))
if not ed_1st:
l.append(ed)
l.extend([os.path.join(d, 'thawab-data') for d in get_drives()])
else:
l.append(os.path.expanduser('~/.thawab'))
if not ed_1st:
l.append(ed)
l.append(u'/usr/local/share/thawab')
l.append(u'/usr/share/thawab')
return l
thawab-4.1/Thawab/shamelaUtils.py 0000664 0000000 0000000 00000123723 13052627552 0017056 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
The shamela related tools for thawab
Copyright © 2008-2009, Muayyad Saleh Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import sys, os, os.path
import re
import sqlite3
import bisect
from okasha.utils import cmp_bisect_right
from subprocess import Popen,PIPE
from itertools import groupby,imap
from meta import MCache, prettyId, makeId
schema = {
'main':"bkid INTEGER, bk TEXT, shortname TEXT, cat INTEGER, betaka TEXT, inf TEXT, bkord INTEGER DEFAULT -1, authno INTEGER DEFAULT 0, auth TEXT, authinfo TEXT, higrid INTEGER DEFAULT 0, ad INTEGER DEFAULT 0, islamshort INTEGER DEFAULT 0, blnk TEXT",
'men': "id INTEGER, arrname TEXT, isoname TEXT, dispname TEXT",
'shorts': "bk INTEGER, ramz TEXT, nass TEXT",
'mendetail': "spid INTEGER PRIMARY KEY, manid INTEGER, bk INTEGER, id INTEGER, talween TEXT",
'shrooh': "matn INTEGER, matnid INTEGER, sharh INTEGER, sharhid INTEGER, PRIMARY KEY (sharh, sharhid)",
'cat':"id INTEGER PRIMARY KEY, name Text, catord INTEGER, lvl INTEGER",
'book':"id, nass TEXT, part INTEGER DEFAULT 0, page INTEGER DEFAULT 0, hno INTEGER DEFAULT 0, sora INTEGER DEFAULT 0, aya INTEGER DEFAULT 0, na INTEGER DEFAULT 0, blnk TEXT",
'toc': "id INTEGER, tit TEXT, lvl INTEGER DEFAULT 1, sub INTEGER DEFAULT 0"
}
schema_index = {
'main':"CREATE INDEX MainBkIdIndex on main (bkid);",
'men': "CREATE INDEX MenIdIndex on men (id); CREATE INDEX MenIsoNameIndex on men (isoname);",
'shorts': "CREATE INDEX ShortsIndex on shorts (bk,ramz);",
'mendetail': "CREATE INDEX MenDetailSpIdIndex on mendetail (spid);",
'shrooh': "CREATE INDEX ShroohIndex on shrooh (sharhid);",
'book':"CREATE INDEX Book%(table)sIdIndex on %(table)s (id);",
'toc': "CREATE INDEX Toc%(table)sIdIndex on %(table)s (id);"
}
hashlen = 32 # must be divisible by 4
# some mark to know how and where to cut
mark = "-- CUT HERE STUB (%s) BUTS EREH TUC --\n" % \
os.urandom(hashlen*3/4).encode('base64')[:hashlen]
table_cols = dict(map(lambda tb: (tb,
map(lambda i: i.split()[0],
schema[tb].split(','))),
schema.keys()))
table_col_defs = dict(map(lambda tb: (tb,
dict(map(lambda i: (i.strip().split()[0],
i.strip()),
schema[tb].split(',')))),
schema.keys()))
# transformations
dos2unix_tb = {13: 10}
normalize_tb = {
65: 97, 66: 98, 67: 99, 68: 100, 69: 101, 70: 102, 71: 103, 72: 104, 73: 105, 74: 106, 75: 107, 76: 108, 77: 109, 78: 110, 79: 111, 80: 112, 81: 113, 82: 114, 83: 115, 84: 116, 85: 117, 86: 118, 87: 119, 88: 120, 89: 121, 90: 122,
1600: None, 1569: 1575, 1570: 1575, 1571: 1575, 1572: 1575, 1573: 1575, 1574: 1575, 1577: 1607, 1611: None, 1612: None, 1613: None, 1614: None, 1615: None, 1616: None, 1617: None, 1618: None, 1609: 1575}
spaces='\t\n\r\f\v'
spaces_d = dict(map(lambda s: (ord(s),32),list(spaces)))
schema_fix_del = re.compile('\(\d+\)') # match digits in parenthesis (after types) to be removed
schema_fix_text = re.compile('Memo/Hyperlink',re.I)
schema_fix_int = re.compile('(Boolean|Byte|Byte|Numeric|Replication ID|(\w+ )?Integer)',re.I)
sqlite_cols_re = re.compile("\((.*)\)",re.M | re.S)
no_sql_comments=re.compile('^--.*$',re.M)
shamela_footers_re = re.compile(u'^(¬?_{4,})$',re.M)
digits_re = re.compile(r'\d+')
no_w_re = re.compile(ur'[^A-Za-zابتثجحخدذرزسشصضطظعغفقكلمنهوي\s]')
# one to one transformations that does not change chars order
sh_digits_to_spaces_tb = {
48:32, 49:32, 50:32, 51:32, 52:32,
53:32, 54:32, 55:32, 56:32, 57:32
}
sh_normalize_tb = {
65: 97, 66: 98, 67: 99, 68: 100, 69: 101, 70: 102, 71: 103, 72: 104, 73: 105, 74: 106, 75: 107, 76: 108, 77: 109, 78: 110, 79: 111, 80: 112, 81: 113, 82: 114, 83: 115, 84: 116, 85: 117, 86: 118, 87: 119, 88: 120, 89: 121, 90: 122,
1569: 1575, 1570: 1575, 1571: 1575, 1572: 1575, 1573: 1575, 1574: 1575, 1577: 1607, 1609: 1575,
8: 32, 1600:32, 1632: 48, 1633: 49, 1634: 50, 1635: 51, 1636: 52, 1637: 53, 1638: 54, 1639: 55, 1640: 56, 1641: 57, 1642:37, 1643:46
}
# TODO: remove unused variables and methods
# shorts
std_shorts={
u'A': u'صلى الله عليه وسلم',
u'B': u'رضي الله عن',
u'C': u'رحمه الله',
u'D': u'عز وجل',
u'E': u'عليه الصلاة و السلام',
}
footnotes_cnd = [] # candidate, in the form of (footnote_mark, footnote_text) tuples
footnotes =[]
def footer_shift_cb(mi):
global footnotes_cnd, footnotes
if footnotes_cnd and footnotes_cnd[0][0] == mi.group(1):
# int(mi.group(1))
footnotes.append(footnotes_cnd.pop(0))
return " ^[" + str(len(footnotes)) + "]"
return mi.group(0)
class ShamelaSqlite(object):
mode=None
def __init__(self,
src,
cn = None,
releaseMajor = 0,
releaseMinor = 0,
progress = None,
progress_args = [],
progress_kw = {},
progress_dict = None):
"""import the bok file into sqlite"""
self.releaseMajor = releaseMajor
self.releaseMinor = releaseMinor
self.progress = progress
self.progress_args = progress_args
self.progress_kw = progress_kw
self.progress_dict = progress_dict
self.tables = None
self.tablesFn = {}
self.src_is_dir = False
if os.path.isdir(src):
self.sh_prefix = src
self.src_is_dir = True
elif os.path.isfile(src):
self.bok_fn = src
else:
raise OSError
self.metaById = {}
self._blnk = {}
self.xref = {}
self.encoding_fix_needed = None # True/False or None ie. not yet checked
self.__bkids = None
self.__commentaries = None
self.version, self.tb, self.bkids = self.identify()
if self.progress_dict == None:
self.progress_dict = {}
# note: the difference between tb and self.tables that tables are left as reported by mdbtoolds while tb are lower-cased
self.cn = cn or sqlite3.connect(':memory:', isolation_level = None)
self.cn.row_factory = sqlite3.Row
self.c = self.cn.cursor()
self.imported_tables = []
self.__meta_by_bkid = {}
def set_xref(self, bkid, pg_id, xref):
if self.xref.has_key(bkid):
self.xref[bkid].append( (pg_id, xref,) )
else:
self.xref[bkid] = [ (pg_id, xref,) ]
def get_xref(self, bkid, pg_id):
if self.xref.has_key(bkid):
i = cmp_bisect_right( lambda a,b: cmp(a[0], b), self.xref[bkid], pg_id)
if i > 0:
return self.xref[bkid][i-1][1]
return None
def identify(self):
tables = self.getTables() # Note: would raise OSError or TypeError
if len(tables) == 0:
raise TypeError
tables.sort()
tb = dict(map(lambda s: (s.lower(),s), tables))
if 'book' in tables and 'title' in tables:
return (2,tb,[])
bkid = map(lambda i:int(i[1:]),
filter(lambda i: i[0] == 'b' and i[1:].isdigit(),
tables))
bkid.sort()
return (3, tb, bkid)
def _getTablesInFile(self, fn):
try:
p = Popen(['mdb-tables', '-1', fn],
0,
stdout = PIPE,
env = {'MDB_JET3_CHARSET':'cp1256',
'MDB_ICONV':'UTF-8'})
except OSError:
raise
try:
tables = p.communicate()[0].replace('\r','').strip().split('\n')
except OSError:
raise
r = p.returncode
del p
if r != 0:
raise TypeError
tables = filter(lambda t: not t.isdigit(), tables)
return tables
def _getTablesInBok(self):
if self.tables:
return self.tables
self.tables = self._getTablesInFile(self.bok_fn)
self.tablesFn = dict(((t,self.bok_fn) for t in self.tables))
return self.tables
def _getTablesInDir(self):
if self.tables:
return self.tables
self.tables = []
for f in ("main.mdb", "special.mdb"):
fn = os.path.join(self.sh_prefix, "Files", f)
tb = self._getTablesInFile(fn)
self.tables.extend(tb)
self.tablesFn.update(dict(((t,fn) for t in tb)))
return self.tables
def _getTableFile(self, tb):
if self.src_is_dir:
return self.tablesFn[tb]
return self.bok_fn
def getTables(self):
if self.tables:
return self.tables
if self.src_is_dir:
return self._getTablesInDir()
return self._getTablesInBok()
def __shamela3_fix_insert(self, sql_cmd, prefix = "OR IGNORE INTO tmp_"):
"""Internal function used by importTable"""
if prefix and sql_cmd[0].startswith('INSERT INTO '):
sql_cmd[0] = 'INSERT INTO ' + prefix + sql_cmd[0][12:]
sql = ''.join(sql_cmd)
self.c.execute(sql)
def __schemaGetCols(self, r):
"""used internally by importTableSchema"""
m = sqlite_cols_re.search( no_sql_comments.sub('', r) )
if not m:
return []
return map(lambda i: i.split()[0], m.group(1).split(','))
def importTableSchema(self, Tb, tb, is_tmp = False,prefix = 'tmp_'):
"""create schema for table"""
if is_tmp:
temp = 'temp'
else:
temp = ''
fn = self._getTableFile(Tb)
opts=['mdb-schema', '-S','-T', Tb, fn]
e=""
if self.mode==None or self.mode=='0.6':
self.mode='0.6'
print "MODE 0.6"
pipe = Popen(opts, 0, stdout = PIPE, stderr = PIPE, env = {'MDB_JET3_CHARSET':'cp1256','MDB_ICONV':'UTF-8'})
r,e = pipe.communicate()
print e
r=r.replace('\r', '')
#if pipe.returncode != 0:
#raise TypeError
if self.mode=='0.7' or ((e.startswith("mdb-schema: invalid option") or e.startswith("option parsing failed: Unknown option")) and opts[1]=='-S'):
print "MODE 0.7"
del opts[1]
self.mode='0.7'
pipe = Popen(opts, 0, stdout = PIPE, env = {'MDB_JET3_CHARSET':'cp1256','MDB_ICONV':'UTF-8'})
r = pipe.communicate()[0].replace('\r', '').replace('[', '').replace(']', '')
if pipe.returncode != 0:
raise TypeError
sql = schema_fix_text.sub('TEXT',
schema_fix_int.sub('INETEGER',
schema_fix_del.sub('',r))).lower()
sql = sql.replace('create table ',
' '.join(('create ',
temp,
' table ',
prefix,)))
sql = sql.replace('drop table ',
'drop table if exists ' + prefix)
cols = self.__schemaGetCols(sql)
if table_cols.has_key(tb):
missing = filter(lambda i: not i in cols,table_cols[tb])
missing_def = u', '.join(map(lambda i: table_col_defs[tb][i], missing))
else:
missing = []
missing_def = u''
if missing_def:
sql = sql.replace('\n)',',' + missing_def + '\n)')
sql += schema_index.get(tb,'') % {'table': Tb.lower()}
sql_l = no_sql_comments.sub('', sql).split(';')
for l in sql_l:
l = l.strip()
if l:
try:
self.c.execute(l)
except:
print l
raise
def importTable(self,
Tb,
tb,
tb_prefix = None,
is_tmp = False,
is_ignore = False,
is_replace = False):
"""
import a table where:
* Tb is the case-sesitive table name found reported in mdbtools.
* tb is the name in our standard schema,
usually tb = Tb.lower() except for book and toc where its Tb is b${bok_id},
t${bok_id}
* tb_prefix a prefix added to tb [default is tmp_ if is_tmp otherwise it's '']
"""
tb_prefix = is_tmp and 'tmp_' or ''
if Tb in self.imported_tables:
return
self.importTableSchema(Tb, tb, is_tmp, tb_prefix)
fn = self._getTableFile(Tb)
if self.mode=='0.6': opts=['mdb-export', '-R',';\n'+mark,'-I', fn, Tb]
else: opts=['mdb-export', '-R','\n'+mark,'-I', 'postgres', fn, Tb]
print "** opts: ",opts
print "** mode: ", self.mode
pipe = Popen(opts,
0,
stdout = PIPE,
env = {'MDB_JET3_CHARSET':'cp1256',
'MDB_ICONV':'UTF-8'})
sql_cmd = []
prefix = ""
if is_ignore:
prefix = "OR IGNORE INTO "
elif is_replace:
prefix = "OR REPLACE INTO "
prefix += tb_prefix
for l in pipe.stdout:
l = l.replace('\r','\n')
# output encoding in mdbtools in windows is cp1256, this is a bug in it
if self.encoding_fix_needed == None:
try:
l.decode('UTF-8')
except:
self.encoding_fix_needed = True
l = l.decode('cp1256')
else:
self.encoding_fix_needed = False
elif self.encoding_fix_needed:
l = l.decode('cp1256')
if l == mark:
self.__shamela3_fix_insert(sql_cmd,prefix)
sql_cmd = []
else:
sql_cmd.append(l)
if len(sql_cmd):
self.__shamela3_fix_insert(sql_cmd,prefix); sql_cmd = []
pipe.wait() # TODO: why is this needed
if pipe.returncode != 0:
raise TypeError
del pipe
self.imported_tables.append(Tb)
def toSqlite(self, in_transaction = True, bkids=None):
"""
return True if success, or False if canceled
"""
if in_transaction:
self.c.execute('BEGIN TRANSACTION')
tables = self.getTables()
is_special = lambda t: (t.lower().startswith('t') or \
t.lower().startswith('b')) and \
t[1:].isdigit()
is_not_special = lambda t: not is_special(t)
s_tables = filter(is_special, tables)
g_tables = filter(is_not_special, tables)
if bkids:
# filter bkids in s_tables
s_tables =filter(lambda t: int(t[1:]) in bkids, s_tables)
progress_delta = 1.0 / (len(s_tables) + len(g_tables)) * 100.0
progress = 0.0
for t in g_tables:
if self.progress_dict.get('cancel', False):
return False
if self.progress:
self.progress("importing table [%s]" % t,
progress,
*self.progress_args,
**self.progress_kw)
progress += progress_delta
self.importTable(t, t.lower())
for t in s_tables:
if self.progress_dict.get('cancel', False):
return False
if self.progress:
self.progress("importing table [%s]" % t,
progress,
*self.progress_args,
**self.progress_kw)
progress += progress_delta
if t.lower().startswith('t'):
self.importTable(t, 'toc')
else:
self.importTable(t, 'book')
progress = 100.0
if self.progress:
self.progress("finished, committing ...",
progress,
*self.progress_args,
**self.progress_kw)
if in_transaction:
self.c.execute('END TRANSACTION')
self.__getCommentariesHash()
return True
def __getCommentariesHash(self):
if self.__commentaries != None:
return self.__commentaries
self.__commentaries={}
for a in self.c.execute('SELECT DISTINCT matn, sharh FROM shrooh'):
try:
r = (int(a[0]),int(a[1])) # fix that some books got string bkids not integer
except ValueError:
continue # skip non integer book ids
if self.__commentaries.has_key(r[0]):
self.__commentaries[r[0]].append(r[1])
else:
self.__commentaries[r[0]] = [r[1]]
for i in self.getBookIds():
if not self.__commentaries.has_key(i):
self.__commentaries[i] = []
return self.__commentaries
def authorByID(self, authno, main_tb = {}):
# TODO: use authno to search shamela specific database
a, y = '_unset',0
if main_tb:
a = makeId(main_tb.get('auth','') or '')
y = main_tb.get('higrid',0) or 0
if not y:
y = main_tb.get('ad',0) or 0
if isinstance(y,basestring) and y.isdigit():
y = int(y)
else:
m = digits_re.search(unicode(y))
if m:
y = int(m.group(0))
else:
y = 0
return a, y
def classificationByBookId(self, bkid):
return '_unset'
def getBookIds(self):
if self.__bkids != None:
return self.__bkids
r = self.c.execute('SELECT bkid FROM main')
self.__bkids = map(lambda a: a[0],r.fetchall() or [])
if self.__commentaries != None:
# sort to make sure we import the book before its commentary
self.__bkids.sort(lambda a,b: (int(a in self.__commentaries.get(b,[])) << 1) - 1)
return self.__bkids
def _is_tafseer(self, bkid):
r = self.c.execute('''SELECT sora, aya FROM b%d WHERE sora>0 and sora <115 and aya>0 LIMIT 1''' % bkid).fetchone()
return bool(r)
def _get_matn(self, sharh_bkid):
r = self.c.execute('''SELECT matn, matnid, sharh, sharhid FROM shrooh WHERE sharh = ? LIMIT 1''', (sharh_bkid, ) ).fetchone()
if not r:
return -1
return int(r['matn'])
def getBLink(self, bkid):
if not self._blnk.has_key(bkid):
r = self.c.execute('SELECT blnk FROM main WHERE bkid = ?', (bkid,)).fetchone()
self._blnk[bkid] = r['blnk']
return self._blnk[bkid]
def getBookMeta(self, bkid):
if self.__meta_by_bkid.has_key(bkid):
return self.__meta_by_bkid[bkid]
else:
r = self.c.execute('SELECT bk, shortname, cat, betaka, inf, bkord, authno, auth, higrid, ad, islamshort FROM main WHERE bkid = ?', (bkid,)).fetchone()
if not r:
m = None
else:
r = dict(r)
# FIXME: make "releaseMajor" "releaseMinor" integers
m = {
"repo":"_user", "lang":"ar", "type": int(self._is_tafseer(bkid)),
"version":"0."+str(bkid), "releaseMajor":0, "releaseMinor":0,
'originalKitab':None, 'originalVersion':None,
'originalAuthor':None, 'originalYear':None
}
m['kitab'] = makeId(r['bk'])
m['author'], m['year'] = self.authorByID(r['authno'], r)
m['classification'] = self.classificationByBookId(bkid)
m['keywords'] = u''
matn_bkid = self._get_matn(bkid)
#print "%d is sharh for %d" % (bkid, matn_bkid)
if matn_bkid>0:
matn_m = self.getBookMeta(matn_bkid)
if matn_m:
m['originalKitab'] = matn_m['kitab']
m['originalVersion'] = matn_m['version']
m['originalAuthor'] = matn_m['author']
m['originalYear'] = matn_m['year']
self.__meta_by_bkid[bkid] = m
return m
class _foundShHeadingMatchItem():
def __init__(self, start, end = -1, txt = '', depth = -1, fuzzy = -1):
self.start = start
self.end = end
self.txt = txt
self.depth = depth
self.fuzzy = fuzzy
self.suffix = ''
def __repr__(self):
return (u"".format(self.start,
self.end,
self.txt)).encode('utf-8')
def overlaps_with(self,b):
return b.end > self.start and self.end > b.start
def __cmp__(self, b):
return cmp(self.start, b.start)
def _fixHeadBounds(pg_txt, found):
for i, f in enumerate(found):
if f.fuzzy >= 4:
# then the heading is part of some text
f.end = f.start
f.suffix = u'\u2026'
if f.fuzzy >= 7:
#then move f.start to the last \n
f.end = max(pg_txt[:f.end].rfind('\n'), 0)
if i > 0:
f.end = max(f.end,found[i-1].end)
f.start = min(f.start, f.end)
def reformat(txt, shorts_t, shorts_dict):
txt = txt.replace('\n', '\n\n')
if shorts_t & 1:
for k in std_shorts:
txt = txt.replace(k, std_shorts[k])
for k in shorts_dict:
txt = txt.replace(k, "\n====== %s ======\n\n" % shorts_dict[k])
return txt
def set_get_xref(xref, h_tags, sh, bkid, pg_id, matn, matnid):
h_tags['header'] = xref
sh.set_xref(bkid, pg_id, xref)
if matn and matnid and sh.metaById.has_key(matn):
m = sh.metaById[matn]
xref = sh.get_xref(matn, matnid)
if xref:
h_tags['embed.original.section'] = xref
ss_re = re.compile(" +")
re_ss_re = re.compile("( \*){2,}")
def ss(txt):
"""squeeze spaces"""
return ss_re.sub(" ", txt)
def re_ss(txt):
"""squeeze spaces in re"""
return re_ss_re.sub(" *", ss(txt))
def shamelaImport(cursor,
sh,
bkid,
footnote_re = ur'\((\d+)\)',
body_footnote_re = ur'\((\d+)\)',
ft_prefix_len = 1,
ft_suffix_len = 1):
"""
import a ShamelaSqlite book as thawab kitab object, where
* cursor - a cursor for an empty thawab kitab object
* sh - ShamelaSqlite object
* bkid - the id of the shamela book to be imported
this function returns the cached meta dictionary
"""
global footnotes_cnd, footnotes
shamela_footer_re = re.compile(footnote_re, re.M | re.U)
shamela_shift_footers_re = re.compile(body_footnote_re, re.M | re.U)
ki = cursor.ki
# NOTE: page id refers to the number used as id in shamela not thawab
c = sh.c
# step 0: prepare shorts
shorts_t = c.execute("SELECT islamshort FROM main WHERE bkid = ?",
(bkid,)).fetchone()
if shorts_t:
shorts_t = shorts_t[0] or 0
else:
shorts_t = 0
if shorts_t > 1:
shorts_dict = dict(c.execute("SELECT ramz,nass FROM shorts WHERE bk = ?",
(bkid,)).fetchall())
else:
shorts_dict = {}
# step 1: import meta
meta = sh.getBookMeta(bkid)
ki.setMCache(meta)
# step 2: prepare topics hashed by page_id
r = c.execute("SELECT id,tit,lvl FROM t%d ORDER BY id,sub" % bkid).fetchall()
# NOTE: we only need page_id,title and depth, sub is only used to sort them
toc_ls = filter(lambda i: i[2] and i[1], [list(i) for i in r])
if not toc_ls:
raise TypeError # no text in the book
if toc_ls[0][0] != 1:
toc_ls.insert(0, [1, sh.getBookMeta(bkid)['kitab'].replace('_',' '), toc_ls[0][2]])
toc_hash = map(lambda i: (i[1][0],i[0]),enumerate(toc_ls))
# toc_hash.sort(lambda a,b: cmp(a[0],b[0])) # FIXME: this is not needed!
toc_hash = dict(map(lambda j: (j[0],map(lambda k:k[1], j[1])),
groupby(toc_hash, lambda i: i[0])))
# NOTE: toc_hash[pg_id] holds list of indexes in toc_ls
found = []
parents = [ki.root]
depths = [-1] # -1 is used to indicate depth or level as shamela could use 0
last = u''
started = False
rm_fz4_re = re.compile(ur'(?:[^\w\n]|[_ـ])',re.M | re.U) # [\W_ـ] without \n
rm_fz7_re = re.compile(ur'(?:[^\w\n]|[\d_ـ])',re.M | re.U) # [\W\d_ـ] without \n
def _shamelaFindHeadings(page_txt,
page_id,
d,
h,
headings_re,
heading_ix,
j,
fuzzy):
# fuzzy is saved because it could be used later to figure whither to add newline
# or to move start point
for m in headings_re.finditer(page_txt): #
# NOTE: since this is not exact, make it ends at start. FIXME: it was m.end()
candidate = _foundShHeadingMatchItem(m.start(), m.start(), h, d, fuzzy)
ii = bisect.bisect_left(found, candidate) # only check for overlaps in found[ii:]
# skip matches that overlaps with previous headings
if any(imap(lambda mi: mi.overlaps_with(candidate),found[ii:])):
continue
bisect.insort(found, candidate) # add the candidate to the found list
toc_hash[page_id][j] = None
return True
return False
def _shamelaFindExactHeadings(page_txt, page_id, f, d, heading, heading_ix,j, fuzzy):
shift = 0
s = f % page_txt
h = f % heading
#print "*** page:", s
#print "*** h:", page_id, heading_ix, fuzzy, "[%s]" % h.encode('utf-8')
l = len(heading)
while(True):
i = s.find(h)
if i >= 0:
# print "found"
candidate = _foundShHeadingMatchItem(i+shift, i+shift+l, h, d, fuzzy)
# only check for overlaps in found[ii:]
ii = bisect.bisect_left(found, candidate)
# skip matches that overlaps with previous headings
if not any(imap(lambda mi: mi.overlaps_with(candidate),found[ii:])):
# add the candidate to the found list
bisect.insort(found, candidate)
toc_hash[page_id][j] = None
return True
# skip to i+l
s = s[i+l:]
shift += i + l
# not found:
return False
return False
def _shamelaHeadings(page_txt, page_id):
l = toc_hash.get(page_id, [])
if not l:
return
txt = None
txt_no_d = None
# for each heading
for j, ix in enumerate(l):
h, d = toc_ls[ix][1:3]
# search for entire line matches
# (exact, then only letters and digits then only letters: 1,2,3)
# search for leading matches
# (exact, then only letters and digits then only letters: 4,5,6)
# search for matches anywhere
# (exact, then only letters and digits then only letters: 7,8,9)
if _shamelaFindExactHeadings(page_txt, page_id, "\n%s\n", d, h, ix, j, 1):
continue
if not txt:
txt = no_w_re.sub(' ', page_txt.translate(sh_normalize_tb))
h_p = no_w_re.sub(' ', h.translate(sh_normalize_tb)).strip()
if h_p: # if normalized h_p is not empty
# NOTE: no need for map h_p on re.escape() because it does not contain special chars
h_re_entire_line = re.compile(re_ss(ur"^\s*%s\s*$" % ur" *".join(list(h_p))), re.M)
if _shamelaFindHeadings(txt, page_id, d, h, h_re_entire_line, ix, j, 2):
continue
if not txt_no_d:
txt_no_d = txt.translate(sh_digits_to_spaces_tb)
h_p_no_d = h_p.translate(sh_digits_to_spaces_tb).strip()
if h_p_no_d:
h_re_entire_line_no_d = re.compile(re_ss(ur"^\s*%s\s*$" % \
ur" *".join(list(h_p_no_d))),
re.M)
if _shamelaFindHeadings(txt_no_d,
page_id,
d,
h,
h_re_entire_line_no_d,
ix,
j,
3):
continue
# at the beginning of the line
if _shamelaFindExactHeadings(page_txt, page_id, "\n%s", d, h, ix, j, 4):
continue
if h_p:
h_re_line_start = re.compile(re_ss(ur"^\s*%s\s*" % ur" *".join(list(h_p))), re.M)
if _shamelaFindHeadings(txt, page_id, d, h, h_re_line_start, ix, j, 5):
continue
if h_p_no_d:
h_re_line_start_no_d = re.compile(re_ss(ur"^\s*%s\s*" % \
ur" *".join(list(h_p_no_d))),
re.M)
if _shamelaFindHeadings(txt_no_d,
page_id,
d,
h,
h_re_line_start_no_d,
ix,
j,
6):
continue
# any where in the line
if _shamelaFindExactHeadings(page_txt, page_id, "%s", d, h, ix,j, 7):
continue
if h_p:
h_re_any_ware = re.compile(re_ss(ur"\s*%s\s*" % \
ur" *".join(list(h_p))),
re.M)
if _shamelaFindHeadings(txt, page_id, d, h, h_re_any_ware, ix, j, 8):
continue
if h_p_no_d:
h_re_any_ware_no_d = re.compile(re_ss(ur"\s*%s\s*" % \
ur" *".join(list(h_p_no_d))),
re.M)
if _shamelaFindHeadings(txt_no_d, page_id, d, h, h_re_any_ware, ix, j, 9):
continue
# if we reached here then head is not found
# place it just after last one
if found:
last_end = found[-1].end
#try: last_end += page_txt[last_end:].index('\n')+1
#except ValueError: last_end = len(page_txt); print "*"
#print "last_end = ",last_end
else:
last_end = 0
candidate = _foundShHeadingMatchItem(last_end, last_end, h, d, 0)
bisect.insort(found, candidate) # add the candidate to the found list
del toc_hash[page_id]
return
footnotes_cnd = []
footnotes = []
h_tags = {}
t_tags0 = {'textbody':None}
t_tags = t_tags0.copy()
last_hno = None
hno_pop_needed = False
def pop_footers(ft):
s = "\n\n".join(map(lambda (i,a): " * (%d) %s" % (i + 1, a[1]), enumerate(ft)))
del ft[:]
return s
# step 3: walk through pages, accumulating contents
# NOTE: in some books id need not be unique
#
blnk_base = sh.getBLink(bkid)
blnk = ""
blnk_old = ""
r = c.execute('SELECT rowid FROM b%d ORDER BY rowid DESC LIMIT 1' % bkid).fetchone()
r_max = float(r['rowid'])/100.0
for r in c.execute('SELECT b%d.rowid,id,nass,part,page,hno,sora,aya,na,matn,matnid,blnk FROM b%d LEFT OUTER JOIN shrooh ON shrooh.sharh = %d AND id=shrooh.sharhid ORDER BY id' % (bkid,bkid,bkid,)):
if sh.progress_dict.get('cancel', False):
return None
# FIXME: since we are using ORDER BY id, then using rowid for progress is not always correct
sh.progress("importing book [%d]" % bkid,
r['rowid']/r_max,
*sh.progress_args,
**sh.progress_kw)
if r['nass']:
pg_txt = r['nass'].translate(dos2unix_tb).strip()
else:
pg_txt = u""
pg_id = r['id']
hno = r['hno']
blnk_old = blnk
blnk = r['blnk']
try:
matn = r['matn'] and int(r['matn'])
matnid = r['matnid'] and int(r['matnid'])
except ValueError:
matn,matnid = None,None
except TypeError:
matn,matnid = None,None
sura, aya, na = 0, 0, 0
if r['sora'] and r['aya'] and r['sora'] > 0 and r['aya'] > 0:
sura, aya, na = r['sora'], r['aya'], r['na']
if not na or na <= 0:
na = 1
h_tags['quran.tafseer.ref'] = "%03d-%03d-%03d" % (sura, aya, na)
# split pg_txt into pg_body and pg_footers_txt
m = shamela_footers_re.search(pg_txt)
if m:
i = m.start()
pg_body = pg_txt[:i].strip()
pg_footers_txt = pg_txt[m.end()+1:].strip()
# A = [(mark, offset_of_num, offset_of_text)]
A = [(fm.group(1),
fm.start(),
fm.start() + \
len(fm.group(1)) + \
ft_prefix_len + \
ft_suffix_len) \
for fm in shamela_footer_re.finditer(pg_footers_txt)] # fixme it need not be +2
if A:
pg_footers_continue = pg_footers_txt[:A[0][1]].strip()
B = []
for i, (j, k, l) in enumerate(A[:-1]):
# TODO: do we need to check if j is in right order
B.append([j, pg_footers_txt[l:A[i + 1][1]].strip()])
j, k, l = A[-1]
B.append([j,pg_footers_txt[l:].strip()])
last_digit = 0
for i, j in B:
if i.isdigit():
if int(i) == last_digit + 1:
footnotes_cnd.append([i, j])
last_digit = int(i)
elif footnotes_cnd:
footnotes_cnd[-1][1] += " (%s) %s" % (i, j)
else:
pg_footers_continue += "(%s) %s" % (i, j)
else:
footnotes_cnd.append([i, j])
if pg_footers_continue:
# FIXME: should this be footnotes or footnotes_cnd
if footnotes:
footnotes[-1][1] += " " + pg_footers_continue
else:
# NOTE: an excess footnote without previous footnotes to add it to
print " * warning: an excess text in footnotes in pg_id = ", pg_id
pg_body += "\n\n==========\n\n" + \
pg_footers_continue + \
"\n\n==========\n\n"
# NOTE: t_tags is used since h_tags was already committed
t_tags["request.fix.footnote"] = "shamela import warning: excess text in footnotes"
else:
pg_body = pg_txt
# debug stubs
#if pg_id == 38:
# print "pg_body = [%s]\n" % pg_body
# for j,k in footnotes_cnd:
# print "j = [%s] k = [%s]" % (j,k)
# # raise KeyError
if toc_hash.has_key(pg_id):
hno_pop_needed = False
elif hno != None and hno != last_hno:
# FIXME: make it into a new head
last_hno = hno
# commit anything not commited
if footnotes:
last += "\n\n__________\n" + pop_footers(footnotes)
cursor.appendNode(parents[-1], reformat(last, shorts_t, shorts_dict), t_tags)
t_tags = t_tags0.copy()
last = ""
# create a new node
set_get_xref(unicode(hno), h_tags, sh, bkid, pg_id, matn, matnid)
h_tags[u'request.fix.head'] = u'shamela import warning: automatically generated head'
# FIXME: handle the case of a new hno on the beginning of a chapter
if hno_pop_needed:
parents.pop()
depths.pop() # FIXME: how many time to pop ?
else:
hno_pop_needed = True
parent = cursor.appendNode(parents[-1], unicode(hno), h_tags)
h_tags = {}
parents.append(parent)
depths.append(depths[-1] + 0.5) # FIXME: does this hack work?
# TODO: set the value of header tag to be a unique reference
# TODO: keep part,page,hno,sora,aya,na somewhere in the imported document
# TODO: add special handling for hadeeth number and tafseer info
found = []
# step 4: for each page content try to find all headings
_shamelaHeadings(pg_body, pg_id)
# now we got all headings in found
# step 5: add the found headings and its content
# splitting page text pg_body into [:f0.start] [f0.end:f1.start] [f1.end:f2.start]...[fn.end:]
# step 5.1: add [:f0.start] to the last heading contents and push it
if not found:
# if no new heading in this page, add it to be committed later
last += shamela_shift_footers_re.sub(footer_shift_cb, pg_body)
if footnotes_cnd:
print " * fixing stall footnotes at pg_id = ", pg_id
last += " ".join(map(lambda (j,k): "(%s) %s" % (j,k),footnotes_cnd))
del footnotes_cnd[:]
continue
# here some new headings were found
_fixHeadBounds(pg_body, found)
# commit the body of previous heading first
if started:
if blnk_old and blnk_base:
last += u"\n\n[[%s]]\n\n" % (blnk_base+blnk_old)
blnk_old = None
last += shamela_shift_footers_re.sub(footer_shift_cb, pg_body[:found[0].start])
if footnotes_cnd:
print " ** stall footnotes at pg_id = ", pg_id
#for j,k in footnotes_cnd:
# print "j = [%s] k = [%s]" % (j,k)
#raise
if footnotes:
last += "\n\n__________\n" + pop_footers(footnotes)
cursor.appendNode(parents[-1], reformat(last, shorts_t, shorts_dict), t_tags)
t_tags = t_tags0.copy()
last = ""
# step 5.2: same for all rest segments [f0.end:f1.start],[f1.end:f2.start]...[f(n-1).end:fn.start]
for i,f in enumerate(found[:-1]):
while(depths[-1] >= f.depth):
depths.pop()
parents.pop()
started = True
# FIXME: pg_id won't be unique, add a counter like "_p5", "_p5.2", ..etc
set_get_xref(u"_p" + unicode(pg_id), h_tags, sh, bkid, pg_id, matn, matnid)
if f.fuzzy == 0:
h_tags[u'request.fix.head'] = u'shamela import error: missing head'
parent = cursor.appendNode(parents[-1], f.txt+f.suffix, h_tags)
h_tags = {}
parents.append(parent)
depths.append(f.depth)
last = shamela_shift_footers_re.sub(footer_shift_cb, pg_body[f.end:found[i+1].start])
if footnotes:
last += "\n\n__________\n" + pop_footers(footnotes)
parent = cursor.appendNode(parent, reformat(last, shorts_t, shorts_dict), t_tags)
t_tags = t_tags0.copy()
# step 5.3: save [fn.end:] as last heading
f = found[-1]
while(depths[-1] >= f.depth):
depths.pop()
parents.pop()
# FIXME: pg_id won't be unique, add a counter like "_p5", "_p5.2", ..etc
set_get_xref(u"_p"+unicode(pg_id), h_tags, sh, bkid, pg_id, matn, matnid)
txt_start = f.end
if f.fuzzy == 0:
h_tags[u'request.fix.head'] = u'shamela import error: missing header'
parent = cursor.appendNode(parents[-1], f.txt+f.suffix,h_tags)
h_tags={}
started = True
parents.append(parent)
depths.append(f.depth)
#last = pg_body[f.end:]+'\n'
last = shamela_shift_footers_re.sub(footer_shift_cb, pg_body[f.end:]+'\n')
if footnotes_cnd:
last += "\n==========[\n" + \
pop_footers(footnotes_cnd) + \
"\n]==========\n"
if not started:
raise TypeError
if blnk and blnk_base:
last += u"\n\n[[%s]]\n\n" % (blnk_base + blnk)
blnk = None
if last:
if footnotes:
last += "\n\n__________\n" + pop_footers(footnotes)
cursor.appendNode(parents[-1], reformat(last, shorts_t, shorts_dict), t_tags)
t_tags=t_tags0.copy()
# l should be empty because we have managed missing headers
#l = filter(lambda i: i,toc_hash.values())
#for j in l: print j
#print "*** headings left: ",len(l)
sh.metaById[bkid] = meta
sh.progress("importing book [%d]" % bkid,
100.0,
*sh.progress_args,
**sh.progress_kw)
return meta
if __name__ == '__main__':
# input bok_fn, dst
th = ThawabMan(os.path.expanduser('~/.thawab'))
sh = ShamelaSqlite(bok_fn)
sh.toSqlite()
for bok_id in sh.getBokIds():
ki = th.mktemp()
c = ki.seek(-1,-1)
meta = shamelaImport(c, cn, bok_id)
c.flush()
o = ki.uri
n = meta['kitab']
del ki
shutil.move(o, os.path.join(dst, n))
thawab-4.1/Thawab/stemming.py 0000664 0000000 0000000 00000007404 13052627552 0016243 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
Copyright © 2008, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import sys, os, os.path, re
#harakat = "ًٌٍَُِّْـ".decode('utf-8')
#normalize_tb = dict(map(lambda i: (ord(i),None),list(harakat)))
#normalize_tb[ord('ة'.decode('utf-8'))] = ord('ه'.decode('utf-8'))
#for i in list("ىئإؤأآء".decode('utf-8')):
# normalize_tb[ord(i)] = ord('ا'.decode('utf-8'))
normalize_tb = {
65: 97, 66: 98, 67: 99, 68: 100, 69: 101, 70: 102,
71: 103, 72: 104, 73: 105, 74: 106, 75: 107, 76: 108,
77: 109, 78: 110, 79: 111, 80: 112, 81: 113, 82: 114,
83: 115, 84: 116, 85: 117, 86: 118, 87: 119, 88: 120,
89: 121, 90: 122, 1600: None, 1569: 1575, 1570: 1575, 1571: 1575,
1572: 1575, 1573: 1575, 1574: 1575, 1577: 1607, # teh marboota -> haa
1611: None, 1612: None, 1613: None, 1614: None, 1615: None,
1616: None, 1617: None, 1618: None, 1609: 1575}
rm_prefix = re.compile(u"^(?:ا?[وف]?((?:[بك]?ال|لل?)|[اينت])?)")
# TODO: reconsider the suffex re
rm_suffix = re.compile(u"(?:ا[نت]|[يهة]|ها|ي[هنة]|ون)$")
#rm_prefix = u"^(?:ا?[وف]?((?:[بك]?ال|لل?)|[اينت])?)"
#rm_suffix = u"(?:ا[نت]|[يهة]|ها|ي[هنة]|ون)$"
#stem_re = rm_prefix+"(\w{3,}?)"+rm_suffix
# أواستقدمتموني
# استفهام عطف جر وتعريف (مثال: "أفككتابي تؤلف ؟" "وللآخرة فلنعد العدة" "فالاستغفار") أو مضارعة
# الجر والتعريف لا تجتمع مع المضارعة
prefix_re = u''.join( (
u"^\u0627?" , # optional hamza
u"[\u0648\u0641]?", # optional Atf (with Waw or Faa)
u"(?:" , # nouns specific prefixes (Jar and definite article)
u"[\u0628\u0643]?\u0627\u0644?|" , # optional Jar (with ba or kaf) with optional AL
u"\u0644\u0644|" , # optional LL (Jar with Lam and article )
u"\u0644" , # optional LL (Jar with Lam and article)
u")?" , # end nouns specific prefixes
u"(\\w{2,})$" ) ) # the stem is grouped
# [اتني]|نا|ان|تا|ون|ين|تما
verb_some_subject_re = u"[\u0627\u062a\u0646\u064a]|\u0646\u0627|\u0627\u0646|\u062a\u0627|\u0648\u0646|\u064a\u0646|\u062a\u0645\u0627"
# [هن]|ني|نا|ها|هما|هم|هن|كما|كم|كن
verb_object_re = u"(?[\u0647\u0646]|\u0646\u064a|\u0646\u0627|\u0647\u0627|\u0647\u0645\u0627|\u0647\u0645|\u0647\u0646|\u0643\u0645\u0627|\u0643\u0645|\u0643\u0646)"
verb_suffix_re = u''.join( [
u"(?:(?:\u0648\u0627|\u062a\u0645)|" , # وا|تم
u"(?:",
u"(?:",
verb_some_subject_re,
u'|\u0648|\u062a\u0645\u0648', # و|تمو
u")",
verb_object_re,u'{1,2}'
u")|(?:",
verb_some_subject_re,
u"))?$"])
def removeArabicSuffix(word):
if len(word) > 4:
w = rm_suffix.sub("", word, 1)
if len(w) > 2:
return w
return word
def removeArabicPrefix(word):
if len(word) > 3:
w = rm_prefix.sub("", word, 1)
if len(w)>2:
return w
return word
def stemArabic(word):
return removeArabicPrefix(removeArabicSuffix(unicode(word).translate(normalize_tb)))
thawab-4.1/Thawab/tags.py 0000664 0000000 0000000 00000002537 13052627552 0015360 0 ustar 00root root 0000000 0000000 # the following flags are Or-ed in a node-wide (not tag-wide)
TAG_FLAGS_EXTERNAL_SOURCE = 1 # some external source pointed by param
TAG_FLAGS_BYBOT = 2 # the content and descendant nodes are generated by a bot, CHANGES WILL BE LOST
TAG_FLAGS_HEADER = 4 # index content in a separated document then consume content
TAG_FLAGS_IX_TAG = 8 # add this tag name into index tags list, if it has a param append it to the tag name
TAG_FLAGS_IX_FIELD = 16 # index content (again) in a separated document without consuming content
TAG_FLAGS_IX_SKIP = 32 # don't index content
TAG_FLAGS_PAD_CONTENT = 64 # append a space/LF after content
TAG_FLAGS_FLOW_BLOCK = 128 # in a separated block eg.
TAG_FLAGS_FLOW_FLOAT = 256 # marked text does not flow normally, but float in a box
TAG_FLAGS_FLOW_FOOTER = 512 # marked text does not flow normally, but accumelated in the tail
TAG_FLAGS_FLOW_HIDDEN = 1024 # marked text does not appear in usual cases
# NOTE: validaty of data: a node of type TAG_FLAGS_HEADER can't be a child of TAG_FLAGS_IX_FIELD
# NOTE: validaty of data: both TAG_FLAGS_IX_SKIP and TAG_FLAGS_IX_FIELD should not be applied to the same node
# NOTE: validaty of data: both TAG_FLAGS_HEADER and TAG_FLAGS_IX_FIELD will case redudancy if applied to same node (as things will be indexed twice without any befinit)
thawab-4.1/Thawab/userDb.py 0000664 0000000 0000000 00000014545 13052627552 0015650 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
The meta handling classes of thawab
Copyright © 2008-2010, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import sys, os, os.path, re, sqlite3, time, threading
#################################################
USER_DB_SCHEMA = """\
CREATE TABLE "starred" (
"kitab" TEXT PRIMARY KEY,
"time" FLOAT
);
CREATE INDEX StarredTimeIndex on starred(time);
CREATE TABLE "bookmarks" (
"kitab" TEXT,
"version" TEXT,
"globalOrder" INTEGER,
"nodeIdNum" INTEGER,
"nodeId" TEXT,
"title" TEXT,
"time" FLOAT,
PRIMARY KEY ("kitab", "version", nodeId)
);
CREATE INDEX BookmarksKitabIndex on bookmarks(kitab);
CREATE INDEX BookmarksNodeIdNumIndex on bookmarks(nodeIdNum);
CREATE INDEX BookmarksGlobalOrderIndex on bookmarks(globalOrder);
CREATE INDEX BookmarksTimeIndex on bookmarks(time);
CREATE TABLE "comments" (
"kitab" TEXT,
"version" TEXT,
"globalOrder" INTEGER,
"nodeIdNum" INTEGER,
"nodeId" TEXT,
"title" TEXT,
"comment" TEXT,
"time" FLOAT,
PRIMARY KEY ("kitab", "version", nodeId)
);
CREATE INDEX CommentsKitabIndex on comments(kitab);
CREATE INDEX CommentsNodeIdNumIndex on comments(nodeIdNum);
CREATE INDEX CommentsGlobalOrderIndex on comments(globalOrder);
CREATE INDEX CommentsTimeIndex on comments(time);
"""
SQL_GET_ALL_STARRED = """SELECT kitab FROM starred ORDER BY time"""
SQL_GET_STARRED_TIME = """SELECT time FROM starred WHERE kitab=?"""
SQL_SET_STARRED = 'INSERT OR REPLACE INTO starred (kitab, time) VALUES (?, ?)'
SQL_UNSET_STARRED = 'DELETE OR IGNORE FROM starred WHERE kitab=?'
# NOTE: globalOrder is used to get the right book order
# NOTE: nodeIdNum is used for consistancy checking and optimization
SQL_GET_ALL_BOOKMARKS = """SELECT * FROM bookmarks ORDER BY kitab"""
SQL_GET_BOOKMARKED_KUTUB = """SELECT DISTINCT kitab FROM bookmarks ORDER BY kitab"""
SQL_GET_KITAB_BOOKMARKS = """SELECT * FROM bookmarks WHERE kitab=? ORDER BY time"""
SQL_ADD_BOOKMARK = 'INSERT OR REPLACE INTO bookmarks (kitab, version, globalOrder, nodeIdNum, nodeId, title, time) VALUES (?,?,?,?,?,?,?)'
SQL_GET_ALL_COMMENTS = """SELECT * FROM comments ORDER BY kitab"""
SQL_GET_COMMENTED_KUTUB = """SELECT DISTINCT kitab FROM comments ORDER BY kitab"""
SQL_GET_KITAB_COMMENTS = """SELECT * FROM comments WHERE kitab=? ORDER BY time"""
SQL_ADD_COMMENT = 'INSERT OR REPLACE INTO comments (kitab, version, globalOrder, nodeIdNum, nodeId, title, comment, time) VALUES (?,?,?,?,?,?,?,?)'
#################################
class UserDb(object):
"""a class holding metadata cache"""
def __init__(self, th, user_db):
self.th = th
self.db_fn = user_db
if not os.path.exists(self.db_fn):
create_new = True
else:
create_new = False
self._cn = {}
cn = self._getConnection()
if create_new:
cn.executescript(USER_DB_SCHEMA)
cn.commit()
def _getConnection(self):
n = threading.current_thread().name
if self._cn.has_key(n):
r = self._cn[n]
else:
r = sqlite3.connect(self.db_fn)
r.row_factory = sqlite3.Row
self._cn[n] = r
return r
def getStarredTime(self, kitab):
"""
return None if not starred, can be used to check if starred
"""
r = self._getConnection().execute(SQL_GET_STARRED_TIME, (kitab,)).fetchone()
if not r:
return None
return r['time']
def getStarredList(self):
r = self._getConnection().execute(SQL_GET_ALL_STARRED).fetchall()
return map(lambda i: i['kitab'], r)
def starKitab(self, kitab):
self._getConnection().execute(SQL_SET_STARRED , (kitab, float(time.time())))
def unstarKitab(self, kitab):
self._getConnection().execute(SQL_UNSET_STARRED, (kitab,))
def starKitab(self, kitab):
self._getConnection().execute(SQL_SET_STARRED , (kitab, float(time.time())))
def getAllBookmarks(self):
r = self._getConnection().execute(SQL_GET_ALL_BOOKMARKS).fetchall()
return map(lambda i: dict(i), r)
def getBookmarkedKutub(self):
r = self._getConnection().execute(SQL_GET_BOOKMARKED_KUTUB).fetchall()
return map(lambda i: i['kitab'], r)
def getKitabBookmarks(self, kitab):
r = self._getConnection().execute(SQL_GET_KITAB_BOOKMARKS, (kitab, )).fetchall()
return map(lambda i: dict(i), r)
def addBookmark(self, kitab, version, globalOrder, nodeIdNum, nodeId, title):
self._getConnection().execute(SQL_ADD_BOOKMARKS,
(kitab,
version,
globalOrder,
nodeIdNum,
nodeId,
title,
float(time.time()) ))
def getAllComments(self):
r = self._getConnection().execute(SQL_GET_ALL_COMMENTS).fetchall()
return map(lambda i: dict(i), r)
def getCommentedKutub(self):
r = self._getConnection().execute(SQL_GET_COMMENTED_KUTUB).fetchall()
return map(lambda i: i['kitab'], r)
def getKitabComments(self, kitab):
r = self._getConnection().execute(SQL_GET_KITAB_COMMENTS, (kitab, )).fetchall()
return map(lambda i: dict(i), r)
def addComment(self, kitab, version, globalOrder, nodeIdNum, nodeId, title, comment):
self._getConnection().execute(SQL_ADD_COMMENT,
(kitab,
version,
globalOrder,
nodeIdNum,
nodeId,
title,
comment,
float(time.time()) ))
thawab-4.1/Thawab/webApp.py 0000664 0000000 0000000 00000031570 13052627552 0015637 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
Copyright © 2009, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import sys, os, os.path
import hashlib
import time
import bisect
from cgi import escape # for html escaping
from meta import prettyId, makeId, metaVrr
from stemming import normalize_tb
from okasha.utils import ObjectsCache
from okasha.baseWebApp import *
from okasha.bottleTemplate import bottleTemplate
# fixme move this to okasha.utils
def tryInt(s, d = 0):
try:
return int(s)
except ValueError:
pass
except TypeError:
pass
return d
class webApp(baseWebApp):
_emptyViewResp = {
'apptype': 'web',
'content': '', 'childrenLinks': '',
'prevUrl': '', 'prevTitle': '',
'upUrl': '', 'upTitle': '',
'nextUrl': '', 'nextTitle': '',
'breadcrumbs': ''
}
def __init__(self, th, typ = 'web', *args, **kw):
"""
th is an instance of ThawabMan
allowByUri = True for desktop, False for server
"""
self.th = th
self.isMonolithic = th.isMonolithic
self.stringSeed = "S3(uR!r7y"
self._typ = typ
self._allowByUri = (typ == 'app')
self._emptyViewResp[u"apptype"]=self._typ
# FIXME: move ObjectsCache of kitab to routines to core.ThawabMan
if not self.isMonolithic:
import threading
lock1 = threading.Lock();
else:
lock1 = None
self.searchCache = ObjectsCache(lock = lock1)
baseWebApp.__init__(self, *args, **kw)
def _safeHash(self,o):
"""
a URL safe hash, it results a 22 byte long string hash based on md5sum
"""
if isinstance(o,unicode):
o = o.encode('utf8')
return hashlib.md5(self.stringSeed+o).digest().encode('base64').replace('+','-').replace('/','_')[:22]
def _root(self, rq, *args):
if args:
if args[0] == 'favicon.ico':
raise redirectException(rq.script+'/_files/img/favicon.ico')
elif args[0] == 'robots.txt':
return self._robots(rq, *args)
elif args[0] == 'sitemap.xml':
return self._sitemap(rq, *args)
raise forbiddenException()
raise redirectException(rq.script+'/index/')
@expose(contentType = 'text/plain; charset = utf-8')
def _robots(self, rq, *args):
return """Sitemap: http://%s/sitemap.xml
User-agent: *
Allow: /
""" % (rq.environ['HTTP_HOST']+rq.script)
@expose(contentType = 'text/xml; charset = utf-8')
def _sitemap(self, rq, *args):
t = time.gmtime() # FIXME: use meta to get mime of meta.db
d = time.strftime("%Y-%m-%dT%H:%M:%S+00:00", t)
tmp = "\t\n\t\thttp://"+rq.environ['HTTP_HOST']+rq.script+"/static/%s/_i0.html\n\t\t"+d+"\n\t\tdaily\n\t\t0.5\n\t"
l=self.th.getMeta().getKitabList()
urls=[]
for k in l:
urls.append(tmp % (k))
return """
http://thawab.ojuba.org/index/
%s
daily
0.8
%s
""" % (d,"\n".join(urls))
@expose(bottleTemplate,["main"])
def index(self, rq, *args):
rq.q.title = "الرئيسية"
l = self.th.getMeta().getKitabList()
htmlLinks = []
l = sorted(l)
for k in l:
# FIXME: it currenly offers only one version for each kitab (the first one)
htmlLinks.append('\t%s' % (k,
prettyId(self.th.getMeta().getByKitab(k)[0]['kitab'])))
htmlLinks = (u"\n".join(htmlLinks))
return {
u"lang":u"ar", u"dir":u"rtl",
u"kutublinks": htmlLinks,
"args":'/'.join(args)}
@expose(percentTemplate,["stem.html"])
def stem(self, rq, *args):
from stemming import stemArabic
w = rq.q.getfirst('word','').decode('utf-8')
s = ''
if w:
s = " ".join([stemArabic(i) for i in w.split()])
return {u"script":rq.script, u"word":w, u"stem":s}
def _getKitabObject(self, rq, *args):
# FIXME: cache KitabObjects and update last access
if not args: raise forbiddenException() # TODO: make it a redirect to index
k = args[0]
if k == '_by_uri':
if self._allowByUri:
uri = rq.q.getfirst('uri',None)
if not uri:
raise fileNotFoundException()
m = self.th.getMeta().getByUri(uri)
else:
raise forbiddenException()
else:
m = self.th.getMeta().getLatestKitab(k)
if not m:
raise forbiddenException()
uri = m['uri']
ki = self.th.getCachedKitab(uri)
return ki, m
def _view(self, ki, m, i, d = '#', s = ""):
r = self._emptyViewResp.copy()
node, p, u, n, c, b = ki.toc.getNodePrevUpNextChildrenBreadcrumbs(i)
if n:
ub = n.globalOrder
else:
ub = -1
if not node or i == "_i0":
r['content'] = "%s
" % escape(prettyId(m['kitab']))
else:
r['content'] = node.toHtml(ub).replace('\n\n','\n\n')
if c:
cLinks = ''.join(map(lambda cc: '
%s\n' % \
(d + "_i" + str(cc.idNum) + s,
escape(cc.getContent())),
c))
cLinks = ""
else:
cLinks = ''
r['childrenLinks'] = cLinks
if n:
r['nextUrl'] = d + '_i' + str(n.idNum) + s
r['nextTitle'] = escape(n.getContent())
if p:
r['prevUrl'] = d + '_i' + str(p.idNum) + s
r['prevTitle'] = escape(p.getContent())
if u:
r['upUrl'] = d + '_i' + str(u.idNum) + s
r['upTitle'] = escape(u.getContent())
if b:
r['breadcrumbs'] = " > ".join(map(lambda (i,t): ("%s") % \
(i, escape(t)), b))
vrr = metaVrr(ki.meta)
#self.th.searchEngine.related(m['kitab'], vrr, node.idNum)
return r
def _get_kitab_details(self, rq, *args):
ki, m = self._getKitabObject(rq, *args)
if not ki or not m:
return None, None, {}
lang = m.get('lang', 'ar')
if lang in ('ar', 'fa', 'he'):
d = 'rtl'
else:
d = 'ltr'
kitabId = escape(makeId(m['kitab']))
t = escape(prettyId(m['kitab']))
r = self._emptyViewResp.copy()
r.update({
u"script": rq.script,
u"kitabTitle": t,
u"kitabId": kitabId,
u"headingId": u"_i0",
u"app": u"Thawab", u"version": u"3.0.1",
u"lang": lang, u"dir": d,
u"title": t,
u"content": t,
"args": '/'.join(args)})
return ki, m, r
@expose(bottleTemplate,["view"])
def static(self, rq, *args):
l = len(args)
if l < 1:
raise forbiddenException() # TODO: make it show a list of books
elif l == 1:
raise redirectException(rq.script + '/static/' + args[0] + "/_i0.html")
elif l != 2:
raise forbiddenException()
ki, m, r = self._get_kitab_details(rq, *args)
if not ki:
raise fileNotFoundException()
h = args[1]
if h.endswith(".html"):
h = h[:-5]
r.update(self._view(ki, m, h, './', ".html"))
if self.th.searchEngine.getIndexedVersion(m['kitab']):
rq.q.is_indexed = 1
r['is_indexed'] = 1
else:
rq.q.is_indexed = 0
r['is_indexed'] = 0
r['is_static'] = 1
r['d'] = './'
r['s'] = '.html'
return r
@expose(bottleTemplate,["view"])
def view(self, rq, *args):
if len(args) != 1:
raise forbiddenException()
ki, m, r = self._get_kitab_details(rq, *args)
if not ki:
raise fileNotFoundException()
if self.th.searchEngine.getIndexedVersion(m['kitab']):
rq.q.is_indexed = 1
r['is_indexed'] = 1
else:
rq.q.is_indexed = 0
r['is_indexed'] = 0
r['is_static'] = 0
r['d'] = '#'
r['s'] = ''
return r
@expose()
def ajax(self, rq, *args):
if not args:
raise forbiddenException()
if args[0] == 'searchExcerpt' and len(args) == 3:
h = args[1]
try:
i = int(args[2])
except TypeError:
raise forbiddenException()
R = self.searchCache.get(h)
if R == None:
return 'انتهت صلاحية هذا البحث'
try :
r = self.th.searchEngine.resultExcerpt(R, i)
except OSError, e:
print '** webapp.ajax: %s' , e
return ''
#r = escape(self.th.searchEngine.resultExcerpt(R,i)).replace('\0','').replace('\010','').replace(u"\u2026",u"\u2026
").encode('utf8')
return r
elif args[0] == 'kutub' and len(args) == 1:
q = rq.q.getfirst('q','').decode('utf-8').strip().translate(normalize_tb)
r = []
l = self.th.getMeta().getKitabList()
l = sorted(l)
for k in l:
n = prettyId(k)
if not q or q in n.translate(normalize_tb):
r.append('\t%s' % (k, n))
return '\n' % "\n".join(r)
raise forbiddenException()
@expose(jsonDumps)
def json(self, rq, *args):
# use rq.rhost to impose host-based limits on searching
if not args: raise forbiddenException()
ki = None
r = {}
if args[0] == 'view':
a = args[1:]
ki, m = self._getKitabObject(rq, *a)
if len(a) == 2:
r = self._view(ki, m, a[1])
elif args[0] == 'search':
q = rq.q.getfirst('q','')
h = self._safeHash(q)
# FIXME: check to see if one already search for that before
q = q.decode('utf8')
R = self.th.searchEngine.queryIndex(q)
# print R
if not R:
return {'t': 0, 'c': 0, 'h': ''}
self.searchCache.append(h,R)
r = {'t': R.runtime, 'c': len(R), 'h': h}
elif args[0] == 'searchResults':
h = rq.q.getfirst('h','')
try:
i = int(rq.q.getfirst('i', '0'))
except TypeError:
i = 0
try:
c = int(rq.q.getfirst('c', '0'))
except TypeError:
c = 0
R = self.searchCache.get(h)
if R == None:
return {'c': 0}
C = len(R)
if i >= C:
return {'c': 0}
c = min(c, C-i)
r = {'c': c, 'a': []}
n = 100.0 / R[0].score
j = 0
for j in range(i, i + c):
name = R[j]['kitab']
v = R[j]['vrr'].split('-')[0]
m = self.th.getMeta().getLatestKitabV(name,v)
k = m['kitab'] #.replace('_', ' ')
if not m:
continue # book is removed
r['a'].append({
'i':j,'n':'_i'+R[j]['nodeIdNum'],
'k':k, 'a':prettyId(m['author']), 'y':tryInt(m['year']),
't':R[j]['title'], 'r':'%4.1f' % (n*R[j].score)})
j += 1
r[c] = j;
else:
r = {}
return r
thawab-4.1/Thawab/whooshSearchEngine.py 0000664 0000000 0000000 00000026317 13052627552 0020207 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
Copyright © 2008, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import sys, os, os.path, re
import shutil
from tags import *
from meta import prettyId,makeId
from whoosh import query
from whoosh.index import EmptyIndexError, create_in, open_dir, IndexVersionError
from whoosh.highlight import highlight, SentenceFragmenter, BasicFragmentScorer, FIRST, HtmlFormatter
from whoosh.filedb.filestore import FileStorage
from whoosh.fields import Schema, ID, IDLIST, TEXT
from whoosh.formats import Frequency
from whoosh.qparser import QueryParserError
from whoosh.lang.porter import stem
from whoosh.analysis import StandardAnalyzer, StemFilter
try:
from whoosh.index import _CURRENT_TOC_VERSION as whoosh_ix_ver
except ImportError:
from whoosh.filedb.fileindex import _INDEX_VERSION as whoosh_ix_ver
from stemming import stemArabic
def stemfn(word): return stemArabic(stem(word))
# word_re = ur"[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]"
analyzer = StandardAnalyzer(expression = ur"[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]+(?:\.?[\w\u064e\u064b\u064f\u064c\u0650\u064d\u0652\u0651\u0640]+)*") | StemFilter(stemfn)
from whoosh.qparser import FieldAliasPlugin
from whooshSymbolicQParser import MultifieldSQParser
class ExcerptFormatter(object):
def __init__(self, between = "..."):
self.between = between
def _format_fragment(self, text, fragment):
output = []
index = fragment.startchar
for t in fragment.matches:
if t.startchar > index:
output.append(text[index:t.startchar])
ttxt = text[t.startchar:t.endchar]
if t.matched:
ttxt = "\0" + ttxt.upper() + "\010"
output.append(ttxt)
index = t.endchar
output.append(text[index:fragment.endchar])
return "".join(output)
def __call__(self, text, fragments):
return self.between.join((self._format_fragment(text, fragment)
for fragment in fragments))
from baseSearchEngine import BaseSearchEngine
class SearchEngine(BaseSearchEngine):
def __init__(self, th):
BaseSearchEngine.__init__(self, th, False)
self.__ix_writer = None
ix_dir = os.path.join(th.prefixes[0],'index', "ix_" + str(whoosh_ix_ver))
if not os.path.isdir(ix_dir):
os.makedirs(ix_dir)
# try to load a pre-existing index
try:
self.indexer = open_dir(ix_dir)
except (EmptyIndexError, IndexVersionError):
# create a new one
try:
shutil.rmtree(ix_dir, True)
os.makedirs(ix_dir)
except OSError:
pass
schema = Schema(
kitab = ID(stored = True),
vrr = ID(stored = True, unique = False), # version release
nodeIdNum = ID(stored = True, unique = False),
title = TEXT(stored = True, field_boost = 1.5, analyzer = analyzer),
content = TEXT(stored = False,analyzer = analyzer),
#content = TEXT(stored = False,analyzer = analyzer,
#vector = Frequency(analyzer = analyzer)), # with term vector
tags=IDLIST(stored = False)
)
self.indexer = create_in(ix_dir, schema)
#self.__ix_qparser = ThMultifieldParser(self.th, ("title","content",), schema=self.indexer.schema)
self.__ix_qparser = MultifieldSQParser(("title","content",), self.indexer.schema)
self.__ix_qparser.add_plugin(FieldAliasPlugin({
u"kitab":(u"كتاب",),
u"title":(u"عنوان",),
u"tags":(u"وسوم",)})
)
#self.__ix_pre = whoosh.query.Prefix
self.__ix_searcher = self.indexer.searcher()
def __del__(self):
if self.__ix_writer: self.__ix_writer.commit()
def getIndexedVersion(self, name):
"""
return a Version-Release string if in index, otherwise return None
"""
try:
d = self.__ix_searcher.document(kitab = unicode(makeId(name)))
except TypeError:
return None
except KeyError:
return None
if d:
return d['vrr']
return None
def queryIndex(self, queryString):
"""return an interatable of fields dict"""
# FIXME: the return should not be implementation specific
try:
r = self.__ix_searcher.search(self.__ix_qparser.parse(queryString), limit = 500)
except QueryParserError:
return None
return r
def resultExcerpt(self, results, i, ki = None):
# FIXME: this should not be implementation specific
if not ki:
r = results[i]
name = r['kitab']
v = r['vrr'].split('-')[0]
m = self.th.getMeta().getLatestKitabV(name,v)
ki = self.th.getCachedKitab(m['uri'])
num = int(results[i]['nodeIdNum'])
node = ki.getNodeByIdNum(num)
n = ki.toc.next(node)
if n:
ub = n.globalOrder
else:
ub = -1
txt = node.toText(ub)
s = set()
#results.query.all_terms(s) # return (field,term) pairs
# return (field,term) pairs # self.self.__ix_searcher.reader()
results.q.existing_terms(self.indexer.reader(), s, phrases = True)
terms = dict(
map(lambda i: (i[1],i[0]),
filter(lambda j: j[0] == 'content' or j[0] == 'title', s))).keys()
#print "txt = [%s]" % len(txt)
snippet = txt[:min(len(txt),512)] # dummy summary
snippet = highlight(txt,
terms,
analyzer,
SentenceFragmenter(sentencechars = ".!?؟\n"),
HtmlFormatter(between = u"\u2026\n"),
top = 3,
scorer = BasicFragmentScorer,
minscore = 1,
order = FIRST)
#snippet = highlight(txt, terms, analyzer,
# SentenceFragmenter(sentencechars = ".!?"), ExcerptFormatter(between = u"\u2026\n"), top = 3,
# scorer = BasicFragmentScorer, minscore = 1,
# order = FIRST)
return snippet
def indexingStart(self):
"""
should be called before any sequence of indexing Ops, reindexAll() calls this method automatically
"""
if not self.__ix_writer:
try:
self.__ix_writer = self.indexer.writer()
except OSError, e:
print '*** whooshSearchEnfine.indexingStart: %s', e
def indexingEnd(self):
"""
should be called after a sequence of indexing Ops, reindexAll() calls this method automatically
"""
self.__ix_writer.commit(optimize = True)
# self.indexer.optimize() # no need for this with optimize in previous line
self.reload()
def reload(self):
"""
called after commiting changes to index (eg. adding or dropping from index)
"""
self.__ix_searcher = self.__ix_searcher.refresh() # no need to obtain new one with self.indexer.searcher()
self.__ix_writer = None
def dropKitabIndex(self, name):
"""
drop search index for a given Kitab by its uri
if you call indexingStart() before this
then you must call indexingEnd() after it
"""
# FIXME: it seems that this used not work correctly without commit() just after drop, this mean that reindex needs a commit in-between
ki = self.th.getKitab(name)
if ki:
self.th.getMeta().setIndexedFlags(ki.uri, 1)
print "dropping index for kitab name:", name,
w, c = self.__ix_writer, False
if not w:
w, c = self.indexer.writer(), True # creates a writer internally if one is not defined
# NOTE: because the searcher could be limited do a loop that keeps deleting till the query is empty
while(w.delete_by_term('kitab', name)):
print "*",
print
if c:
w.commit()
if ki:
self.th.getMeta().setIndexedFlags(ki.uri, 0)
def dropAll(self):
# FIXME: it would be more effeciant to delete the directory
# NOTE: see http://groups.google.com/group/whoosh/browse_thread/thread/35b1700b4e4a3d5d
self.th.getMeta().setAllIndexedFlags(1)
self.indexingStart()
reader = self.indexer.reader() # also self.__ix_searcher.reader()
for docnum in reader.all_stored_fields():
self.__ix_writer.delete_document(docnum)
self.indexingEnd()
self.th.getMeta().setAllIndexedFlags(0)
def reindexKitab(self,name):
"""
you need to call indexingStart() before this and indexingEnd() after it
"""
# NOTE: this method is overridden here because we need to commit
# between dropping and creating a new index.
# NOTE: can't use updateDocument because each Kitab contains many documents
self.dropKitabIndex(name)
self.__ix_writer.commit()
self.indexKitab(name)
def addDocumentToIndex(self, name, vrr, nodeIdNum, title, content, tags):
"""
this method must be overridden in implementation specific way
"""
if content:
self.__ix_writer.add_document(kitab = name,
vrr = vrr,
nodeIdNum = unicode(nodeIdNum),
title = title,
content = content,
tags = tags)
def keyterms(self, kitab, vrr, nodeIdNum):
s = self.indexer.searcher()
dn = s.document_number(kitab = kitab, vrr = vrr, nodeIdNum = unicode(nodeIdNum))
if dn == None:
return None, []
print " ## ", dn
r = s.key_terms([dn], "content", numterms = 5)
return dn, r
def related(self, kitab, vrr, nodeIdNum):
dn, kt = self.keyterms(kitab, vrr, nodeIdNum)
if not dn:
return None
for t, r in kt:
print "term = ", t, " @ rank = ",r
q = query.Or([query.Term("content", t) for (t, r) in kt])
results = self.indexer.searcher().search(q, limit = 10)
for i, fields in enumerate(results):
if results.docnum(i) != dn:
print fields['kitab'],"\t\t",str(fields['nodeIdNum']),"\t\t",fields['title']
thawab-4.1/Thawab/whooshSymbolicQParser.py 0000664 0000000 0000000 00000000705 13052627552 0020724 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
Copyright © 2010, Muayyad Alsadi
"""
import sys, os, os.path, re
from whoosh import query
from whoosh.qparser import *
def MultifieldSQParser(fieldnames, schema = None, fieldboosts=None, **kwargs):
p = MultifieldParser(fieldnames, schema, fieldboosts, **kwargs)
cp = OperatorsPlugin(And = r"&", Or = r"\|", AndNot = r"&!", AndMaybe = r"&~", Not = r'!')
p.replace_plugin(cp)
return p
thawab-4.1/Thawab/wiki.py 0000664 0000000 0000000 00000006366 13052627552 0015371 0 ustar 00root root 0000000 0000000 # -*- coding: UTF-8 -*-
"""
Copyright © 2008, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import time, re
####################################
header_re = re.compile(r'^\s*( = +)\s*(.+?)\s*\1\s*$')
def importFromWiki(c, wiki):
"""import a wiki-like into a thawab"""
ki = c.ki
txt = ""
parents = [ki.root]
wikidepths = [0]
title = None
wiki_started = 0
meta = {
'cache_hash': time.time(),
'repo': u'_local',
'lang': None,
'kitab': None,
'version': u'1',
'releaseMajor': u'0',
'releaseMinor': u'0',
'author': None,
'year': 0,
'originalAuthor': None,
'originalYear': 0,
'originalKitab': None,
'originalVersion': None,
'classification': u'_misc'}
for l in wiki:
#l = l.decode('utf-8')
if wiki_started == 0:
if l.startswith('@'):
kv = l.split(' = ',1)
key = kv[0][1:].strip()
if len(kv) == 2:
value = kv[1].strip()
meta[key] = value
continue
else:
wiki_started = 1
m = header_re.match(l)
if not m:
# textbody line: add the line to accumelated textbody variable
txt += l
else:
# new header:
# add the accumelated textbody of a previous header (if exists) to the Kitab
if txt and title:
c.appendNode(parents[-1], txt, {'textbody': None})
# elif txt and not title: pass # it's leading noise, as title can't be empty because of + in the RE
# reset the accumelated textbody
txt = ""
# now get the title of matched by RE
title = m.group(2)
newwikidepth = 7 - len(m.group(1))
# several methods, first one is to use:
while(wikidepths[-1] >= newwikidepth):
wikidepths.pop()
parents.pop()
wikidepths = wikidepths + [newwikidepth]
parent = c.appendNode(parents[-1], title, {'header': None})
parents = parents + [parent]
if (txt):
c.appendNode(parents[-1], txt, {'textbody': None})
ki.setMCache(meta)
def wiki2th(w, dst):
import os
import os.path
import Thawab.core
import shutil
n = os.path.basename(w)
if n.endswith('.txt'):
n = n[:-4] + ".ki"
th = Thawab.core.ThawabMan(os.path.expanduser('~/.thawab'))
ki = th.mktemp()
wiki = open(w, "rt").read().decode('utf-8').splitlines()
c = ki.seek(-1, -1)
importFromWiki(c, wiki)
c.flush()
o = ki.uri
del ki
shutil.move(o, os.path.join(dst, n))
thawab-4.1/bok2ki.py 0000775 0000000 0000000 00000007617 13052627552 0014404 0 ustar 00root root 0000000 0000000 #! /usr/bin/python
# -*- coding: UTF-8 -*-
"""
Script to import .bok files
Copyright © 2008-2010, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import sys, os, os.path, glob, shutil, re
import sqlite3
from getopt import getopt, GetoptError
# TODO: take shamela prefix
# import Files/special.mdb Files/main.mdb first
# then take bokids eg. -f /opt/emu/apps/shamela-r1/ 100 15001 ..etc.
# if first arg of ShamelaSqlite is a directory,
# getTables should generate tb:fn
#
def usage():
print '''\
Usage: %s [-i] [-m DIR] FILES ...
Where:
\t-i\t\t- in-memory
\t-m DIR\t\t- move successfully imported BOK files into DIR
\t--ft-prefix=FOOTER_PREFIX default is "(¬"
\t--ft-suffix=FOOTER_SUFFIX default is ")"
\t--ft-leading=[0|1] should footnote be match at line start only, default is 0
\t--ft-sp=[0|1|2] no, single or many whitespaces, default is 0
\t--bft-prefix=FOOTER_PREFIX footnote anchor in body prefix, default is "(¬"
\t--bft-suffix=FOOTER_SUFFIX footnote anchor in body suffix, default is ")"
\t--bft-sp=[0|1|2] no, single or many whitespaces, default is 0
the generated files will be moved into db in thawab prefix (usually ~/.thawab/db/)
''' % os.path.basename(sys.argv[0])
try:
opts, args = getopt(sys.argv[1:], "im:", ["help", 'ft-prefix=', 'ft-suffix=', 'bft-prefix=', 'bft-suffix=', 'ft-leading=', 'ft-sp=', 'bft-sp='])
except GetoptError, err:
print str(err) # will print something like "option -a not recognized"
usage()
sys.exit(1)
if not args:
print "please provide at least one .bok files"
usage()
sys.exit(1)
opts=dict(opts)
def progress(msg, p, *a, **kw): print " ** [%g%% completed] %s" % (p,msg)
from Thawab.core import ThawabMan
from Thawab.shamelaUtils import ShamelaSqlite,shamelaImport
th=ThawabMan()
thprefix=th.prefixes[0]
if not opts.has_key('-i'): db_fn=os.path.expanduser('~/bok2sql.db')
else: db_fn=None
# ¬ U+00AC NOT SIGN
ft_prefix=opts.get('--ft-prefix','(¬').decode('utf-8'); ft_prefix_len=len(ft_prefix)
ft_suffix=opts.get('--ft-suffix',')').decode('utf-8'); ft_suffix_len=len(ft_suffix)
ft_sp=[u'', ur'\s?' , ur'\s*'][int(opts.get('--ft-sp','0'))]
ft_at_line_start=int(opts.get('--ft-leading','0'))
footnote_re=(ft_at_line_start and u'^\s*' or u'') + re.escape(ft_prefix)+ft_sp+ur'(\d+)'+ft_sp+re.escape(ft_suffix)
bft_prefix=opts.get('--bft-prefix','(¬').decode('utf-8');
bft_suffix=opts.get('--bft-suffix',')').decode('utf-8');
bft_sp=[u'', ur'\s?' , ur'\s*'][int(opts.get('--bft-sp','0'))]
body_footnote_re=re.escape(bft_prefix)+bft_sp+ur'(\d+)'+bft_sp+re.escape(bft_suffix)
for fn in args:
if db_fn:
if os.path.exists(db_fn): os.unlink(db_fn)
cn=sqlite3.connect(db_fn, isolation_level=None)
else: cn=None
sh=ShamelaSqlite(fn, cn, 0 , 0, progress)
sh.toSqlite()
for bkid in sh.getBookIds():
ki=th.mktemp()
c=ki.seek(-1,-1)
m=shamelaImport(c, sh, bkid, footnote_re, body_footnote_re, ft_prefix_len, ft_suffix_len)
c.flush()
print "moving %s to %s" % (ki.uri, os.path.join(thprefix,'db', m['kitab']+u"-"+m['version']+u".ki"))
shutil.move(ki.uri, os.path.join(thprefix,'db', m['kitab']+u"-"+m['version']+u".ki"))
if opts.has_key('-m'):
dd=opts['-m']
if not os.path.isdir(dd):
try: os.makedirs(dd)
except OSError: pass
if os.path.isdir(dd):
dst=os.path.join(dd,os.path.basename(fn))
print "moving %s to %s" % (fn,dst)
shutil.move(fn, dst)
else: print "could not move .bok files, target directory does not exists"
thawab-4.1/po/ 0000775 0000000 0000000 00000000000 13052627552 0013251 5 ustar 00root root 0000000 0000000 thawab-4.1/po/Makefile 0000664 0000000 0000000 00000002276 13052627552 0014720 0 ustar 00root root 0000000 0000000 APPNAME := thawab
POFILES := $(wildcard *.po)
MOFILES := $(patsubst %.po,%.mo,$(POFILES))
CRE_POTFILESin := for i in $(shell cat POTFILES.am ); do echo ../$${i} | sed 's/\s/\n/g; s/\.\.\///g' ; done > POTFILES.in
CAT := cat
ECHO := echo
MKDIR := mkdir
MSGFMT := msgfmt
INTLTOOL_UPDATE := intltool-update
RM := $(shell which rm | egrep '/' | sed 's/\s*//g')
MV := $(shell which mv | egrep '/' | sed 's/\s*//g')
all: $(APPNAME).pot $(MOFILES)
$(APPNAME).pot:
@$(CRE_POTFILESin)
@$(ECHO) "*** Building $(APPNAME).pot: $(SOURCES)"
@$(CAT) POTFILES.in
@$(INTLTOOL_UPDATE) -g $(APPNAME) -p
%.mo: %.po
@$(ECHO) "- Merging translations into $*.mo"
@$(MSGFMT) $*.po -o $*.mo
@$(MKDIR) -p ../locale/$*/LC_MESSAGES/ || :
@$(ECHO) "- Moving: $*.mo -> ../locale/$*/LC_MESSAGES/$(APPNAME).mo"
@$(MV) $*.mo ../locale/$*/LC_MESSAGES/$(APPNAME).mo
@$(RM) -f *.tmp
%.po: $(APPNAME).pot
@$(ECHO) "- Updating: $*.po"
@$(INTLTOOL_UPDATE) -g $(APPNAME) -d $*
clean:
@$(ECHO) "*** Cleaning pos..."
@$(ECHO) "- Removing: $(APPNAME).pot"
@$(RM) -f $(APPNAME).pot
@$(ECHO) "- Removing: *.tmp"
@$(RM) -f *.tmp
@$(ECHO) "- Removing: *.mo"
@$(RM) -f *.mo
@$(ECHO) "- Removing: POTFILES.in"
@$(RM) -f POTFILES.in
thawab-4.1/po/POTFILES.am 0000664 0000000 0000000 00000000037 13052627552 0015015 0 ustar 00root root 0000000 0000000 thawab.desktop.in
Thawab/*.py
thawab-4.1/po/ar.po 0000664 0000000 0000000 00000015434 13052627552 0014222 0 ustar 00root root 0000000 0000000 # Translation of thawab templates to Arabic
# Copyright (C) 2008-2010, ojuba.org
# This file is distributed under the same license as the thawab package.
# Muayyad Saleh Alsadi , 2010
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2014-02-16 04:06+0200\n"
"PO-Revision-Date: 2010-06-12 19:35+0300\n"
"Last-Translator: Muayyad Saleh Alsadi \n"
"Language-Team: thawab@ojuba.org\n"
"Language: \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
#: ../thawab.desktop.in.h:1 ../Thawab/gtkUi.py:837 ../Thawab/gtkUi.py:849
msgid "Thawab"
msgstr "ثواب"
#: ../thawab.desktop.in.h:2
msgid "Electronic Arabic/Islamic Encyclopedia"
msgstr "موسوعة ثواب العربية الإسلامية"
#: ../Thawab/gtkUi.py:154
msgid "Import Shamela .bok files"
msgstr "استيراد ملفات bok من الشاملة"
#: ../Thawab/gtkUi.py:216
msgid "Advanced options"
msgstr "خيارات متقدمة"
#: ../Thawab/gtkUi.py:221
msgid "Performance tuning:"
msgstr "معايرة الأداء"
#: ../Thawab/gtkUi.py:228
msgid "in memory"
msgstr "في الذاكرة"
#: ../Thawab/gtkUi.py:229
msgid "faster but consumes more memory and harder to debug."
msgstr "أسرع لكنها تستهلك الكثير من الذاكرة وصعبة التمحيص"
#: ../Thawab/gtkUi.py:238
msgid "Release Major:"
msgstr "الإصدار الأكبر:"
#: ../Thawab/gtkUi.py:243
msgid "Release Minor:"
msgstr "الإصدار الأصغر:"
#: ../Thawab/gtkUi.py:252 ../Thawab/gtkUi.py:278
msgid "Prefix:"
msgstr "السابقة:"
#: ../Thawab/gtkUi.py:258 ../Thawab/gtkUi.py:284
msgid "Suffix:"
msgstr "اللاحقة:"
#: ../Thawab/gtkUi.py:264
msgid "only at line start"
msgstr "فقط على بداية الكلمة"
#: ../Thawab/gtkUi.py:268 ../Thawab/gtkUi.py:291
msgid "in between spaces:"
msgstr "المسافات البينية:"
#: ../Thawab/gtkUi.py:269 ../Thawab/gtkUi.py:292
msgid "no spaces"
msgstr "دون مسافات"
#: ../Thawab/gtkUi.py:270 ../Thawab/gtkUi.py:293
msgid "optional white-space"
msgstr "مسافة واحدة اختيارية"
#: ../Thawab/gtkUi.py:271 ../Thawab/gtkUi.py:294
msgid "optional white-spaces"
msgstr "مسافات اختيارية"
#: ../Thawab/gtkUi.py:344 ../Thawab/gtkUi.py:357
msgid "working ..."
msgstr "يجري العمل ..."
#. canceled
#: ../Thawab/gtkUi.py:413 ../Thawab/gtkUi.py:414 ../Thawab/gtkUi.py:432
#: ../Thawab/gtkUi.py:433
msgid "Canceled"
msgstr "ملغي"
#. windows can't move an opened file
#. FIXME: close ki in a clean way so the above code works in windows
#: ../Thawab/gtkUi.py:448 ../Thawab/gtkUi.py:461 ../Thawab/gtkUi.py:817
#: ../Thawab/gtkUi.py:828
msgid "Done"
msgstr "تم"
#: ../Thawab/gtkUi.py:462
msgid "Convert Book, Done"
msgstr ""
#: ../Thawab/gtkUi.py:482
msgid "Select files to import"
msgstr "اختر الملفات كي تستورد"
#: ../Thawab/gtkUi.py:489
msgid "Shamela BOK files"
msgstr "ملفات BOK الخاصة بالشاملة"
#: ../Thawab/gtkUi.py:667
msgid "Open Link in New Tab"
msgstr "فتح الرابط في لسان جديد"
#: ../Thawab/gtkUi.py:716
msgid "Manage search index"
msgstr "إدارة فهارس البحث"
#: ../Thawab/gtkUi.py:727
msgid "Queue new books"
msgstr "دفع الكتب الجديدة"
#: ../Thawab/gtkUi.py:744
msgid "Indexing jobs canceled"
msgstr "تم إلغاء مهمات الفهرسة"
#: ../Thawab/gtkUi.py:761
#, python-format
msgid "Indexing ... (%d left)"
msgstr "يجري الفهرسة ... (بقي %d)"
#. Gtk.main_iteration_do(True)
#: ../Thawab/gtkUi.py:766
msgid "No indexing jobs left"
msgstr "لم يتبق أي مهمات فهرسة"
#: ../Thawab/gtkUi.py:769
#, python-format
msgid "Indexing %d jobs, Done"
msgstr "إنتهى %d مهمات فهرسة"
#: ../Thawab/gtkUi.py:776
msgid "Misc. Fixes"
msgstr "إصلاحات متنوعة"
#: ../Thawab/gtkUi.py:788
msgid ""
"Those procedures are to be used in case of "
"emergency only,\n"
"for example to recover power failure."
msgstr ""
"تستخدم هذه الإجراءات في الحالات الطارئة فقط,\n"
"مثل عطل ناتج عن انقطاع الطاقة الكهربائية."
#: ../Thawab/gtkUi.py:793
msgid "remove search index"
msgstr "حذف فهرس البحث"
#: ../Thawab/gtkUi.py:794
msgid "you will need to re-index all books"
msgstr "ستحتاج للقيام بإعادة فعرسة كل الكتب"
#: ../Thawab/gtkUi.py:799
msgid "remove meta data cache to generate a fresh one"
msgstr "إزالة نسخة الميتا الخبيئة وتوليد واحدة جديدة"
#: ../Thawab/gtkUi.py:800
msgid "instead of incremental meta data gathering"
msgstr "عوضا عن جمعها تزايديا"
#: ../Thawab/gtkUi.py:810
msgid ""
"You will need to recreate search index in-order to search again.\n"
"Are you sure you want to remove search index?"
msgstr ""
"يتوجب عليك إعادة توليد فهارس البحث حتى يعمل البحث مجددا.\n"
"هل أنت متأكد من رغبتك في حذف فهارس البحث؟"
#: ../Thawab/gtkUi.py:815
#, python-format
msgid "unable to remove folder [%s]"
msgstr "غير قادر على حذف المجلد [%s]"
#: ../Thawab/gtkUi.py:820
msgid "Are you sure you want to remove search meta data cache?"
msgstr "هل أنت متاكد من رغبتك في إزالة خبيئة الميتا؟"
#: ../Thawab/gtkUi.py:825
#, python-format
msgid "unable to remove file [%s]"
msgstr "غير قادر على إزالة الملف [%s]"
#: ../Thawab/gtkUi.py:860
msgid "Open a new tab"
msgstr "فتح لسان جديد"
#: ../Thawab/gtkUi.py:868
msgid "Import"
msgstr "استيراد"
#: ../Thawab/gtkUi.py:869
msgid "Import .bok files"
msgstr "استيراد ملف bok"
#: ../Thawab/gtkUi.py:875
msgid "Index"
msgstr "فهرسة"
#: ../Thawab/gtkUi.py:877
msgid "Create search index"
msgstr "إنشاء فهرس البحث"
#: ../Thawab/gtkUi.py:885
msgid "Zoom in"
msgstr "تكبير"
#: ../Thawab/gtkUi.py:890
msgid "Makes things appear bigger"
msgstr "تجعل الأشياء تبدو أكبر"
#: ../Thawab/gtkUi.py:896
msgid "Zoom out"
msgstr "تصغيير"
#: ../Thawab/gtkUi.py:899
msgid "Makes things appear smaller"
msgstr "تجعل الأشياء تبدو أصغر"
#: ../Thawab/gtkUi.py:905
msgid "1:1 Zoom"
msgstr "تكبير 1:1"
#: ../Thawab/gtkUi.py:908
msgid "Restore original zoom factor"
msgstr "تعيد التكبير الأصلي"
#: ../Thawab/gtkUi.py:916
msgid "Fixes"
msgstr "إصلاحات"
#: ../Thawab/gtkUi.py:918
msgid "Misc Fixes"
msgstr "إصلاحات متنوعة"
#: ../Thawab/gtkUi.py:926
msgid "Help"
msgstr "مساعدة"
#: ../Thawab/gtkUi.py:927
msgid "Show user manual"
msgstr "إظهار دليل المستخدم"
thawab-4.1/po/de.po 0000664 0000000 0000000 00000014556 13052627552 0014214 0 ustar 00root root 0000000 0000000 # Translation of thawab templates to Arabic
# Copyright (C) 2008-2010, ojuba.org
# This file is distributed under the same license as the thawab package.
# cegerxwin , 2010
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2014-02-16 04:06+0200\n"
"PO-Revision-Date: 2010-08-27 23:53+0100\n"
"Last-Translator: cegerxwin \n"
"Language-Team: LANGUAGE \n"
"Language: \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=utf-8\n"
"Content-Transfer-Encoding: 8bit\n"
#: ../thawab.desktop.in.h:1 ../Thawab/gtkUi.py:837 ../Thawab/gtkUi.py:849
msgid "Thawab"
msgstr "Thawab"
#: ../thawab.desktop.in.h:2
msgid "Electronic Arabic/Islamic Encyclopedia"
msgstr "Elektronische Arabisch/Islamische Ezyklopädie"
#: ../Thawab/gtkUi.py:154
msgid "Import Shamela .bok files"
msgstr "Importiere Shamela .bok Dateien"
#: ../Thawab/gtkUi.py:216
msgid "Advanced options"
msgstr "Erweiterte Optionen:"
#: ../Thawab/gtkUi.py:221
msgid "Performance tuning:"
msgstr "Geschwindigkeits Feineinstellungen"
#: ../Thawab/gtkUi.py:228
msgid "in memory"
msgstr "im Speicher"
#: ../Thawab/gtkUi.py:229
msgid "faster but consumes more memory and harder to debug."
msgstr ""
"schneller aber verbraucht mehr Speicherplatz und die Fehlersuche ist "
"aufwändiger"
#: ../Thawab/gtkUi.py:238
msgid "Release Major:"
msgstr "Hauptveröffentlichung:"
#: ../Thawab/gtkUi.py:243
msgid "Release Minor:"
msgstr "Kleinere Veröffentlichungen:"
#: ../Thawab/gtkUi.py:252 ../Thawab/gtkUi.py:278
msgid "Prefix:"
msgstr "Präfix:"
#: ../Thawab/gtkUi.py:258 ../Thawab/gtkUi.py:284
msgid "Suffix:"
msgstr "Suffix:"
#: ../Thawab/gtkUi.py:264
msgid "only at line start"
msgstr "nur am Zeilenanfang"
#: ../Thawab/gtkUi.py:268 ../Thawab/gtkUi.py:291
msgid "in between spaces:"
msgstr "in den Zwischenräumen:"
#: ../Thawab/gtkUi.py:269 ../Thawab/gtkUi.py:292
msgid "no spaces"
msgstr "keine Zwischenräume"
#: ../Thawab/gtkUi.py:270 ../Thawab/gtkUi.py:293
msgid "optional white-space"
msgstr "optional Leerzeile"
#: ../Thawab/gtkUi.py:271 ../Thawab/gtkUi.py:294
msgid "optional white-spaces"
msgstr "optional Leerzeilen"
#: ../Thawab/gtkUi.py:344 ../Thawab/gtkUi.py:357
msgid "working ..."
msgstr "läuft..."
#. canceled
#: ../Thawab/gtkUi.py:413 ../Thawab/gtkUi.py:414 ../Thawab/gtkUi.py:432
#: ../Thawab/gtkUi.py:433
msgid "Canceled"
msgstr "Abgebrochen"
#. windows can't move an opened file
#. FIXME: close ki in a clean way so the above code works in windows
#: ../Thawab/gtkUi.py:448 ../Thawab/gtkUi.py:461 ../Thawab/gtkUi.py:817
#: ../Thawab/gtkUi.py:828
msgid "Done"
msgstr "Erledigt"
#: ../Thawab/gtkUi.py:462
msgid "Convert Book, Done"
msgstr ""
#: ../Thawab/gtkUi.py:482
msgid "Select files to import"
msgstr "Wähle zu importierende Dateien aus"
#: ../Thawab/gtkUi.py:489
msgid "Shamela BOK files"
msgstr "Shamela BOK Dateien"
#: ../Thawab/gtkUi.py:667
msgid "Open Link in New Tab"
msgstr "Öffne Link im neuen Tab"
#: ../Thawab/gtkUi.py:716
msgid "Manage search index"
msgstr "Verwalte Suchindex"
#: ../Thawab/gtkUi.py:727
msgid "Queue new books"
msgstr "Neue Bücher in der Warteschlange aufstellen"
#: ../Thawab/gtkUi.py:744
#, fuzzy
msgid "Indexing jobs canceled"
msgstr "Keine Indizierungsaufgaben mehr vorhanden"
#: ../Thawab/gtkUi.py:761
#, python-format
msgid "Indexing ... (%d left)"
msgstr "Indizierung...(%d left)"
#. Gtk.main_iteration_do(True)
#: ../Thawab/gtkUi.py:766
#, fuzzy
msgid "No indexing jobs left"
msgstr "Keine Indizierungsaufgaben mehr vorhanden"
#: ../Thawab/gtkUi.py:769
#, fuzzy, python-format
msgid "Indexing %d jobs, Done"
msgstr "Keine Indizierungsaufgaben mehr vorhanden"
#: ../Thawab/gtkUi.py:776
msgid "Misc. Fixes"
msgstr "Misc. Fixes"
#: ../Thawab/gtkUi.py:788
#, fuzzy
msgid ""
"Those procedures are to be used in case of "
"emergency only,\n"
"for example to recover power failure."
msgstr ""
"Diese Prozedur sollte nur in Notfällen benutzt "
"werden,\n"
" z.B. um nach einem Stromausfall die Daten wiederherzustellen."
#: ../Thawab/gtkUi.py:793
msgid "remove search index"
msgstr "entferne Suchindex"
#: ../Thawab/gtkUi.py:794
msgid "you will need to re-index all books"
msgstr "Neu-Indizierung aller Bücher notwendig"
#: ../Thawab/gtkUi.py:799
msgid "remove meta data cache to generate a fresh one"
msgstr "entferne meta Daten Speicher um eine neue zu generieren"
#: ../Thawab/gtkUi.py:800
msgid "instead of incremental meta data gathering"
msgstr "anstatt der inkrementellen meta Daten ansammlung"
#: ../Thawab/gtkUi.py:810
msgid ""
"You will need to recreate search index in-order to search again.\n"
"Are you sure you want to remove search index?"
msgstr ""
"Du musst den Suchindex noch einmal um danach eine Suche durchführen zu "
"können.\n"
"Bist du sicher, das du den Suchindex entfernen möchtest?"
#: ../Thawab/gtkUi.py:815
#, python-format
msgid "unable to remove folder [%s]"
msgstr "Fehler beim entfernen des Ordners [%s]"
#: ../Thawab/gtkUi.py:820
msgid "Are you sure you want to remove search meta data cache?"
msgstr "Sind sie sicher, das sie die Suchmetadaten Speicher entfernen möchten?"
#: ../Thawab/gtkUi.py:825
#, python-format
msgid "unable to remove file [%s]"
msgstr "Fehler beim entfernen der Datei [%s]"
#: ../Thawab/gtkUi.py:860
msgid "Open a new tab"
msgstr "Öffne im neuen Tab"
#: ../Thawab/gtkUi.py:868
msgid "Import"
msgstr "Importieren"
#: ../Thawab/gtkUi.py:869
msgid "Import .bok files"
msgstr "Importiere .bok Dateien"
#: ../Thawab/gtkUi.py:875
msgid "Index"
msgstr "Index"
#: ../Thawab/gtkUi.py:877
msgid "Create search index"
msgstr "Erzeuge Suchindex"
#: ../Thawab/gtkUi.py:885
msgid "Zoom in"
msgstr "Einzoomen"
#: ../Thawab/gtkUi.py:890
msgid "Makes things appear bigger"
msgstr "Macht das Dinge größer wirken"
#: ../Thawab/gtkUi.py:896
msgid "Zoom out"
msgstr "Auszoomen"
#: ../Thawab/gtkUi.py:899
msgid "Makes things appear smaller"
msgstr "Macht das Dinge kleiner wirken"
#: ../Thawab/gtkUi.py:905
msgid "1:1 Zoom"
msgstr "1:1 Zoom"
#: ../Thawab/gtkUi.py:908
#, fuzzy
msgid "Restore original zoom factor"
msgstr "Original Zoomgröße wiederherstellen"
#: ../Thawab/gtkUi.py:916
msgid "Fixes"
msgstr "Fixes"
#: ../Thawab/gtkUi.py:918
msgid "Misc Fixes"
msgstr "Misc Fixes"
#: ../Thawab/gtkUi.py:926
msgid "Help"
msgstr "Hilfe"
#: ../Thawab/gtkUi.py:927
msgid "Show user manual"
msgstr "Zeige Benutzerhandbuch"
thawab-4.1/readme 0000664 0000000 0000000 00000000044 13052627552 0014011 0 ustar 00root root 0000000 0000000 إقرأني أولا
Read me first
thawab-4.1/setup.py 0000664 0000000 0000000 00000002351 13052627552 0014346 0 ustar 00root root 0000000 0000000 #! /usr/bin/python
import sys, os, os.path
from distutils.core import setup
from glob import glob
# to install type:
# python setup.py install --root=/
def no_empty(l):
return filter(lambda (i,j): j, l)
def recusive_data_dir(to, src, l=None):
D=glob(os.path.join(src,'*'))
files=filter( lambda i: os.path.isfile(i), D )
dirs=filter( lambda i: os.path.isdir(i), D )
if l==None: l=[]
l.append( (to , files ) )
for d in dirs: recusive_data_dir( os.path.join(to,os.path.basename(d)), d , l)
return l
locales=map(lambda i: ('share/'+i,[''+i+'/thawab.mo',]),glob('locale/*/LC_MESSAGES'))
data_files=no_empty(recusive_data_dir('share/thawab/', 'thawab-data'))
data_files.extend(locales)
setup (name='thawab', version='3.0.10',
description='Thawab Arabic/Islamic encyclopedia system',
author='Muayyad Saleh Alsadi',
author_email='alsadi@ojuba.org',
url='http://thawab.ojuba.org/',
license='Waqf',
packages=['Thawab'],
scripts=['thawab-gtk','thawab-server'],
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: End Users/Desktop',
'Operating System :: POSIX',
'Programming Language :: Python',
],
data_files=data_files
)
thawab-4.1/th-set-meta.py 0000775 0000000 0000000 00000004446 13052627552 0015350 0 ustar 00root root 0000000 0000000 #! /usr/bin/python
# -*- coding: UTF-8 -*-
"""
Setting meta data for thawab files
Copyright © 2010, Muayyad Alsadi
Released under terms of Waqf Public License.
This program is free software; you can redistribute it and/or modify
it under the terms of the latest version Waqf Public License as
published by Ojuba.org.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
The Latest version of the license can be found on
"http://waqf.ojuba.org/license"
"""
import sys, os, os.path
import Thawab.core
from getopt import getopt, GetoptError
def usage():
print '''\
Usage: %s [plbvRrtayAYBVck] VALUE ... FILES ...
Where:
\t-p VALUE\t set repo name to VALUE
\t-l VALUE\t set language name to VALUE
\t-b VALUE\t set kitab name to VALUE
\t-v VALUE\t set version name to VALUE
\t-R VALUE\t set release major name to VALUE
\t-r VALUE\t set release minor name to VALUE
\t-t VALUE\t set kitab type to VALUE
\t-a VALUE\t set author name to VALUE
\t-y VALUE\t set author death year to VALUE
\t-A VALUE\t set original kitab author name to VALUE
\t-Y VALUE\t set original kitab author death year to VALUE
\t-B VALUE\t set original kitab name to VALUE
\t-V VALUE\t set original kitab version to VALUE
\t-c VALUE\t set classification to VALUE
\t-k VALUE\t set keywords to VALUE
''' % os.path.basename(sys.argv[0])
meta_keys={
'-p':'repo', '-l':'lang', '-b':'kitab',
'-v':'version', '-R':'releaseMajor', '-r':'releaseMinor',
'-t':'type', '-a':'author', '-y':'year',
'-A':'originalAuthor', '-Y':'originalYear', '-B':'originalKitab', '-V':'originalVersion',
'-c':'classification', '-k':'keywords'
}
metas=set(meta_keys.values())
try:
opts, args = getopt(sys.argv[1:], "hp:l:b:v:r:R:t:a:y:A:Y:B:V:c:k:", ["help"])
except GetoptError, err:
print str(err) # will print something like "option -a not recognized"
usage()
sys.exit(1)
opts=dict([(meta_keys.get(i,i),j) for i,j in opts])
if opts.has_key("-h") or opts.has_key("--help") or len(opts)==0 or not args:
usage()
sys.exit(1)
th=Thawab.core.ThawabMan()
for uri in args:
ki=th.getKitabByUri(uri)
#print ki.meta
for i in opts:
ki.meta[i]=opts[i]
#print ki.meta
ki.setMCache(ki.meta)
thawab-4.1/thApiTest.py 0000775 0000000 0000000 00000001633 13052627552 0015120 0 ustar 00root root 0000000 0000000 #! /usr/bin/python
# -*- coding: UTF-8 -*-
import os, os.path, Thawab.core
th=Thawab.core.ThawabMan()
th.searchEngine.reindexAll()
# th.loadMeta() # to detect new files and add them ..etc.
meta=th.getMeta()
print meta.getUriList()
th.searchEngine.reindexKitab('/home/alsadi/.thawab/db/uthaymine.ki')
## export to xml
#from cStringIO import StringIO
#s=StringIO()
#ki=Thawab.core.Kitab('/home/alsadi/.thawab/tmp/THAWAB_xqkca0.ki3001')
#n=ki.root.toXml(s)
#print s.getvalue()
## export to HTML or wiki
#import Thawab.core
#ki=Thawab.core.Kitab('/home/alsadi/.thawab/tmp/THAWAB_xqkca0.ki3001')
#s=ki.root.toHTML()
#print s
##searching the index
#for i in th.searchEngine.queryIndex('إنشاء'.decode('utf-8')): print i['title']
#for i in th.searchEngine.queryIndex('إنشاء kitab:pyqt4'.decode('utf-8')): print i['title']
#for i in th.searchEngine.queryIndex('إنشاء kitab:test'.decode('utf-8')): print i['title']
thawab-4.1/thawab-data/ 0000775 0000000 0000000 00000000000 13052627552 0015010 5 ustar 00root root 0000000 0000000 thawab-4.1/thawab-data/themes/ 0000775 0000000 0000000 00000000000 13052627552 0016275 5 ustar 00root root 0000000 0000000 thawab-4.1/thawab-data/themes/default/ 0000775 0000000 0000000 00000000000 13052627552 0017721 5 ustar 00root root 0000000 0000000 thawab-4.1/thawab-data/themes/default/static/ 0000775 0000000 0000000 00000000000 13052627552 0021210 5 ustar 00root root 0000000 0000000 thawab-4.1/thawab-data/themes/default/static/fx.css 0000664 0000000 0000000 00000002515 13052627552 0022342 0 ustar 00root root 0000000 0000000 /* fx */
#async_tips_div {
background-color:rgba(255,255,200,0.9);
}
#overlay {
opacity:0.7;
}
.showOnFocus {
opacity:0.4;
}
.showOnFocus:active, .showOnFocus:focus {
opacity:0.75;
}
.showOnFocus:hover {
opacity:1;
}
.blurOnFocus {
opacity:1.0;
}
.blurOnFocus:active, .showOnFocus:focus {
opacity:0.75;
}
.blurOnFocus:hover {
opacity:0.4;
}
#minisearch input {
background:rgba(255,255,255,0.6);
}
#absnav {
-webkit-border-radius:20px;
-moz-border-radius:20px;
border-radius:20px;
padding:0 20px;
-webkit-box-shadow:0 0 8px rgba(240,240,240,0.4);
-moz-box-shadow:0 0 8px 8px rgba(240,240,240,0.4);
}
#absnav2, #absnav3{
-webkit-border-radius:20px;
-moz-border-radius:20px;
border-radius:20px;
padding:0 20px;
-webkit-box-shadow:0 0 8px rgba(240,240,240,0.4);
-moz-box-shadow:0 0 8px 8px rgba(240,240,240,0.4);
}
#absnav3{
padding:0 5px;
}
#minisearch {
-webkit-border-bottom-left-radius:8px;
-moz-border-radius-bottomleft:8px;
border-bottom-left-radius:8px;
-webkit-border-bottom-right-radius:32px;
-moz-border-radius-bottomright:32px;
border-bottom-right-radius:32px;
padding:0 8px 2px 0;
}
#container{
-webkit-border-radius:10px;
-moz-border-radius:10px;
border-radius:10px;
/* -webkit-box-shadow:0 0 8px rgba(128,128,128,0.4); */
-moz-box-shadow:0 0 8px 4px rgba(128,128,128,0.4);
}
#absnav3 { position: fixed; }
thawab-4.1/thawab-data/themes/default/static/ie-fx.css 0000664 0000000 0000000 00000000413 13052627552 0022730 0 ustar 00root root 0000000 0000000 /* fx-ie */
#overlay {
filter:alpha(opacity=70);
}
.showOnFocus {
filter:alpha(opacity=50);
}
.showOnFocus:hover {
filter:alpha(opacity=100);
}
.blurOnFocus{
filter:alpha(opacity=100);
}
.showOnFocus:hover {
filter:alpha(opacity=40);
}
#absnav3{ display:none; }
thawab-4.1/thawab-data/themes/default/static/img/ 0000775 0000000 0000000 00000000000 13052627552 0021764 5 ustar 00root root 0000000 0000000 thawab-4.1/thawab-data/themes/default/static/img/about.gif 0000664 0000000 0000000 00000002157 13052627552 0023572 0 ustar 00root root 0000000 0000000 GIF89a :;<