lightproof-1.5+git20140515/ 0000755 0001750 0001750 00000000000 12335137641 013606 5 ustar rene rene lightproof-1.5+git20140515/make.py 0000644 0001750 0001750 00000003435 12335137465 015106 0 ustar rene rene # -*- encoding: UTF-8 -*-
import sys, os, zipfile, traceback, Dialog
import configparser as cp
import pythonpath.lightproof_compile___implname__
from string import Template
def dist(fn, a):
a['locales'] = a["locales"].replace("_", "-")
a['loc'] = str(dict([[i, [i[0:2], i[3:5], ""]] for i in a["locales"].split(" ")]))
distname = a['implname'] + "-" + a['version'] + '.oxt'
z = zipfile.ZipFile(distname, mode='w', compression = zipfile.ZIP_DEFLATED)
f = open(fn + ".dat", 'r', encoding="utf-8")
code = pythonpath.lightproof_compile___implname__.c(f.read(), a['lang'])
a["code"] = code["code"]
a['data'] = code["rules"]
for i in ["META-INF/manifest.xml", "description.xml", "Linguistic.xcu", "Lightproof.py", \
"pythonpath/lightproof_handler___implname__.py", "pythonpath/lightproof_impl___implname__.py", \
"pythonpath/lightproof___implname__.py" ]:
z.writestr(i.replace("__implname__", a["implname"]), Template(open(i, "r", encoding="utf-8").read()).safe_substitute(a))
for i in a["extras"].split(","):
z.writestr(i.strip().replace("../", "").replace("__implname__", a["implname"]), \
open(fn[:fn.rfind("/")+1] + i.strip(), 'rb').read())
try:
d = open(fn + ".dlg", "r", encoding="utf-8").readlines()
Dialog.c(a["implname"], d, z, a["lang"])
except:
z.writestr("pythonpath/lightproof_opts_%s.py"%a["implname"], "")
if len(sys.argv) == 1:
print ("""Synopsis: python make.py config_file
eg. python make.py src/en/en.cfg""")
sys.exit(0)
fArgs = cp.SafeConfigParser()
for i in sys.argv[1:]:
try:
fArgs.read(i)
dist(i[:-4], fArgs._sections['args'])
except:
print (traceback.format_exc())
print ("missing config file or options: " + str(i))
sys.exit(0)
lightproof-1.5+git20140515/.pc/ 0000755 0001750 0001750 00000000000 12171062050 014253 5 ustar rene rene lightproof-1.5+git20140515/.pc/applied-patches 0000644 0001750 0001750 00000000000 12171062050 017227 0 ustar rene rene lightproof-1.5+git20140515/.pc/.quilt_patches 0000644 0001750 0001750 00000000017 12171062050 017117 0 ustar rene rene debian/patches
lightproof-1.5+git20140515/.pc/.version 0000644 0001750 0001750 00000000002 12171062050 015731 0 ustar rene rene 2
lightproof-1.5+git20140515/.pc/.quilt_series 0000644 0001750 0001750 00000000007 12171062050 016761 0 ustar rene rene series
lightproof-1.5+git20140515/NEWS 0000644 0001750 0001750 00000005377 12054114507 014313 0 ustar rene rene 2012-08-23: Version 1.5
- capitalization based on the underlined group (on 2012-07-09)
- fix positioning of the blue underline (on 2012-07-09)
- alternative separator (|) for suggestions and URL suggestions
- rule fixes
2012-02-04: Version 1.5 beta 3
- bug fix in suggestion formatting
2012-02-02: Version 1.5 beta 2
- rule editor (Writer extension)
- code cleanups, only Python dependency in config system
2011-12-05: Version 1.5 beta (1.4.3)
- Improved English and Hungarian rules
- See ChangeLog
2011-07-12: Version 1.4.1
- fix option saving in LibreOffice
- fix localization of the name of the grammar checker in OpenOffice.org/LibreOffice Options
- multiple options in the same line in the Options dialog
- new functions: stem(), generate()
- improved Hungarian rules
2010-02-19 Version 1.4
- user code support ([code] sections in the rule files)
- fix Mac OS X PyUNO problem
2009-12-16 Version 1.3
- Lightproof is a grammar checker extension generator (see doc/manual.txt):
the result of the generation is a single Lightproof or vendor specific
grammar checker language package, eg. lightproof-en_US.oxt or
your-grammar-checker-en_US.oxt.
- native OpenOffice.org Options support (see Options->Language Settings
after the installation of en_US or hu_HU oxt packages of the Lightproof
distribution). Documentation: doc/dialog.txt
- new, simplified rule syntax:
- rule sections ([Word], [word], [Char], [char]):
- word rules: patterns with default word boundaries
- [Word]: ignore case
- [word]: case sensitive
simplified syntax:
[Word] (default)
foo -> bar # bar is far better
instead of the old
(?i)\bfoo\b -> bar # bar is far better
- character rules: old default
- [Char]: ignore case
- [case]: case sensitive
- expressions in suggestions: specified by a starting equal sign
foo\w+ -> =\0.upper() # suggest with uppercase letters
- new library functions:
spell: spelling (Boolean function)
suggest: suggestion
affix: morph variant (only with affix fields)
calc: Calc function access (see NUMBERTEXT example in data/hu_HU.dat)
2009-10-23 Version 1.2.2
- a small improvement in Hungarian grammar checking
2009-10-20 Version 1.2.1
- small improvements in Hungarian grammar checking
2009-10-19 Version 1.2
- Hunspell integration and
- extended syntax for grammar checking based on
morphological analysis (see data/hu_HU.dat)
- multi-line rules
- extended Hungarian rules
2009-09-03 Version 1.1
- fixed multiple space detection beetween sentences
- Translated and extended Russian rule sets from Yakov Reztsov
- small improvements (see ChangeLog)
lightproof-1.5+git20140515/Linguistic.xcu 0000644 0001750 0001750 00000001137 12054114507 016435 0 ustar rene rene
${locales}
lightproof-1.5+git20140515/VERSION 0000644 0001750 0001750 00000000004 12054114507 014642 0 ustar rene rene 1.5
lightproof-1.5+git20140515/THANKS 0000644 0001750 0001750 00000000137 12054114507 014514 0 ustar rene rene Thanks to
András Tímár
Olivier Ronez
Michael Meeks
See ChangeLog for other contributors.
lightproof-1.5+git20140515/.gitignore 0000644 0001750 0001750 00000000013 12054114507 015562 0 ustar rene rene *.pyc
*.oxt lightproof-1.5+git20140515/ChangeLog 0000644 0001750 0001750 00000017657 12335137465 015404 0 ustar rene rene 2014-05-15 László Németh :
- Python 3.3 port of the measurement conversion in
the English and Hungarian modules
2013-11-20 László Németh :
- Python 3.3 port of build environment of the editor
2013-10-14 Olivier Ronez :
- Python 3.3 port of the build environment
- Workaround for paragraph chunking in LO 4, see
http://nabble.documentfoundation.org/Grammar-checker-Undocumented-change-in-the-API-for-LO-4-td4030639.html
2012-12-04 László Németh :
- Python 3.3 port
2012-11-23 László Németh :
- fix strange false positives resulted by a synchronization problem (missing
alternatives with unloaded dictionary), the problem reported by Olivier Ronez
2012-09-18 László Németh :
- fix suggest() splitting
2012-08-24 László Németh :
- fix URL splitting
- update URLs in English rules
2012-08-23 László Németh :
- fix fdo#46542 ("an usability" instead of "a usability") reported by Roman Eisele
- fix fdo#46549 (allow "an habitual", too), reported by jacobwwood at fdo
- fixes for English and Hungarian modules
2009-08-21 Yakov Reztsov
* src/ru_RU/*: fixes and new Russian grammar rules
2012-07-09 László Németh :
- fix positions of underlining of part of the matched pattern:
(foo) (bar) -2> baz # Did you mean?
- "|" as alternative separator of suggestions
foo -> bar|baz # Did you mean?
- print depends from PYUNO_DEBUG_LEVEL (fix for Windows)
- print() for future Python 3 support
2012-05-27 Olivier Ronez :
- fix: suggestion with capital is based on selected back reference group
2012-02-15 Olivier Ronez :
- new: error positioning, underline a back reference group instead of the full pattern
2012-02-12 Olivier Ronez :
- back references in explanations, eg. # \1 should be...
- display message without suggestion, eg. foo -> _ # Message
- function word() returns '' instead of None
- add .gitignore
2012-02-04 László Németh :
- fix multiple suggestions, the problem reported by Yakov Reztsov
2012-02-02 László Németh :
- Lightproof editor (rule development extension for LibreOffice)
- use template and config system, based on
the work of Olivier Ronez, author of French
grammar checker Grammalecte.
- code cleanups
- move user code to different module to speed up
its load (submodules use Python pyc format)
2011-12-16 László Németh :
- add FullCommentURL support
- remove " [" from reversed space and punctuation
- remove traceback.print_exc (problems on Windows reported by Olivier Ronez)
- license: LGPL -> MPL/GPL/LGPL
- remove tabulators for LibreOffice Git
- options only in extension manager (temporarily)
- help separator in dlg: now "\n" instead of "/"
- long comments (\n), eg. # Did you mean:\nExplanation...
- expressions in comments, eg. # =expression({word})
- \u00AD (soft hyphen is not word boundary)
- print traceback in LibreOffice
- LOCALE, TEXT in rules, see doc.
* lightproof_py.py: fix hasLocale() for LibreOffice 3.5
- data/en_US.dat: improved English rules (a/an) rules
* Complile.py: support "_" in pattern names.
- add ^ for sentence beginning
2011-07-12 László Németh :
* lightproof_handler_py.py: fix for LibreOffice
Problem with option settings reported by Péter Benedek on
openscope.org and others
* Dialog.py: support for multiple options in a line.
syntax in dlg files: space and comma separated list, comma means new line:
item1 item2 ..., itemx itemz ..., itemA itemB ...
* Dialog.py: tooltip support, syntax in dlg files: slash separation
item = option caption/tooltip
* Dialog.py: fix localization of the extension name in the Options
* lightproof_py.py:
- stem(): new function for stemming, see doc/syntax.txt.
Problems with the experimental version reported by Olivier Ronez.
- generate(): new function for morphological generation, see doc/syntax.txt
- extended functions: optional "all" parameter of functions morph() and affix(),
see doc/syntax.txt
* Compile.py: fix for [code] (handling "\n" in strings)
2010-02-19 László Németh :
* Compile.py, make.py: add user code support
by [code] sections in the rule files,
see end of the doc/manual.txt
* Lightproof_handler.py: fix Mac OS X PyUNO problem
(problem with helper classes in the same file)
2009-12-16 László Németh :
* make.py: vendor/language specific grammar checker generator
* *_{py,xcu,xml}.py:
- template files for code generation
* Makefile, Compile.py:
- modified for code generation
* Dialog.py:
- Options dialog data generator (see doc/dialog.txt)
* data/{en_US,hu_HU}.dat:
- optional rules: word duplication etc.
* Lightproof.py:
- fix getAlternatives() handling (morphological analysis of unknown
words resulted in unchecked sentences)
- extended, simplified syntax:
rule sections ([Word], [word], [Char], [char]): see NEWS
- expressions in suggestions: specified by a starting equal sign
(see doc/syntax.txt):
- new library functions (see doc/syntax.txt):
option(): return the value of the option (see doc/dialog.txt)
spell(): spelling (Boolean function)
suggest(): suggestion
affix(): morph variant (only with affix fields)
calc(): Calc function access
2009-10-23 László Németh :
* data/hu_HU.dat:
- fix rule article "a". The problem with "13-a óta" reported by kiazaki
at openscope.hu.
2009-10-20 László Németh :
* data/hu_HU.dat:
- add the rule "mellet" for irregular nouns, too (eg. "a ház mellet")
- add -ban/-ben affixes to the rule "helyett kap", reported by
József Barna.
- fix digit separator rules for decimal digits, reported by joghurt at
OOo Wiki.
* Lightproof.py:
- add words with dots to word(). The problem with version numbers
(eg. in OpenOffice.org 3.1) reported by Kami.
2009-10-19 László Németh :
* Lightproof.py:
- Hunspell integration
- processing conditional rules by eval()
- helper functions: morph() and word() (see doc/syntax.txt)
- add try-except for proofing rule compiling (bad regexes of the
conditional rules detected only in loading phase yet, see stdout
of OpenOffice.org)
* Compile.py:
- conditional rules (see doc/syntax.txt and hu_HU.dat)
- multi-line rules
* data/hu_HU.dat:
- new Hungarian rules
2009-09-03 László Németh :
* Lightproof.py:
- fix: multiple space detection beetween sentences
- data/*.dat: modified rule for space detection beetween sentences
* data/en*.dat:
- fix: de *juro -> de jure, also extended by "de luxe"
- using "typographic apostrophe" message instead of "apostrophe".
Ambiguous warning message reported by Dave Pitts (davidmpitts at gmail)
* data/hu_HU.dat:
- add "döntetlenre" to the suggestions for "egyelőre"
* doc/manual.txt: reported by Olivier Ronez
- replace Convert.py with Compile.py
- suggesting yourname.dat argument instead of parameter substitution
for manual compilation under Windows
2009-09-01 Yakov Reztsov
* data/ru_RU.dat: Russian translation of template.dat
2009-04-25 László Németh :
* first release
lightproof-1.5+git20140515/pythonpath/ 0000755 0001750 0001750 00000000000 12335137472 016006 5 ustar rene rene lightproof-1.5+git20140515/pythonpath/lightproof___implname__.py 0000644 0001750 0001750 00000000121 12075257522 023205 0 ustar rene rene # -*- encoding: UTF-8 -*-
from __future__ import unicode_literals
dic = ${data}
lightproof-1.5+git20140515/pythonpath/lightproof_impl___implname__.py 0000644 0001750 0001750 00000017370 12075257522 024244 0 ustar rene rene # -*- encoding: UTF-8 -*-
from __future__ import unicode_literals
import uno, re, sys, os, traceback
from com.sun.star.text.TextMarkupType import PROOFREADING
from com.sun.star.beans import PropertyValue
pkg = "${implname}"
lang = "${lang}"
locales = ${loc}
version = "${version}"
author = "${author}"
name = "${name}"
import lightproof_handler_${implname}
# loaded rules (check for Update mechanism of the editor)
try:
langrule
except NameError:
langrule = {}
# ignored rules
ignore = {}
# cache for morphogical analyses
analyses = {}
stems = {}
suggestions = {}
# assign Calc functions
calcfunc = None
# check settings
def option(lang, opt):
return lightproof_handler_${implname}.get_option(lang.Language + "_" + lang.Country, opt)
# filtering affix fields (ds, is, ts etc.)
def onlymorph(st):
if st != None:
st = re.sub(r"^.*(st:|po:)", r"\\1", st) # keep last word part
st = re.sub(r"\\b(?=[dit][sp]:)","@", st) # and its affixes
st = re.sub(r"(?" + word + "", rLoc, ())
if not x:
return None
t = x.getAlternatives()
if not t:
if not analyses: # fix synchronization problem (missing alternatives with unloaded dictionary)
return None
t = [""]
analyses[word] = t[0].split("")[:-1]
a = analyses[word]
result = None
p = re.compile(pattern)
for i in a:
if onlyaffix:
i = onlymorph(i)
result = p.search(i)
if result:
result = result.group(0)
if not all:
return result
elif all:
return None
return result
def morph(rLoc, word, pattern, all=True):
return _morph(rLoc, word, pattern, all, False)
def affix(rLoc, word, pattern, all=True):
return _morph(rLoc, word, pattern, all, True)
def spell(rLoc, word):
if not word:
return None
return spellchecker.isValid(word, rLoc, ())
# get the tuple of the stem of the word or an empty array
def stem(rLoc, word):
global stems
if not word:
return []
if not word in stems:
x = spellchecker.spell(u"" + word + "", rLoc, ())
if not x:
return []
t = x.getAlternatives()
if not t:
t = []
stems[word] = list(t)
return stems[word]
# get the tuple of the morphological generation of a word or an empty array
def generate(rLoc, word, example):
if not word:
return []
x = spellchecker.spell(u"" + word + "" + example + "", rLoc, ())
if not x:
return []
t = x.getAlternatives()
if not t:
t = []
return list(t)
# get suggestions
def suggest(rLoc, word):
global suggestions
if not word:
return word
if word not in suggestions:
x = spellchecker.spell("_" + word, rLoc, ())
if not x:
return word
t = x.getAlternatives()
suggestions[word] = "|".join(t)
return suggestions[word]
# get the nth word of the input string or None
def word(s, n):
a = re.match("(?u)( [-.\w%%]+){" + str(n-1) + "}( [-.\w%%]+)", s)
if not a:
return ''
return a.group(2)[1:]
# get the (-)nth word of the input string or None
def wordmin(s, n):
a = re.search("(?u)([-.\w%%]+ )([-.\w%%]+ ){" + str(n-1) + "}$", s)
if not a:
return ''
return a.group(1)[:-1]
def calc(funcname, par):
global calcfunc
global SMGR
if calcfunc == None:
calcfunc = SMGR.createInstance( "com.sun.star.sheet.FunctionAccess")
if calcfunc == None:
return None
return calcfunc.callFunction(funcname, par)
def proofread( nDocId, TEXT, LOCALE, nStartOfSentencePos, nSuggestedSentenceEndPos, rProperties ):
global ignore
aErrs = []
s = TEXT[nStartOfSentencePos:nSuggestedSentenceEndPos]
for i in get_rule(LOCALE).dic:
# 0: regex, 1: replacement, 2: message, 3: condition, 4: ngroup, (5: oldline), 6: case sensitive ?
if i[0] and not str(i[0]) in ignore:
for m in i[0].finditer(s):
try:
if not i[3] or eval(i[3]):
aErr = uno.createUnoStruct( "com.sun.star.linguistic2.SingleProofreadingError" )
aErr.nErrorStart = nStartOfSentencePos + m.start(i[4]) # nStartOfSentencePos
aErr.nErrorLength = m.end(i[4]) - m.start(i[4])
aErr.nErrorType = PROOFREADING
aErr.aRuleIdentifier = str(i[0])
iscap = (i[-1] and m.group(i[4])[0:1].isupper())
if i[1][0:1] == "=":
aErr.aSuggestions = tuple(cap(eval(i[1][1:]).replace('|', "\n").split("\n"), iscap, LOCALE))
elif i[1] == "_":
aErr.aSuggestions = ()
else:
aErr.aSuggestions = tuple(cap(m.expand(i[1]).replace('|', "\n").split("\n"), iscap, LOCALE))
comment = i[2]
if comment[0:1] == "=":
comment = eval(comment[1:])
else:
comment = m.expand(comment)
aErr.aShortComment = comment.replace('|', '\n').replace('\\n', '\n').split("\n")[0].strip()
aErr.aFullComment = comment.replace('|', '\n').replace('\\n', '\n').split("\n")[-1].strip()
if "://" in aErr.aFullComment:
p = PropertyValue()
p.Name = "FullCommentURL"
p.Value = aErr.aFullComment
aErr.aFullComment = aErr.aShortComment
aErr.aProperties = (p,)
else:
aErr.aProperties = ()
aErrs = aErrs + [aErr]
except Exception as e:
if len(i) == 7:
raise Exception(str(e), i[5])
raise
return tuple(aErrs)
def cap(a, iscap, rLoc):
if iscap:
for i in range(0, len(a)):
if a[i][0:1] == "i":
if rLoc.Language == "tr" or rLoc.Language == "az":
a[i] = u"\u0130" + a[i][1:]
elif a[i][1:2] == "j" and rLoc.Language == "nl":
a[i] = "IJ" + a[i][2:]
else:
a[i] = "I" + a[i][1:]
else:
a[i] = a[i].capitalize()
return a
def compile_rules(dic):
# compile regular expressions
for i in dic:
try:
if re.compile("[(][?]iu[)]").match(i[0]):
i += [True]
i[0] = re.sub("[(][?]iu[)]", "(?u)", i[0])
else:
i += [False]
i[0] = re.compile(i[0])
except:
if 'PYUNO_LOGLEVEL' in os.environ:
print("Lightproof: bad regular expression: " + str(traceback.format_exc()))
i[0] = None
def get_rule(loc):
try:
return langrule[pkg]
except:
langrule[pkg] = __import__("lightproof_" + pkg)
compile_rules(langrule[pkg].dic)
return langrule[pkg]
def get_path():
return os.path.join(os.path.dirname(sys.modules[__name__].__file__), __name__ + ".py")
# [code]
${code}
lightproof-1.5+git20140515/pythonpath/lightproof_handler___implname__.py 0000644 0001750 0001750 00000007734 12054114510 024705 0 ustar rene rene import uno
import unohelper
import lightproof_opts_${implname}
from lightproof_impl_${implname} import pkg
from com.sun.star.lang import XServiceInfo
from com.sun.star.awt import XContainerWindowEventHandler
# options
options = {}
def load(context):
try:
l = LightproofOptionsEventHandler(context)
for i in lightproof_opts_${implname}.lopts:
l.load(i)
except:
pass
def get_option(page, option):
try:
return options[page + "," + option]
except:
try:
return options[page[:2] + "," + option]
except:
return 0
def set_option(page, option, value):
options[page + "," + option] = int(value)
class LightproofOptionsEventHandler( unohelper.Base, XServiceInfo, XContainerWindowEventHandler ):
def __init__( self, ctx ):
p = uno.createUnoStruct( "com.sun.star.beans.PropertyValue" )
p.Name = "nodepath"
p.Value = "/org.openoffice.Lightproof_%s/Leaves"%pkg
self.xConfig = ctx.ServiceManager.createInstance( 'com.sun.star.configuration.ConfigurationProvider' )
self.node = self.xConfig.createInstanceWithArguments( 'com.sun.star.configuration.ConfigurationUpdateAccess', (p, ) )
self.service = "org.openoffice.comp.pyuno.LightproofOptionsEventHandler." + pkg
self.ImplementationName = self.service
self.services = (self.service, )
# XContainerWindowEventHandler
def callHandlerMethod(self, aWindow, aEventObject, sMethod):
if sMethod == "external_event":
return self.handleExternalEvent(aWindow, aEventObject)
def getSupportedMethodNames(self):
return ("external_event", )
def handleExternalEvent(self, aWindow, aEventObject):
sMethod = aEventObject
if sMethod == "ok":
self.saveData(aWindow)
elif sMethod == "back" or sMethod == "initialize":
self.loadData(aWindow)
return True
def load(self, sWindowName):
child = self.getChild(sWindowName)
for i in lightproof_opts_${implname}.lopts[sWindowName]:
sValue = child.getPropertyValue(i)
if sValue == '':
if i in lightproof_opts_${implname}.lopts_default[sWindowName]:
sValue = 1
else:
sValue = 0
set_option(sWindowName, i, sValue)
def loadData(self, aWindow):
sWindowName = self.getWindowName(aWindow)
if (sWindowName == None):
return
child = self.getChild(sWindowName)
for i in lightproof_opts_${implname}.lopts[sWindowName]:
sValue = child.getPropertyValue(i)
if sValue == '':
if i in lightproof_opts_${implname}.lopts_default[sWindowName]:
sValue = 1
else:
sValue = 0
xControl = aWindow.getControl(i)
xControl.State = sValue
set_option(sWindowName, i, sValue)
def saveData(self, aWindow):
sWindowName = self.getWindowName(aWindow)
if (sWindowName == None):
return
child = self.getChild(sWindowName)
for i in lightproof_opts_${implname}.lopts[sWindowName]:
xControl = aWindow.getControl(i)
sValue = xControl.State
child.setPropertyValue(i, str(sValue))
set_option(sWindowName, i, sValue)
self.commitChanges()
def getWindowName(self, aWindow):
sName = aWindow.getModel().Name
if sName in lightproof_opts_${implname}.lopts:
return sName
return None
# XServiceInfo method implementations
def getImplementationName (self):
return self.ImplementationName
def supportsService(self, ServiceName):
return (ServiceName in self.services)
def getSupportedServiceNames (self):
return self.services
def getChild(self, name):
return self.node.getByName(name)
def commitChanges(self):
self.node.commitChanges()
return True
lightproof-1.5+git20140515/pythonpath/lightproof_compile___implname__.py 0000644 0001750 0001750 00000021247 12075257522 024731 0 ustar rene rene from __future__ import unicode_literals
import sys, re, traceback
repl = {}
tests = []
comp = []
modes = ["[Word]", "[word]", "[Char]", "[char]", "[code]"]
mode = "[Word]"
code = u""
language = ""
oldlinenums = {}
def prepare_for_eval(s):
s = re.sub("(affix|spell|morph|stem|option|suggest|generate)\(", r'\1(LOCALE,', s)
s = re.sub(r"word\(\s*(\d)", r'word(s[m.end():],\1', s) # word(n)
s = re.sub(r"word\(\s*-(\d)", r'wordmin(s[:m.start()],\1', s) # word(-n)
s = re.sub(r"[\\](\d)", r'm.group(\1)', s)
s = re.sub("[{]([^}]+)}", r'm.group("\1_1")', s)
return s
def mysplit(s, line, oldline, debug):
global repl
global tests
global comp
global modes
global mode
orig = s
if s[0:1] == '[':
if s.strip() in modes:
mode = s.strip()
return None
elif re.match(r"\[\w+\]$", s.strip()):
#sys.stderr.write("Unknown mode: " + s + "\n")
return oldline
dec = 0
exprep = 0 # replacement is a Python expression (beginning with sign =)
condition = False
ngroup = 0 # back reference group number that will be used for error positioning
# description
c = re.search("\s#\s", s)
com = u""
if c:
try:
c = c.start(c.lastindex - 1)
except:
c = c.start()
com = s[c+2:].strip()
if com[0:1] == "=":
com = prepare_for_eval(com)
s = s[:c]
m1 = re.search("<-", s)
m2 = re.search("-\d*>", s)
if m1 and m2:
condition = prepare_for_eval(s[m1.end(0): m2.start(0)].strip())
s = s[0:m1.start(0)] + s[m2.start(0):]
if s[0:1] == '"':
# quoted
pos = s[1:].find('"')
while pos > 0 and s[pos] == '\\':
pos = s[pos:].find('"')
s1 = s[1:pos+1]
s2 = s[pos+2:].strip()
else:
m = re.compile("-\d*>").search(s)
if not m:
m = re.compile("[_a-zA-Z][_a-zA-Z0-9]*").match(s)
if not m:
# syntax error
return oldline
s1 = m.group(0)
s2 = s[m.end(0):].strip()
# replace previous definitions
for i in repl:
ire = re.compile("[{]" + i + "}")
if re.search(ire, s2):
s2 = ire.sub(repl[i], s2)
# make named group
s2 = "(?P<" + m.group(0) + ">" + s2 + ")"
dec = 1
else:
s1 = s[0:m.start(0)].strip()
if re.match("TEST: ", s1): # deprecated
tests += [[s1[5:].strip(), s[m.end(0):].strip(), oldline]]
return None
s2 = s[m.start(0):].strip()
m = re.compile("-(\d*)>").match(s2)
if dec!= 1 and m:
s2 = s2[m.end(0):].strip()
if m.group(1):
ngroup = int(m.group(1))
elif dec!=1:
# syntax error
return oldline
if s2[0:1] == '=':
exprep = 1
if s2[0:1] == '"' and s2[-1:]=='"':
s2 = s2[1:-1]
if dec==1:
repl[s1] = s2
return None
else:
for i in repl:
s1 = re.sub("[{]" + i + "}", repl[i], s1)
# modes
if mode == "[Word]" or mode == "[word]":
if s1[0] == '^':
s1 = r"((?<=[!?.] )|^)" + s1[1:] + r"(?![-\w\u2013\u00AD])"
else:
s1 = r"(? 3 and s1[i-3:i]=="(?P":
state = 2
if state == 2 and c == ">":
state = 0
if c == "?" and i > 0 and s1[i-1:i]=="(":
state = 5
if state == 5 and c == ")":
state = 0
if lu.match(c) and c.islower() and state == 0:
if c=="i" and (language == "tr" or language == "az"):
s3 += u"[\u0130" + c + "]"
else:
s3 += "[" + c.upper() + c + "]"
elif lu.match(c) and c.islower() and state == 1 and s1[i+1:i+2] != "-":
if s1[i-1:i] == "-" and s1[i-2:i-1].islower(): # [a-z] -> [a-zA-Z]
s3 += c + s1[i-2:i-1].upper() + "-" + c.upper()
elif c=="i" and (language == "tr" or language == "az"):
s3 += u"\u0130" + c
else:
s3 += c.upper() + c
else:
s3 += c
if c == "\\":
state = 4
elif state == 4:
state = 0
s1 = s3
s1 = renum("[?]P<([^<_]*)>", s1, "?P<")
if exprep == 0:
s2 = re.sub("[{]([_a-zA-Z][_a-zA-Z0-9]*)}", r"\\g<\1>", s2)
s2 = renum(r"\\g<([^<_]*)>", s2, r"\\g<")
else:
s2 = prepare_for_eval(s2)
# check
if re.compile("[(][?]iu[)]").match(s1):
cap = True
sc = re.sub("[(][?]iu[)]", "(?u)", s1)
else:
cap = False
sc = s1
try:
compr = re.compile(sc)
if not condition:
comp += [[compr, s2, com, cap, line]]
except Exception as e:
raise Exception(str(e), oldline)
if debug:
return [s1, s2, com, condition, ngroup, oldline]
return [s1, s2, com, condition, ngroup]
# group renum ( -> etc.)
def renum(regex, s1, beg):
j={}
mr = re.compile(regex)
m = mr.search(s1)
nl = s1.find("\\n")
while m:
# restart numbering in new lines
if nl > -1 and m.start(0) > (nl + 1):
j={}
nl = s1[m.start(0):].find("\\n")
if nl > -1:
nl = m.start(0) + nl
n = m.group(1)
if n in j:
j[n] += 1
else:
j[n] = 1
s1 = re.sub(mr, beg + n + "_" + str(j[n]) + ">", s1, 1)
m = mr.search(s1)
return s1
def cap(a, iscap):
global language
if iscap:
for i in range(0, len(a)):
if a[i][0:1] == "i":
if language == "tr" or language == "az":
a[i] = u"\u0130" + a[i][1:]
elif a[i][1:2] == "j" and language == "nl":
a[i] = "IJ" + a[i][2:]
else:
a[i] = "I" + a[i][1:]
else:
a[i] = a[i].capitalize()
return a
def c(rules, lang, debug = False):
global language
global code
global oldlinenums
language = lang
r = re.compile("[\n#]")
code = ""
dic = []
oldlinenums = {}
lines = rules.split("\n")
lines2 = []
result = {}
cm = 0
lnums = 1
for i in lines:
if i.strip() in modes:
if i.strip() == "[code]":
cm = 1
continue
else:
cm = 0
if cm == 1:
code = code + i + "\n"
elif len(i.strip()) > 0:
lines2 = lines2 + [i]
oldlinenums[i] = lnums
lnums = lnums + 1
lines = lines2
# concatenate multiline commands
# last tabulator + comment is the message
l = u""
comment = 0
for i in range(len(lines)-1, -1, -1):
if re.match("\t", lines[i]):
if not (comment and re.match("\t+#", lines[i])):
l = lines[i].strip() + " " + l
if re.search("#", lines[i]):
comment = 1
del lines[i]
elif l != "":
lnums = oldlinenums[lines[i]]
lines[i] = lines[i].strip() + " " + l
oldlinenums[lines[i]] = lnums
l = ""
comment = 0
# processing
for i in range(0, len(lines)):
if not r.match(lines[i]):
item = mysplit(lines[i].strip(), i + 1, oldlinenums[lines[i]], debug)
if item != None:
if type(item) == type(1):
raise Exception("Syntax error in line ", item)
dic = dic + [item]
result["rules"] = dic
code = re.sub(r"(?