Creoleparser-0.7.4/0000755000000000000000000000000011742546414012656 5ustar rootrootCreoleparser-0.7.4/Creoleparser.egg-info/0000755000000000000000000000000011742546414016776 5ustar rootrootCreoleparser-0.7.4/Creoleparser.egg-info/top_level.txt0000666000000000000000000000001511633076440021524 0ustar rootrootcreoleparser Creoleparser-0.7.4/Creoleparser.egg-info/SOURCES.txt0000666000000000000000000000052611633076440020665 0ustar rootrootCHANGES.txt INSTALL.txt LICENSE.txt MANIFEST.in README.txt setup.py Creoleparser.egg-info/PKG-INFO Creoleparser.egg-info/SOURCES.txt Creoleparser.egg-info/dependency_links.txt Creoleparser.egg-info/requires.txt Creoleparser.egg-info/top_level.txt creoleparser/__init__.py creoleparser/core.py creoleparser/dialects.py creoleparser/elements.pyCreoleparser-0.7.4/Creoleparser.egg-info/requires.txt0000666000000000000000000000001311633076440021370 0ustar rootrootGenshi>=0.4Creoleparser-0.7.4/Creoleparser.egg-info/PKG-INFO0000666000000000000000000000332011633076440020071 0ustar rootrootMetadata-Version: 1.0 Name: Creoleparser Version: 0.7.4 Summary: Parser for the Creole common wiki markup language Home-page: http://code.google.com/p/creoleparser/ Author: Stephen Day Author-email: stephen.h.day@gm**l.com License: MIT Download-URL: http://pypi.python.org/pypi/Creoleparser Description: What is Creoleparser? --------------------- Creoleparser is a Python library for converting Creole wiki markup for output on the web. It is a full implementation of the Creole 1.0 specification and aims to follow the spec exactly. Find out more about Creoleparser at What is Creole? --------------- From wikicreole.org: Creole is a common wiki markup language to be used across different wikis. It's not replacing existing markup but instead enabling wiki users to transfer content seamlessly across wikis, and for novice users to contribute more easily. Find out more about Creole at Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: Environment :: Web Environment Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: MIT License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: Text Processing :: Markup Classifier: Topic :: Text Processing :: Markup :: HTML Classifier: Topic :: Text Processing :: Markup :: XML Creoleparser-0.7.4/Creoleparser.egg-info/dependency_links.txt0000666000000000000000000000000111633076440023044 0ustar rootroot Creoleparser-0.7.4/creoleparser/0000755000000000000000000000000011742546414015344 5ustar rootrootCreoleparser-0.7.4/creoleparser/__init__.py0000666000000000000000000000220011633074776017461 0ustar rootroot# __init__.py # -*- coding: utf-8 -*- # # Copyright © Stephen Day # # This module is part of Creoleparser and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php # import string import keyword from core import Parser, ArgParser from dialects import (creole11_base, creole10_base, creepy10_base, create_dialect, parse_args) __docformat__ = 'restructuredtext en' __version__ = '0.7.4' creole2html = Parser(dialect=create_dialect(creole10_base), method='html') """This is a pure Creole 1.0 parser created for convenience""" text2html = Parser(dialect=create_dialect(creole11_base), method='html') """This is a Creole 1.0 parser (+ additions) created for convenience""" #parse_args = ArgParser(dialect=creepy10_base(),key_func=string.lower, # illegal_keys=keyword.kwlist + ['macro_name', # 'arg_string', 'body', 'isblock', 'environ', 'macro']) #"""Function for parsing macro arg_strings using a relaxed xml style""" def _test(): import doctest doctest.testmod() if __name__ == "__main__": _test() Creoleparser-0.7.4/creoleparser/elements.py0000666000000000000000000016777111633074776017567 0ustar rootroot# elements.py # -*- coding: utf-8 -*- # # Copyright © Stephen Day # # This module is part of Creoleparser and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php # import re import urlparse import urllib import keyword import warnings import traceback import unicodedata import genshi.builder as bldr from genshi.core import Stream, Markup from core import (escape_char, esc_neg_look, fragmentize, ImplicitList, AttrDict, MacroError) BLOCK_ONLY_TAGS = ['h1','h2','h3','h4','h5','h6', 'ul','ol','dl', 'pre','hr','blockquote','address', 'p','div','form','fieldset','table', 'noscript'] BLOCK_TAGS = BLOCK_ONLY_TAGS + ['ins','del','script'] MACRO_NAME = r'(?P[a-zA-Z][a-zA-Z0-9]*([-.][a-zA-Z0-9]+)*)' """allows any number of non-repeating hyphens or periods. Underscore is not included because hyphen is""" # use Genshi's HTMLSanitizer if possible (i.e., not on Google App Engine) try: from genshi.filters import HTMLSanitizer except: SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None]) class HTMLSanitizer(object): def is_safe_uri(self,uri): if ':' not in uri: return True # This is a relative URI chars = [char for char in uri.split(':', 1)[0] if char.isalnum()] return ''.join(chars).lower() in SAFE_SCHEMES sanitizer = HTMLSanitizer() __docformat__ = 'restructuredtext en' class WikiElement(object): """Baseclass for all wiki elements.""" append_newline = False """Determines if newlines are appended to Element(s) during processing. Should only affect readability of source xml. """ def __init__(self, tag, token, child_elements=None): """Constructor for WikiElement objects. Subclasses may have other keyword arguments. :parameters: tag The xhtml tag associated with the element. token The character string (or strings) that identifies the element in wiki markup. child_elements A list of wiki_elements that will be searched for in the body of the element. The order of these elements matters, because if an element is found before the element that encloses it, the enclosing element will never be found. In cases where this imposes limits (e.g, ``strong`` and ``em`` should be allowed to nest each other), place the conflicting elements in a sublist. The parser will then find which comes first. """ self.tag = tag self.token = token if child_elements is None: child_elements = [] self.child_elements = child_elements def _build(self,mo,element_store, environ): """Returns a genshi Element that has ``self.tag`` as the outermost tag. This methods if called exclusively by ``_process`` :parameters: mo match object, usually the one returned by self.regexp.search(s) """ return bldr.tag.__getattr__(self.tag)(fragmentize(mo.group(1), self.child_elements, element_store, environ)) def re_string(self): """The regular expression pattern that is compiled into ``self.regexp``. The regular expression must consume the entire wiki element, including the tokens. For block elements, the newline on the last line must be consumed also. group(1) should normally be the entire string inside the tokens. If not, a custom ``_build`` method will be needed. """ pass def __repr__(self): return "<"+self.__class__.__name__ + " " + str(self.tag)+">" def _process(self, mos, text, wiki_elements,element_store, environ): """Returns genshi Fragments (Elements and text) This is mainly for block level markup. See InlineElement for the other method. """ frags = [] end = 0 for mo in mos: if end != mo.start(): # call again for leading text and extend the result list frags.extend(fragmentize(text[end:mo.start()],wiki_elements[1:], element_store, environ)) # append the found wiki element to the result list built = self._build(mo,element_store, environ) if built is not None: frags.append(built) # make the source output easier to read if self.append_newline: frags.append('\n') end = mo.end() # call again for trailing text and extend the result list if end < len(text): if not isinstance(wiki_elements[0],(list,tuple)): wiki_elements = wiki_elements[1:] frags.extend(fragmentize(text[end:],wiki_elements, element_store, environ)) return frags class BlockElement(WikiElement): """Block elements inherit form this class Wiki elements wanting ``append_newline = True`` should use this as the base also. """ append_newline = True class InlineElement(WikiElement): r"""For finding generic inline elements >>> em = InlineElement('em','//') >>> mo1 = em.regexp.search('a //word// in a line') >>> mo2 = em.regexp.search('a //word in a line\n or two\n') >>> mo1.group(0),mo1.group(1) ('//word//', 'word') >>> mo2.group(0),mo2.group(1) ('//word in a line\n or two', 'word in a line\n or two') Use a list for the ``token`` argument to have different start and end strings. These must be closed. >>> foo = InlineElement('foo',['<<','>>']) >>> mo = foo.regexp.search('blaa <>\n') >>> mo.group(1) 'here it is ' """ def __init__(self, tag='', token=''): super(InlineElement,self).__init__(tag,token) self.regexp = re.compile(self.re_string(),re.DOTALL) def re_string(self): if isinstance(self.token,str): content = '(.+?)' end = '(' + esc_neg_look + re.escape(self.token) + r'|$)' return esc_neg_look + re.escape(self.token) + content + end else: content = '(.+?)' return esc_neg_look + re.escape(self.token[0]) + content + esc_neg_look + re.escape(self.token[1]) def _process(self, mos, text, wiki_elements, element_store, environ): """Returns genshi Fragments (Elements and text)""" parts = [] end = 0 for mo in mos: processed = self._build(mo,element_store, environ) store_id = str(id(processed)) element_store[store_id] = processed parts.append(''.join([text[end:mo.start()],'<<<',store_id,'>>>'])) end = mo.end() # call again for trailing text and extend the result list if end < len(text): parts.append(text[end:]) new_text = ''.join(parts) if not isinstance(wiki_elements[0],(list,tuple)): wiki_elements = wiki_elements[1:] frags = fragmentize(new_text,wiki_elements,element_store, environ) return frags class SimpleElement(InlineElement): r"""For finding generic inline elements like ``strong`` and ``em``. >>> em = SimpleElement({'//':'em'}) >>> mo1 = em.regexp.search('a //word// in a line') >>> mo2 = em.regexp.search('a //word in a line\n or two\n') >>> mo1.group(0),mo1.group(2) ('//word//', 'word') >>> mo2.group(0),mo2.group(2) ('//word in a line\n or two', 'word in a line\n or two') """ def __init__(self, token_dict={}): self.token_dict = token_dict self.tokens = token_dict.keys() super(SimpleElement,self).__init__('','') self.regexp = re.compile(self.re_string(),re.DOTALL) def re_string(self): if isinstance(self.token,basestring): tokens = '(' + '|'.join([re.escape(token) for token in self.tokens]) + ')' content = '(.+?)' end = '(' + esc_neg_look + r'\1|$)' return esc_neg_look + tokens + content + end def _build(self,mo,element_store, environ): return bldr.tag.__getattr__(self.token_dict[mo.group(1)])(fragmentize(mo.group(2), self.child_elements, element_store, environ)) class CustomElement(InlineElement): """Finds markup defined by provided custom regexp.""" def __init__(self, reg_exp, func): super(CustomElement,self).__init__('','') if isinstance(reg_exp, basestring): self.regexp = re.compile(esc_neg_look + re.escape(reg_exp), re.DOTALL) else: self.regexp = reg_exp self.func = func def _build(self,mo,element_store, environ): if isinstance(self.func, basestring) and not isinstance(self.func, bldr.Markup): value = bldr.tag(bldr.Markup(self.func)) else: value = self.func(mo, environ) if isinstance(value, basestring) and not isinstance(value, bldr.Markup): raise Exception("Custom markup functions can only return Genshi objects.") return value class LinkElement(InlineElement): """Superclass for AnchorLinks and ImageLinks. Parses internal, external, and interwiki links. """ def __init__(self,tag, token, delimiter, interwiki_delimiter,base_urls,links_funcs, interwiki_class_funcs,default_space_char,space_chars, base_url,space_char,class_func,path_func,fragment_pattern): super(LinkElement,self).__init__(tag,token) self.regexp = re.compile(self.re_string(),re.DOTALL) self.delimiter = delimiter self.interwiki_delimiter = interwiki_delimiter self.base_urls = base_urls self.links_funcs = links_funcs self.class_funcs = interwiki_class_funcs self.default_space_char = default_space_char self.space_chars = space_chars self.base_url = base_url self.space_char = space_char self.class_func = class_func self.path_func = path_func self.fragment_pattern = fragment_pattern self.content_regexp = re.compile(self.content_re_string(),re.DOTALL) self.interwikilink_regexp = re.compile(self.interwikilink_re_string()) self.urllink_regexp = re.compile(self.urllink_re_string(), re.DOTALL) self.wikilink_regexp = re.compile(self.wikilink_re_string()) def content_re_string(self): return r'(?P.*?)(' + re.escape(self.delimiter) + '(?P.*?))?$' def interwikilink_re_string(self): all_wikis = set(self.links_funcs.keys() + self.base_urls.keys()) wiki_id = '(?P' + '|'.join(all_wikis) + ')' optional_spaces = ' *' page_name = r'(?P\S+?( \S+?)*)' #allows any number of single spaces return '^' + optional_spaces + wiki_id + \ re.escape(self.interwiki_delimiter) + ' *' + page_name + \ optional_spaces + '$' def urllink_re_string(self): protocol = r'^\s*((\w+?:|/)' rest_of_url = r'[\S\n]*?)\s*$' return protocol + rest_of_url def wikilink_re_string(self): optional_spaces = ' *' if self.fragment_pattern: page_name = r'(?P\S*?( \S+?)*?)(?P' + \ self.fragment_pattern + ')?' else: page_name = r'(?P\S+?( \S+?)*?)' #allows any number of single spaces return '^' + optional_spaces + page_name + optional_spaces + '$' def page_name(self,mo): if 'wiki_id' in mo.groupdict(): space_char = self.space_chars.get(mo.group('wiki_id'),self.default_space_char) else: space_char = self.space_char return mo.group('page_name').replace(' ',space_char) def _build(self,mo,element_store, environ): content_mo = self.content_regexp.match(mo.group(1)) body = content_mo.group('body') arg_string = content_mo.group('arg_string') the_class = None if self.interwikilink_regexp.match(body): interwikilink_mo = self.interwikilink_regexp.match(body) link_type = 'interwiki' base_url = self.base_urls.get(interwikilink_mo.group('wiki_id')) link_func = self.links_funcs.get(interwikilink_mo.group('wiki_id')) class_func = self.class_funcs.get(interwikilink_mo.group('wiki_id')) page_name = self.page_name(interwikilink_mo) if link_func: url = link_func(page_name) else: url = urllib.quote(page_name.encode('utf-8')) if base_url: url = urlparse.urljoin(base_url, url) if class_func: the_class = class_func(page_name) elif self.urllink_regexp.match(body): urllink_mo = self.urllink_regexp.match(body) link_type = 'external' if sanitizer.is_safe_uri(urllink_mo.group(1)): url = urllink_mo.group(1) else: url = None elif self.wikilink_regexp.match(body): wikilink_mo = self.wikilink_regexp.match(body) link_type = 'wiki' page_name = self.page_name(wikilink_mo) if self.path_func and page_name: the_path = self.path_func(page_name) else: the_path = urllib.quote(page_name.encode('utf-8')) if wikilink_mo.groupdict().get('fragment'): the_path = ''.join([the_path,wikilink_mo.group('fragment')]) if self.class_func: the_class = self.class_func(page_name) url = urlparse.urljoin(self.base_url, the_path) else: url = None if not url: return mo.group(0) else: if arg_string is not None: args, kw_args = [arg_string.strip()], {} else: args, kw_args = [], {} try: return self.emit(element_store, environ,link_type,body,url,the_class, *args, **kw_args) except TypeError: return mo.group(0) class AnchorElement(LinkElement): """Finds and builds internal, external, and interwiki links. >>> link = AnchorElement('a',('[[',']]'),delimiter='|', ... interwiki_delimiter=':', ... base_urls=dict(somewiki='http://somewiki.org/', ... bigwiki='http://bigwiki.net/'), ... links_funcs={},interwiki_class_funcs={},default_space_char='-', ... space_chars={'bigwiki':' '},base_url='http://somewiki.org/', ... space_char='_',class_func=None,path_func=None,external_links_class=None, ... fragment_pattern=r'#![a-z0-9-_]+') >>> mo = link.regexp.search("[[http://www.google.com| here]]") >>> link._build(mo,{},None).generate().render() 'here' >>> mo = link.regexp.search(" [[somewiki:Home Page|steve]] ") >>> link._build(mo,{},None).generate().render() 'steve' >>> mo = link.regexp.search(" [[bigwiki:Home Page]] ") >>> link._build(mo,{},None).generate().render() 'bigwiki:Home Page' >>> mo = link.regexp.search(" [[Home Page |Home]]") >>> link._build(mo,{},None).generate().render() 'Home' >>> mo = link.regexp.search(" [[#!my-header|Subpage]]") >>> link._build(mo,{},None).generate().render() 'Subpage' >>> mo = link.regexp.search(" [[Home Page#!a-header |Home]]") >>> link._build(mo,{},None).generate().render() 'Home' """ def __init__(self, tag, token, external_links_class, *args, **kw_args): super(AnchorElement,self).__init__(tag, token, *args, **kw_args) self.external_links_class = external_links_class def emit(self,element_store, environ,link_type,body,url,the_class, alias=None): if alias: alias = fragmentize(alias,self.child_elements,element_store, environ) else: alias = body.strip() if link_type == 'external': the_class = self.external_links_class return bldr.tag.__getattr__(self.tag)(alias, href=url, class_=the_class) class ImageElement(LinkElement): """Processes image elements. >>> img = ImageElement('img',('{{','}}'), delimiter='|', ... interwiki_delimiter=':', ... base_urls=dict(somewiki='http://somewiki.org/', ... bigwiki='http://bigwiki.net/'), ... links_funcs={},interwiki_class_funcs={},default_space_char='-', ... space_chars={'bigwiki':' '},base_url='/images/', ... space_char='_',class_func=None,path_func=None,disable_external=False, ... fragment_pattern=False) >>> mo = img.regexp.search('{{ picture.jpg | An image of a house }}') >>> img._build(mo,{},None).generate().render() 'An image of a house' """ def __init__(self, tag, token, disable_external, *args, **kw_args): super(ImageElement,self).__init__(tag, token, *args, **kw_args) self.disable_external = disable_external def emit(self,element_store, environ,link_type,body,url,the_class, alt=None): if self.disable_external and link_type == 'external': return bldr.tag.span('External images are disabled', class_='external_image') if alt is None: if link_type == 'external': alt = urlparse.urlsplit(url).path.split('/')[-1] else: alt = body.strip() return bldr.tag.__getattr__(self.tag)(src=url ,alt=alt, title=alt, #class_=the_class ) class Link(InlineElement): """DEPRECIATED """ def __init__(self,tag, token): super(Link,self).__init__(tag,token) self.regexp = re.compile(self.re_string(),re.DOTALL) warnings.warn(""" Use of elements.Link is depreciated. """ ) def _build(self,mo,element_store, environ): for tag in self.child_elements: m = tag.regexp.search(mo.group(1)) if m: link = tag._build(m,element_store, environ) if link: break else: link = None if link: return bldr.tag(link) else: return mo.group(0) class Macro(WikiElement): r"""Finds and processes inline macro elements.""" def __init__(self, tag, token, func, macros, arg_parser): super(Macro,self).__init__(tag,token , []) self.func = func self.macros = macros self.arg_parser = arg_parser self.regexp = re.compile(self.re_string()) def _process(self, mos, text, wiki_elements,element_store, environ): """Returns genshi Fragments (Elements and text)""" assert len(mos) == 1 mo = mos[0] processed = self._build(mo,element_store, environ) if isinstance(processed, list): tail = processed[1] processed = processed[0] else: tail = '' if isinstance(processed, basestring) and not isinstance(processed,Markup): text = ''.join([text[:mo.start()],processed,tail, text[mo.end():]]) else: store_id = str(id(processed)) element_store[store_id] = processed text = ''.join([text[:mo.start()],'<<<',store_id,'>>>',tail, text[mo.end():]]) frags = fragmentize(text,wiki_elements,element_store, environ) return frags def re_string(self): content = '(.*?)' return esc_neg_look + re.escape(self.token[0]) + r'(' + MACRO_NAME + \ content + ')' + esc_neg_look + re.escape(self.token[1]) def _macro_func(self,macro_name,arg_string,body,isblock,environ): func = self.macros[macro_name] arg_parser = func.func_dict.get('arg_parser') or self.arg_parser if arg_parser: pos, kw = arg_parser(arg_string) else: pos, kw = [], {} def parsed_body(context='auto'): if context == 'auto': if isblock: child_elements = self.dialect.block_elements else: child_elements = self.dialect.inline_elements elif context == 'inline': child_elements = self.dialect.inline_elements elif context == 'block': child_elements = self.dialect.block_elements else: child_elements = context return bldr.tag(fragmentize(body, child_elements, {}, environ)) macro = AttrDict(name=macro_name,arg_string=arg_string, body=body, isblock=isblock, parsed_body=parsed_body) try: value = func(macro,environ,*pos,**kw) except TypeError , detail: tag = isblock and 'pre' or 'code' e = str(detail) msg = re.sub(r"^\w*\(\) ", '', e) mo = re.match(r'(.+)(\d+)(.+)(\d+) given\)$',msg) if mo: #re.search(r'given\)$',msg): #mo = re.match(r'(.+)(\d+)(.+)(\d+)(.+)$',msg) msg = mo.group(1) + str(int(mo.group(2))-2) \ + re.sub('arguments?','argument(s)',mo.group(3)) \ + str(int(mo.group(4))-2) + ' given)' value = bldr.tag.__getattr__(tag)("Macro error: '%s' %s"% (macro_name, msg),class_='macro_error') except MacroError, detail: tag = isblock and 'pre' or 'code' value = bldr.tag.__getattr__(tag)("Error in macro '%s': %s"% (macro_name,str(detail.value)),class_='macro_error') except: value = bldr.tag.pre('Unexpected error during macro execution!\n' + traceback.format_exc(20) , class_='macro_error') return value trailing_slash = re.compile(r'(?<=[ "\'\]])/$') def _build(self,mo,element_store, environ): arg_string = re.sub(self.trailing_slash,'',mo.group(4)) if mo.group('name') in self.macros: value = self._macro_func(mo.group('name'),arg_string,None,False,environ) elif self.func: value = self.func(mo.group('name'),arg_string,None,False,environ) else: value = None if value is None: return bldr.tag.code(self.token[0],bldr.tag.span(mo.group('name'),class_="macro_name"), bldr.tag.span(arg_string,class_="macro_arg_string"), self.token[1],class_="unknown_macro") elif isinstance(value, (basestring,bldr.Fragment,bldr.Element, Stream)): return value else: raise Exception("macros can only return strings and genshi objects") class BodiedMacro(Macro): """Finds and processes macros with bodies. Does not span across top level block markup (see BodiedBlockMacro's for that).""" def __init__(self, tag, token, func, macros, arg_parser): super(BodiedMacro,self).__init__(tag,token , func, macros, arg_parser) self.regexp = re.compile(self.re_string(),re.DOTALL) def re_string(self): content = r'(?P[ \S]*?)' body = '(?P.+)' return esc_neg_look + re.escape(self.token[0]) + MACRO_NAME + \ content + '(?[ \S]*?)', re.escape(self.token[1])]) end = ''.join([esc_neg_look, re.escape(self.token[0]), '/', re.escape(mo.group('name')), re.escape(self.token[1])]) count = 0 for mo2 in re.finditer(start + '|' + end, mo.group('body')): if re.match(end,mo2.group(0)): count = count + 1 else: count = count - 1 if count > 0: body = mo.group('body')[:mo2.start()] tail = ''.join([mo.group('body')[mo2.end():], self.token[0], '/', mo.group('name'), self.token[1]]) break else: body = mo.group('body') tail = '' if mo.group('name') in self.macros: value = self._macro_func(mo.group('name'),mo.group('arg_string'),body,False,environ) elif self.func: value = self.func(mo.group('name'),mo.group('arg_string'),body,False,environ) else: value = None if value is None: content_out = [self.token[0],bldr.tag.span(mo.group('name'),class_="macro_name"), bldr.tag.span(mo.group('arg_string'),class_="macro_arg_string"), self.token[1],bldr.tag.span(mo.group('body'),class_="macro_body"), self.token[0] + '/' + mo.group('name') + self.token[1]] output = bldr.tag.code(content_out,class_="unknown_macro", style="white-space:pre-wrap") elif isinstance(value, (basestring,bldr.Fragment, Stream)): output = value else: raise Exception("macros can only return strings and genshi objects") return [output, tail] class BodiedBlockMacro(Macro): """Finds and processes block macros with bodies. The opening and closing tokens must be are each on a line alone without leading spaces. These macros can enclose other block level markup including pre blocks and other BodiedBlockMacro's.""" def __init__(self, tag, token, func, macros, arg_parser): super(BodiedBlockMacro,self).__init__(tag,token , func, macros, arg_parser) self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE) def re_string(self): arg_string = r'(?P(?![^\n]*>>[^\n]*>>)[ \S]*?)' start = '^' + re.escape(self.token[0]) body = r'(?P.*\n)' end = re.escape(self.token[0]) + \ r'/(?P=name)' + '(?(?![^\n]*>>[^\n]*>>)[ \S]*?)', re.escape(self.token[1]),r'\s*?\n']) end = ''.join(['^', re.escape(self.token[0]), '/', re.escape(mo.group('name')), re.escape(self.token[1]),r'\s*?$']) count = 0 for mo2 in re.finditer(start + '|' + end, mo.group('body'),re.MULTILINE): if re.match(end,mo2.group(0)): count = count + 1 else: count = count - 1 if count > 0: body = mo.group('body')[:mo2.start()] tail = ''.join([mo.group('body')[mo2.end():], self.token[0], '/', mo.group('name'), self.token[1],'\n']) break else: body = mo.group('body') tail = '' if mo.group('name') in self.macros: value = self._macro_func(mo.group('name'),mo.group('arg_string'),body,True,environ) elif self.func: value = self.func(mo.group('name'),mo.group('arg_string'),body,True,environ) else: value = None if value is None: output = bldr.tag.pre(self.token[0],bldr.tag.span(mo.group('name'),class_="macro_name"), bldr.tag.span(mo.group('arg_string'),class_="macro_arg_string"), self.token[1],'\n',bldr.tag.span(mo.group('body'),class_="macro_body"), self.token[0] + '/' + mo.group('name') + self.token[1], class_="unknown_macro") elif (isinstance(value, (Stream, basestring)) or (isinstance(value,bldr.Element) and value.tag in BLOCK_TAGS)): output = value # Add a p tag if the value is a Fragment or Element that needs one elif isinstance(value, bldr.Fragment): output = bldr.tag.p(value) else: raise Exception("macros can only return strings and genshi objects") return [output, tail] class RawLink(InlineElement): """Used to find raw urls in wiki text and build xml from them. >>> raw_link = RawLink(tag='a') >>> mo = raw_link.regexp.search(" a http://www.google.com url ") >>> raw_link.href(mo) 'http://www.google.com' >>> raw_link._build(mo,{},None).generate().render() 'http://www.google.com' """ linking_protocols = ['http','https'] def __init__(self, tag): super(RawLink,self).__init__(tag=tag, token=None) self.regexp = re.compile(self.re_string()) def re_string(self): escape = '(' + re.escape(escape_char) + ')?' protocol = '((https?|ftp)://' rest_of_url = r'\S+?)' #allow one punctuation character or '**' or '//'. Don't include a placeholder. look_ahead = r'(?=([>)}\]]?[,.?!:;"\']?(([^a-zA-Z0-9])\6)?(\s|$))|<<<)' return escape + protocol + rest_of_url + look_ahead def _build(self,mo,element_store, environ): if (not mo.group(1)) and (mo.group(3) in self.linking_protocols): return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store), href=self.href(mo)) else: return self.href(mo) def href(self,mo): """Returns the string for the href attribute of the Element.""" if sanitizer.is_safe_uri(mo.group(2)): return mo.group(2) else: return "unsafe_uri_detected" def alias(self,mo,element_store): """Returns the string for the content of the Element.""" return self.href(mo) class URLLink(WikiElement): """DEPRECIATED """ def __init__(self, tag,delimiter): super(URLLink,self).__init__(tag, '') self.delimiter = delimiter self.regexp = re.compile(self.re_string(),re.DOTALL) warnings.warn(""" Use of elements.URLLink is depreciated. """ ) def re_string(self): protocol = r'^\s*((\w+?:|/)' rest_of_url = r'[\S\n]*?)\s*' alias = r'(' + re.escape(self.delimiter) + r' *(.*?))? *$' return protocol + rest_of_url + alias def _build(self,mo,element_store, environ): if not self.href(mo): return None return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store, environ), href=self.href(mo)) def href(self,mo): """Returns the string for the href attribute of the Element.""" if sanitizer.is_safe_uri(mo.group(1)): return mo.group(1) else: return None def alias(self,mo,element_store, environ): """Returns the string for the content of the Element.""" if not mo.group(4): return self.href(mo) else: return fragmentize(mo.group(4),self.child_elements,element_store, environ) class InterWikiLink(WikiElement): """DEPRECIATED """ def __init__(self, tag, delimiter1, delimiter2,base_urls,links_funcs,default_space_char,space_chars): super(InterWikiLink,self).__init__(tag, '') self.delimiter1 = delimiter1 self.delimiter2 = delimiter2 #self.regexp = re.compile(self.re_string()) self.base_urls = base_urls self.links_funcs = links_funcs self.default_space_char = default_space_char self.space_chars = space_chars self.regexp = re.compile(self.re_string()) warnings.warn(""" Use of elements.InterWikiLink is depreciated. """ ) def re_string(self): #all_wikis = set(self.links_funcs.keys() + self.base_urls.keys()) #wiki_id = '(' + '|'.join(all_wikis) + ')' wiki_id = r'(\w+)' optional_spaces = ' *' page_name = r'(\S+?( \S+?)*)' #allows any number of single spaces alias = r'(' + re.escape(self.delimiter2) + r' *(.*?))? *$' return '^' + optional_spaces + wiki_id + optional_spaces + \ re.escape(self.delimiter1) + optional_spaces + page_name + \ optional_spaces + alias def page_name(self,mo): space_char = self.space_chars.get(mo.group(1),self.default_space_char) return mo.group(2).replace(' ',space_char) def href(self,mo): linktype = mo.group(1) base_url = self.base_urls.get(linktype) link_func = self.links_funcs.get(linktype) if not (link_func or base_url): return None else: href = self.page_name(mo) if link_func: href = link_func(href) else: href = urllib.quote(href.encode('utf-8')) if base_url: href = urlparse.urljoin(base_url, href) return href def _build(self,mo,element_store, environ): if not self.href(mo): return '[[' + mo.group(0) + ']]' return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store, environ), href=self.href(mo)) def alias(self,mo,element_store, environ): """Returns the string for the content of the Element.""" if not mo.group(5): return ''.join([mo.group(1),self.delimiter1,mo.group(2)]) else: return fragmentize(mo.group(5),self.child_elements,element_store, environ) class WikiLink(WikiElement): """DEPRECIATED """ def __init__(self, tag, delimiter, base_url,space_char,class_func,path_func): super(WikiLink,self).__init__(tag, '') self.delimiter = delimiter self.base_url = base_url self.space_char = space_char self.class_func = class_func self.path_func = path_func self.regexp = re.compile(self.re_string()) warnings.warn(""" Use of elements.WikiLink is depreciated. """ ) def re_string(self): optional_spaces = ' *' page_name = r'(\S+?( \S+?)*?)' #allows any number of single spaces alias = r'(' + re.escape(self.delimiter) + r' *(.*?))? *$' return '^' + optional_spaces + page_name + optional_spaces + \ alias def page_name(self,mo): return mo.group(1).replace(' ',self.space_char) def href(self,mo): if self.path_func: the_path = self.path_func(self.page_name(mo)) else: the_path = urllib.quote(self.page_name(mo).encode('utf-8')) return urlparse.urljoin(self.base_url, the_path) def _build(self,mo,element_store, environ): if self.class_func: the_class = self.class_func(self.page_name(mo)) else: the_class = None return bldr.tag.__getattr__(self.tag)(self.alias(mo,element_store, environ), href=self.href(mo), class_=the_class) def alias(self,mo,element_store, environ): """Returns the string for the content of the Element.""" if not mo.group(3): return mo.group(1) else: return fragmentize(mo.group(4),self.child_elements,element_store, environ) class List(BlockElement): """Finds list (ordered, unordered, and definition) wiki elements. group(1) of the match object includes all lines from the list including newline characters. """ def __init__(self, tag, token,stop_tokens=None): self.stop_tokens = stop_tokens super(List,self).__init__(tag, token) #self.stop_tokens = stop_tokens self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE) def re_string(self): """This re_string is for finding generic block elements like lists (ordered, unordered, and definition) that start with a single token. """ leading_whitespace = r'^([ \t]*' only_one_token = re.escape(self.token)+ '(?!' + re.escape(self.token) + ')' rest_of_list = r'.*?(?:\n|\Z))' only_one_stop_token = '([' + re.escape(self.stop_tokens) + r'])(?!\3)' look_ahead = '(?=([ \t]*' + only_one_stop_token + '|$))' return leading_whitespace + only_one_token + rest_of_list + \ look_ahead class ListItem(BlockElement): r"""Matches the current list item. Everything up to the next same-level list item is matched. >>> list_item = ListItem('li','#*') >>> mo = list_item.regexp.search("*one\n**one.1\n**one.2\n*two\n") >>> mo.group(3) 'one\n**one.1\n**one.2\n' >>> mo.group(0) '*one\n**one.1\n**one.2\n' """ append_newline = False def __init__(self, tag, list_tokens): """Constructor for list items. :parameters" list_tokens A string that includes the tokens used for lists """ self.list_tokens = list_tokens super(ListItem,self).__init__(tag, None) self.regexp = re.compile(self.re_string(),re.DOTALL) def re_string(self): whitespace = r'[ \t]*' item_start = '(([' + self.list_tokens + r'])\2*)' rest_of_item = r'(.*?(?:\n|\Z))' start_of_same_level_item = r'\1(?!\2)' look_ahead = r'(?=(' + whitespace + start_of_same_level_item + '|$))' return whitespace + item_start + whitespace + \ rest_of_item + look_ahead def _build(self,mo,element_store, environ): return bldr.tag.__getattr__(self.tag)(fragmentize(mo.group(3), self.child_elements, element_store, environ)) class NestedList(WikiElement): r"""Finds a list in the current list item. >>> nested_ul = NestedList('ul','*') >>> mo = nested_ul.regexp.search('one\n**one.1\n**one.2\n') >>> mo.group(1) '**one.1\n**one.2\n' >>> mo.group(0) == mo.group(1) True """ def __init__(self, tag, token): super(NestedList,self).__init__(tag, token) self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE) def re_string(self): look_behind = r'(?<=\n)' # have to avoid finding a list on the first line whitespace = r'(\s*' rest_of_list = '.*$)' return look_behind + '^' + whitespace + re.escape(self.token) + \ rest_of_list class DefinitionTerm(BlockElement): r"""Processes definition terms. >>> term = DefinitionTerm('dt',';',stop_token=':') >>> mo1,mo2 = term.regexp.finditer(";term1\n:def1\n;term2:def2\n") >>> mo1.group(1), mo2.group(1) ('term1', 'term2') >>> mo1.group(0), mo2.group(0) (';term1\n', ';term2') group(1) of the match object is the term line or up to the first ':' """ def __init__(self, tag, token,stop_token): super(DefinitionTerm,self).__init__(tag, token) self.stop_token = stop_token self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE) def re_string(self): look_ahead = r'(\n|(?=(' + esc_neg_look + re.escape(self.stop_token) + r'|$)))' return r'^[ \t]*' + re.escape(self.token) + r'[ \t]*(.*?' + \ re.escape(self.stop_token) + '?)\s*' + look_ahead class DefinitionDef(BlockElement): r"""Processes definitions. >>> definition = DefinitionDef('dd',':') >>> mo1,mo2 = definition.regexp.finditer(":def1a\ndef1b\n:def2\n") >>> mo1.group(1), mo2.group(1) ('def1a\ndef1b', 'def2') >>> mo1.group(0), mo2.group(0) (':def1a\ndef1b\n', ':def2\n') group(1) of the match object includes all lines from the defintion up to the next definition. """ def __init__(self, tag, token): super(DefinitionDef,self).__init__(tag, token) self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE) def re_string(self): look_ahead = r'(?=(^[ \t]*' + re.escape(self.token) + r')|\Z)' return r'^[ \t]*' + re.escape(self.token) + r'?[ \t]*(.+?)\s*' + look_ahead class Paragraph(BlockElement): """"This should be the last outer level wiki element to be searched. Anything that is left over will be placed in a paragraphs unless it looks like block content according to xhtml1 strict. Block content is defined here as valid children of the element (see BLOCK_TAGS). Only genshi Element objects will be evaluated (see BLOCK_TAGS). Fragments and stings are treated as inline while Streams are block content. """ def __init__(self, tag): super(Paragraph,self).__init__(tag,None) self.regexp = re.compile(self.re_string(),re.DOTALL)#+re.MULTILINE) def re_string(self): return r'^(.*?)\n?$' def _build(self,mo,element_store, environ): content = fragmentize(mo.group(1), self.child_elements, element_store, environ) # Check each list item and record those that are block only block_only_frags = [] for i,element in enumerate(content): if ((isinstance(element, bldr.Element) and element.tag in BLOCK_ONLY_TAGS) or isinstance(element,(Stream,Markup))): block_only_frags.append(i) # Build a new result list if needed if block_only_frags: new_content = [] last_i = -1 for i in block_only_frags: if content[last_i+1:i]: if not (len(content[last_i+1:i])==1 and content[last_i+1] == '\n'): new_content.append(bldr.tag.__getattr__(self.tag)(content[last_i+1:i])) else: new_content.append('\n') new_content.append(content[i]) last_i = i if content[last_i+1:]: new_content.append(bldr.tag.__getattr__(self.tag)(content[last_i+1:])) return bldr.tag(new_content) else: return bldr.tag.__getattr__(self.tag)(content) class Heading(BlockElement): r"""Processes heading wiki elements. Optionally adds id attributes. >>> h1 = Heading(['h1','h2'],'=',id_prefix=False) >>> mo = h1.regexp.search('before\n = An important thing = \n after') >>> mo.group(2) 'An important thing' >>> mo.group(0) ' = An important thing = \n' >>> h1 = Heading(['h1','h2'],'=',id_prefix='!') >>> mo = h1.regexp.search(u'before\n = An important thing = \n after') >>> mo.group(2) u'An important thing' >>> h1.make_id('!',mo.group(2),[]) '!an-important-thing' >>> h1.make_id('!',mo.group(2),['!an-important-thing']) '!an-important-thing_1' >>> h1.make_id('!',mo.group(2),{}) '!an-important-thing' >>> h1.make_id('!',mo.group(2) + '!@#$%&*()_[]+=/?|-- TO SAY',{}) '!an-important-thing-to-say' """ def __init__(self, tag, token, id_prefix): super(Heading,self).__init__('',token) self.id_prefix = id_prefix self.tags = tag self.regexp = re.compile(self.re_string(),re.MULTILINE) def make_id(self, prefix, heading_text,used_ids): slug = unicodedata.normalize('NFKD', heading_text).encode('ascii', 'ignore').lower() slug = re.sub('-+', '-', re.sub('[^a-z0-9-]+', '-', slug)).strip('-') slug = ''.join([prefix,slug]) or '!' # Restrict length without breaking words. while len(slug) > 40: if slug.find('-') == -1: slug = slug[:40] else: slug = slug.rsplit('-', 1)[0] i = 1 original_slug = slug while True: if slug not in used_ids: break slug = '%s_%d' % (original_slug, i) i += 1 return slug def re_string(self): whitespace = r'[ \t]*' tokens = '(' + re.escape(self.token) + '{1,' + str(len(self.tags)) +'})' content = '(.*?)' trailing_markup = '(' + re.escape(self.token) + r'+[ \t]*)?(\n|\Z)' return '^' + whitespace + tokens + \ whitespace + content + whitespace + trailing_markup def _build(self,mo,element_store, environ): heading_tag = self.tags[len(mo.group(1))-1] heading_body = fragmentize(mo.group(2), self.child_elements, element_store, environ) if self.id_prefix is False: id_=None else: heading_text = bldr.tag(heading_body).generate().render(method='text',encoding=None) used_ids = environ.setdefault('ids', []) id_ = self.make_id(self.id_prefix,heading_text,used_ids) used_ids.append(id_) #toc = environ.setdefault('toc', []) #toc.append((heading_tag, bldr.tag(heading_body), id_)) return bldr.tag.__getattr__(heading_tag)(heading_body, id_=id_) class Table(BlockElement): r"""Find tables. >>> table = Table('table','|') >>> mo = table.regexp.search("before\n | one | two |\n|one|two \n hi") >>> mo.group(1) ' | one | two |\n|one|two \n' >>> mo.group(0) == mo.group(1) True """ def __init__(self, tag, token): super(Table,self).__init__(tag,token) self.regexp = re.compile(self.re_string(),re.MULTILINE) def re_string(self): whitespace = r'[ \t]*' rest_of_line = r'.*?(\n|\Z)' return '^((' + whitespace + re.escape(self.token) + \ rest_of_line + ')+)' class TableRow(BlockElement): r"""Finds rows in a table. >>> row = TableRow('tr','|') >>> mo = row.regexp.search(' | one | two |\n|one|two \n') >>> mo.group(1) '| one | two ' >>> mo.group(0) ' | one | two |\n' """ def __init__(self, tag, token): super(TableRow,self).__init__(tag,token) self.regexp = re.compile(self.re_string(),re.MULTILINE) def re_string(self): whitespace = r'[ \t]*' content = '(' + re.escape(self.token) + '.*?)' trailing_token = re.escape(self.token) + '?' return '^' + whitespace + content + trailing_token + \ whitespace + r'(\n|\Z)' class TableCell(WikiElement): r"""Finds cells in a table row. >>> cell = TableCell('td','|') >>> mo = cell.regexp.search('| one | two ') >>> mo.group(1) 'one' >>> mo.group(0) '| one ' """ def __init__(self, tag, token): super(TableCell,self).__init__(tag,token ) self.regexp = re.compile(self.re_string()) def re_string(self): whitespace = r'[ \t]*' content = '(.*?)' look_ahead = '((?=' + esc_neg_look + re.escape(self.token[0]) + ')|$)' return esc_neg_look + re.escape(self.token) + whitespace + \ content + whitespace + look_ahead class Image(InlineElement): """DEPRECIATED """ def __init__(self, tag, token, delimiter): super(Image,self).__init__(tag,token ) self.regexp = re.compile(self.re_string()) self.delimiter = delimiter self.src_regexp = re.compile(r'^\s*(\S+)\s*$') warnings.warn(""" Use of elements.Image is depreciated. """ ) def _build(self,mo,element_store, environ): body = mo.group(1).split(self.delimiter,1) src_mo = self.src_regexp.search(body[0]) if not src_mo: return bldr.tag.span('Bad Image src') if sanitizer.is_safe_uri(src_mo.group(1)): link = src_mo.group(1) else: link = "unsafe_uri_detected" if len(body) == 1: alias = link else: alias = body[1].strip() return bldr.tag.__getattr__(self.tag)(src=link ,alt=alias, title=alias) class NoWikiElement(InlineElement): """Inline no-wiki. When two or more end tokens are found together, only last marks the end of the element. """ def __init__(self, tag, token): super(NoWikiElement,self).__init__(tag,token ) self.regexp = re.compile(self.re_string(),re.DOTALL) def _build(self,mo,element_store, environ): if self.tag: return bldr.tag.__getattr__(self.tag)( fragmentize(mo.group(1), self.child_elements, element_store,environ, remove_escapes=False)) else: return bldr.tag(fragmentize(mo.group(1),self.child_elements, element_store, environ, remove_escapes=False)) def re_string(self): if isinstance(self.token,str): content = '(.+?' + re.escape(self.token[-1]) + '*)' return esc_neg_look + re.escape(self.token) + \ content + re.escape(self.token) else: content = '(.+?' + re.escape(self.token[1][-1]) + '*)' return esc_neg_look + re.escape(self.token[0]) + \ content + re.escape(self.token[1]) class PreBlock(BlockElement): """A preformatted block. If a closing token is found on a line with a space as the first character, the space will be removed from the output. """ def __init__(self, tag, token ): super(PreBlock,self).__init__(tag,token ) self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE) self.regexp2 = re.compile(self.re_string2(),re.MULTILINE) def re_string(self): if isinstance(self.token,str): return '^' + re.escape(self.token) + r'\s*?\n(.*?\n)' + \ re.escape(self.token) + r'\s*?\n' else: start = '^' + re.escape(self.token[0]) + r'\s*?\n' content = r'(.+?\n)' end = re.escape(self.token[1]) + r'\s*?$' return start + content + end def re_string2(self): """Finds a closing token with a space at the start of the line.""" if isinstance(self.token,str): return r'^ (\s*?' + re.escape(self.token) + r'\s*?\n)' else: return r'^ (\s*?' + re.escape(self.token[1]) + r'\s*?\n)' def _build(self,mo,element_store, environ): match = self.regexp2.sub(r'\1',mo.group(1)) return bldr.tag.__getattr__(self.tag)( fragmentize(match,self.child_elements, element_store, environ,remove_escapes=False)) class IndentedBlock(BlockElement): """An indented block. """ def __init__(self, tag, token, class_ , style ): super(IndentedBlock,self).__init__(tag,token ) self.regexp = re.compile(self.re_string(),re.MULTILINE) self.regexp2 = re.compile(self.re_string2(),re.MULTILINE) self.class_ = class_ self.style = style def re_string(self): return r'^((' + re.escape(self.token) \ + r'.*?(\n|\Z))+)' def re_string2(self): """Finds a token at the start of the line.""" return r'^' + re.escape(self.token) def _build(self,mo,element_store, environ): match = self.regexp2.sub(r'',mo.group(1)) # removes tokens during processing return bldr.tag.__getattr__(self.tag)( fragmentize(match,self.child_elements, element_store, environ),class_=self.class_,style=self.style) class LoneElement(BlockElement): """Element on a line by itself with no content (e.g.,
)""" def __init__(self, tag, token): super(LoneElement,self).__init__(tag,token ) self.regexp = re.compile(self.re_string(),re.DOTALL+re.MULTILINE) def re_string(self): return r'^(\s*?' + re.escape(self.token) + r'\s*?(\n|\Z))' def _build(self,mo,element_store, environ): return bldr.tag.__getattr__(self.tag)() class BlankLine(WikiElement): """Blank lines divide elements but don't add any output.""" def __init__(self): super(BlankLine,self).__init__(tag=None,token='' , child_elements=[]) self.regexp = re.compile(self.re_string(),re.MULTILINE) def re_string(self): return r'^\s*(\Z|\n)' # r'^(\s*\n)+' def _build(self,mo,element_store, environ): return None def _process(self, mos, text, wiki_elements,element_store, environ): """Returns genshi Fragments (Elements and text) Custom _process method here just to avoid unnecessary calling of _build. """ frags = [] end = 0 for mo in mos: if end != mo.start(): # call again for leading text and extend the result list frags.extend(fragmentize(text[end:mo.start()],wiki_elements[1:], element_store, environ)) end = mo.end() # call again for trailing text and extend the result list if end < len(text): if not isinstance(wiki_elements[0],(list,tuple)): wiki_elements = wiki_elements[1:] frags.extend(fragmentize(text[end:],wiki_elements, element_store, environ)) return frags class LineBreak(InlineElement): """An inline line break.""" def __init__(self,tag, token, blog_style=False): self.blog_style = blog_style super(LineBreak,self).__init__(tag,token ) self.regexp = re.compile(self.re_string(),re.DOTALL) def re_string(self): if self.blog_style: return '(' + esc_neg_look + re.escape(self.token) + r'\n?|\n(?!$))' else: return esc_neg_look + re.escape(self.token) def _build(self,mo,element_store, environ): return bldr.tag.__getattr__(self.tag)() ############################################################################# # The WikeElement classes below are used for parsing macro argument strings # ############################################################################# class ArgString(WikiElement): """Base class for elements used on argument strings""" def __init__(self, tag='', token=''): super(ArgString,self).__init__(tag,token) self.regexp = re.compile(self.re_string(),re.DOTALL) class KeywordArg(ArgString): """Finds keyword arguments""" def re_string(self): return r'(?P\w[\w0-9]*) *'+re.escape(self.token) + \ r' *(?P.*?) *(?=\w[\w0-9]* *' + re.escape(self.token) +'|$)' def _build(self,mo,element_store, environ): if mo.group('body') == '': value = '' else: value = fragmentize(mo.group('body'),self.child_elements, element_store, environ) if len(value) == 1: value = value[0] else: value = ImplicitList(value) name = mo.group('key') return (name, value) class QuotedArg(InlineElement): """Finds quoted arguments""" def re_string(self): return esc_neg_look + r'(?P['+ re.escape(self.token) \ +'])(?P.*?)' + esc_neg_look + '(?P=quote)' def _build(self,mo,element_store, environ): if mo.group('body') == '': value = '' else: frags = fragmentize(mo.group('body'),self.child_elements,element_store, environ) assert len(frags) == 1 value = frags[0] return value class ListArg(ArgString): """Finds lists in argument strings This is used for positional arguments only. """ def re_string(self): return esc_neg_look + re.escape(self.token[0]) + r'(?P.*?)' + esc_neg_look + re.escape(self.token[1]) def _build(self,mo,element_store, environ): if mo.group('body') == '': value = [] else: value = fragmentize(mo.group('body'),self.child_elements,element_store, environ) return value class ExplicitListArg(ListArg): """Only finds lists where the string to be searched is fully enclosed This is used for keyword values in argument strings. """ def re_string(self): return '^' + esc_neg_look + re.escape(self.token[0]) +r'(?P.*?)' \ + esc_neg_look+ re.escape(self.token[1]) + '$' class WhiteSpace(ArgString): """Breaks up elements but doesn't add any output""" def re_string(self): return r'[ \n]+' def _build(self,mo,element_store, environ): return None def _test(): import doctest doctest.testmod() if __name__ == "__main__": _test() Creoleparser-0.7.4/creoleparser/dialects.py0000666000000000000000000005671311633074776017534 0ustar rootroot# dialects.py # -*- coding: utf-8 -*- # # Copyright © Stephen Day # # This module is part of Creoleparser and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php # import warnings import string, keyword from elements import * from core import ArgParser def creepy10_base(): """Returns a dialect object (a class) to be used by :class:`~creoleparser.core.ArgParser` **How it Works** The "Creepy" dialect uses a syntax that can look much like that of attribute definition in xml. The most important differences are that positional arguments are allowed and quoting is optional. A Creepy dialect object is normally passed to :class:`~creoleparser.core.ArgParser` to create a new parser object. When called with a single argument, this outputs a two-tuple (a list of positional arguments and a dictionary of keyword arguments): >>> from core import ArgParser >>> my_parser = ArgParser(dialect=creepy10_base(), convert_implicit_lists=False) >>> my_parser(" foo='one' ") ([], {'foo': 'one'}) >>> my_parser(" 'one' ") (['one'], {}) >>> my_parser(" 'one' foo='two' ") (['one'], {'foo': 'two'}) Positional arguments must come before keyword arguments. If they occur after a keyword argument, they will be combined with that value as a list: >>> my_parser(" foo='one' 'two' ") ([], {'foo': ['one', 'two']}) Similarly, if two or more keywords are the same, the values will be combined into a list: >>> my_parser(" foo='one' foo='two' ") ([], {'foo': ['one', 'two']}) The lists above are known as "Implicit" lists. They can automatically be converted to strings by setting ``convert_implicit_lists=True`` in the parser. Quotes can be single or double: >>> my_parser(''' foo="it's okay" ''') ([], {'foo': "it's okay"}) Tildes can be used for escaping: >>> my_parser(''' foo='it~'s okay' ''') ([], {'foo': "it's okay"}) Quotes are optional if an argument value doesn't contain spaces or unescaped special characters: >>> my_parser(" one foo = two ") (['one'], {'foo': 'two'}) Keyword arguments lacking a value will be interpreted as an empty string: >>> my_parser(" '' foo= boo= '' ") ([''], {'foo': '', 'boo': ''}) """ class Base(ArgDialect): kw_arg = KeywordArg(token='=') quoted_arg = QuotedArg(token='\'"') spaces = WhiteSpace() def __init__(self): self.kw_arg.child_elements = [self.spaces] self.quoted_arg.child_elements = [] self.spaces.child_elements = [] @property def top_elements(self): return [self.quoted_arg, self.kw_arg, self.spaces] return Base def creepy20_base(): """Extends creepy10_base to support an explicit list argument syntax. >>> from core import ArgParser >>> my_parser = ArgParser(dialect=creepy20_base(),convert_implicit_lists=False) >>> my_parser(" one [two three] foo=['four' 'five'] ") (['one', ['two', 'three']], {'foo': ['four', 'five']}) You can test if a list is explicit by testing its class: >>> from core import ImplicitList >>> pos, kw = my_parser(" foo=['one' 'two'] boo = 'three' 'four'") >>> print kw {'foo': ['one', 'two'], 'boo': ['three', 'four']} >>> isinstance(kw['foo'], ImplicitList) False >>> isinstance(kw['boo'], ImplicitList) True Lists of length zero or one are **never** of type ImplicitList. """ Creepy10Base = creepy10_base() class Base(Creepy10Base): list_arg = ListArg(token=['[',']']) explicit_list_arg = ExplicitListArg(token=['[',']']) def __init__(self): super(Base,self).__init__() self.kw_arg.child_elements = [self.explicit_list_arg,self.spaces] self.list_arg.child_elements = [self.spaces] self.explicit_list_arg.child_elements = [self.spaces] @property def top_elements(self): return [self.quoted_arg, self.kw_arg, self.list_arg,self.spaces] return Base class ArgDialect(object): """Base class for argument string dialect objects.""" pass parse_args = ArgParser(dialect=creepy10_base(),key_func=string.lower, illegal_keys=keyword.kwlist + ['macro_name', 'arg_string', 'body', 'isblock', 'environ', 'macro']) """Function for parsing macro arg_strings using a relaxed xml style""" def create_dialect(dialect_base, **kw_args): """Factory function for dialect objects (for parameter defaults, see :func:`~creoleparser.dialects.creole10_base` and/or :func:`~creoleparser.dialects.creole11_base`) :parameters: add_heading_ids If `True`, user friendly, lowercase, unique, id attributes will be automatically added to headings. To prevent clashes with other page ids, all will be prefixed with a "!". This prefix may be changed by passing a string rather than a boolean. *``environ`` needs to be a dicionary-like object for this to function* (see :meth:`creoleparser.core.Parser.parse`) and a key named `ids` will be added. argument_parser Parser used for automatic parsing of macro arg strings. Must take a single string argument and return a two-tuple with the first element a list (for positional arguments) and the second a dictionary (for keyword arguments). A default is supplied. Individual macros may override this parser by providing their own as a function attribute named `arg_parser`. blog_style_endings If `True`, each newline character in a paragraph will be converted to a
. Note that the escaping mechanism (tilde) does not work for newlines. bodied_macros Dictionary of macros (functions). If a bodied macro is found that is not in this dictionary, ``macro_func`` (below) will be called instead. Each function must accept the following positional arguments: 1. macro object. This dictionary-like object has attributes ``macro_name``, ``body``, ``isblock``, and ``arg_string`` (see ``macro_func`` (below) for more information). Additionally, the macro object has a ``parsed_body()`` method, that will return the parsed ``macro.body`` as a genshi.Fragment. ``parsed_body()`` takes an optional ``context`` argument, that defaults to `auto`, see :meth:`creoleparser.core.Parser.parse` for other possible values. 2. the `environ` object (see :meth:`creoleparser.core.Parser.parse`) If the found macro includes arguments, they will be included in the function call. Creoleparser will handle exceptions by returning an error message in place of the macro (possibly including a traceback). Python's syntax for accepting arbitrary arguments is often used for macros (e.g.,def mymacro(macro, env, \\*pos, \\**kw)). For information on return values, see macro_func (below). non_bodied_macros Same as ``bodied_macros`` but used for non-bodied macros. custom_markup List of tuples that can each define arbitrary custom wiki markup such as WikiWords and emoticons. Each tuple must have two elements, as follows: 1. Compiled regular expression or string (*not* an re pattern) to match. 2. Function that takes two postional arguments, as follows: 1. the match object 2. the `environ` object (see :meth:`creoleparser.core.Parser.parse`) The function must return a Genshi object (Stream, Markup, builder.Fragment, or builder.Element). Returning a string will raise an error. As a shortcut for simple cases, the second tuple element may be a string rather than a function. The string will be wrapped in a Markup object (to allow pass-through of HTML) and a Fragment object (to prevent Creoleparser from creating a new paragraph). dialect_base The class factory to use for creating the dialect object. ``creoleparser.dialects.creole10_base`` and ``creoleparser.dialects.creole11_base`` are possible values. disable_external_content If True, an error message will be inserted when an externally hosted image is found. external_links_class Class attribute to add to external links (i.e., not wiki or interwiki links). indent_class Class attribute to add to indented regions. indent_style Style attribute to add to indented regions. interwiki_links_base_urls Dictionary of urls for interwiki links. Works like ``wiki_links_base_url``. interwiki_links_class_funcs Dictionary of functions that will be called for interwiki link names and return class attributes. Works like ``wiki_links_class_func``. interwiki_links_path_funcs Dictionary of functions that will be called for interwiki link names and return url paths. Works like ``wiki_links_path_func``. interwiki_links_space_chars Dictionary of characters that that will be used to replace spaces that occur in interwiki_links. Works like ``wiki_links_space_char``. If no key is present for an interwiki name, the ``wiki_links_space_char`` will be used. macro_func If supplied, this fuction will be called when macro markup is found, unless the macro is in one of macro dictionaries above. The function must accept the following postional arguments: 1. macro name (string) 2. the argument, including any delimter (string) 3. the macro body (string or None for a macro without a body) 4. macro type (boolean, True for block macros, False for normal macros) 5. the `environ` object (see :meth:`creoleparser.core.Parser.parse`) The function may return a string (which will be subject to further wiki processing) or a Genshi object (Stream, Markup, builder.Fragment, or builder.Element). If None is returned, the markup will be rendered unchanged. no_wiki_monospace If `False`, inline no_wiki will be rendered as not simple_markup List of tuples that each define markup such as `strong` and `em` that can nest. Each tuple must have two elements, as follows: 1. String to match start and end of text to be enclosed. 2. HTML tag wiki_links_base_url The page name found in wiki links will be smartly appended to this to form the href. To use a different base url for images, supply a two element list; the second element will be used. wiki_links_class_func If supplied, this fuction will be called when a wiki link is found and the return value (should be a string) will be added as a class attribute of the corresponding link. The function must accept the page name (any spaces will have been replaced) as it's only argument. If no class attribute is to be added, return `None`. wiki_links_path_func If supplied, this fuction will be called when a wiki link is found and the return value (should be a string) will be joined to the base_url to form the url for href. The function must accept the page name (any spaces will have been replaced) as it's only argument. Special characters should be url encoded. To use a different function for images, supply a two element list; the second element will be used. wiki_links_space_char When wiki_links have spaces, this character replaces those spaces in the url. To use a different character for images, supply a two element list; the second element will be used. """ if 'interwiki_links_funcs' in kw_args: warnings.warn(""" The "interwiki_links_funcs" parameter has been renamed to "interwiki_links_path_funcs" """) kw_args.setdefault('interwiki_links_path_funcs',kw_args.pop('interwiki_links_funcs')) return dialect_base(**kw_args) def creole10_base(wiki_links_base_url='',wiki_links_space_char='_', interwiki_links_base_urls={}, no_wiki_monospace=True, wiki_links_class_func=None, external_links_class=None, wiki_links_path_func=None, interwiki_links_path_funcs={}, interwiki_links_class_funcs={},interwiki_links_space_chars={}, blog_style_endings=False, disable_external_content=False, custom_markup=[], simple_markup=[('**','strong'),('//','em')], add_heading_ids=False ): """Returns a base class for extending (for parameter descriptions, see :func:`~creoleparser.dialects.create_dialect`) The returned class does not implement any of the proposed additions to to Creole 1.0 specification. """ if isinstance(wiki_links_base_url,(list, tuple)): wiki_links_base_url, embed_base_url = wiki_links_base_url else: embed_base_url = wiki_links_base_url if isinstance(wiki_links_path_func,(list, tuple)): wiki_links_path_func, embed_path_func = wiki_links_path_func else: embed_path_func = wiki_links_path_func if isinstance(wiki_links_space_char,(list, tuple)): wiki_links_space_char, embed_space_char = wiki_links_space_char else: embed_space_char = wiki_links_space_char embed_interwiki_base_urls = {} for k,v in interwiki_links_base_urls.items(): if isinstance(v,(list, tuple)): interwiki_links_base_urls[k], embed_interwiki_base_urls[k] = v else: embed_interwiki_base_urls[k] = v embed_interwiki_path_funcs = {} for k,v in interwiki_links_path_funcs.items(): if isinstance(v,(list, tuple)): interwiki_links_path_funcs[k], embed_interwiki_path_funcs[k] = v else: embed_interwiki_path_funcs[k] = v embed_interwiki_space_chars = {} for k,v in interwiki_links_space_chars.items(): if isinstance(v,(list, tuple)): interwiki_links_space_chars[k], embed_interwiki_space_chars[k] = v else: embed_interwiki_space_chars[k] = v id_prefix=add_heading_ids is True and '!' or add_heading_ids if id_prefix is False: fragment_pattern = None else: fragment_pattern = '#' + re.escape(id_prefix) + r'[a-z0-9-_]+' class Base(Dialect): br = LineBreak('br', r'\\',blog_style=blog_style_endings) headings = Heading(['h1','h2','h3','h4','h5','h6'],'=', id_prefix=id_prefix) no_wiki = NoWikiElement(no_wiki_monospace and 'code' or 'span',['{{{','}}}']) simple_element = SimpleElement(token_dict=dict(simple_markup)) hr = LoneElement('hr','----') blank_line = BlankLine() p = Paragraph('p') pre = PreBlock('pre',['{{{','}}}']) raw_link = RawLink('a') link = AnchorElement('a',('[[',']]'),delimiter = '|',interwiki_delimiter=':', base_urls=interwiki_links_base_urls, links_funcs=interwiki_links_path_funcs, interwiki_class_funcs=interwiki_links_class_funcs, default_space_char=wiki_links_space_char, space_chars=interwiki_links_space_chars, base_url=wiki_links_base_url, space_char=wiki_links_space_char,class_func=wiki_links_class_func, path_func=wiki_links_path_func,fragment_pattern=fragment_pattern, external_links_class=external_links_class) img = ImageElement('img',('{{','}}'),delimiter = '|',interwiki_delimiter=':', base_urls=embed_interwiki_base_urls, links_funcs=embed_interwiki_path_funcs, interwiki_class_funcs=interwiki_links_class_funcs, default_space_char=embed_space_char, space_chars=embed_interwiki_space_chars, base_url=embed_base_url, space_char=embed_space_char,class_func=wiki_links_class_func, path_func=embed_path_func,fragment_pattern=fragment_pattern, disable_external=disable_external_content) td = TableCell('td','|') th = TableCell('th','|=') tr = TableRow('tr','|') table = Table('table','|') li = ListItem('li',list_tokens='*#') ol = List('ol','#',stop_tokens='*') ul = List('ul','*',stop_tokens='#') nested_ol = NestedList('ol','#') nested_ul = NestedList('ul','*') custom_elements = [CustomElement(reg_exp, func) for reg_exp, func in custom_markup] def __init__(self): self.link.child_elements = [self.simple_element] self.simple_element.child_elements = [self.simple_element] self.headings.child_elements = self.inline_elements self.p.child_elements = self.inline_elements self.td.child_elements = [self.br, self.raw_link, self.simple_element] self.th.child_elements = [self.br, self.raw_link, self.simple_element] self.tr.child_elements = [self.no_wiki,self.img,self.link,self.custom_elements,self.th,self.td] self.table.child_elements = [self.tr] self.ol.child_elements = [self.li] self.ul.child_elements = [self.li] self.nested_ol.child_elements = [self.li] self.nested_ul.child_elements = [self.li] self.li.child_elements = [(self.nested_ol,self.nested_ul)] + self.inline_elements @property def inline_elements(self): return [self.no_wiki, self.img, self.link] + self.custom_elements \ + [self.br, self.raw_link, self.simple_element] @property def block_elements(self): return [self.pre,self.blank_line,self.table,self.headings, self.hr,self.ul,self.ol,self.p] """self.block_elements are the wiki elements that are searched at the top level of text to be processed. The order matters because elements later in the list need not have any knowledge of those before (as those were parsed out already). This makes the regular expression patterns for later elements very simple. """ return Base def creole11_base(macro_func=None, indent_class=None, indent_style='margin-left:2em', simple_markup=[('**','strong'),('//','em'),(',,','sub'), ('^^','sup'),('__','u'),('##','code')], non_bodied_macros={}, bodied_macros={}, argument_parser=parse_args, **kwargs): """Returns a base class for extending (for parameter descriptions, see :func:`~creoleparser.dialects.create_dialect`) The returned class implements most of the *officially* proposed additions to to Creole 1.0 specification: * superscript, subscript, underline, and monospace * definition lists * indentation * macros (see http://purl.oclc.org/creoleparser/cheatsheet) **A Basic Extending Example** Extending Creoleparser through subclassing is usually only needed when custom WikiElement objects are incorporated. However, it is also needed for other special jobs, like entirely disabling certain markup. Here we create a dialect that removes image support:: >>> Base = creole11_base() >>> class MyDialect(Base): ... @property ... def inline_elements(self): ... l = super(MyDialect,self).inline_elements ... l.remove(self.img) ... return l >>> from core import Parser >>> parser = Parser(MyDialect) >>> print parser.render("{{this}} is not an image!"),

{{this}} is not an image!

For a more complex example, see the `source code `_ of this function. It extends the class created from creole10_base(). .. note:: It is generally safest to create only one dialect class per base class. This is because WikiElement objects are bound as class attributes and would therefor be shared between multiple instances, which could lead to unexpected behaviour. """ Creole10Base = creole10_base(simple_markup=simple_markup, **kwargs) class Base(Creole10Base): indented = IndentedBlock('div','>', class_=indent_class, style=indent_style) dd = DefinitionDef('dd',':') dt = DefinitionTerm('dt',';',stop_token=':') dl = List('dl',';',stop_tokens='*#') macro = Macro('',('<<','>>'),func=macro_func, macros=non_bodied_macros, arg_parser= argument_parser) bodiedmacro = BodiedMacro('',('<<','>>'),func=macro_func, macros=bodied_macros, arg_parser= argument_parser) bodied_block_macro = BodiedBlockMacro('',('<<','>>'),func=macro_func, macros=bodied_macros,arg_parser= argument_parser) def __init__(self): super(Base,self).__init__() self.tr.child_elements[0] = (self.no_wiki,self.bodiedmacro,self.macro) self.dd.child_elements = self.custom_elements + [self.br, self.raw_link, self.simple_element] self.dt.child_elements = self.custom_elements + [self.br, self.raw_link, self.simple_element] self.dl.child_elements = [(self.no_wiki,self.bodiedmacro,self.macro),self.img,self.link,self.dt,self.dd] self.indented.child_elements = self.block_elements self.bodiedmacro.dialect = self self.bodied_block_macro.dialect = self @property def inline_elements(self): return [(self.no_wiki,self.bodiedmacro,self.macro), self.img, self.link] + self.custom_elements + [self.br, self.raw_link, self.simple_element] @property def block_elements(self): return [(self.bodied_block_macro,self.pre),self.blank_line,self.table,self.headings, self.hr,self.indented,self.dl,self.ul,self.ol,self.p] return Base class Dialect(object): """Base class for dialect objects.""" pass def _test(): import doctest doctest.testmod() if __name__ == "__main__": _test() Creoleparser-0.7.4/creoleparser/core.py0000666000000000000000000002741311633074776016667 0ustar rootroot# core.py # -*- coding: utf-8 -*- # # Copyright © Stephen Day # # This module is part of Creoleparser and is released under # the MIT License: http://www.opensource.org/licenses/mit-license.php # import re import warnings import genshi.builder as bldr __docformat__ = 'restructuredtext en' escape_char = '~' esc_neg_look = '(?>>') class Parser(object): def __init__(self,dialect, method='xhtml', strip_whitespace=False, encoding='utf-8'): """Constructor for Parser objects :parameters: dialect Usually created using :func:`creoleparser.dialects.create_dialect` method This value is passed to Genshies Steam.render(). Possible values include ``xhtml``, ``html``, ``xml``, and ``text``. strip_whitespace This value is passed to Genshies Steam.render(). encoding This value is passed to Genshies Steam.render(). If ``None``, the ouput will be a unicode object. """ if isinstance(dialect,type): self.dialect = dialect() else: warnings.warn(""" 'dialect' should be a type object. """ ) self.dialect = dialect self.method = method self.strip_whitespace = strip_whitespace self.encoding=encoding def parse(self,text,element_store=None,context='block', environ=None, preprocess=True): """Returns a Genshi Fragment (basically a list of Elements and text nodes). :parameters: text The text to be parsed. context This is useful for marco development where (for example) supression of paragraph tags is desired. Can be 'inline', 'block', or a list of WikiElement objects (use with caution). element_store Internal dictionary that's passed around a lot ;) environ This can be any type of object. It will be passed to ``macro_func`` unchanged (for a description of ``macro_func``, see :func:`~creoleparser.dialects.create_dialect`). preprocess Passes text through preprocess method that replaces Windows style line breaks. """ if element_store is None: element_store = {} if environ is None: environ = {} if not isinstance(context,list): if context == 'block': top_level_elements = self.dialect.block_elements elif context == 'inline': top_level_elements = self.dialect.inline_elements else: top_level_elements = context if preprocess: text = self.preprocess(text) return bldr.tag(fragmentize(text,top_level_elements,element_store, environ)) def generate(self,text,element_store=None,context='block', environ=None, preprocess=True): """Returns a Genshi Stream. See :meth:`~creoleparser.core.Parser.parse` for named parameter descriptions. """ return self.parse(text, element_store, context, environ, preprocess).generate() def render(self, text, element_store=None, context='block', environ=None, preprocess=True, **kwargs): """Returns the final output string (e.g., xhtml). See :meth:`~creoleparser.core.Parser.parse` for named parameter descriptions. Left over keyword arguments (``kwargs``) will be passed to Genshi's Stream.render() method, overriding the corresponding attributes of the Parser object. For more infomation on Streams, see the `Genshi documentation `_. """ kwargs.setdefault('method',self.method) kwargs.setdefault('encoding',self.encoding) if kwargs['method'] != "text": kwargs.setdefault('strip_whitespace',self.strip_whitespace) stream = self.generate(text, element_store, context, environ, preprocess) return stream.render(**kwargs) def __call__(self,text, **kwargs): """Wrapper for the render method. Returns final output string. """ return self.render(text, **kwargs) def preprocess(self,text): """This should generally be called before fragmentize(). :parameter text: text to be processsed. """ text = text.replace("\r\n", "\n") text = text.replace("\r", "\n") return text class ArgParser(object): """Creates a callable object for parsing macro argument strings >>> from dialects import creepy20_base >>> my_parser = ArgParser(dialect=creepy20_base()) >>> my_parser(" one two foo='three' boo='four' ") (['one', 'two'], {'foo': 'three', 'boo': 'four'}) A parser returns a two-tuple, the first item being a list of positional arguments and the second a dictionary of keyword arguments. Argument values are either strings or lists. """ def __init__(self,dialect, convert_implicit_lists=True, key_func=None, illegal_keys=(), convert_unicode_keys=True): """Constructor for ArgParser objects :parameters: convert_unicode_keys If *True*, keys will be converted using ``str(key)`` before being added to the output dictionary. This allows the dictionary to be safely passed to functions using the special ``**`` form (i.e., ``func(**kwargs)``). dialect Usually created using :func:`~creoleparser.dialects.creepy10_base` or :func:`~creoleparser.dialects.creepy20_base` convert_implicit_lists If *True*, all implicit lists will be converted to strings using ``' '.join(list)``. "Implicit" lists are created when positional arguments follow keyword arguments (see :func:`~creoleparser.dialects.creepy10_base`). illegal_keys A tuple of keys that will be post-fixed with an underscore if found during parsing. key_func If supplied, this function will be used to transform the names of keyword arguments. It must accept a single positional argument. For example, this can be used to make keywords case insensitive: >>> from string import lower >>> from dialects import creepy20_base >>> my_parser = ArgParser(dialect=creepy20_base(),key_func=lower) >>> my_parser(" Foo='one' ") ([], {'foo': 'one'}) """ self.dialect = dialect() self.convert_implicit_lists = convert_implicit_lists self.key_func = key_func self.illegal_keys = illegal_keys self.convert_unicode_keys = convert_unicode_keys def __call__(self, arg_string, **kwargs): """Parses the ``arg_string`` returning a two-tuple Keyword arguments (``kwargs``) can be used to override the corresponding attributes of the ArgParser object (see above). However, the ``dialect`` attribute **cannot** be overridden. """ kwargs.setdefault('convert_implicit_lists',self.convert_implicit_lists) kwargs.setdefault('key_func',self.key_func) kwargs.setdefault('illegal_keys',self.illegal_keys) kwargs.setdefault('convert_unicode_keys',self.convert_unicode_keys) return self._parse(arg_string,**kwargs) def _parse(self,arg_string, convert_implicit_lists, key_func, illegal_keys, convert_unicode_keys): frags = fragmentize(arg_string,self.dialect.top_elements,{},{}) positional_args = [] kw_args = {} for arg in frags: if isinstance(arg,tuple): k, v = arg if convert_unicode_keys: k = str(k) if key_func: k = key_func(k) if k in illegal_keys: k = k + '_' if k in kw_args: if isinstance(v,list): try: kw_args[k].extend(v) except AttributeError: v.insert(0,kw_args[k]) kw_args[k] = v elif isinstance(kw_args[k],list): kw_args[k].append(v) else: kw_args[k] = [kw_args[k], v] kw_args[k] = ImplicitList(kw_args[k]) else: kw_args[k] = v if isinstance(kw_args[k],ImplicitList) and convert_implicit_lists: kw_args[k] = ' ' .join(kw_args[k]) else: positional_args.append(arg) return (positional_args, kw_args) def fragmentize(text,wiki_elements, element_store, environ, remove_escapes=True): """Takes a string of wiki markup and outputs a list of genshi Fragments (Elements and strings). This recursive function, with help from the WikiElement objects, does almost all the parsing. When no WikiElement objects are supplied, escapes are removed from ``text`` (except if remove_escapes=True) and it is returned as-is. This is the only way for recursion to stop. :parameters: text the text to be parsed wiki_elements list of WikiElement objects to be searched for environ object that may by used by macros remove_escapes If False, escapes will not be removed """ while wiki_elements: # If the first supplied wiki_element is actually a list of elements, \ # search for all of them and match the closest one only. if isinstance(wiki_elements[0],(list,tuple)): x = None mos = None for element in wiki_elements[0]: mo = element.regexp.search(text) if mo: if x is None or mo.start() < x: x,wiki_element,mos = mo.start(),element,[mo] else: wiki_element = wiki_elements[0] mos = [mo for mo in wiki_element.regexp.finditer(text)] if mos: frags = wiki_element._process(mos, text, wiki_elements, element_store, environ) break else: wiki_elements = wiki_elements[1:] # remove escape characters else: if remove_escapes: text = esc_to_remove.sub('',text) frags = fill_from_store(text,element_store) return frags def fill_from_store(text,element_store): frags = [] mos = place_holder_re.finditer(text) start = 0 for mo in mos: if mo.start() > start: frags.append(text[start:mo.start()]) frags.append(element_store.get(mo.group(1), mo.group(1).join(['<<<','>>>']))) start = mo.end() if start < len(text): frags.append(text[start:]) return frags class ImplicitList(list): """This class marks argument lists as implicit""" pass class AttrDict(dict): def __getattr__(self, attr): return self[attr] class MacroError(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) def _test(): import doctest doctest.testmod() if __name__ == "__main__": _test() Creoleparser-0.7.4/setup.py0000666000000000000000000000424311633074776014405 0ustar rootroot#!/usr/bin/env python try: from setuptools import setup except ImportError: print 'setuptools not installed, using distutils.core' print 'please ignore error message about "install_requires"' from distutils.core import setup version = '0.7.4' try: ## setup.py must work if Genshi isn't installed! from creoleparser import __version__ assert version == __version__ except ImportError: pass setup(name='Creoleparser', version=version, install_requires=['Genshi>=0.4'], description='Parser for the Creole common wiki markup language', author='Stephen Day', author_email='stephen.h.day@gm**l.com', url='http://code.google.com/p/creoleparser/', download_url='http://pypi.python.org/pypi/Creoleparser', packages=['creoleparser'], license = 'MIT', #zip_safe = False, classifiers = [ 'Development Status :: 4 - Beta', 'Environment :: Web Environment', 'Intended Audience :: Developers', 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Topic :: Internet :: WWW/HTTP :: Dynamic Content', 'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: Text Processing :: Markup', 'Topic :: Text Processing :: Markup :: HTML', 'Topic :: Text Processing :: Markup :: XML' ], long_description = """\ What is Creoleparser? --------------------- Creoleparser is a Python library for converting Creole wiki markup for output on the web. It is a full implementation of the Creole 1.0 specification and aims to follow the spec exactly. Find out more about Creoleparser at What is Creole? --------------- From wikicreole.org: Creole is a common wiki markup language to be used across different wikis. It's not replacing existing markup but instead enabling wiki users to transfer content seamlessly across wikis, and for novice users to contribute more easily. Find out more about Creole at """ ) Creoleparser-0.7.4/setup.cfg0000666000000000000000000000010011633076442014470 0ustar rootroot[egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 Creoleparser-0.7.4/README.txt0000666000000000000000000000032111633074776014362 0ustar rootrootAbout Creoleparser ================== Creoleparser is a Python implementation of a parser for the Creole wiki markup language. For more information please visit: http://purl.oclc.org/creoleparser Creoleparser-0.7.4/PKG-INFO0000666000000000000000000000332011633076440013751 0ustar rootrootMetadata-Version: 1.0 Name: Creoleparser Version: 0.7.4 Summary: Parser for the Creole common wiki markup language Home-page: http://code.google.com/p/creoleparser/ Author: Stephen Day Author-email: stephen.h.day@gm**l.com License: MIT Download-URL: http://pypi.python.org/pypi/Creoleparser Description: What is Creoleparser? --------------------- Creoleparser is a Python library for converting Creole wiki markup for output on the web. It is a full implementation of the Creole 1.0 specification and aims to follow the spec exactly. Find out more about Creoleparser at What is Creole? --------------- From wikicreole.org: Creole is a common wiki markup language to be used across different wikis. It's not replacing existing markup but instead enabling wiki users to transfer content seamlessly across wikis, and for novice users to contribute more easily. Find out more about Creole at Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: Environment :: Web Environment Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: MIT License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Topic :: Text Processing :: Markup Classifier: Topic :: Text Processing :: Markup :: HTML Classifier: Topic :: Text Processing :: Markup :: XML Creoleparser-0.7.4/MANIFEST.in0000666000000000000000000000007111633074776014424 0ustar rootrootinclude *.txt prune ./creoleparser/test* prune ./docs Creoleparser-0.7.4/LICENSE.txt0000666000000000000000000000211311633074776014510 0ustar rootrootCopyright © Stephen Day Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Creoleparser-0.7.4/INSTALL.txt0000666000000000000000000000141611633074776014541 0ustar rootrootInstallation ============ You need `Python 2.4 `_ or higher. *Option 1 - easy_install* If you have `setuptools `_, install Creoleparser using easy_install:: easy_install Creoleparser This will automatically download and install Genshi and Creoleparser to your site-packages directory. *Option 2 - manual installation* 1. download and install Genshi from that package's `site `_. 2. download Creoleparser from the `Python Package Index `_, unzip to a temporary folder, and run:: setup.py install This will install Creoleparser to your site-packages directory. Creoleparser-0.7.4/CHANGES.txt0000666000000000000000000001515511633074776014510 0ustar rootrootVersion 0.7.4 (Sept 11 2011) ---------------------------- * optimized macro regular expression (thanks to Jim Garrison) * added MacroError class * added support for fragment identifiers in wiki links * added option for automatic creation of id attributes in headings (see ``add_heading_ids`` parameters of the ``create_dialect`` function) Version 0.7.3 (Oct 19 2010) --------------------------- * added higher level support for macros for those that don't need a specialized ``macro_func`` (see the ``bodied_macros`` and ``non_bodied_macros`` parameters of the ``create_dialect`` function) * renamed ``interwiki_links_funcs`` to ``interwiki_links_path_funcs``, added depreciation warning * added a bunch of other new parameters to ``create_dialect``: * ``disable_external_content`` * ``external_links_class`` * ``interwiki_links_class_funcs`` * ``simple_markup``, markup for inline HTML tags like `strong` and `em` now lives here * ``custom_markup``, provides easy support for markup like emoticons and WikiWords * ``argument_parser``, the default parser for macro argument strings * depreciated unused classes * added support for custom base urls and functions for images * fix bug with ``blog_style_endings`` option Version 0.7.2 (May 8 2010) -------------------------- * fixed bad import in setup.py * images now support wiki links and interwiki links Version 0.7.1 (March 29 2010) ----------------------------- * added ``indent_class`` and ``indent_style`` parameter to ``create_dialect`` function Version 0.7.0 (March 20 2010) ----------------------------- * added syntax for indented regions * added Parser.parse() method to expose the parse tree (a Genshi Fragment object) * improved output of passed through macros to better support post-processing * removed depreciated Creole10 class support Version 0.6.1 (March 31 2009) ----------------------------- * fixed recursion issue with long lists and tables (>400 rows) * added ``parse_args`` function for macro arg_string parsing Version 0.6 (March 1 2009) -------------------------- * refactored dialects.py to make extending easier * added ``creole10_base`` class factory function * added ``creole11_base`` class factory function * added ``create_dialect`` factory function * The old ``Creole10`` class is now a factory function and it is depreciated. * ``macro_func`` now takes a fifth positional argument, ``environ`` * creole2html and text2html convenience functions now return html, not xhtml * ``no_wiki_monospace`` is now True for text2html * creole_to_xmtml convenience function removed * explicit url links (e.g., [[http://example.com/]]) can be multiline * added support for easy inline markup customization (SimpleElement) * changed to * added support for blog-style line endings (blog_style_endings=True) * macros of the same name can nest (previously this was possible only for macros with unique names) * macros can now return Markup objects directly (Stream, Fragment, Element, and string (preferably unicode) objects were always okay) * made creoleparser smarter about adding

tags * improved rawlink detection regex Version 0.5.1 (Dec 8 2008) -------------------------- * converted docs to Sphinx * improved rendering of unknown macros * added title attribute to images * fixed bug with markup in raw links * fixed bug with unicode characters in wiki links * fixed bug preventing plain-text rendering * tests.py was refactored to use unittest (thanks duncan.mcgreggor!) * special characters in wiki links get quoted for urls * links refactored to support mailto: * wiki links can no longer have adjacent spaces * creoleparser now works on Goggle App Engine Version 0.5.0 (May 31 2008) --------------------------- * special syntax introduced for macros spanning block level markup * macros returning Creole can contain other macros (issue 6) * macros refactored, pre-processing no longer used, more robust * fixed bug with macros that output unicode strings (issue 4) * macro names can now include capitals and periods (issue 5) * added support for custom space characters for interwiki links (interwiki_links_space_chars) * api changed for default interwiki links space character, the wiki_links_space_char is used instead of an underscore. (issue 3) Version 0.4.0 (March 24 2008) ----------------------------- * added support for custom urls for interwiki links (interwiki_links_funcs) * user supplied uri sanitation added * a macro on a line by itself will no longer be

aragraphed Version 0.3.3 (Dec 16 2007) --------------------------- * changed api slightly for wiki_links_class_func (see source) * added suport for custom urls for wiki links (wiki_links_path_func) * added support for macros with bodies (<>the body<>) * removed use of global element_store and thread.local() Version 0.3.2 (Nov 28 2007) --------------------------- * line breaks (\\) can now be escaped with a tilde * added macro support (<>) * added support for adding a class attribute to wiki links (e.g., if the page doesn't exist). Version 0.3.1 (Nov 16 2007) --------------------------- * super (^^), subscript (,,), and underline (__) support added * definition lists supported (;) * ftp://xxx no longer cause italics * links refactored * removed store_id_seq in favor of id() * made element_store thread.local() Version 0.3 (Nov 10 2007) ------------------------- * text2xhtml has no_wiki_monospace=True and use_additions=True * creole_to_xhtml is now pure Creole 1.0 (use_additions=False) * removed need for any pre_escaping, much more robust now * big refactoring of inline wiki markup processing Version 0.2.1 (Nov 6 2007) -------------------------- * links can include mark-up * creole_to_xhtml has no_wiki_monospace=True and use_additions=True * fixed bug with bad links * removed lone line break as an element (it wasn't valid xhtml) * stopped inserting \n's after
's * wiki_links_space_char option added to dialects.Creole10 * added inline_no_wiki_monospace option to dialects.Creole10 * refactored core.pre_escape() * CHANGES.txt, LISCENCE.txt, and INSTALL.txt added to distribution * added use_additions option to dialects.Creole10 * implemented monospace (##) (for use_additions option only) Version 0.2 (Oct 26 2007) ------------------------- * module split into separate files to create the creoleparser package * other refactoring Version 0.1, 0.1.1, 0.1.2 (Oct 2007) ------------------------------------ * First public releases