pandocfilters-1.2.2/0000755000175000017500000000000012401456225013726 5ustar jgmjgm00000000000000pandocfilters-1.2.2/examples/0000755000175000017500000000000012401456225015544 5ustar jgmjgm00000000000000pandocfilters-1.2.2/examples/abc.py0000755000175000017500000000255112247411326016652 0ustar jgmjgm00000000000000#!/usr/bin/env python """ Pandoc filter to process code blocks with class "abc" containing ABC notation into images. Assumes that abcm2ps and ImageMagick's convert are in the path. Images are put in the abc-images directory. """ import hashlib import os import sys from pandocfilters import toJSONFilter, Para, Image from subprocess import Popen, PIPE, call imagedir = "abc-images" def sha1(x): return hashlib.sha1(x).hexdigest() def abc2eps(abc, filetype, outfile): p = Popen(["abcm2ps", "-O", outfile + '.eps', "-"],stdin=PIPE) p.stdin.write(abc) p.communicate() p.stdin.close() call(["convert", outfile + '.eps', outfile + '.' + filetype]) def abc(key, value, format, meta): if key == 'CodeBlock': [[ident,classes,keyvals], code] = value if "abc" in classes: outfile = imagedir + '/' + sha1(code) if format == "html": filetype = "png" elif format == "latex": filetype = "pdf" else: filetype = "png" src = outfile + '.' + filetype if not os.path.isfile(src): try: os.mkdir(imagedir) sys.stderr.write('Created directory ' + imagedir + '\n') except OSError: pass abc2eps(code, filetype, outfile) sys.stderr.write('Created image ' + src + '\n') return Para([Image([], [src,""])]) if __name__ == "__main__": toJSONFilter(abc) pandocfilters-1.2.2/examples/caps.py0000755000175000017500000000046312225041664017053 0ustar jgmjgm00000000000000#!/usr/bin/env python """ Pandoc filter to convert all regular text to uppercase. Code, link URLs, etc. are not affected. """ from pandocfilters import toJSONFilter, Str def caps(key, value, format, meta): if key == 'Str': return Str(value.upper()) if __name__ == "__main__": toJSONFilter(caps) pandocfilters-1.2.2/examples/comments.py0000755000175000017500000000130412214401717017742 0ustar jgmjgm00000000000000#!/usr/bin/env python from pandocfilters import toJSONFilter import re """ Pandoc filter that causes everything between '' and '' to be ignored. The comment lines must appear on lines by themselves, with blank lines surrounding them. """ incomment = False def comment(k,v,fmt,meta): global incomment if k == 'RawBlock': fmt, s = v if fmt == "html": if re.search("", s): incomment = True return [] elif re.search("", s): incomment = False return [] if incomment: return [] # suppress anything in a comment if __name__ == "__main__": toJSONFilter(comment) pandocfilters-1.2.2/examples/deemph.py0000755000175000017500000000047012225041664017365 0ustar jgmjgm00000000000000#!/usr/bin/env python from pandocfilters import walk, toJSONFilter from caps import caps """ Pandoc filter that causes emphasized text to be displayed in ALL CAPS. """ def deemph(key, val, fmt, meta): if key == 'Emph': return walk(val, caps, fmt, meta) if __name__ == "__main__": toJSONFilter(deemph) pandocfilters-1.2.2/examples/deflists.py0000755000175000017500000000100312225041664017731 0ustar jgmjgm00000000000000#!/usr/bin/env python """ Pandoc filter to convert definition lists to bullet lists with the defined terms in strong emphasis (for compatibility with standard markdown). """ from pandocfilters import toJSONFilter, BulletList, Para, Strong def deflists(key, value, format, meta): if key == 'DefinitionList': return BulletList([tobullet(t,d) for [t,d] in value]) def tobullet(term, defs): return([Para([Strong(term)])] + [b for d in defs for b in d]) if __name__ == "__main__": toJSONFilter(deflists) pandocfilters-1.2.2/examples/graphviz.py0000755000175000017500000000221212350126645017753 0ustar jgmjgm00000000000000#!/usr/bin/env python """ Pandoc filter to process code blocks with class "graphviz" into graphviz-generated images. """ import pygraphviz import hashlib import os import sys from pandocfilters import toJSONFilter, Str, Para, Image def sha1(x): return hashlib.sha1(x).hexdigest() imagedir = "graphviz-images" def graphviz(key, value, format, meta): if key == 'CodeBlock': [[ident,classes,keyvals], code] = value caption = "caption" if "graphviz" in classes: G = pygraphviz.AGraph(string = code) G.layout() filename = sha1(code) if format == "html": filetype = "png" elif format == "latex": filetype = "pdf" else: filetype = "png" alt = Str(caption) src = imagedir + '/' + filename + '.' + filetype if not os.path.isfile(src): try: os.mkdir(imagedir) sys.stderr.write('Created directory ' + imagedir + '\n') except OSError: pass G.draw(src) sys.stderr.write('Created image ' + src + '\n') tit = "" return Para([Image([alt], [src,tit])]) if __name__ == "__main__": toJSONFilter(graphviz) pandocfilters-1.2.2/examples/metavars.py0000755000175000017500000000143212225041664017744 0ustar jgmjgm00000000000000#!/usr/bin/env python """ Pandoc filter to allow interpolation of metadata fields into a document. %{fields} will be replaced by the field's value, assuming it is of the type MetaInlines or MetaString. """ from pandocfilters import toJSONFilter, attributes, Span, Str import re pattern = re.compile('%\{(.*)\}$') def metavars(key, value, format, meta): if key == 'Str': m = pattern.match(value) if m: field = m.group(1) result = meta.get(field, {}) if 'MetaInlines' in result: return Span(attributes({'class': 'interpolated', 'field': field}), result['MetaInlines']) elif 'MetaString' in result: return Str(result['MetaString']) if __name__ == "__main__": toJSONFilter(metavars) pandocfilters-1.2.2/examples/myemph.py0000755000175000017500000000070512225041664017423 0ustar jgmjgm00000000000000#!/usr/bin/env python from pandocfilters import toJSONFilter, RawInline """ Pandoc filter that causes emphasis to be rendered using the custom macro '\myemph{...}' rather than '\emph{...}' in latex. Other output formats are unaffected. """ def latex(s): return RawInline('latex', s) def myemph(k, v, f, meta): if k == 'Emph' and f == 'latex': return [latex('\\myemph{')] + v + [latex('}')] if __name__ == "__main__": toJSONFilter(myemph) pandocfilters-1.2.2/examples/theorem.py0000755000175000017500000000205712225041664017571 0ustar jgmjgm00000000000000#!/usr/bin/env python """ Pandoc filter to convert divs with class="theorem" to LaTeX theorem environments in LaTeX output, and to numbered theorems in HTML output. """ from pandocfilters import toJSONFilter, RawBlock, Div theoremcount = 0 def latex(x): return RawBlock('latex',x) def html(x): return RawBlock('html', x) def theorems(key, value, format, meta): if key == 'Div': [[ident,classes,kvs], contents] = value if "theorem" in classes: if format == "latex": if ident == "": label = "" else: label = '\\label{' + ident + '}' return([latex('\\begin{theorem}' + label)] + contents + [latex('\\end{theorem}')]) elif format == "html" or format == "html5": global theoremcount theoremcount = theoremcount + 1 newcontents = [html('
Theorem ' + str(theoremcount) + '
'), html('
')] + contents + [html('
\n')] return Div([ident,classes,kvs], newcontents) if __name__ == "__main__": toJSONFilter(theorems) pandocfilters-1.2.2/examples/tikz.py0000755000175000017500000000351612225041664017110 0ustar jgmjgm00000000000000#!/usr/bin/env python """ Pandoc filter to process raw latex tikz environments into images. Assumes that pdflatex is in the path, and that the standalone package is available. Also assumes that ImageMagick's convert is in the path. Images are put in the tikz-images directory. """ import hashlib import re import os import sys import shutil from pandocfilters import toJSONFilter, Para, Image from subprocess import Popen, PIPE, call from tempfile import mkdtemp imagedir = "tikz-images" def sha1(x): return hashlib.sha1(x).hexdigest() def tikz2image(tikz, filetype, outfile): tmpdir = mkdtemp() olddir = os.getcwd() os.chdir(tmpdir) f = open('tikz.tex', 'w') f.write("""\\documentclass{standalone} \\usepackage{tikz} \\begin{document} """) f.write(tikz) f.write("\n\\end{document}\n") f.close() p = call(["pdflatex", 'tikz.tex'], stdout=sys.stderr) os.chdir(olddir) if filetype == 'pdf': shutil.copyfile(tmpdir + '/tikz.pdf', outfile + '.pdf') else: call(["convert", tmpdir + '/tikz.pdf', outfile + '.' + filetype]) shutil.rmtree(tmpdir) def tikz(key, value, format, meta): if key == 'RawBlock': [fmt, code] = value if fmt == "latex" and re.match("\\\\begin{tikzpicture}", code): outfile = imagedir + '/' + sha1(code) if format == "html": filetype = "png" elif format == "latex": filetype = "pdf" else: filetype = "png" src = outfile + '.' + filetype if not os.path.isfile(src): try: os.mkdir(imagedir) sys.stderr.write('Created directory ' + imagedir + '\n') except OSError: pass tikz2image(code, filetype, outfile) sys.stderr.write('Created image ' + src + '\n') return Para([Image([], [src,""])]) if __name__ == "__main__": toJSONFilter(tikz) pandocfilters-1.2.2/LICENSE0000644000175000017500000000273012214401717014733 0ustar jgmjgm00000000000000Copyright (c) 2013, John MacFarlane All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of John Macfarlane nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pandocfilters-1.2.2/README0000644000175000017500000001052312350131100014570 0ustar jgmjgm00000000000000pandocfilters ============= A python module for writing pandoc filters. Pandoc filters are pipes that read a JSON serialization of the Pandoc AST from stdin, transform it in some way, and write it to stdout. They can be used with pandoc (>= 1.12) either using pipes::: pandoc -t json -s | ./caps.py | pandoc -f json or using the ``--filter`` (or ``-F``) command-line option::: pandoc --filter ./caps.py -s For more on pandoc filters, see the pandoc documentation under ``--filter`` and `the tutorial on writing filters`__. __ http://johnmacfarlane.net/pandoc/scripting.html To install:: python setup.py install The ``pandocfilters`` module exports the following functions: ``walk(x, action, format, meta)`` Walk a tree, applying an action to every object. Returns a modified tree. ``toJSONFilter(action)`` Converts an action into a filter that reads a JSON-formatted pandoc document from stdin, transforms it by walking the tree with the action, and returns a new JSON-formatted pandoc document to stdout. The argument is a function action(key, value, format, meta), where key is the type of the pandoc object (e.g. 'Str', 'Para'), value is the contents of the object (e.g. a string for 'Str', a list of inline elements for 'Para'), format is the target output format (which will be taken for the first command line argument if present), and meta is the document's metadata. If the function returns None, the object to which it applies will remain unchanged. If it returns an object, the object will be replaced. If it returns a list, the list will be spliced in to the list to which the target object belongs. (So, returning an empty list deletes the object.) ``stringify(x)`` Walks the tree ``x`` and returns concatenated string content, leaving out all formatting. ``attributes(attrs)`` Returns an attribute list, constructed from the dictionary ``attrs``. Most users will only need ``toJSONFilter``. Here is a simple example of its use::: #!/usr/bin/env python """ Pandoc filter to convert all regular text to uppercase. Code, link URLs, etc. are not affected. """ from pandocfilters import toJSONFilter, Str def caps(key, value, format, meta): if key == 'Str': return Str(value.upper()) if __name__ == "__main__": toJSONFilter(caps) Examples -------- The examples subdirectory in the source repository contains the following filters. These filters should provide a useful starting point for developing your own pandocfilters. - ``abc.py`` Pandoc filter to process code blocks with class ``abc`` containing ABC notation into images. Assumes that abcm2ps and ImageMagick's convert are in the path. Images are put in the abc-images directory. - ``caps.py`` Pandoc filter to convert all regular text to uppercase. Code, link URLs, etc. are not affected. - ``comments.py`` Pandoc filter that causes everything between ```` and ```` to be ignored. The comment lines must appear on lines by themselves, with blank lines surrounding - ``deemph.py`` Pandoc filter that causes emphasized text to be displayed in ALL CAPS. - ``deflists.py`` Pandoc filter to convert definition lists to bullet lists with the defined terms in strong emphasis (for compatibility with standard markdown). - ``graphviz.py`` Pandoc filter to process code blocks with class ``graphviz`` into graphviz-generated images. - ``metavars.py`` Pandoc filter to allow interpolation of metadata fields into a document. ``%{fields}`` will be replaced by the field's value, assuming it is of the type ``MetaInlines`` or ``MetaString``. - ``myemph.py`` Pandoc filter that causes emphasis to be rendered using the custom macro ``\myemph{...}`` rather than ``\emph{...}`` in latex. Other output formats are unaffected. - ``theorem.py`` Pandoc filter to convert divs with ``class="theorem"`` to LaTeX theorem environments in LaTeX output, and to numbered theorems in HTML output. - ``tikz.py`` Pandoc filter to process raw latex tikz environments into images. Assumes that pdflatex is in the path, and that the standalone package is available. Also assumes that ImageMagick's convert is in the path. Images are put in the ``tikz-images`` directory. pandocfilters-1.2.2/README.rst0000777000175000017500000000000012214401717016270 2READMEustar jgmjgm00000000000000pandocfilters-1.2.2/pandocfilters.py0000755000175000017500000001003112401456173017135 0ustar jgmjgm00000000000000# Author: John MacFarlane # Copyright: (C) 2013 John MacFarlane # License: BSD3 """ Functions to aid writing python scripts that process the pandoc AST serialized as JSON. """ import sys import json def walk(x, action, format, meta): """Walk a tree, applying an action to every object. Returns a modified tree. """ if isinstance(x, list): array = [] for item in x: if isinstance(item, dict) and 't' in item: res = action(item['t'], item['c'], format, meta) if res is None: array.append(walk(item, action, format, meta)) elif isinstance(res, list): for z in res: array.append(walk(z, action, format, meta)) else: array.append(walk(res, action, format, meta)) else: array.append(walk(item, action, format, meta)) return array elif isinstance(x, dict): obj = {} for k in x: obj[k] = walk(x[k], action, format, meta) return obj else: return x def toJSONFilter(action): """Converts an action into a filter that reads a JSON-formatted pandoc document from stdin, transforms it by walking the tree with the action, and returns a new JSON-formatted pandoc document to stdout. The argument is a function action(key, value, format, meta), where key is the type of the pandoc object (e.g. 'Str', 'Para'), value is the contents of the object (e.g. a string for 'Str', a list of inline elements for 'Para'), format is the target output format (which will be taken for the first command line argument if present), and meta is the document's metadata. If the function returns None, the object to which it applies will remain unchanged. If it returns an object, the object will be replaced. If it returns a list, the list will be spliced in to the list to which the target object belongs. (So, returning an empty list deletes the object.) """ doc = json.loads(sys.stdin.read()) if len(sys.argv) > 1: format = sys.argv[1] else: format = "" altered = walk(doc, action, format, doc[0]['unMeta']) json.dump(altered, sys.stdout) def stringify(x): """Walks the tree x and returns concatenated string content, leaving out all formatting. """ result = [] def go(key, val, format, meta): if key == 'Str': result.append(val) elif key == 'Code': result.append(val[1]) elif key == 'Math': result.append(val[1]) elif key == 'LineBreak': result.append(" ") elif key == 'Space': result.append(" ") walk(x, go, "", {}) return ''.join(result) def attributes(attrs): """Returns an attribute list, constructed from the dictionary attrs. """ attrs = attrs or {} ident = attrs.get("id","") classes = attrs.get("classes",[]) keyvals = [[x,attrs[x]] for x in attrs if (x != "classes" and x != "id")] return [ident, classes, keyvals] def elt(eltType, numargs): def fun(*args): lenargs = len(args) if lenargs != numargs: raise ValueError(eltType + ' expects ' + str(numargs) + ' arguments, but given ' + str(lenargs)) if len(args) == 1: xs = args[0] else: xs = args return {'t': eltType, 'c': xs} return fun # Constructors for block elements Plain = elt('Plain',1) Para = elt('Para',1) CodeBlock = elt('CodeBlock',2) RawBlock = elt('RawBlock',2) BlockQuote = elt('BlockQuote',1) OrderedList = elt('OrderedList',2) BulletList = elt('BulletList',1) DefinitionList = elt('DefinitionList',1) Header = elt('Header',3) HorizontalRule = elt('HorizontalRule',0) Table = elt('Table',5) Div = elt('Div',2) Null = elt('Null',0) # Constructors for inline elements Str = elt('Str',1) Emph = elt('Emph',1) Strong = elt('Strong',1) Strikeout = elt('Strikeout',1) Superscript = elt('Superscript',1) Subscript = elt('Subscript',1) SmallCaps = elt('SmallCaps',1) Quoted = elt('Quoted',2) Cite = elt('Cite',2) Code = elt('Code',2) Space = elt('Space',0) LineBreak = elt('LineBreak',0) Math = elt('Math',2) RawInline = elt('RawInline',2) Link = elt('Link',2) Image = elt('Image',2) Note = elt('Note',1) Span = elt('Span',2) pandocfilters-1.2.2/setup.py0000644000175000017500000000155512401456173015450 0ustar jgmjgm00000000000000from distutils.core import setup import os def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() setup(name='pandocfilters', version='1.2.2', description='Utilities for writing pandoc filters in python', long_description=read('README.rst'), author='John MacFarlane', author_email='fiddlosopher@gmail.com', url='http://github.com/jgm/pandocfilters', py_modules=['pandocfilters'], keywords=['pandoc'], classifiers=[ 'Development Status :: 3 - Alpha', 'Environment :: Console', 'Intended Audience :: End Users/Desktop', 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Topic :: Text Processing :: Filters' ], ) pandocfilters-1.2.2/PKG-INFO0000644000175000017500000001405512401456225015030 0ustar jgmjgm00000000000000Metadata-Version: 1.1 Name: pandocfilters Version: 1.2.2 Summary: Utilities for writing pandoc filters in python Home-page: http://github.com/jgm/pandocfilters Author: John MacFarlane Author-email: fiddlosopher@gmail.com License: UNKNOWN Description: pandocfilters ============= A python module for writing pandoc filters. Pandoc filters are pipes that read a JSON serialization of the Pandoc AST from stdin, transform it in some way, and write it to stdout. They can be used with pandoc (>= 1.12) either using pipes::: pandoc -t json -s | ./caps.py | pandoc -f json or using the ``--filter`` (or ``-F``) command-line option::: pandoc --filter ./caps.py -s For more on pandoc filters, see the pandoc documentation under ``--filter`` and `the tutorial on writing filters`__. __ http://johnmacfarlane.net/pandoc/scripting.html To install:: python setup.py install The ``pandocfilters`` module exports the following functions: ``walk(x, action, format, meta)`` Walk a tree, applying an action to every object. Returns a modified tree. ``toJSONFilter(action)`` Converts an action into a filter that reads a JSON-formatted pandoc document from stdin, transforms it by walking the tree with the action, and returns a new JSON-formatted pandoc document to stdout. The argument is a function action(key, value, format, meta), where key is the type of the pandoc object (e.g. 'Str', 'Para'), value is the contents of the object (e.g. a string for 'Str', a list of inline elements for 'Para'), format is the target output format (which will be taken for the first command line argument if present), and meta is the document's metadata. If the function returns None, the object to which it applies will remain unchanged. If it returns an object, the object will be replaced. If it returns a list, the list will be spliced in to the list to which the target object belongs. (So, returning an empty list deletes the object.) ``stringify(x)`` Walks the tree ``x`` and returns concatenated string content, leaving out all formatting. ``attributes(attrs)`` Returns an attribute list, constructed from the dictionary ``attrs``. Most users will only need ``toJSONFilter``. Here is a simple example of its use::: #!/usr/bin/env python """ Pandoc filter to convert all regular text to uppercase. Code, link URLs, etc. are not affected. """ from pandocfilters import toJSONFilter, Str def caps(key, value, format, meta): if key == 'Str': return Str(value.upper()) if __name__ == "__main__": toJSONFilter(caps) Examples -------- The examples subdirectory in the source repository contains the following filters. These filters should provide a useful starting point for developing your own pandocfilters. - ``abc.py`` Pandoc filter to process code blocks with class ``abc`` containing ABC notation into images. Assumes that abcm2ps and ImageMagick's convert are in the path. Images are put in the abc-images directory. - ``caps.py`` Pandoc filter to convert all regular text to uppercase. Code, link URLs, etc. are not affected. - ``comments.py`` Pandoc filter that causes everything between ```` and ```` to be ignored. The comment lines must appear on lines by themselves, with blank lines surrounding - ``deemph.py`` Pandoc filter that causes emphasized text to be displayed in ALL CAPS. - ``deflists.py`` Pandoc filter to convert definition lists to bullet lists with the defined terms in strong emphasis (for compatibility with standard markdown). - ``graphviz.py`` Pandoc filter to process code blocks with class ``graphviz`` into graphviz-generated images. - ``metavars.py`` Pandoc filter to allow interpolation of metadata fields into a document. ``%{fields}`` will be replaced by the field's value, assuming it is of the type ``MetaInlines`` or ``MetaString``. - ``myemph.py`` Pandoc filter that causes emphasis to be rendered using the custom macro ``\myemph{...}`` rather than ``\emph{...}`` in latex. Other output formats are unaffected. - ``theorem.py`` Pandoc filter to convert divs with ``class="theorem"`` to LaTeX theorem environments in LaTeX output, and to numbered theorems in HTML output. - ``tikz.py`` Pandoc filter to process raw latex tikz environments into images. Assumes that pdflatex is in the path, and that the standalone package is available. Also assumes that ImageMagick's convert is in the path. Images are put in the ``tikz-images`` directory. Keywords: pandoc Platform: UNKNOWN Classifier: Development Status :: 3 - Alpha Classifier: Environment :: Console Classifier: Intended Audience :: End Users/Desktop Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Topic :: Text Processing :: Filters