Pygments-1.6/0000755000175000017500000000000012103430105012227 5ustar piotrpiotrPygments-1.6/MANIFEST.in0000644000175000017500000000025711713467262014015 0ustar piotrpiotrinclude pygmentize include external/* include Makefile CHANGES LICENSE AUTHORS TODO ez_setup.py recursive-include tests * recursive-include docs * recursive-include scripts * Pygments-1.6/setup.cfg0000644000175000017500000000014112103430105014044 0ustar piotrpiotr[egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 [aliases] release = egg_info -RDb '' Pygments-1.6/docs/0000755000175000017500000000000012103430105013157 5ustar piotrpiotrPygments-1.6/docs/pygmentize.10000644000175000017500000000665011713467262015467 0ustar piotrpiotr.TH PYGMENTIZE 1 "February 15, 2007" .SH NAME pygmentize \- highlights the input file .SH SYNOPSIS .B \fBpygmentize\fP .RI [-l\ \fI\fP]\ [-F\ \fI\fP[:\fI\fP]]\ [-f\ \fI\fP] .RI [-O\ \fI\fP]\ [-P\ \fI\fP]\ [-o\ \fI\fP]\ [\fI\fP] .br .B \fBpygmentize\fP .RI -S\ \fI

Pygments

{{ title }}

{% if file_id != "index" %} « Back To Index {% endif %} {% if toc %}

Contents

{% endif %} {{ body }}
\ ''' STYLESHEET = '''\ body { background-color: #f2f2f2; margin: 0; padding: 0; font-family: 'Georgia', serif; color: #111; } #content { background-color: white; padding: 20px; margin: 20px auto 20px auto; max-width: 800px; border: 4px solid #ddd; } h1 { font-weight: normal; font-size: 40px; color: #09839A; } h2 { font-weight: normal; font-size: 30px; color: #C73F00; } h1.heading { margin: 0 0 30px 0; } h2.subheading { margin: -30px 0 0 45px; } h3 { margin-top: 30px; } table.docutils { border-collapse: collapse; border: 2px solid #aaa; margin: 0.5em 1.5em 0.5em 1.5em; } table.docutils td { padding: 2px; border: 1px solid #ddd; } p, li, dd, dt, blockquote { font-size: 15px; color: #333; } p { line-height: 150%; margin-bottom: 0; margin-top: 10px; } hr { border-top: 1px solid #ccc; border-bottom: 0; border-right: 0; border-left: 0; margin-bottom: 10px; margin-top: 20px; } dl { margin-left: 10px; } li, dt { margin-top: 5px; } dt { font-weight: bold; } th { text-align: left; } a { color: #990000; } a:hover { color: #c73f00; } pre { background-color: #f9f9f9; border-top: 1px solid #ccc; border-bottom: 1px solid #ccc; padding: 5px; font-size: 13px; font-family: Bitstream Vera Sans Mono,monospace; } tt { font-size: 13px; font-family: Bitstream Vera Sans Mono,monospace; color: black; padding: 1px 2px 1px 2px; background-color: #f0f0f0; } cite { /* abusing , it's generated by ReST for `x` */ font-size: 13px; font-family: Bitstream Vera Sans Mono,monospace; font-weight: bold; font-style: normal; } #backlink { float: right; font-size: 11px; color: #888; } div.toc { margin: 0 0 10px 0; } div.toc h2 { font-size: 20px; } ''' #' def pygments_directive(name, arguments, options, content, lineno, content_offset, block_text, state, state_machine): try: lexer = get_lexer_by_name(arguments[0]) except ValueError: # no lexer found lexer = get_lexer_by_name('text') parsed = highlight(u'\n'.join(content), lexer, PYGMENTS_FORMATTER) return [nodes.raw('', parsed, format="html")] pygments_directive.arguments = (1, 0, 1) pygments_directive.content = 1 directives.register_directive('sourcecode', pygments_directive) def create_translator(link_style): class Translator(html4css1.HTMLTranslator): def visit_reference(self, node): refuri = node.get('refuri') if refuri is not None and '/' not in refuri and refuri.endswith('.txt'): node['refuri'] = link_style(refuri[:-4]) html4css1.HTMLTranslator.visit_reference(self, node) return Translator class DocumentationWriter(html4css1.Writer): def __init__(self, link_style): html4css1.Writer.__init__(self) self.translator_class = create_translator(link_style) def translate(self): html4css1.Writer.translate(self) # generate table of contents contents = self.build_contents(self.document) contents_doc = self.document.copy() contents_doc.children = contents contents_visitor = self.translator_class(contents_doc) contents_doc.walkabout(contents_visitor) self.parts['toc'] = self._generated_toc def build_contents(self, node, level=0): sections = [] i = len(node) - 1 while i >= 0 and isinstance(node[i], nodes.section): sections.append(node[i]) i -= 1 sections.reverse() toc = [] for section in sections: try: reference = nodes.reference('', '', refid=section['ids'][0], *section[0]) except IndexError: continue ref_id = reference['refid'] text = escape(reference.astext()) toc.append((ref_id, text)) self._generated_toc = [('#%s' % href, caption) for href, caption in toc] # no further processing return [] def generate_documentation(data, link_style): writer = DocumentationWriter(link_style) data = data.replace('[builtin_lexer_docs]', LEXERDOCS).\ replace('[builtin_formatter_docs]', FORMATTERDOCS).\ replace('[builtin_filter_docs]', FILTERDOCS).\ replace('[changelog]', CHANGELOG).\ replace('[authors]', AUTHORS) parts = publish_parts( data, writer=writer, settings_overrides={ 'initial_header_level': 3, 'field_name_limit': 50, } ) return { 'title': parts['title'], 'body': parts['body'], 'toc': parts['toc'] } def handle_python(filename, fp, dst): now = datetime.now() title = os.path.basename(filename)[:-4] content = fp.read() def urlize(href): # create links for the pygments webpage if href == 'index.txt': return '/docs/' else: return '/docs/%s/' % href parts = generate_documentation(content, urlize) result = file(os.path.join(dst, title + '.py'), 'w') result.write('# -*- coding: utf-8 -*-\n') result.write('"""\n Pygments Documentation - %s\n' % title) result.write(' %s\n\n' % ('~' * (24 + len(title)))) result.write(' Generated on: %s\n"""\n\n' % now) result.write('import datetime\n') result.write('DATE = %r\n' % now) result.write('TITLE = %r\n' % parts['title']) result.write('TOC = %r\n' % parts['toc']) result.write('BODY = %r\n' % parts['body']) result.close() def handle_html(filename, fp, dst): now = datetime.now() title = os.path.basename(filename)[:-4] content = fp.read().decode('utf-8') c = generate_documentation(content, (lambda x: './%s.html' % x)) result = file(os.path.join(dst, title + '.html'), 'w') c['style'] = STYLESHEET + PYGMENTS_FORMATTER.get_style_defs('.syntax') c['generation_date'] = now c['file_id'] = title t = Template(TEMPLATE) result.write(t.render(c).encode('utf-8')) result.close() def run(handle_file, dst, sources=()): path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')) if not sources: sources = [os.path.join(path, fn) for fn in os.listdir(path)] if not os.path.isdir(dst): os.makedirs(dst) print 'Making docs for Pygments %s in %s' % (__version__, dst) for fn in sources: if not os.path.isfile(fn): continue print 'Processing %s' % fn f = open(fn) try: handle_file(fn, f, dst) finally: f.close() def main(mode, dst='build/', *sources): try: handler = { 'html': handle_html, 'python': handle_python }[mode] except KeyError: print 'Error: unknown mode "%s"' % mode sys.exit(1) run(handler, os.path.realpath(dst), sources) if __name__ == '__main__': if len(sys.argv) == 1: print USAGE else: main(*sys.argv[1:]) Pygments-1.6/docs/src/0000755000175000017500000000000012103430105013746 5ustar piotrpiotrPygments-1.6/docs/src/plugins.txt0000644000175000017500000000502511726741461016216 0ustar piotrpiotr================ Register Plugins ================ If you want to extend Pygments without hacking the sources, but want to use the lexer/formatter/style/filter lookup functions (`lexers.get_lexer_by_name` et al.), you can use `setuptools`_ entrypoints to add new lexers, formatters or styles as if they were in the Pygments core. .. _setuptools: http://peak.telecommunity.com/DevCenter/setuptools That means you can use your highlighter modules with the `pygmentize` script, which relies on the mentioned functions. Entrypoints =========== Here is a list of setuptools entrypoints that Pygments understands: `pygments.lexers` This entrypoint is used for adding new lexers to the Pygments core. The name of the entrypoint values doesn't really matter, Pygments extracts required metadata from the class definition: .. sourcecode:: ini [pygments.lexers] yourlexer = yourmodule:YourLexer Note that you have to define ``name``, ``aliases`` and ``filename`` attributes so that you can use the highlighter from the command line: .. sourcecode:: python class YourLexer(...): name = 'Name Of Your Lexer' aliases = ['alias'] filenames = ['*.ext'] `pygments.formatters` You can use this entrypoint to add new formatters to Pygments. The name of an entrypoint item is the name of the formatter. If you prefix the name with a slash it's used as a filename pattern: .. sourcecode:: ini [pygments.formatters] yourformatter = yourmodule:YourFormatter /.ext = yourmodule:YourFormatter `pygments.styles` To add a new style you can use this entrypoint. The name of the entrypoint is the name of the style: .. sourcecode:: ini [pygments.styles] yourstyle = yourmodule:YourStyle `pygments.filters` Use this entrypoint to register a new filter. The name of the entrypoint is the name of the filter: .. sourcecode:: ini [pygments.filters] yourfilter = yourmodule:YourFilter How To Use Entrypoints ====================== This documentation doesn't explain how to use those entrypoints because this is covered in the `setuptools documentation`_. That page should cover everything you need to write a plugin. .. _setuptools documentation: http://peak.telecommunity.com/DevCenter/setuptools Extending The Core ================== If you have written a Pygments plugin that is open source, please inform us about that. There is a high chance that we'll add it to the Pygments distribution. Pygments-1.6/docs/src/styles.txt0000644000175000017500000000770011726741461016062 0ustar piotrpiotr.. -*- mode: rst -*- ====== Styles ====== Pygments comes with some builtin styles that work for both the HTML and LaTeX formatter. The builtin styles can be looked up with the `get_style_by_name` function: .. sourcecode:: pycon >>> from pygments.styles import get_style_by_name >>> get_style_by_name('colorful') You can pass a instance of a `Style` class to a formatter as the `style` option in form of a string: .. sourcecode:: pycon >>> from pygments.styles import get_style_by_name >>> HtmlFormatter(style='colorful').style Or you can also import your own style (which must be a subclass of `pygments.style.Style`) and pass it to the formatter: .. sourcecode:: pycon >>> from yourapp.yourmodule import YourStyle >>> HtmlFormatter(style=YourStyle).style Creating Own Styles =================== So, how to create a style? All you have to do is to subclass `Style` and define some styles: .. sourcecode:: python from pygments.style import Style from pygments.token import Keyword, Name, Comment, String, Error, \ Number, Operator, Generic class YourStyle(Style): default_style = "" styles = { Comment: 'italic #888', Keyword: 'bold #005', Name: '#f00', Name.Function: '#0f0', Name.Class: 'bold #0f0', String: 'bg:#eee #111' } That's it. There are just a few rules. When you define a style for `Name` the style automatically also affects `Name.Function` and so on. If you defined ``'bold'`` and you don't want boldface for a subtoken use ``'nobold'``. (Philosophy: the styles aren't written in CSS syntax since this way they can be used for a variety of formatters.) `default_style` is the style inherited by all token types. To make the style usable for Pygments, you must * either register it as a plugin (see `the plugin docs `_) * or drop it into the `styles` subpackage of your Pygments distribution one style class per style, where the file name is the style name and the class name is `StylenameClass`. For example, if your style should be called ``"mondrian"``, name the class `MondrianStyle`, put it into the file ``mondrian.py`` and this file into the ``pygments.styles`` subpackage directory. Style Rules =========== Here a small overview of all allowed styles: ``bold`` render text as bold ``nobold`` don't render text as bold (to prevent subtokens being highlighted bold) ``italic`` render text italic ``noitalic`` don't render text as italic ``underline`` render text underlined ``nounderline`` don't render text underlined ``bg:`` transparent background ``bg:#000000`` background color (black) ``border:`` no border ``border:#ffffff`` border color (white) ``#ff0000`` text color (red) ``noinherit`` don't inherit styles from supertoken Note that there may not be a space between ``bg:`` and the color value since the style definition string is split at whitespace. Also, using named colors is not allowed since the supported color names vary for different formatters. Furthermore, not all lexers might support every style. Builtin Styles ============== Pygments ships some builtin styles which are maintained by the Pygments team. To get a list of known styles you can use this snippet: .. sourcecode:: pycon >>> from pygments.styles import STYLE_MAP >>> STYLE_MAP.keys() ['default', 'emacs', 'friendly', 'colorful'] Getting a list of available styles ================================== *New in Pygments 0.6.* Because it could be that a plugin registered a style, there is a way to iterate over all styles: .. sourcecode:: pycon >>> from pygments.styles import get_all_styles >>> styles = list(get_all_styles()) Pygments-1.6/docs/src/java.txt0000644000175000017500000000444212103426531015444 0ustar piotrpiotr===================== Use Pygments in Java ===================== Thanks to `Jython `__ it is possible to use Pygments in Java. This page is a simple tutorial to get an idea of how this is working. You can then look at the `Jython documentation `__ for more advanced use. Since version 1.5, Pygments is deployed on `Maven Central `__ as a JAR so is Jython which makes it a lot easier to create the Java project. Here is an example of a `Maven `__ ``pom.xml`` file for a project running Pygments: .. sourcecode:: xml 4.0.0 example example 1.0-SNAPSHOT org.python jython-standalone 2.5.3 org.pygments pygments 1.5 runtime The following Java example: .. sourcecode:: java PythonInterpreter interpreter = new PythonInterpreter(); // Set a variable with the content you want to work with interpreter.set("code", code); // Simple use Pygments as you would in Python interpreter.exec("from pygments import highlight\n" + "from pygments.lexers import PythonLexer\n" + "from pygments.formatters import HtmlFormatter\n" + "\nresult = highlight(code, PythonLexer(), HtmlFormatter())"); // Get the result that has been set in a variable System.out.println(interpreter.get("result", String.class)); will print something like: .. sourcecode:: html
print "Hello World"
Pygments-1.6/docs/src/authors.txt0000644000175000017500000000004311726741461016215 0ustar piotrpiotr======= Authors ======= [authors] Pygments-1.6/docs/src/changelog.txt0000644000175000017500000000005311726741461016460 0ustar piotrpiotr========= Changelog ========= [changelog] Pygments-1.6/docs/src/formatters.txt0000644000175000017500000000311211726741461016716 0ustar piotrpiotr.. -*- mode: rst -*- ==================== Available formatters ==================== This page lists all builtin formatters. Common options ============== All formatters support these options: `encoding` *New in Pygments 0.6.* If given, must be an encoding name (such as ``"utf-8"``). This will be used to convert the token strings (which are Unicode strings) to byte strings in the output (default: ``None``). It will also be written in an encoding declaration suitable for the document format if the `full` option is given (e.g. a ``meta content-type`` directive in HTML or an invocation of the `inputenc` package in LaTeX). If this is ``""`` or ``None``, Unicode strings will be written to the output file, which most file-like objects do not support. For example, `pygments.highlight()` will return a Unicode string if called with no `outfile` argument and a formatter that has `encoding` set to ``None`` because it uses a `StringIO.StringIO` object that supports Unicode arguments to `write()`. Using a regular file object wouldn't work. `outencoding` *New in Pygments 0.7.* When using Pygments from the command line, any `encoding` option given is passed to the lexer and the formatter. This is sometimes not desirable, for example if you want to set the input encoding to ``"guess"``. Therefore, `outencoding` has been introduced which overrides `encoding` for the formatter if given. Formatter classes ================= All these classes are importable from `pygments.formatters`. [builtin_formatter_docs] Pygments-1.6/docs/src/index.txt0000644000175000017500000000247312103426531015634 0ustar piotrpiotr.. -*- mode: rst -*- ======== Overview ======== Welcome to the Pygments documentation. - Starting with Pygments - `Installation `_ - `Introduction and Quickstart `_ - `Command line interface `_ - Builtin components - `Lexers `_ - `Formatters `_ - `Filters `_ - `Styles `_ - Reference - `Unicode and encodings `_ - `Builtin tokens `_ - `API documentation `_ - Hacking for Pygments - `Write your own lexer `_ - `Write your own formatter `_ - `Write your own filter `_ - `Register plugins `_ - Hints and Tricks - `Using Pygments in ReST documents `_ - `Using Pygments with MoinMoin `_ - `Using Pygments in other contexts `_ - About Pygments - `Changelog `_ - `Authors `_ -------------- If you find bugs or have suggestions for the documentation, please look `here`_ for info on how to contact the team. You can download an offline version of this documentation from the `download page`_. .. _here: http://pygments.org/contribute/ .. _download page: http://pygments.org/download/ Pygments-1.6/docs/src/filterdevelopment.txt0000644000175000017500000000435311726741461020270 0ustar piotrpiotr.. -*- mode: rst -*- ===================== Write your own filter ===================== *New in Pygments 0.7.* Writing own filters is very easy. All you have to do is to subclass the `Filter` class and override the `filter` method. Additionally a filter is instanciated with some keyword arguments you can use to adjust the behavior of your filter. Subclassing Filters =================== As an example, we write a filter that converts all `Name.Function` tokens to normal `Name` tokens to make the output less colorful. .. sourcecode:: python from pygments.util import get_bool_opt from pygments.token import Name from pygments.filter import Filter class UncolorFilter(Filter): def __init__(self, **options): Filter.__init__(self, **options) self.class_too = get_bool_opt(options, 'classtoo') def filter(self, lexer, stream): for ttype, value in stream: if ttype is Name.Function or (self.class_too and ttype is Name.Class): ttype = Name yield ttype, value Some notes on the `lexer` argument: that can be quite confusing since it doesn't need to be a lexer instance. If a filter was added by using the `add_filter()` function of lexers, that lexer is registered for the filter. In that case `lexer` will refer to the lexer that has registered the filter. It *can* be used to access options passed to a lexer. Because it could be `None` you always have to check for that case if you access it. Using a decorator ================= You can also use the `simplefilter` decorator from the `pygments.filter` module: .. sourcecode:: python from pygments.util import get_bool_opt from pygments.token import Name from pygments.filter import simplefilter @simplefilter def uncolor(lexer, stream, options): class_too = get_bool_opt(options, 'classtoo') for ttype, value in stream: if ttype is Name.Function or (class_too and ttype is Name.Class): ttype = Name yield ttype, value The decorator automatically subclasses an internal filter class and uses the decorated function for filtering. Pygments-1.6/docs/src/formatterdevelopment.txt0000644000175000017500000001404711726741461021007 0ustar piotrpiotr.. -*- mode: rst -*- ======================== Write your own formatter ======================== As well as creating `your own lexer `_, writing a new formatter for Pygments is easy and straightforward. A formatter is a class that is initialized with some keyword arguments (the formatter options) and that must provides a `format()` method. Additionally a formatter should provide a `get_style_defs()` method that returns the style definitions from the style in a form usable for the formatter's output format. Quickstart ========== The most basic formatter shipped with Pygments is the `NullFormatter`. It just sends the value of a token to the output stream: .. sourcecode:: python from pygments.formatter import Formatter class NullFormatter(Formatter): def format(self, tokensource, outfile): for ttype, value in tokensource: outfile.write(value) As you can see, the `format()` method is passed two parameters: `tokensource` and `outfile`. The first is an iterable of ``(token_type, value)`` tuples, the latter a file like object with a `write()` method. Because the formatter is that basic it doesn't overwrite the `get_style_defs()` method. Styles ====== Styles aren't instantiated but their metaclass provides some class functions so that you can access the style definitions easily. Styles are iterable and yield tuples in the form ``(ttype, d)`` where `ttype` is a token and `d` is a dict with the following keys: ``'color'`` Hexadecimal color value (eg: ``'ff0000'`` for red) or `None` if not defined. ``'bold'`` `True` if the value should be bold ``'italic'`` `True` if the value should be italic ``'underline'`` `True` if the value should be underlined ``'bgcolor'`` Hexadecimal color value for the background (eg: ``'eeeeeee'`` for light gray) or `None` if not defined. ``'border'`` Hexadecimal color value for the border (eg: ``'0000aa'`` for a dark blue) or `None` for no border. Additional keys might appear in the future, formatters should ignore all keys they don't support. HTML 3.2 Formatter ================== For an more complex example, let's implement a HTML 3.2 Formatter. We don't use CSS but inline markup (````, ````, etc). Because this isn't good style this formatter isn't in the standard library ;-) .. sourcecode:: python from pygments.formatter import Formatter class OldHtmlFormatter(Formatter): def __init__(self, **options): Formatter.__init__(self, **options) # create a dict of (start, end) tuples that wrap the # value of a token so that we can use it in the format # method later self.styles = {} # we iterate over the `_styles` attribute of a style item # that contains the parsed style values. for token, style in self.style: start = end = '' # a style item is a tuple in the following form: # colors are readily specified in hex: 'RRGGBB' if style['color']: start += '' % style['color'] end = '' + end if style['bold']: start += '' end = '' + end if style['italic']: start += '' end = '' + end if style['underline']: start += '' end = '' + end self.styles[token] = (start, end) def format(self, tokensource, outfile): # lastval is a string we use for caching # because it's possible that an lexer yields a number # of consecutive tokens with the same token type. # to minimize the size of the generated html markup we # try to join the values of same-type tokens here lastval = '' lasttype = None # wrap the whole output with
            outfile.write('
')

            for ttype, value in tokensource:
                # if the token type doesn't exist in the stylemap
                # we try it with the parent of the token type
                # eg: parent of Token.Literal.String.Double is
                # Token.Literal.String
                while ttype not in self.styles:
                    ttype = ttype.parent
                if ttype == lasttype:
                    # the current token type is the same of the last
                    # iteration. cache it
                    lastval += value
                else:
                    # not the same token as last iteration, but we
                    # have some data in the buffer. wrap it with the
                    # defined style and write it to the output file
                    if lastval:
                        stylebegin, styleend = self.styles[lasttype]
                        outfile.write(stylebegin + lastval + styleend)
                    # set lastval/lasttype to current values
                    lastval = value
                    lasttype = ttype

            # if something is left in the buffer, write it to the
            # output file, then close the opened 
 tag
            if lastval:
                stylebegin, styleend = self.styles[lasttype]
                outfile.write(stylebegin + lastval + styleend)
            outfile.write('
\n') The comments should explain it. Again, this formatter doesn't override the `get_style_defs()` method. If we would have used CSS classes instead of inline HTML markup, we would need to generate the CSS first. For that purpose the `get_style_defs()` method exists: Generating Style Definitions ============================ Some formatters like the `LatexFormatter` and the `HtmlFormatter` don't output inline markup but reference either macros or css classes. Because the definitions of those are not part of the output, the `get_style_defs()` method exists. It is passed one parameter (if it's used and how it's used is up to the formatter) and has to return a string or ``None``. Pygments-1.6/docs/src/unicode.txt0000644000175000017500000000405211726741461016162 0ustar piotrpiotr===================== Unicode and Encodings ===================== Since Pygments 0.6, all lexers use unicode strings internally. Because of that you might encounter the occasional `UnicodeDecodeError` if you pass strings with the wrong encoding. Per default all lexers have their input encoding set to `latin1`. If you pass a lexer a string object (not unicode), it tries to decode the data using this encoding. You can override the encoding using the `encoding` lexer option. If you have the `chardet`_ library installed and set the encoding to ``chardet`` if will ananlyse the text and use the encoding it thinks is the right one automatically: .. sourcecode:: python from pygments.lexers import PythonLexer lexer = PythonLexer(encoding='chardet') The best way is to pass Pygments unicode objects. In that case you can't get unexpected output. The formatters now send Unicode objects to the stream if you don't set the output encoding. You can do so by passing the formatters an `encoding` option: .. sourcecode:: python from pygments.formatters import HtmlFormatter f = HtmlFormatter(encoding='utf-8') **You will have to set this option if you have non-ASCII characters in the source and the output stream does not accept Unicode written to it!** This is the case for all regular files and for terminals. Note: The Terminal formatter tries to be smart: if its output stream has an `encoding` attribute, and you haven't set the option, it will encode any Unicode string with this encoding before writing it. This is the case for `sys.stdout`, for example. The other formatters don't have that behavior. Another note: If you call Pygments via the command line (`pygmentize`), encoding is handled differently, see `the command line docs `_. *New in Pygments 0.7*: the formatters now also accept an `outencoding` option which will override the `encoding` option if given. This makes it possible to use a single options dict with lexers and formatters, and still have different input and output encodings. .. _chardet: http://chardet.feedparser.org/ Pygments-1.6/docs/src/api.txt0000644000175000017500000002233312103426531015273 0ustar piotrpiotr.. -*- mode: rst -*- ===================== The full Pygments API ===================== This page describes the Pygments API. High-level API ============== Functions from the `pygments` module: def `lex(code, lexer):` Lex `code` with the `lexer` (must be a `Lexer` instance) and return an iterable of tokens. Currently, this only calls `lexer.get_tokens()`. def `format(tokens, formatter, outfile=None):` Format a token stream (iterable of tokens) `tokens` with the `formatter` (must be a `Formatter` instance). The result is written to `outfile`, or if that is ``None``, returned as a string. def `highlight(code, lexer, formatter, outfile=None):` This is the most high-level highlighting function. It combines `lex` and `format` in one function. Functions from `pygments.lexers`: def `get_lexer_by_name(alias, **options):` Return an instance of a `Lexer` subclass that has `alias` in its aliases list. The lexer is given the `options` at its instantiation. Will raise `pygments.util.ClassNotFound` if no lexer with that alias is found. def `get_lexer_for_filename(fn, **options):` Return a `Lexer` subclass instance that has a filename pattern matching `fn`. The lexer is given the `options` at its instantiation. Will raise `pygments.util.ClassNotFound` if no lexer for that filename is found. def `get_lexer_for_mimetype(mime, **options):` Return a `Lexer` subclass instance that has `mime` in its mimetype list. The lexer is given the `options` at its instantiation. Will raise `pygments.util.ClassNotFound` if not lexer for that mimetype is found. def `guess_lexer(text, **options):` Return a `Lexer` subclass instance that's guessed from the text in `text`. For that, the `analyse_text()` method of every known lexer class is called with the text as argument, and the lexer which returned the highest value will be instantiated and returned. `pygments.util.ClassNotFound` is raised if no lexer thinks it can handle the content. def `guess_lexer_for_filename(filename, text, **options):` As `guess_lexer()`, but only lexers which have a pattern in `filenames` or `alias_filenames` that matches `filename` are taken into consideration. `pygments.util.ClassNotFound` is raised if no lexer thinks it can handle the content. def `get_all_lexers():` Return an iterable over all registered lexers, yielding tuples in the format:: (longname, tuple of aliases, tuple of filename patterns, tuple of mimetypes) *New in Pygments 0.6.* Functions from `pygments.formatters`: def `get_formatter_by_name(alias, **options):` Return an instance of a `Formatter` subclass that has `alias` in its aliases list. The formatter is given the `options` at its instantiation. Will raise `pygments.util.ClassNotFound` if no formatter with that alias is found. def `get_formatter_for_filename(fn, **options):` Return a `Formatter` subclass instance that has a filename pattern matching `fn`. The formatter is given the `options` at its instantiation. Will raise `pygments.util.ClassNotFound` if no formatter for that filename is found. Functions from `pygments.styles`: def `get_style_by_name(name):` Return a style class by its short name. The names of the builtin styles are listed in `pygments.styles.STYLE_MAP`. Will raise `pygments.util.ClassNotFound` if no style of that name is found. def `get_all_styles():` Return an iterable over all registered styles, yielding their names. *New in Pygments 0.6.* Lexers ====== A lexer (derived from `pygments.lexer.Lexer`) has the following functions: def `__init__(self, **options):` The constructor. Takes a \*\*keywords dictionary of options. Every subclass must first process its own options and then call the `Lexer` constructor, since it processes the `stripnl`, `stripall` and `tabsize` options. An example looks like this: .. sourcecode:: python def __init__(self, **options): self.compress = options.get('compress', '') Lexer.__init__(self, **options) As these options must all be specifiable as strings (due to the command line usage), there are various utility functions available to help with that, see `Option processing`_. def `get_tokens(self, text):` This method is the basic interface of a lexer. It is called by the `highlight()` function. It must process the text and return an iterable of ``(tokentype, value)`` pairs from `text`. Normally, you don't need to override this method. The default implementation processes the `stripnl`, `stripall` and `tabsize` options and then yields all tokens from `get_tokens_unprocessed()`, with the ``index`` dropped. def `get_tokens_unprocessed(self, text):` This method should process the text and return an iterable of ``(index, tokentype, value)`` tuples where ``index`` is the starting position of the token within the input text. This method must be overridden by subclasses. def `analyse_text(text):` A static method which is called for lexer guessing. It should analyse the text and return a float in the range from ``0.0`` to ``1.0``. If it returns ``0.0``, the lexer will not be selected as the most probable one, if it returns ``1.0``, it will be selected immediately. For a list of known tokens have a look at the `Tokens`_ page. A lexer also can have the following attributes (in fact, they are mandatory except `alias_filenames`) that are used by the builtin lookup mechanism. `name` Full name for the lexer, in human-readable form. `aliases` A list of short, unique identifiers that can be used to lookup the lexer from a list, e.g. using `get_lexer_by_name()`. `filenames` A list of `fnmatch` patterns that match filenames which contain content for this lexer. The patterns in this list should be unique among all lexers. `alias_filenames` A list of `fnmatch` patterns that match filenames which may or may not contain content for this lexer. This list is used by the `guess_lexer_for_filename()` function, to determine which lexers are then included in guessing the correct one. That means that e.g. every lexer for HTML and a template language should include ``\*.html`` in this list. `mimetypes` A list of MIME types for content that can be lexed with this lexer. .. _Tokens: tokens.txt Formatters ========== A formatter (derived from `pygments.formatter.Formatter`) has the following functions: def `__init__(self, **options):` As with lexers, this constructor processes options and then must call the base class `__init__`. The `Formatter` class recognizes the options `style`, `full` and `title`. It is up to the formatter class whether it uses them. def `get_style_defs(self, arg=''):` This method must return statements or declarations suitable to define the current style for subsequent highlighted text (e.g. CSS classes in the `HTMLFormatter`). The optional argument `arg` can be used to modify the generation and is formatter dependent (it is standardized because it can be given on the command line). This method is called by the ``-S`` `command-line option`_, the `arg` is then given by the ``-a`` option. def `format(self, tokensource, outfile):` This method must format the tokens from the `tokensource` iterable and write the formatted version to the file object `outfile`. Formatter options can control how exactly the tokens are converted. .. _command-line option: cmdline.txt A formatter must have the following attributes that are used by the builtin lookup mechanism. (*New in Pygments 0.7.*) `name` Full name for the formatter, in human-readable form. `aliases` A list of short, unique identifiers that can be used to lookup the formatter from a list, e.g. using `get_formatter_by_name()`. `filenames` A list of `fnmatch` patterns that match filenames for which this formatter can produce output. The patterns in this list should be unique among all formatters. Option processing ================= The `pygments.util` module has some utility functions usable for option processing: class `OptionError` This exception will be raised by all option processing functions if the type or value of the argument is not correct. def `get_bool_opt(options, optname, default=None):` Interpret the key `optname` from the dictionary `options` as a boolean and return it. Return `default` if `optname` is not in `options`. The valid string values for ``True`` are ``1``, ``yes``, ``true`` and ``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off`` (matched case-insensitively). def `get_int_opt(options, optname, default=None):` As `get_bool_opt`, but interpret the value as an integer. def `get_list_opt(options, optname, default=None):` If the key `optname` from the dictionary `options` is a string, split it at whitespace and return it. If it is already a list or a tuple, it is returned as a list. def `get_choice_opt(options, optname, allowed, default=None):` If the key `optname` from the dictionary is not in the sequence `allowed`, raise an error, otherwise return it. *New in Pygments 0.8.* Pygments-1.6/docs/src/integrate.txt0000644000175000017500000000237412103426531016507 0ustar piotrpiotr.. -*- mode: rst -*- =================================== Using Pygments in various scenarios =================================== PyGtk ----- Armin has written a piece of sample code that shows how to create a Gtk `TextBuffer` object containing Pygments-highlighted text. See the article here: http://lucumr.pocoo.org/cogitations/2007/05/30/pygments-gtk-rendering/ Wordpress --------- He also has a snippet that shows how to use Pygments in WordPress: http://lucumr.pocoo.org/cogitations/2007/05/30/pygments-in-wordpress/ Markdown -------- Since Pygments 0.9, the distribution ships Markdown_ preprocessor sample code that uses Pygments to render source code in `external/markdown-processor.py`. You can copy and adapt it to your liking. .. _Markdown: http://www.freewisdom.org/projects/python-markdown/ TextMate -------- Antonio Cangiano has created a Pygments bundle for TextMate that allows to colorize code via a simple menu option. It can be found here_. .. _here: http://antoniocangiano.com/2008/10/28/pygments-textmate-bundle/ Bash completion --------------- The source distribution contains a file ``external/pygments.bashcomp`` that sets up completion for the ``pygmentize`` command in bash. Java ---- See the `Java quickstart `_ document. Pygments-1.6/docs/src/cmdline.txt0000644000175000017500000001141611726741461016151 0ustar piotrpiotr.. -*- mode: rst -*- ====================== Command Line Interface ====================== You can use Pygments from the shell, provided you installed the `pygmentize` script:: $ pygmentize test.py print "Hello World" will print the file test.py to standard output, using the Python lexer (inferred from the file name extension) and the terminal formatter (because you didn't give an explicit formatter name). If you want HTML output:: $ pygmentize -f html -l python -o test.html test.py As you can see, the -l option explicitly selects a lexer. As seen above, if you give an input file name and it has an extension that Pygments recognizes, you can omit this option. The ``-o`` option gives an output file name. If it is not given, output is written to stdout. The ``-f`` option selects a formatter (as with ``-l``, it can also be omitted if an output file name is given and has a supported extension). If no output file name is given and ``-f`` is omitted, the `TerminalFormatter` is used. The above command could therefore also be given as:: $ pygmentize -o test.html test.py To create a full HTML document, including line numbers and stylesheet (using the "emacs" style), highlighting the Python file ``test.py`` to ``test.html``:: $ pygmentize -O full,style=emacs -o test.html test.py Options and filters ------------------- Lexer and formatter options can be given using the ``-O`` option:: $ pygmentize -f html -O style=colorful,linenos=1 -l python test.py Be sure to enclose the option string in quotes if it contains any special shell characters, such as spaces or expansion wildcards like ``*``. If an option expects a list value, separate the list entries with spaces (you'll have to quote the option value in this case too, so that the shell doesn't split it). Since the ``-O`` option argument is split at commas and expects the split values to be of the form ``name=value``, you can't give an option value that contains commas or equals signs. Therefore, an option ``-P`` is provided (as of Pygments 0.9) that works like ``-O`` but can only pass one option per ``-P``. Its value can then contain all characters:: $ pygmentize -P "heading=Pygments, the Python highlighter" ... Filters are added to the token stream using the ``-F`` option:: $ pygmentize -f html -l pascal -F keywordcase:case=upper main.pas As you see, options for the filter are given after a colon. As for ``-O``, the filter name and options must be one shell word, so there may not be any spaces around the colon. Generating styles ----------------- Formatters normally don't output full style information. For example, the HTML formatter by default only outputs ```` tags with ``class`` attributes. Therefore, there's a special ``-S`` option for generating style definitions. Usage is as follows:: $ pygmentize -f html -S colorful -a .syntax generates a CSS style sheet (because you selected the HTML formatter) for the "colorful" style prepending a ".syntax" selector to all style rules. For an explanation what ``-a`` means for `a particular formatter`_, look for the `arg` argument for the formatter's `get_style_defs()` method. Getting lexer names ------------------- *New in Pygments 1.0.* The ``-N`` option guesses a lexer name for a given filename, so that :: $ pygmentize -N setup.py will print out ``python``. It won't highlight anything yet. If no specific lexer is known for that filename, ``text`` is printed. Getting help ------------ The ``-L`` option lists lexers, formatters, along with their short names and supported file name extensions, styles and filters. If you want to see only one category, give it as an argument:: $ pygmentize -L filters will list only all installed filters. The ``-H`` option will give you detailed information (the same that can be found in this documentation) about a lexer, formatter or filter. Usage is as follows:: $ pygmentize -H formatter html will print the help for the HTML formatter, while :: $ pygmentize -H lexer python will print the help for the Python lexer, etc. A note on encodings ------------------- *New in Pygments 0.9.* Pygments tries to be smart regarding encodings in the formatting process: * If you give an ``encoding`` option, it will be used as the input and output encoding. * If you give an ``outencoding`` option, it will override ``encoding`` as the output encoding. * If you don't give an encoding and have given an output file, the default encoding for lexer and formatter is ``latin1`` (which will pass through all non-ASCII characters). * If you don't give an encoding and haven't given an output file (that means output is written to the console), the default encoding for lexer and formatter is the terminal encoding (`sys.stdout.encoding`). .. _a particular formatter: formatters.txt Pygments-1.6/docs/src/installation.txt0000644000175000017500000000407211726741461017237 0ustar piotrpiotr.. -*- mode: rst -*- ============ Installation ============ Pygments requires at least Python 2.4 to work correctly. Just to clarify: there *won't* ever be support for Python versions below 2.4. However, there are no other dependencies. Installing a released version ============================= As a Python egg (via easy_install) ---------------------------------- You can install the most recent Pygments version using `easy_install`_:: sudo easy_install Pygments This will install a Pygments egg in your Python installation's site-packages directory. From the tarball release ------------------------- 1. Download the most recent tarball from the `download page`_ 2. Unpack the tarball 3. ``sudo python setup.py install`` Note that the last command will automatically download and install `setuptools`_ if you don't already have it installed. This requires a working internet connection. This will install Pygments into your Python installation's site-packages directory. Installing the development version ================================== If you want to play around with the code ---------------------------------------- 1. Install `Mercurial`_ 2. ``hg clone http://bitbucket.org/birkenfeld/pygments-main pygments`` 3. ``cd pygments`` 4. ``ln -s pygments /usr/lib/python2.X/site-packages`` 5. ``ln -s pygmentize /usr/local/bin`` As an alternative to steps 4 and 5 you can also do ``python setup.py develop`` which will install the package via setuptools in development mode. .. If you just want the latest features and use them ------------------------------------------------- :: sudo easy_install Pygments==dev This will install a Pygments egg containing the latest Subversion trunk code in your Python installation's site-packages directory. Every time the command is run, the sources are updated from Subversion. .. _download page: http://pygments.org/download/ .. _setuptools: http://peak.telecommunity.com/DevCenter/setuptools .. _easy_install: http://peak.telecommunity.com/DevCenter/EasyInstall .. _Mercurial: http://selenic.com/mercurial/ Pygments-1.6/docs/src/rstdirective.txt0000644000175000017500000000155111726741461017244 0ustar piotrpiotr.. -*- mode: rst -*- ================================ Using Pygments in ReST documents ================================ Many Python people use `ReST`_ for documentation their sourcecode, programs, scripts et cetera. This also means that documentation often includes sourcecode samples or snippets. You can easily enable Pygments support for your ReST texts using a custom directive -- this is also how this documentation displays source code. From Pygments 0.9, the directive is shipped in the distribution as `external/rst-directive.py`. You can copy and adapt this code to your liking. .. removed -- too confusing *Loosely related note:* The ReST lexer now recognizes ``.. sourcecode::`` and ``.. code::`` directives and highlights the contents in the specified language if the `handlecodeblocks` option is true. .. _ReST: http://docutils.sf.net/rst.html Pygments-1.6/docs/src/moinmoin.txt0000644000175000017500000000267011726741461016365 0ustar piotrpiotr.. -*- mode: rst -*- ============================ Using Pygments with MoinMoin ============================ From Pygments 0.7, the source distribution ships a `Moin`_ parser plugin that can be used to get Pygments highlighting in Moin wiki pages. To use it, copy the file `external/moin-parser.py` from the Pygments distribution to the `data/plugin/parser` subdirectory of your Moin instance. Edit the options at the top of the file (currently ``ATTACHMENTS`` and ``INLINESTYLES``) and rename the file to the name that the parser directive should have. For example, if you name the file ``code.py``, you can get a highlighted Python code sample with this Wiki markup:: {{{ #!code python [...] }}} where ``python`` is the Pygments name of the lexer to use. Additionally, if you set the ``ATTACHMENTS`` option to True, Pygments will also be called for all attachments for whose filenames there is no other parser registered. You are responsible for including CSS rules that will map the Pygments CSS classes to colors. You can output a stylesheet file with `pygmentize`, put it into the `htdocs` directory of your Moin instance and then include it in the `stylesheets` configuration option in the Moin config, e.g.:: stylesheets = [('screen', '/htdocs/pygments.css')] If you do not want to do that and are willing to accept larger HTML output, you can set the ``INLINESTYLES`` option to True. .. _Moin: http://moinmoin.wikiwikiweb.de/ Pygments-1.6/docs/src/quickstart.txt0000644000175000017500000001466111726741461016735 0ustar piotrpiotr.. -*- mode: rst -*- =========================== Introduction and Quickstart =========================== Welcome to Pygments! This document explains the basic concepts and terms and gives a few examples of how to use the library. Architecture ============ There are four types of components that work together highlighting a piece of code: * A **lexer** splits the source into tokens, fragments of the source that have a token type that determines what the text represents semantically (e.g., keyword, string, or comment). There is a lexer for every language or markup format that Pygments supports. * The token stream can be piped through **filters**, which usually modify the token types or text fragments, e.g. uppercasing all keywords. * A **formatter** then takes the token stream and writes it to an output file, in a format such as HTML, LaTeX or RTF. * While writing the output, a **style** determines how to highlight all the different token types. It maps them to attributes like "red and bold". Example ======= Here is a small example for highlighting Python code: .. sourcecode:: python from pygments import highlight from pygments.lexers import PythonLexer from pygments.formatters import HtmlFormatter code = 'print "Hello World"' print highlight(code, PythonLexer(), HtmlFormatter()) which prints something like this: .. sourcecode:: html
print "Hello World"
As you can see, Pygments uses CSS classes (by default, but you can change that) instead of inline styles in order to avoid outputting redundant style information over and over. A CSS stylesheet that contains all CSS classes possibly used in the output can be produced by: .. sourcecode:: python print HtmlFormatter().get_style_defs('.highlight') The argument to `get_style_defs` is used as an additional CSS selector: the output may look like this: .. sourcecode:: css .highlight .k { color: #AA22FF; font-weight: bold } .highlight .s { color: #BB4444 } ... Options ======= The `highlight()` function supports a fourth argument called `outfile`, it must be a file object if given. The formatted output will then be written to this file instead of being returned as a string. Lexers and formatters both support options. They are given to them as keyword arguments either to the class or to the lookup method: .. sourcecode:: python from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter lexer = get_lexer_by_name("python", stripall=True) formatter = HtmlFormatter(linenos=True, cssclass="source") result = highlight(code, lexer, formatter) This makes the lexer strip all leading and trailing whitespace from the input (`stripall` option), lets the formatter output line numbers (`linenos` option), and sets the wrapping ``
``'s class to ``source`` (instead of ``highlight``). Important options include: `encoding` : for lexers and formatters Since Pygments uses Unicode strings internally, this determines which encoding will be used to convert to or from byte strings. `style` : for formatters The name of the style to use when writing the output. For an overview of builtin lexers and formatters and their options, visit the `lexer `_ and `formatters `_ lists. For a documentation on filters, see `this page `_. Lexer and formatter lookup ========================== If you want to lookup a built-in lexer by its alias or a filename, you can use one of the following methods: .. sourcecode:: pycon >>> from pygments.lexers import (get_lexer_by_name, ... get_lexer_for_filename, get_lexer_for_mimetype) >>> get_lexer_by_name('python') >>> get_lexer_for_filename('spam.rb') >>> get_lexer_for_mimetype('text/x-perl') All these functions accept keyword arguments; they will be passed to the lexer as options. A similar API is available for formatters: use `get_formatter_by_name()` and `get_formatter_for_filename()` from the `pygments.formatters` module for this purpose. Guessing lexers =============== If you don't know the content of the file, or you want to highlight a file whose extension is ambiguous, such as ``.html`` (which could contain plain HTML or some template tags), use these functions: .. sourcecode:: pycon >>> from pygments.lexers import guess_lexer, guess_lexer_for_filename >>> guess_lexer('#!/usr/bin/python\nprint "Hello World!"') >>> guess_lexer_for_filename('test.py', 'print "Hello World!"') `guess_lexer()` passes the given content to the lexer classes' `analyse_text()` method and returns the one for which it returns the highest number. All lexers have two different filename pattern lists: the primary and the secondary one. The `get_lexer_for_filename()` function only uses the primary list, whose entries are supposed to be unique among all lexers. `guess_lexer_for_filename()`, however, will first loop through all lexers and look at the primary and secondary filename patterns if the filename matches. If only one lexer matches, it is returned, else the guessing mechanism of `guess_lexer()` is used with the matching lexers. As usual, keyword arguments to these functions are given to the created lexer as options. Command line usage ================== You can use Pygments from the command line, using the `pygmentize` script:: $ pygmentize test.py will highlight the Python file test.py using ANSI escape sequences (a.k.a. terminal colors) and print the result to standard output. To output HTML, use the ``-f`` option:: $ pygmentize -f html -o test.html test.py to write an HTML-highlighted version of test.py to the file test.html. Note that it will only be a snippet of HTML, if you want a full HTML document, use the "full" option:: $ pygmentize -f html -O full -o test.html test.py This will produce a full HTML document with included stylesheet. A style can be selected with ``-O style=``. If you need a stylesheet for an existing HTML file using Pygments CSS classes, it can be created with:: $ pygmentize -S default -f html > style.css where ``default`` is the style name. More options and tricks and be found in the `command line reference `_. Pygments-1.6/docs/src/tokens.txt0000644000175000017500000002310011726741461016032 0ustar piotrpiotr.. -*- mode: rst -*- ============== Builtin Tokens ============== Inside the `pygments.token` module, there is a special object called `Token` that is used to create token types. You can create a new token type by accessing an attribute of `Token`: .. sourcecode:: pycon >>> from pygments.token import Token >>> Token.String Token.String >>> Token.String is Token.String True Note that tokens are singletons so you can use the ``is`` operator for comparing token types. As of Pygments 0.7 you can also use the ``in`` operator to perform set tests: .. sourcecode:: pycon >>> from pygments.token import Comment >>> Comment.Single in Comment True >>> Comment in Comment.Multi False This can be useful in `filters`_ and if you write lexers on your own without using the base lexers. You can also split a token type into a hierarchy, and get the parent of it: .. sourcecode:: pycon >>> String.split() [Token, Token.Literal, Token.Literal.String] >>> String.parent Token.Literal In principle, you can create an unlimited number of token types but nobody can guarantee that a style would define style rules for a token type. Because of that, Pygments proposes some global token types defined in the `pygments.token.STANDARD_TYPES` dict. For some tokens aliases are already defined: .. sourcecode:: pycon >>> from pygments.token import String >>> String Token.Literal.String Inside the `pygments.token` module the following aliases are defined: ============= ============================ ==================================== `Text` `Token.Text` for any type of text data `Whitespace` `Token.Text.Whitespace` for specially highlighted whitespace `Error` `Token.Error` represents lexer errors `Other` `Token.Other` special token for data not matched by a parser (e.g. HTML markup in PHP code) `Keyword` `Token.Keyword` any kind of keywords `Name` `Token.Name` variable/function names `Literal` `Token.Literal` Any literals `String` `Token.Literal.String` string literals `Number` `Token.Literal.Number` number literals `Operator` `Token.Operator` operators (``+``, ``not``...) `Punctuation` `Token.Punctuation` punctuation (``[``, ``(``...) `Comment` `Token.Comment` any kind of comments `Generic` `Token.Generic` generic tokens (have a look at the explanation below) ============= ============================ ==================================== The `Whitespace` token type is new in Pygments 0.8. It is used only by the `VisibleWhitespaceFilter` currently. Normally you just create token types using the already defined aliases. For each of those token aliases, a number of subtypes exists (excluding the special tokens `Token.Text`, `Token.Error` and `Token.Other`) The `is_token_subtype()` function in the `pygments.token` module can be used to test if a token type is a subtype of another (such as `Name.Tag` and `Name`). (This is the same as ``Name.Tag in Name``. The overloaded `in` operator was newly introduced in Pygments 0.7, the function still exists for backwards compatiblity.) With Pygments 0.7, it's also possible to convert strings to token types (for example if you want to supply a token from the command line): .. sourcecode:: pycon >>> from pygments.token import String, string_to_tokentype >>> string_to_tokentype("String") Token.Literal.String >>> string_to_tokentype("Token.Literal.String") Token.Literal.String >>> string_to_tokentype(String) Token.Literal.String Keyword Tokens ============== `Keyword` For any kind of keyword (especially if it doesn't match any of the subtypes of course). `Keyword.Constant` For keywords that are constants (e.g. ``None`` in future Python versions). `Keyword.Declaration` For keywords used for variable declaration (e.g. ``var`` in some programming languages like JavaScript). `Keyword.Namespace` For keywords used for namespace declarations (e.g. ``import`` in Python and Java and ``package`` in Java). `Keyword.Pseudo` For keywords that aren't really keywords (e.g. ``None`` in old Python versions). `Keyword.Reserved` For reserved keywords. `Keyword.Type` For builtin types that can't be used as identifiers (e.g. ``int``, ``char`` etc. in C). Name Tokens =========== `Name` For any name (variable names, function names, classes). `Name.Attribute` For all attributes (e.g. in HTML tags). `Name.Builtin` Builtin names; names that are available in the global namespace. `Name.Builtin.Pseudo` Builtin names that are implicit (e.g. ``self`` in Ruby, ``this`` in Java). `Name.Class` Class names. Because no lexer can know if a name is a class or a function or something else this token is meant for class declarations. `Name.Constant` Token type for constants. In some languages you can recognise a token by the way it's defined (the value after a ``const`` keyword for example). In other languages constants are uppercase by definition (Ruby). `Name.Decorator` Token type for decorators. Decorators are synatic elements in the Python language. Similar syntax elements exist in C# and Java. `Name.Entity` Token type for special entities. (e.g. `` `` in HTML). `Name.Exception` Token type for exception names (e.g. ``RuntimeError`` in Python). Some languages define exceptions in the function signature (Java). You can highlight the name of that exception using this token then. `Name.Function` Token type for function names. `Name.Label` Token type for label names (e.g. in languages that support ``goto``). `Name.Namespace` Token type for namespaces. (e.g. import paths in Java/Python), names following the ``module``/``namespace`` keyword in other languages. `Name.Other` Other names. Normally unused. `Name.Tag` Tag names (in HTML/XML markup or configuration files). `Name.Variable` Token type for variables. Some languages have prefixes for variable names (PHP, Ruby, Perl). You can highlight them using this token. `Name.Variable.Class` same as `Name.Variable` but for class variables (also static variables). `Name.Variable.Global` same as `Name.Variable` but for global variables (used in Ruby, for example). `Name.Variable.Instance` same as `Name.Variable` but for instance variables. Literals ======== `Literal` For any literal (if not further defined). `Literal.Date` for date literals (e.g. ``42d`` in Boo). `String` For any string literal. `String.Backtick` Token type for strings enclosed in backticks. `String.Char` Token type for single characters (e.g. Java, C). `String.Doc` Token type for documentation strings (for example Python). `String.Double` Double quoted strings. `String.Escape` Token type for escape sequences in strings. `String.Heredoc` Token type for "heredoc" strings (e.g. in Ruby or Perl). `String.Interpol` Token type for interpolated parts in strings (e.g. ``#{foo}`` in Ruby). `String.Other` Token type for any other strings (for example ``%q{foo}`` string constructs in Ruby). `String.Regex` Token type for regular expression literals (e.g. ``/foo/`` in JavaScript). `String.Single` Token type for single quoted strings. `String.Symbol` Token type for symbols (e.g. ``:foo`` in LISP or Ruby). `Number` Token type for any number literal. `Number.Float` Token type for float literals (e.g. ``42.0``). `Number.Hex` Token type for hexadecimal number literals (e.g. ``0xdeadbeef``). `Number.Integer` Token type for integer literals (e.g. ``42``). `Number.Integer.Long` Token type for long integer literals (e.g. ``42L`` in Python). `Number.Oct` Token type for octal literals. Operators ========= `Operator` For any punctuation operator (e.g. ``+``, ``-``). `Operator.Word` For any operator that is a word (e.g. ``not``). Punctuation =========== *New in Pygments 0.7.* `Punctuation` For any punctuation which is not an operator (e.g. ``[``, ``(``...) Comments ======== `Comment` Token type for any comment. `Comment.Multiline` Token type for multiline comments. `Comment.Preproc` Token type for preprocessor comments (also ```__ is used to guess the encoding of the input. The "Short Names" field lists the identifiers that can be used with the `get_lexer_by_name()` function. These lexers are builtin and can be imported from `pygments.lexers`: [builtin_lexer_docs] Iterating over all lexers ------------------------- *New in Pygments 0.6.* To get all lexers (both the builtin and the plugin ones), you can use the `get_all_lexers()` function from the `pygments.lexers` module: .. sourcecode:: pycon >>> from pygments.lexers import get_all_lexers >>> i = get_all_lexers() >>> i.next() ('Diff', ('diff',), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch')) >>> i.next() ('Delphi', ('delphi', 'objectpascal', 'pas', 'pascal'), ('*.pas',), ('text/x-pascal',)) >>> i.next() ('XML+Ruby', ('xml+erb', 'xml+ruby'), (), ()) As you can see, the return value is an iterator which yields tuples in the form ``(name, aliases, filetypes, mimetypes)``. Pygments-1.6/docs/src/lexerdevelopment.txt0000644000175000017500000005153211726741461020123 0ustar piotrpiotr.. -*- mode: rst -*- ==================== Write your own lexer ==================== If a lexer for your favorite language is missing in the Pygments package, you can easily write your own and extend Pygments. All you need can be found inside the `pygments.lexer` module. As you can read in the `API documentation `_, a lexer is a class that is initialized with some keyword arguments (the lexer options) and that provides a `get_tokens_unprocessed()` method which is given a string or unicode object with the data to parse. The `get_tokens_unprocessed()` method must return an iterator or iterable containing tuples in the form ``(index, token, value)``. Normally you don't need to do this since there are numerous base lexers you can subclass. RegexLexer ========== A very powerful (but quite easy to use) lexer is the `RegexLexer`. This lexer base class allows you to define lexing rules in terms of *regular expressions* for different *states*. States are groups of regular expressions that are matched against the input string at the *current position*. If one of these expressions matches, a corresponding action is performed (normally yielding a token with a specific type), the current position is set to where the last match ended and the matching process continues with the first regex of the current state. Lexer states are kept in a state stack: each time a new state is entered, the new state is pushed onto the stack. The most basic lexers (like the `DiffLexer`) just need one state. Each state is defined as a list of tuples in the form (`regex`, `action`, `new_state`) where the last item is optional. In the most basic form, `action` is a token type (like `Name.Builtin`). That means: When `regex` matches, emit a token with the match text and type `tokentype` and push `new_state` on the state stack. If the new state is ``'#pop'``, the topmost state is popped from the stack instead. (To pop more than one state, use ``'#pop:2'`` and so on.) ``'#push'`` is a synonym for pushing the current state on the stack. The following example shows the `DiffLexer` from the builtin lexers. Note that it contains some additional attributes `name`, `aliases` and `filenames` which aren't required for a lexer. They are used by the builtin lexer lookup functions. .. sourcecode:: python from pygments.lexer import RegexLexer from pygments.token import * class DiffLexer(RegexLexer): name = 'Diff' aliases = ['diff'] filenames = ['*.diff'] tokens = { 'root': [ (r' .*\n', Text), (r'\+.*\n', Generic.Inserted), (r'-.*\n', Generic.Deleted), (r'@.*\n', Generic.Subheading), (r'Index.*\n', Generic.Heading), (r'=.*\n', Generic.Heading), (r'.*\n', Text), ] } As you can see this lexer only uses one state. When the lexer starts scanning the text, it first checks if the current character is a space. If this is true it scans everything until newline and returns the parsed data as `Text` token. If this rule doesn't match, it checks if the current char is a plus sign. And so on. If no rule matches at the current position, the current char is emitted as an `Error` token that indicates a parsing error, and the position is increased by 1. Regex Flags =========== You can either define regex flags in the regex (``r'(?x)foo bar'``) or by adding a `flags` attribute to your lexer class. If no attribute is defined, it defaults to `re.MULTILINE`. For more informations about regular expression flags see the `regular expressions`_ help page in the python documentation. .. _regular expressions: http://docs.python.org/lib/re-syntax.html Scanning multiple tokens at once ================================ Here is a more complex lexer that highlights INI files. INI files consist of sections, comments and key = value pairs: .. sourcecode:: python from pygments.lexer import RegexLexer, bygroups from pygments.token import * class IniLexer(RegexLexer): name = 'INI' aliases = ['ini', 'cfg'] filenames = ['*.ini', '*.cfg'] tokens = { 'root': [ (r'\s+', Text), (r';.*?$', Comment), (r'\[.*?\]$', Keyword), (r'(.*?)(\s*)(=)(\s*)(.*?)$', bygroups(Name.Attribute, Text, Operator, Text, String)) ] } The lexer first looks for whitespace, comments and section names. And later it looks for a line that looks like a key, value pair, separated by an ``'='`` sign, and optional whitespace. The `bygroups` helper makes sure that each group is yielded with a different token type. First the `Name.Attribute` token, then a `Text` token for the optional whitespace, after that a `Operator` token for the equals sign. Then a `Text` token for the whitespace again. The rest of the line is returned as `String`. Note that for this to work, every part of the match must be inside a capturing group (a ``(...)``), and there must not be any nested capturing groups. If you nevertheless need a group, use a non-capturing group defined using this syntax: ``r'(?:some|words|here)'`` (note the ``?:`` after the beginning parenthesis). If you find yourself needing a capturing group inside the regex which shouldn't be part of the output but is used in the regular expressions for backreferencing (eg: ``r'(<(foo|bar)>)(.*?)()'``), you can pass `None` to the bygroups function and it will skip that group will be skipped in the output. Changing states =============== Many lexers need multiple states to work as expected. For example, some languages allow multiline comments to be nested. Since this is a recursive pattern it's impossible to lex just using regular expressions. Here is the solution: .. sourcecode:: python from pygments.lexer import RegexLexer from pygments.token import * class ExampleLexer(RegexLexer): name = 'Example Lexer with states' tokens = { 'root': [ (r'[^/]+', Text), (r'/\*', Comment.Multiline, 'comment'), (r'//.*?$', Comment.Singleline), (r'/', Text) ], 'comment': [ (r'[^*/]', Comment.Multiline), (r'/\*', Comment.Multiline, '#push'), (r'\*/', Comment.Multiline, '#pop'), (r'[*/]', Comment.Multiline) ] } This lexer starts lexing in the ``'root'`` state. It tries to match as much as possible until it finds a slash (``'/'``). If the next character after the slash is a star (``'*'``) the `RegexLexer` sends those two characters to the output stream marked as `Comment.Multiline` and continues parsing with the rules defined in the ``'comment'`` state. If there wasn't a star after the slash, the `RegexLexer` checks if it's a singleline comment (eg: followed by a second slash). If this also wasn't the case it must be a single slash (the separate regex for a single slash must also be given, else the slash would be marked as an error token). Inside the ``'comment'`` state, we do the same thing again. Scan until the lexer finds a star or slash. If it's the opening of a multiline comment, push the ``'comment'`` state on the stack and continue scanning, again in the ``'comment'`` state. Else, check if it's the end of the multiline comment. If yes, pop one state from the stack. Note: If you pop from an empty stack you'll get an `IndexError`. (There is an easy way to prevent this from happening: don't ``'#pop'`` in the root state). If the `RegexLexer` encounters a newline that is flagged as an error token, the stack is emptied and the lexer continues scanning in the ``'root'`` state. This helps producing error-tolerant highlighting for erroneous input, e.g. when a single-line string is not closed. Advanced state tricks ===================== There are a few more things you can do with states: - You can push multiple states onto the stack if you give a tuple instead of a simple string as the third item in a rule tuple. For example, if you want to match a comment containing a directive, something like:: /* rest of comment */ you can use this rule: .. sourcecode:: python tokens = { 'root': [ (r'/\* <', Comment, ('comment', 'directive')), ... ], 'directive': [ (r'[^>]*', Comment.Directive), (r'>', Comment, '#pop'), ], 'comment': [ (r'[^*]+', Comment), (r'\*/', Comment, '#pop'), (r'\*', Comment), ] } When this encounters the above sample, first ``'comment'`` and ``'directive'`` are pushed onto the stack, then the lexer continues in the directive state until it finds the closing ``>``, then it continues in the comment state until the closing ``*/``. Then, both states are popped from the stack again and lexing continues in the root state. *New in Pygments 0.9:* The tuple can contain the special ``'#push'`` and ``'#pop'`` (but not ``'#pop:n'``) directives. - You can include the rules of a state in the definition of another. This is done by using `include` from `pygments.lexer`: .. sourcecode:: python from pygments.lexer import RegexLexer, bygroups, include from pygments.token import * class ExampleLexer(RegexLexer): tokens = { 'comments': [ (r'/\*.*?\*/', Comment), (r'//.*?\n', Comment), ], 'root': [ include('comments'), (r'(function )(\w+)( {)', bygroups(Keyword, Name, Keyword), 'function'), (r'.', Text), ], 'function': [ (r'[^}/]+', Text), include('comments'), (r'/', Text), (r'}', Keyword, '#pop'), ] } This is a hypothetical lexer for a language that consist of functions and comments. Because comments can occur at toplevel and in functions, we need rules for comments in both states. As you can see, the `include` helper saves repeating rules that occur more than once (in this example, the state ``'comment'`` will never be entered by the lexer, as it's only there to be included in ``'root'`` and ``'function'``). - Sometimes, you may want to "combine" a state from existing ones. This is possible with the `combine` helper from `pygments.lexer`. If you, instead of a new state, write ``combined('state1', 'state2')`` as the third item of a rule tuple, a new anonymous state will be formed from state1 and state2 and if the rule matches, the lexer will enter this state. This is not used very often, but can be helpful in some cases, such as the `PythonLexer`'s string literal processing. - If you want your lexer to start lexing in a different state you can modify the stack by overloading the `get_tokens_unprocessed()` method: .. sourcecode:: python from pygments.lexer import RegexLexer class MyLexer(RegexLexer): tokens = {...} def get_tokens_unprocessed(self, text): stack = ['root', 'otherstate'] for item in RegexLexer.get_tokens_unprocessed(text, stack): yield item Some lexers like the `PhpLexer` use this to make the leading ``', Name.Tag), ], 'script-content': [ (r'(.+?)(<\s*/\s*script\s*>)', bygroups(using(JavascriptLexer), Name.Tag), '#pop'), ] } Here the content of a ```` end tag is processed by the `JavascriptLexer`, while the end tag is yielded as a normal token with the `Name.Tag` type. As an additional goodie, if the lexer class is replaced by `this` (imported from `pygments.lexer`), the "other" lexer will be the current one (because you cannot refer to the current class within the code that runs at class definition time). Also note the ``(r'<\s*script\s*', Name.Tag, ('script-content', 'tag'))`` rule. Here, two states are pushed onto the state stack, ``'script-content'`` and ``'tag'``. That means that first ``'tag'`` is processed, which will parse attributes and the closing ``>``, then the ``'tag'`` state is popped and the next state on top of the stack will be ``'script-content'``. The `using()` helper has a special keyword argument, `state`, which works as follows: if given, the lexer to use initially is not in the ``"root"`` state, but in the state given by this argument. This *only* works with a `RegexLexer`. Any other keywords arguments passed to `using()` are added to the keyword arguments used to create the lexer. Delegating Lexer ================ Another approach for nested lexers is the `DelegatingLexer` which is for example used for the template engine lexers. It takes two lexers as arguments on initialisation: a `root_lexer` and a `language_lexer`. The input is processed as follows: First, the whole text is lexed with the `language_lexer`. All tokens yielded with a type of ``Other`` are then concatenated and given to the `root_lexer`. The language tokens of the `language_lexer` are then inserted into the `root_lexer`'s token stream at the appropriate positions. .. sourcecode:: python from pygments.lexer import DelegatingLexer from pygments.lexers.web import HtmlLexer, PhpLexer class HtmlPhpLexer(DelegatingLexer): def __init__(self, **options): super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options) This procedure ensures that e.g. HTML with template tags in it is highlighted correctly even if the template tags are put into HTML tags or attributes. If you want to change the needle token ``Other`` to something else, you can give the lexer another token type as the third parameter: .. sourcecode:: python DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options) Callbacks ========= Sometimes the grammar of a language is so complex that a lexer would be unable to parse it just by using regular expressions and stacks. For this, the `RegexLexer` allows callbacks to be given in rule tuples, instead of token types (`bygroups` and `using` are nothing else but preimplemented callbacks). The callback must be a function taking two arguments: * the lexer itself * the match object for the last matched rule The callback must then return an iterable of (or simply yield) ``(index, tokentype, value)`` tuples, which are then just passed through by `get_tokens_unprocessed()`. The ``index`` here is the position of the token in the input string, ``tokentype`` is the normal token type (like `Name.Builtin`), and ``value`` the associated part of the input string. You can see an example here: .. sourcecode:: python from pygments.lexer import RegexLexer from pygments.token import Generic class HypotheticLexer(RegexLexer): def headline_callback(lexer, match): equal_signs = match.group(1) text = match.group(2) yield match.start(), Generic.Headline, equal_signs + text + equal_signs tokens = { 'root': [ (r'(=+)(.*?)(\1)', headline_callback) ] } If the regex for the `headline_callback` matches, the function is called with the match object. Note that after the callback is done, processing continues normally, that is, after the end of the previous match. The callback has no possibility to influence the position. There are not really any simple examples for lexer callbacks, but you can see them in action e.g. in the `compiled.py`_ source code in the `CLexer` and `JavaLexer` classes. .. _compiled.py: http://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/compiled.py The ExtendedRegexLexer class ============================ The `RegexLexer`, even with callbacks, unfortunately isn't powerful enough for the funky syntax rules of some languages that will go unnamed, such as Ruby. But fear not; even then you don't have to abandon the regular expression approach. For Pygments has a subclass of `RegexLexer`, the `ExtendedRegexLexer`. All features known from RegexLexers are available here too, and the tokens are specified in exactly the same way, *except* for one detail: The `get_tokens_unprocessed()` method holds its internal state data not as local variables, but in an instance of the `pygments.lexer.LexerContext` class, and that instance is passed to callbacks as a third argument. This means that you can modify the lexer state in callbacks. The `LexerContext` class has the following members: * `text` -- the input text * `pos` -- the current starting position that is used for matching regexes * `stack` -- a list containing the state stack * `end` -- the maximum position to which regexes are matched, this defaults to the length of `text` Additionally, the `get_tokens_unprocessed()` method can be given a `LexerContext` instead of a string and will then process this context instead of creating a new one for the string argument. Note that because you can set the current position to anything in the callback, it won't be automatically be set by the caller after the callback is finished. For example, this is how the hypothetical lexer above would be written with the `ExtendedRegexLexer`: .. sourcecode:: python from pygments.lexer import ExtendedRegexLexer from pygments.token import Generic class ExHypotheticLexer(ExtendedRegexLexer): def headline_callback(lexer, match, ctx): equal_signs = match.group(1) text = match.group(2) yield match.start(), Generic.Headline, equal_signs + text + equal_signs ctx.pos = match.end() tokens = { 'root': [ (r'(=+)(.*?)(\1)', headline_callback) ] } This might sound confusing (and it can really be). But it is needed, and for an example look at the Ruby lexer in `agile.py`_. .. _agile.py: https://bitbucket.org/birkenfeld/pygments-main/src/tip/pygments/lexers/agile.py Filtering Token Streams ======================= Some languages ship a lot of builtin functions (for example PHP). The total amount of those functions differs from system to system because not everybody has every extension installed. In the case of PHP there are over 3000 builtin functions. That's an incredible huge amount of functions, much more than you can put into a regular expression. But because only `Name` tokens can be function names it's solvable by overriding the ``get_tokens_unprocessed()`` method. The following lexer subclasses the `PythonLexer` so that it highlights some additional names as pseudo keywords: .. sourcecode:: python from pygments.lexers.agile import PythonLexer from pygments.token import Name, Keyword class MyPythonLexer(PythonLexer): EXTRA_KEYWORDS = ['foo', 'bar', 'foobar', 'barfoo', 'spam', 'eggs'] def get_tokens_unprocessed(self, text): for index, token, value in PythonLexer.get_tokens_unprocessed(self, text): if token is Name and value in self.EXTRA_KEYWORDS: yield index, Keyword.Pseudo, value else: yield index, token, value The `PhpLexer` and `LuaLexer` use this method to resolve builtin functions. **Note** Do not confuse this with the `filter`_ system. .. _filter: filters.txt Pygments-1.6/docs/src/filters.txt0000644000175000017500000000232611726741461016206 0ustar piotrpiotr.. -*- mode: rst -*- ======= Filters ======= *New in Pygments 0.7.* You can filter token streams coming from lexers to improve or annotate the output. For example, you can highlight special words in comments, convert keywords to upper or lowercase to enforce a style guide etc. To apply a filter, you can use the `add_filter()` method of a lexer: .. sourcecode:: pycon >>> from pygments.lexers import PythonLexer >>> l = PythonLexer() >>> # add a filter given by a string and options >>> l.add_filter('codetagify', case='lower') >>> l.filters [] >>> from pygments.filters import KeywordCaseFilter >>> # or give an instance >>> l.add_filter(KeywordCaseFilter(case='lower')) The `add_filter()` method takes keyword arguments which are forwarded to the constructor of the filter. To get a list of all registered filters by name, you can use the `get_all_filters()` function from the `pygments.filters` module that returns an iterable for all known filters. If you want to write your own filter, have a look at `Write your own filter`_. .. _Write your own filter: filterdevelopment.txt Builtin Filters =============== [builtin_filter_docs] Pygments-1.6/docs/build/0000755000175000017500000000000012103430105014256 5ustar piotrpiotrPygments-1.6/docs/build/authors.html0000644000175000017500000003073412073317553016661 0ustar piotrpiotr Authors — Pygments

Pygments

Authors

« Back To Index

Pygments is written and maintained by Georg Brandl <georg@python.org>.

Major developers are Tim Hatch <tim@timhatch.com> and Armin Ronacher <armin.ronacher@active-4.com>.

Other contributors, listed alphabetically, are:

  • Sam Aaron -- Ioke lexer
  • Kumar Appaiah -- Debian control lexer
  • Ali Afshar -- image formatter
  • Andreas Amann -- AppleScript lexer
  • Jeffrey Arnold -- R/S, Rd, BUGS, Jags, and Stan lexers
  • Jeremy Ashkenas -- CoffeeScript lexer
  • Stefan Matthias Aust -- Smalltalk lexer
  • Ben Bangert -- Mako lexers
  • Max Battcher -- Darcs patch lexer
  • Paul Baumgart, 280 North, Inc. -- Objective-J lexer
  • Michael Bayer -- Myghty lexers
  • John Benediktsson -- Factor lexer
  • Christopher Bertels -- Fancy lexer
  • Jarrett Billingsley -- MiniD lexer
  • Adam Blinkinsop -- Haskell, Redcode lexers
  • Frits van Bommel -- assembler lexers
  • Pierre Bourdon -- bugfixes
  • Hiram Chirino -- Scaml and Jade lexers
  • Ian Cooper -- VGL lexer
  • Leaf Corcoran -- MoonScript lexer
  • Christopher Creutzig -- MuPAD lexer
  • Pete Curry -- bugfixes
  • Owen Durni -- haXe lexer
  • Nick Efford -- Python 3 lexer
  • Sven Efftinge -- Xtend lexer
  • Artem Egorkine -- terminal256 formatter
  • James H. Fisher -- PostScript lexer
  • Carlos Galdino -- Elixir and Elixir Console lexers
  • Michael Galloy -- IDL lexer
  • Naveen Garg -- Autohotkey lexer
  • Laurent Gautier -- R/S lexer
  • Alex Gaynor -- PyPy log lexer
  • Alain Gilbert -- TypeScript lexer
  • Bertrand Goetzmann -- Groovy lexer
  • Krzysiek Goj -- Scala lexer
  • Matt Good -- Genshi, Cheetah lexers
  • Patrick Gotthardt -- PHP namespaces support
  • Olivier Guibe -- Asymptote lexer
  • Jordi Gutiérrez Hermoso -- Octave lexer
  • Martin Harriman -- SNOBOL lexer
  • Matthew Harrison -- SVG formatter
  • Steven Hazel -- Tcl lexer
  • Aslak Hellesøy -- Gherkin lexer
  • Greg Hendershott -- Racket lexer
  • David Hess, Fish Software, Inc. -- Objective-J lexer
  • Varun Hiremath -- Debian control lexer
  • Doug Hogan -- Mscgen lexer
  • Ben Hollis -- Mason lexer
  • Alastair Houghton -- Lexer inheritance facility
  • Tim Howard -- BlitzMax lexer
  • Ivan Inozemtsev -- Fantom lexer
  • Brian R. Jackson -- Tea lexer
  • Dennis Kaarsemaker -- sources.list lexer
  • Igor Kalnitsky -- vhdl lexer
  • Pekka Klärck -- Robot Framework lexer
  • Eric Knibbe -- Lasso lexer
  • Adam Koprowski -- Opa lexer
  • Benjamin Kowarsch -- Modula-2 lexer
  • Alexander Kriegisch -- Kconfig and AspectJ lexers
  • Marek Kubica -- Scheme lexer
  • Jochen Kupperschmidt -- Markdown processor
  • Gerd Kurzbach -- Modelica lexer
  • Jon Larimer, Google Inc. -- Smali lexer
  • Olov Lassus -- Dart lexer
  • Sylvestre Ledru -- Scilab lexer
  • Mark Lee -- Vala lexer
  • Ben Mabey -- Gherkin lexer
  • Angus MacArthur -- QML lexer
  • Simone Margaritelli -- Hybris lexer
  • Kirk McDonald -- D lexer
  • Gordon McGregor -- SystemVerilog lexer
  • Stephen McKamey -- Duel/JBST lexer
  • Brian McKenna -- F# lexer
  • Charles McLaughlin -- Puppet lexer
  • Lukas Meuser -- BBCode formatter, Lua lexer
  • Paul Miller -- LiveScript lexer
  • Hong Minhee -- HTTP lexer
  • Michael Mior -- Awk lexer
  • Bruce Mitchener -- Dylan lexer rewrite
  • Reuben Morais -- SourcePawn lexer
  • Jon Morton -- Rust lexer
  • Paulo Moura -- Logtalk lexer
  • Mher Movsisyan -- DTD lexer
  • Ana Nelson -- Ragel, ANTLR, R console lexers
  • Nam T. Nguyen -- Monokai style
  • Jesper Noehr -- HTML formatter "anchorlinenos"
  • Mike Nolta -- Julia lexer
  • Jonas Obrist -- BBCode lexer
  • David Oliva -- Rebol lexer
  • Jon Parise -- Protocol buffers lexer
  • Ronny Pfannschmidt -- BBCode lexer
  • Benjamin Peterson -- Test suite refactoring
  • Dominik Picheta -- Nimrod lexer
  • Clément Prévost -- UrbiScript lexer
  • Kashif Rasul -- CUDA lexer
  • Justin Reidy -- MXML lexer
  • Norman Richards -- JSON lexer
  • Lubomir Rintel -- GoodData MAQL and CL lexers
  • Andre Roberge -- Tango style
  • Konrad Rudolph -- LaTeX formatter enhancements
  • Mario Ruggier -- Evoque lexers
  • Stou Sandalski -- NumPy, FORTRAN, tcsh and XSLT lexers
  • Matteo Sasso -- Common Lisp lexer
  • Joe Schafer -- Ada lexer
  • Ken Schutte -- Matlab lexers
  • Tassilo Schweyer -- Io, MOOCode lexers
  • Ted Shaw -- AutoIt lexer
  • Joerg Sieker -- ABAP lexer
  • Robert Simmons -- Standard ML lexer
  • Kirill Simonov -- YAML lexer
  • Alexander Smishlajev -- Visual FoxPro lexer
  • Steve Spigarelli -- XQuery lexer
  • Jerome St-Louis -- eC lexer
  • James Strachan -- Kotlin lexer
  • Tom Stuart -- Treetop lexer
  • Tiberius Teng -- default style overhaul
  • Jeremy Thurgood -- Erlang, Squid config lexers
  • Brian Tiffin -- OpenCOBOL lexer
  • Erick Tryzelaar -- Felix lexer
  • Daniele Varrazzo -- PostgreSQL lexers
  • Abe Voelker -- OpenEdge ABL lexer
  • Pepijn de Vos -- HTML formatter CTags support
  • Whitney Young -- ObjectiveC lexer
  • Matthias Vallentin -- Bro lexer
  • Nathan Weizenbaum -- Haml and Sass lexers
  • Dietmar Winkler -- Modelica lexer
  • Nils Winter -- Smalltalk lexer
  • Davy Wybiral -- Clojure lexer
  • Diego Zamboni -- CFengine3 lexer
  • Enrique Zamudio -- Ceylon lexer
  • Alex Zimin -- Nemerle lexer

Many thanks for all contributions!

Pygments-1.6/docs/build/plugins.html0000644000175000017500000002477412073317553016664 0ustar piotrpiotr Register Plugins — Pygments

Pygments

Register Plugins

« Back To Index

If you want to extend Pygments without hacking the sources, but want to use the lexer/formatter/style/filter lookup functions (lexers.get_lexer_by_name et al.), you can use setuptools entrypoints to add new lexers, formatters or styles as if they were in the Pygments core.

That means you can use your highlighter modules with the pygmentize script, which relies on the mentioned functions.

Entrypoints

Here is a list of setuptools entrypoints that Pygments understands:

pygments.lexers

This entrypoint is used for adding new lexers to the Pygments core. The name of the entrypoint values doesn't really matter, Pygments extracts required metadata from the class definition:

[pygments.lexers]
yourlexer = yourmodule:YourLexer

Note that you have to define name, aliases and filename attributes so that you can use the highlighter from the command line:

class YourLexer(...):
    name = 'Name Of Your Lexer'
    aliases = ['alias']
    filenames = ['*.ext']

pygments.formatters

You can use this entrypoint to add new formatters to Pygments. The name of an entrypoint item is the name of the formatter. If you prefix the name with a slash it's used as a filename pattern:

[pygments.formatters]
yourformatter = yourmodule:YourFormatter
/.ext = yourmodule:YourFormatter

pygments.styles

To add a new style you can use this entrypoint. The name of the entrypoint is the name of the style:

[pygments.styles]
yourstyle = yourmodule:YourStyle

pygments.filters

Use this entrypoint to register a new filter. The name of the entrypoint is the name of the filter:

[pygments.filters]
yourfilter = yourmodule:YourFilter

How To Use Entrypoints

This documentation doesn't explain how to use those entrypoints because this is covered in the setuptools documentation. That page should cover everything you need to write a plugin.

Extending The Core

If you have written a Pygments plugin that is open source, please inform us about that. There is a high chance that we'll add it to the Pygments distribution.

Pygments-1.6/docs/build/quickstart.html0000644000175000017500000004546512073317553017375 0ustar piotrpiotr Introduction and Quickstart — Pygments

Pygments

Introduction and Quickstart

« Back To Index

Welcome to Pygments! This document explains the basic concepts and terms and gives a few examples of how to use the library.

Architecture

There are four types of components that work together highlighting a piece of code:

  • A lexer splits the source into tokens, fragments of the source that have a token type that determines what the text represents semantically (e.g., keyword, string, or comment). There is a lexer for every language or markup format that Pygments supports.
  • The token stream can be piped through filters, which usually modify the token types or text fragments, e.g. uppercasing all keywords.
  • A formatter then takes the token stream and writes it to an output file, in a format such as HTML, LaTeX or RTF.
  • While writing the output, a style determines how to highlight all the different token types. It maps them to attributes like "red and bold".

Example

Here is a small example for highlighting Python code:

from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.formatters import HtmlFormatter

code = 'print "Hello World"'
print highlight(code, PythonLexer(), HtmlFormatter())

which prints something like this:

<div class="highlight">
<pre><span class="k">print</span> <span class="s">&quot;Hello World&quot;</span></pre>
</div>

As you can see, Pygments uses CSS classes (by default, but you can change that) instead of inline styles in order to avoid outputting redundant style information over and over. A CSS stylesheet that contains all CSS classes possibly used in the output can be produced by:

print HtmlFormatter().get_style_defs('.highlight')

The argument to get_style_defs is used as an additional CSS selector: the output may look like this:

.highlight .k { color: #AA22FF; font-weight: bold }
.highlight .s { color: #BB4444 }
...

Options

The highlight() function supports a fourth argument called outfile, it must be a file object if given. The formatted output will then be written to this file instead of being returned as a string.

Lexers and formatters both support options. They are given to them as keyword arguments either to the class or to the lookup method:

from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter

lexer = get_lexer_by_name("python", stripall=True)
formatter = HtmlFormatter(linenos=True, cssclass="source")
result = highlight(code, lexer, formatter)

This makes the lexer strip all leading and trailing whitespace from the input (stripall option), lets the formatter output line numbers (linenos option), and sets the wrapping <div>'s class to source (instead of highlight).

Important options include:

encoding : for lexers and formatters
Since Pygments uses Unicode strings internally, this determines which encoding will be used to convert to or from byte strings.
style : for formatters
The name of the style to use when writing the output.

For an overview of builtin lexers and formatters and their options, visit the lexer and formatters lists.

For a documentation on filters, see this page.

Lexer and formatter lookup

If you want to lookup a built-in lexer by its alias or a filename, you can use one of the following methods:

>>> from pygments.lexers import (get_lexer_by_name,
...     get_lexer_for_filename, get_lexer_for_mimetype)

>>> get_lexer_by_name('python')
<pygments.lexers.PythonLexer>

>>> get_lexer_for_filename('spam.rb')
<pygments.lexers.RubyLexer>

>>> get_lexer_for_mimetype('text/x-perl')
<pygments.lexers.PerlLexer>

All these functions accept keyword arguments; they will be passed to the lexer as options.

A similar API is available for formatters: use get_formatter_by_name() and get_formatter_for_filename() from the pygments.formatters module for this purpose.

Guessing lexers

If you don't know the content of the file, or you want to highlight a file whose extension is ambiguous, such as .html (which could contain plain HTML or some template tags), use these functions:

>>> from pygments.lexers import guess_lexer, guess_lexer_for_filename

>>> guess_lexer('#!/usr/bin/python\nprint "Hello World!"')
<pygments.lexers.PythonLexer>

>>> guess_lexer_for_filename('test.py', 'print "Hello World!"')
<pygments.lexers.PythonLexer>

guess_lexer() passes the given content to the lexer classes' analyse_text() method and returns the one for which it returns the highest number.

All lexers have two different filename pattern lists: the primary and the secondary one. The get_lexer_for_filename() function only uses the primary list, whose entries are supposed to be unique among all lexers. guess_lexer_for_filename(), however, will first loop through all lexers and look at the primary and secondary filename patterns if the filename matches. If only one lexer matches, it is returned, else the guessing mechanism of guess_lexer() is used with the matching lexers.

As usual, keyword arguments to these functions are given to the created lexer as options.

Command line usage

You can use Pygments from the command line, using the pygmentize script:

$ pygmentize test.py

will highlight the Python file test.py using ANSI escape sequences (a.k.a. terminal colors) and print the result to standard output.

To output HTML, use the -f option:

$ pygmentize -f html -o test.html test.py

to write an HTML-highlighted version of test.py to the file test.html. Note that it will only be a snippet of HTML, if you want a full HTML document, use the "full" option:

$ pygmentize -f html -O full -o test.html test.py

This will produce a full HTML document with included stylesheet.

A style can be selected with -O style=<name>.

If you need a stylesheet for an existing HTML file using Pygments CSS classes, it can be created with:

$ pygmentize -S default -f html > style.css

where default is the style name.

More options and tricks and be found in the command line reference.

Pygments-1.6/docs/build/styles.html0000644000175000017500000003605612073317553016522 0ustar piotrpiotr Styles — Pygments

Pygments

Styles

« Back To Index

Pygments comes with some builtin styles that work for both the HTML and LaTeX formatter.

The builtin styles can be looked up with the get_style_by_name function:

>>> from pygments.styles import get_style_by_name
>>> get_style_by_name('colorful')
<class 'pygments.styles.colorful.ColorfulStyle'>

You can pass a instance of a Style class to a formatter as the style option in form of a string:

>>> from pygments.styles import get_style_by_name
>>> HtmlFormatter(style='colorful').style
<class 'pygments.styles.colorful.ColorfulStyle'>

Or you can also import your own style (which must be a subclass of pygments.style.Style) and pass it to the formatter:

>>> from yourapp.yourmodule import YourStyle
>>> HtmlFormatter(style=YourStyle).style
<class 'yourapp.yourmodule.YourStyle'>

Creating Own Styles

So, how to create a style? All you have to do is to subclass Style and define some styles:

from pygments.style import Style
from pygments.token import Keyword, Name, Comment, String, Error, \
     Number, Operator, Generic

class YourStyle(Style):
    default_style = ""
    styles = {
        Comment:                'italic #888',
        Keyword:                'bold #005',
        Name:                   '#f00',
        Name.Function:          '#0f0',
        Name.Class:             'bold #0f0',
        String:                 'bg:#eee #111'
    }

That's it. There are just a few rules. When you define a style for Name the style automatically also affects Name.Function and so on. If you defined 'bold' and you don't want boldface for a subtoken use 'nobold'.

(Philosophy: the styles aren't written in CSS syntax since this way they can be used for a variety of formatters.)

default_style is the style inherited by all token types.

To make the style usable for Pygments, you must

  • either register it as a plugin (see the plugin docs)
  • or drop it into the styles subpackage of your Pygments distribution one style class per style, where the file name is the style name and the class name is StylenameClass. For example, if your style should be called "mondrian", name the class MondrianStyle, put it into the file mondrian.py and this file into the pygments.styles subpackage directory.

Style Rules

Here a small overview of all allowed styles:

bold
render text as bold
nobold
don't render text as bold (to prevent subtokens being highlighted bold)
italic
render text italic
noitalic
don't render text as italic
underline
render text underlined
nounderline
don't render text underlined
bg:
transparent background
bg:#000000
background color (black)
border:
no border
border:#ffffff
border color (white)
#ff0000
text color (red)
noinherit
don't inherit styles from supertoken

Note that there may not be a space between bg: and the color value since the style definition string is split at whitespace. Also, using named colors is not allowed since the supported color names vary for different formatters.

Furthermore, not all lexers might support every style.

Builtin Styles

Pygments ships some builtin styles which are maintained by the Pygments team.

To get a list of known styles you can use this snippet:

>>> from pygments.styles import STYLE_MAP
>>> STYLE_MAP.keys()
['default', 'emacs', 'friendly', 'colorful']

Getting a list of available styles

New in Pygments 0.6.

Because it could be that a plugin registered a style, there is a way to iterate over all styles:

>>> from pygments.styles import get_all_styles
>>> styles = list(get_all_styles())
Pygments-1.6/docs/build/rstdirective.html0000644000175000017500000001642712073317553017706 0ustar piotrpiotr Using Pygments in ReST documents — Pygments

Pygments

Using Pygments in ReST documents

« Back To Index

Many Python people use ReST for documentation their sourcecode, programs, scripts et cetera. This also means that documentation often includes sourcecode samples or snippets.

You can easily enable Pygments support for your ReST texts using a custom directive -- this is also how this documentation displays source code.

From Pygments 0.9, the directive is shipped in the distribution as external/rst-directive.py. You can copy and adapt this code to your liking.

Pygments-1.6/docs/build/filterdevelopment.html0000644000175000017500000003057012073317553020722 0ustar piotrpiotr Write your own filter — Pygments

Pygments

Write your own filter

« Back To Index

New in Pygments 0.7.

Writing own filters is very easy. All you have to do is to subclass the Filter class and override the filter method. Additionally a filter is instanciated with some keyword arguments you can use to adjust the behavior of your filter.

Subclassing Filters

As an example, we write a filter that converts all Name.Function tokens to normal Name tokens to make the output less colorful.

from pygments.util import get_bool_opt
from pygments.token import Name
from pygments.filter import Filter

class UncolorFilter(Filter):

    def __init__(self, **options):
        Filter.__init__(self, **options)
        self.class_too = get_bool_opt(options, 'classtoo')

    def filter(self, lexer, stream):
        for ttype, value in stream:
            if ttype is Name.Function or (self.class_too and
                                          ttype is Name.Class):
                ttype = Name
            yield ttype, value

Some notes on the lexer argument: that can be quite confusing since it doesn't need to be a lexer instance. If a filter was added by using the add_filter() function of lexers, that lexer is registered for the filter. In that case lexer will refer to the lexer that has registered the filter. It can be used to access options passed to a lexer. Because it could be None you always have to check for that case if you access it.

Using a decorator

You can also use the simplefilter decorator from the pygments.filter module:

from pygments.util import get_bool_opt
from pygments.token import Name
from pygments.filter import simplefilter


@simplefilter
def uncolor(lexer, stream, options):
    class_too = get_bool_opt(options, 'classtoo')
    for ttype, value in stream:
        if ttype is Name.Function or (class_too and
                                      ttype is Name.Class):
            ttype = Name
        yield ttype, value

The decorator automatically subclasses an internal filter class and uses the decorated function for filtering.

Pygments-1.6/docs/build/moinmoin.html0000644000175000017500000002021012073317553017005 0ustar piotrpiotr Using Pygments with MoinMoin — Pygments

Pygments

Using Pygments with MoinMoin

« Back To Index

From Pygments 0.7, the source distribution ships a Moin parser plugin that can be used to get Pygments highlighting in Moin wiki pages.

To use it, copy the file external/moin-parser.py from the Pygments distribution to the data/plugin/parser subdirectory of your Moin instance. Edit the options at the top of the file (currently ATTACHMENTS and INLINESTYLES) and rename the file to the name that the parser directive should have. For example, if you name the file code.py, you can get a highlighted Python code sample with this Wiki markup:

{{{
#!code python
[...]
}}}

where python is the Pygments name of the lexer to use.

Additionally, if you set the ATTACHMENTS option to True, Pygments will also be called for all attachments for whose filenames there is no other parser registered.

You are responsible for including CSS rules that will map the Pygments CSS classes to colors. You can output a stylesheet file with pygmentize, put it into the htdocs directory of your Moin instance and then include it in the stylesheets configuration option in the Moin config, e.g.:

stylesheets = [('screen', '/htdocs/pygments.css')]

If you do not want to do that and are willing to accept larger HTML output, you can set the INLINESTYLES option to True.

Pygments-1.6/docs/build/formatterdevelopment.html0000644000175000017500000005032512073317553021440 0ustar piotrpiotr Write your own formatter — Pygments

Pygments

Write your own formatter

« Back To Index

As well as creating your own lexer, writing a new formatter for Pygments is easy and straightforward.

A formatter is a class that is initialized with some keyword arguments (the formatter options) and that must provides a format() method. Additionally a formatter should provide a get_style_defs() method that returns the style definitions from the style in a form usable for the formatter's output format.

Quickstart

The most basic formatter shipped with Pygments is the NullFormatter. It just sends the value of a token to the output stream:

from pygments.formatter import Formatter

class NullFormatter(Formatter):
    def format(self, tokensource, outfile):
        for ttype, value in tokensource:
            outfile.write(value)

As you can see, the format() method is passed two parameters: tokensource and outfile. The first is an iterable of (token_type, value) tuples, the latter a file like object with a write() method.

Because the formatter is that basic it doesn't overwrite the get_style_defs() method.

Styles

Styles aren't instantiated but their metaclass provides some class functions so that you can access the style definitions easily.

Styles are iterable and yield tuples in the form (ttype, d) where ttype is a token and d is a dict with the following keys:

'color'
Hexadecimal color value (eg: 'ff0000' for red) or None if not defined.
'bold'
True if the value should be bold
'italic'
True if the value should be italic
'underline'
True if the value should be underlined
'bgcolor'
Hexadecimal color value for the background (eg: 'eeeeeee' for light gray) or None if not defined.
'border'
Hexadecimal color value for the border (eg: '0000aa' for a dark blue) or None for no border.

Additional keys might appear in the future, formatters should ignore all keys they don't support.

HTML 3.2 Formatter

For an more complex example, let's implement a HTML 3.2 Formatter. We don't use CSS but inline markup (<u>, <font>, etc). Because this isn't good style this formatter isn't in the standard library ;-)

from pygments.formatter import Formatter

class OldHtmlFormatter(Formatter):

    def __init__(self, **options):
        Formatter.__init__(self, **options)

        # create a dict of (start, end) tuples that wrap the
        # value of a token so that we can use it in the format
        # method later
        self.styles = {}

        # we iterate over the `_styles` attribute of a style item
        # that contains the parsed style values.
        for token, style in self.style:
            start = end = ''
            # a style item is a tuple in the following form:
            # colors are readily specified in hex: 'RRGGBB'
            if style['color']:
                start += '<font color="#%s">' % style['color']
                end = '</font>' + end
            if style['bold']:
                start += '<b>'
                end = '</b>' + end
            if style['italic']:
                start += '<i>'
                end = '</i>' + end
            if style['underline']:
                start += '<u>'
                end = '</u>' + end
            self.styles[token] = (start, end)

    def format(self, tokensource, outfile):
        # lastval is a string we use for caching
        # because it's possible that an lexer yields a number
        # of consecutive tokens with the same token type.
        # to minimize the size of the generated html markup we
        # try to join the values of same-type tokens here
        lastval = ''
        lasttype = None

        # wrap the whole output with <pre>
        outfile.write('<pre>')

        for ttype, value in tokensource:
            # if the token type doesn't exist in the stylemap
            # we try it with the parent of the token type
            # eg: parent of Token.Literal.String.Double is
            # Token.Literal.String
            while ttype not in self.styles:
                ttype = ttype.parent
            if ttype == lasttype:
                # the current token type is the same of the last
                # iteration. cache it
                lastval += value
            else:
                # not the same token as last iteration, but we
                # have some data in the buffer. wrap it with the
                # defined style and write it to the output file
                if lastval:
                    stylebegin, styleend = self.styles[lasttype]
                    outfile.write(stylebegin + lastval + styleend)
                # set lastval/lasttype to current values
                lastval = value
                lasttype = ttype

        # if something is left in the buffer, write it to the
        # output file, then close the opened <pre> tag
        if lastval:
            stylebegin, styleend = self.styles[lasttype]
            outfile.write(stylebegin + lastval + styleend)
        outfile.write('</pre>\n')

The comments should explain it. Again, this formatter doesn't override the get_style_defs() method. If we would have used CSS classes instead of inline HTML markup, we would need to generate the CSS first. For that purpose the get_style_defs() method exists:

Generating Style Definitions

Some formatters like the LatexFormatter and the HtmlFormatter don't output inline markup but reference either macros or css classes. Because the definitions of those are not part of the output, the get_style_defs() method exists. It is passed one parameter (if it's used and how it's used is up to the formatter) and has to return a string or None.

Pygments-1.6/docs/build/filters.html0000644000175000017500000004043512073317553016643 0ustar piotrpiotr Filters — Pygments

Pygments

Filters

« Back To Index

Contents

New in Pygments 0.7.

You can filter token streams coming from lexers to improve or annotate the output. For example, you can highlight special words in comments, convert keywords to upper or lowercase to enforce a style guide etc.

To apply a filter, you can use the add_filter() method of a lexer:

>>> from pygments.lexers import PythonLexer
>>> l = PythonLexer()
>>> # add a filter given by a string and options
>>> l.add_filter('codetagify', case='lower')
>>> l.filters
[<pygments.filters.CodeTagFilter object at 0xb785decc>]
>>> from pygments.filters import KeywordCaseFilter
>>> # or give an instance
>>> l.add_filter(KeywordCaseFilter(case='lower'))

The add_filter() method takes keyword arguments which are forwarded to the constructor of the filter.

To get a list of all registered filters by name, you can use the get_all_filters() function from the pygments.filters module that returns an iterable for all known filters.

If you want to write your own filter, have a look at Write your own filter.

Builtin Filters

RaiseOnErrorTokenFilter

Raise an exception when the lexer generates an error token.

Options accepted:

excclass : Exception class
The exception class to raise. The default is pygments.filters.ErrorToken.

New in Pygments 0.8.

Name:raiseonerror

VisibleWhitespaceFilter

Convert tabs, newlines and/or spaces to visible characters.

Options accepted:

spaces : string or bool
If this is a one-character string, spaces will be replaces by this string. If it is another true value, spaces will be replaced by · (unicode MIDDLE DOT). If it is a false value, spaces will not be replaced. The default is False.
tabs : string or bool
The same as for spaces, but the default replacement character is » (unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value is False. Note: this will not work if the tabsize option for the lexer is nonzero, as tabs will already have been expanded then.
tabsize : int
If tabs are to be replaced by this filter (see the tabs option), this is the total number of characters that a tab should be expanded to. The default is 8.
newlines : string or bool
The same as for spaces, but the default replacement character is (unicode PILCROW SIGN). The default value is False.
wstokentype : bool
If true, give whitespace the special Whitespace token type. This allows styling the visible whitespace differently (e.g. greyed out), but it can disrupt background colors. The default is True.

New in Pygments 0.8.

Name:whitespace

TokenMergeFilter

Merges consecutive tokens with the same token type in the output stream of a lexer.

New in Pygments 1.2.

Name:tokenmerge

NameHighlightFilter

Highlight a normal Name token with a different token type.

Example:

filter = NameHighlightFilter(
    names=['foo', 'bar', 'baz'],
    tokentype=Name.Function,
)

This would highlight the names "foo", "bar" and "baz" as functions. Name.Function is the default token type.

Options accepted:

names : list of strings
A list of names that should be given the different token type. There is no default.
tokentype : TokenType or string
A token type or a string containing a token type name that is used for highlighting the strings in names. The default is Name.Function.
Name:highlight

GobbleFilter

Gobbles source code lines (eats initial characters).

This filter drops the first n characters off every line of code. This may be useful when the source code fed to the lexer is indented by a fixed amount of space that isn't desired in the output.

Options accepted:

n : int
The number of characters to gobble.

New in Pygments 1.2.

Name:gobble

CodeTagFilter

Highlight special code tags in comments and docstrings.

Options accepted:

codetags : list of strings
A list of strings that are flagged as code tags. The default is to highlight XXX, TODO, BUG and NOTE.
Name:codetagify

KeywordCaseFilter

Convert keywords to lowercase or uppercase or capitalize them, which means first letter uppercase, rest lowercase.

This can be useful e.g. if you highlight Pascal code and want to adapt the code to your styleguide.

Options accepted:

case : string
The casing to convert keywords to. Must be one of 'lower', 'upper' or 'capitalize'. The default is 'lower'.
Name:keywordcase
Pygments-1.6/docs/build/lexerdevelopment.html0000644000175000017500000014450012073317552020552 0ustar piotrpiotr Write your own lexer — Pygments

Pygments

Write your own lexer

« Back To Index

If a lexer for your favorite language is missing in the Pygments package, you can easily write your own and extend Pygments.

All you need can be found inside the pygments.lexer module. As you can read in the API documentation, a lexer is a class that is initialized with some keyword arguments (the lexer options) and that provides a get_tokens_unprocessed() method which is given a string or unicode object with the data to parse.

The get_tokens_unprocessed() method must return an iterator or iterable containing tuples in the form (index, token, value). Normally you don't need to do this since there are numerous base lexers you can subclass.

RegexLexer

A very powerful (but quite easy to use) lexer is the RegexLexer. This lexer base class allows you to define lexing rules in terms of regular expressions for different states.

States are groups of regular expressions that are matched against the input string at the current position. If one of these expressions matches, a corresponding action is performed (normally yielding a token with a specific type), the current position is set to where the last match ended and the matching process continues with the first regex of the current state.

Lexer states are kept in a state stack: each time a new state is entered, the new state is pushed onto the stack. The most basic lexers (like the DiffLexer) just need one state.

Each state is defined as a list of tuples in the form (regex, action, new_state) where the last item is optional. In the most basic form, action is a token type (like Name.Builtin). That means: When regex matches, emit a token with the match text and type tokentype and push new_state on the state stack. If the new state is '#pop', the topmost state is popped from the stack instead. (To pop more than one state, use '#pop:2' and so on.) '#push' is a synonym for pushing the current state on the stack.

The following example shows the DiffLexer from the builtin lexers. Note that it contains some additional attributes name, aliases and filenames which aren't required for a lexer. They are used by the builtin lexer lookup functions.

from pygments.lexer import RegexLexer
from pygments.token import *

class DiffLexer(RegexLexer):
    name = 'Diff'
    aliases = ['diff']
    filenames = ['*.diff']

    tokens = {
        'root': [
            (r' .*\n', Text),
            (r'\+.*\n', Generic.Inserted),
            (r'-.*\n', Generic.Deleted),
            (r'@.*\n', Generic.Subheading),
            (r'Index.*\n', Generic.Heading),
            (r'=.*\n', Generic.Heading),
            (r'.*\n', Text),
        ]
    }

As you can see this lexer only uses one state. When the lexer starts scanning the text, it first checks if the current character is a space. If this is true it scans everything until newline and returns the parsed data as Text token.

If this rule doesn't match, it checks if the current char is a plus sign. And so on.

If no rule matches at the current position, the current char is emitted as an Error token that indicates a parsing error, and the position is increased by 1.

Regex Flags

You can either define regex flags in the regex (r'(?x)foo bar') or by adding a flags attribute to your lexer class. If no attribute is defined, it defaults to re.MULTILINE. For more informations about regular expression flags see the regular expressions help page in the python documentation.

Scanning multiple tokens at once

Here is a more complex lexer that highlights INI files. INI files consist of sections, comments and key = value pairs:

from pygments.lexer import RegexLexer, bygroups
from pygments.token import *

class IniLexer(RegexLexer):
    name = 'INI'
    aliases = ['ini', 'cfg']
    filenames = ['*.ini', '*.cfg']

    tokens = {
        'root': [
            (r'\s+', Text),
            (r';.*?$', Comment),
            (r'\[.*?\]$', Keyword),
            (r'(.*?)(\s*)(=)(\s*)(.*?)$',
             bygroups(Name.Attribute, Text, Operator, Text, String))
        ]
    }

The lexer first looks for whitespace, comments and section names. And later it looks for a line that looks like a key, value pair, separated by an '=' sign, and optional whitespace.

The bygroups helper makes sure that each group is yielded with a different token type. First the Name.Attribute token, then a Text token for the optional whitespace, after that a Operator token for the equals sign. Then a Text token for the whitespace again. The rest of the line is returned as String.

Note that for this to work, every part of the match must be inside a capturing group (a (...)), and there must not be any nested capturing groups. If you nevertheless need a group, use a non-capturing group defined using this syntax: r'(?:some|words|here)' (note the ?: after the beginning parenthesis).

If you find yourself needing a capturing group inside the regex which shouldn't be part of the output but is used in the regular expressions for backreferencing (eg: r'(<(foo|bar)>)(.*?)(</\2>)'), you can pass None to the bygroups function and it will skip that group will be skipped in the output.

Changing states

Many lexers need multiple states to work as expected. For example, some languages allow multiline comments to be nested. Since this is a recursive pattern it's impossible to lex just using regular expressions.

Here is the solution:

from pygments.lexer import RegexLexer
from pygments.token import *

class ExampleLexer(RegexLexer):
    name = 'Example Lexer with states'

    tokens = {
        'root': [
            (r'[^/]+', Text),
            (r'/\*', Comment.Multiline, 'comment'),
            (r'//.*?$', Comment.Singleline),
            (r'/', Text)
        ],
        'comment': [
            (r'[^*/]', Comment.Multiline),
            (r'/\*', Comment.Multiline, '#push'),
            (r'\*/', Comment.Multiline, '#pop'),
            (r'[*/]', Comment.Multiline)
        ]
    }

This lexer starts lexing in the 'root' state. It tries to match as much as possible until it finds a slash ('/'). If the next character after the slash is a star ('*') the RegexLexer sends those two characters to the output stream marked as Comment.Multiline and continues parsing with the rules defined in the 'comment' state.

If there wasn't a star after the slash, the RegexLexer checks if it's a singleline comment (eg: followed by a second slash). If this also wasn't the case it must be a single slash (the separate regex for a single slash must also be given, else the slash would be marked as an error token).

Inside the 'comment' state, we do the same thing again. Scan until the lexer finds a star or slash. If it's the opening of a multiline comment, push the 'comment' state on the stack and continue scanning, again in the 'comment' state. Else, check if it's the end of the multiline comment. If yes, pop one state from the stack.

Note: If you pop from an empty stack you'll get an IndexError. (There is an easy way to prevent this from happening: don't '#pop' in the root state).

If the RegexLexer encounters a newline that is flagged as an error token, the stack is emptied and the lexer continues scanning in the 'root' state. This helps producing error-tolerant highlighting for erroneous input, e.g. when a single-line string is not closed.

Advanced state tricks

There are a few more things you can do with states:

  • You can push multiple states onto the stack if you give a tuple instead of a simple string as the third item in a rule tuple. For example, if you want to match a comment containing a directive, something like:

    /* <processing directive>    rest of comment */
    

    you can use this rule:

    tokens = {
        'root': [
            (r'/\* <', Comment, ('comment', 'directive')),
            ...
        ],
        'directive': [
            (r'[^>]*', Comment.Directive),
            (r'>', Comment, '#pop'),
        ],
        'comment': [
            (r'[^*]+', Comment),
            (r'\*/', Comment, '#pop'),
            (r'\*', Comment),
        ]
    }
    

    When this encounters the above sample, first 'comment' and 'directive' are pushed onto the stack, then the lexer continues in the directive state until it finds the closing >, then it continues in the comment state until the closing */. Then, both states are popped from the stack again and lexing continues in the root state.

    New in Pygments 0.9: The tuple can contain the special '#push' and '#pop' (but not '#pop:n') directives.

  • You can include the rules of a state in the definition of another. This is done by using include from pygments.lexer:

    from pygments.lexer import RegexLexer, bygroups, include
    from pygments.token import *
    
    class ExampleLexer(RegexLexer):
        tokens = {
            'comments': [
                (r'/\*.*?\*/', Comment),
                (r'//.*?\n', Comment),
            ],
            'root': [
                include('comments'),
                (r'(function )(\w+)( {)',
                 bygroups(Keyword, Name, Keyword), 'function'),
                (r'.', Text),
            ],
            'function': [
                (r'[^}/]+', Text),
                include('comments'),
                (r'/', Text),
                (r'}', Keyword, '#pop'),
            ]
        }
    

    This is a hypothetical lexer for a language that consist of functions and comments. Because comments can occur at toplevel and in functions, we need rules for comments in both states. As you can see, the include helper saves repeating rules that occur more than once (in this example, the state 'comment' will never be entered by the lexer, as it's only there to be included in 'root' and 'function').

  • Sometimes, you may want to "combine" a state from existing ones. This is possible with the combine helper from pygments.lexer.

    If you, instead of a new state, write combined('state1', 'state2') as the third item of a rule tuple, a new anonymous state will be formed from state1 and state2 and if the rule matches, the lexer will enter this state.

    This is not used very often, but can be helpful in some cases, such as the PythonLexer's string literal processing.

  • If you want your lexer to start lexing in a different state you can modify the stack by overloading the get_tokens_unprocessed() method:

    from pygments.lexer import RegexLexer
    
    class MyLexer(RegexLexer):
        tokens = {...}
    
        def get_tokens_unprocessed(self, text):
            stack = ['root', 'otherstate']
            for item in RegexLexer.get_tokens_unprocessed(text, stack):
                yield item
    

    Some lexers like the PhpLexer use this to make the leading <?php preprocessor comments optional. Note that you can crash the lexer easily by putting values into the stack that don't exist in the token map. Also removing 'root' from the stack can result in strange errors!

  • An empty regex at the end of a state list, combined with '#pop', can act as a return point from a state that doesn't have a clear end marker.

Using multiple lexers

Using multiple lexers for the same input can be tricky. One of the easiest combination techniques is shown here: You can replace the token type entry in a rule tuple (the second item) with a lexer class. The matched text will then be lexed with that lexer, and the resulting tokens will be yielded.

For example, look at this stripped-down HTML lexer:

from pygments.lexer import RegexLexer, bygroups, using
from pygments.token import *
from pygments.lexers.web import JavascriptLexer

class HtmlLexer(RegexLexer):
    name = 'HTML'
    aliases = ['html']
    filenames = ['*.html', '*.htm']

    flags = re.IGNORECASE | re.DOTALL
    tokens = {
        'root': [
            ('[^<&]+', Text),
            ('&.*?;', Name.Entity),
            (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')),
            (r'<\s*[a-zA-Z0-9:]+', Name.Tag, 'tag'),
            (r'<\s*/\s*[a-zA-Z0-9:]+\s*>', Name.Tag),
        ],
        'script-content': [
            (r'(.+?)(<\s*/\s*script\s*>)',
             bygroups(using(JavascriptLexer), Name.Tag),
             '#pop'),
        ]
    }

Here the content of a <script> tag is passed to a newly created instance of a JavascriptLexer and not processed by the HtmlLexer. This is done using the using helper that takes the other lexer class as its parameter.

Note the combination of bygroups and using. This makes sure that the content up to the </script> end tag is processed by the JavascriptLexer, while the end tag is yielded as a normal token with the Name.Tag type.

As an additional goodie, if the lexer class is replaced by this (imported from pygments.lexer), the "other" lexer will be the current one (because you cannot refer to the current class within the code that runs at class definition time).

Also note the (r'<\s*script\s*', Name.Tag, ('script-content', 'tag')) rule. Here, two states are pushed onto the state stack, 'script-content' and 'tag'. That means that first 'tag' is processed, which will parse attributes and the closing >, then the 'tag' state is popped and the next state on top of the stack will be 'script-content'.

The using() helper has a special keyword argument, state, which works as follows: if given, the lexer to use initially is not in the "root" state, but in the state given by this argument. This only works with a RegexLexer.

Any other keywords arguments passed to using() are added to the keyword arguments used to create the lexer.

Delegating Lexer

Another approach for nested lexers is the DelegatingLexer which is for example used for the template engine lexers. It takes two lexers as arguments on initialisation: a root_lexer and a language_lexer.

The input is processed as follows: First, the whole text is lexed with the language_lexer. All tokens yielded with a type of Other are then concatenated and given to the root_lexer. The language tokens of the language_lexer are then inserted into the root_lexer's token stream at the appropriate positions.

from pygments.lexer import DelegatingLexer
from pygments.lexers.web import HtmlLexer, PhpLexer

class HtmlPhpLexer(DelegatingLexer):
    def __init__(self, **options):
        super(HtmlPhpLexer, self).__init__(HtmlLexer, PhpLexer, **options)

This procedure ensures that e.g. HTML with template tags in it is highlighted correctly even if the template tags are put into HTML tags or attributes.

If you want to change the needle token Other to something else, you can give the lexer another token type as the third parameter:

DelegatingLexer.__init__(MyLexer, OtherLexer, Text, **options)

Callbacks

Sometimes the grammar of a language is so complex that a lexer would be unable to parse it just by using regular expressions and stacks.

For this, the RegexLexer allows callbacks to be given in rule tuples, instead of token types (bygroups and using are nothing else but preimplemented callbacks). The callback must be a function taking two arguments:

  • the lexer itself
  • the match object for the last matched rule

The callback must then return an iterable of (or simply yield) (index, tokentype, value) tuples, which are then just passed through by get_tokens_unprocessed(). The index here is the position of the token in the input string, tokentype is the normal token type (like Name.Builtin), and value the associated part of the input string.

You can see an example here:

from pygments.lexer import RegexLexer
from pygments.token import Generic

class HypotheticLexer(RegexLexer):

    def headline_callback(lexer, match):
        equal_signs = match.group(1)
        text = match.group(2)
        yield match.start(), Generic.Headline, equal_signs + text + equal_signs

    tokens = {
        'root': [
            (r'(=+)(.*?)(\1)', headline_callback)
        ]
    }

If the regex for the headline_callback matches, the function is called with the match object. Note that after the callback is done, processing continues normally, that is, after the end of the previous match. The callback has no possibility to influence the position.

There are not really any simple examples for lexer callbacks, but you can see them in action e.g. in the compiled.py source code in the CLexer and JavaLexer classes.

The ExtendedRegexLexer class

The RegexLexer, even with callbacks, unfortunately isn't powerful enough for the funky syntax rules of some languages that will go unnamed, such as Ruby.

But fear not; even then you don't have to abandon the regular expression approach. For Pygments has a subclass of RegexLexer, the ExtendedRegexLexer. All features known from RegexLexers are available here too, and the tokens are specified in exactly the same way, except for one detail:

The get_tokens_unprocessed() method holds its internal state data not as local variables, but in an instance of the pygments.lexer.LexerContext class, and that instance is passed to callbacks as a third argument. This means that you can modify the lexer state in callbacks.

The LexerContext class has the following members:

  • text -- the input text
  • pos -- the current starting position that is used for matching regexes
  • stack -- a list containing the state stack
  • end -- the maximum position to which regexes are matched, this defaults to the length of text

Additionally, the get_tokens_unprocessed() method can be given a LexerContext instead of a string and will then process this context instead of creating a new one for the string argument.

Note that because you can set the current position to anything in the callback, it won't be automatically be set by the caller after the callback is finished. For example, this is how the hypothetical lexer above would be written with the ExtendedRegexLexer:

from pygments.lexer import ExtendedRegexLexer
from pygments.token import Generic

class ExHypotheticLexer(ExtendedRegexLexer):

    def headline_callback(lexer, match, ctx):
        equal_signs = match.group(1)
        text = match.group(2)
        yield match.start(), Generic.Headline, equal_signs + text + equal_signs
        ctx.pos = match.end()

    tokens = {
        'root': [
            (r'(=+)(.*?)(\1)', headline_callback)
        ]
    }

This might sound confusing (and it can really be). But it is needed, and for an example look at the Ruby lexer in agile.py.

Filtering Token Streams

Some languages ship a lot of builtin functions (for example PHP). The total amount of those functions differs from system to system because not everybody has every extension installed. In the case of PHP there are over 3000 builtin functions. That's an incredible huge amount of functions, much more than you can put into a regular expression.

But because only Name tokens can be function names it's solvable by overriding the get_tokens_unprocessed() method. The following lexer subclasses the PythonLexer so that it highlights some additional names as pseudo keywords:

from pygments.lexers.agile import PythonLexer
from pygments.token import Name, Keyword

class MyPythonLexer(PythonLexer):
    EXTRA_KEYWORDS = ['foo', 'bar', 'foobar', 'barfoo', 'spam', 'eggs']

    def get_tokens_unprocessed(self, text):
        for index, token, value in PythonLexer.get_tokens_unprocessed(self, text):
            if token is Name and value in self.EXTRA_KEYWORDS:
                yield index, Keyword.Pseudo, value
            else:
                yield index, token, value

The PhpLexer and LuaLexer use this method to resolve builtin functions.

Note Do not confuse this with the filter system.

Pygments-1.6/docs/build/integrate.html0000644000175000017500000002142212103430066017135 0ustar piotrpiotr Using Pygments in various scenarios — Pygments

Pygments

Using Pygments in various scenarios

« Back To Index

PyGtk

Armin has written a piece of sample code that shows how to create a Gtk TextBuffer object containing Pygments-highlighted text.

See the article here: http://lucumr.pocoo.org/cogitations/2007/05/30/pygments-gtk-rendering/

Wordpress

He also has a snippet that shows how to use Pygments in WordPress:

http://lucumr.pocoo.org/cogitations/2007/05/30/pygments-in-wordpress/

Markdown

Since Pygments 0.9, the distribution ships Markdown preprocessor sample code that uses Pygments to render source code in external/markdown-processor.py. You can copy and adapt it to your liking.

TextMate

Antonio Cangiano has created a Pygments bundle for TextMate that allows to colorize code via a simple menu option. It can be found here.

Bash completion

The source distribution contains a file external/pygments.bashcomp that sets up completion for the pygmentize command in bash.

Java

See the Java quickstart document.

Pygments-1.6/docs/build/cmdline.html0000644000175000017500000003332512073317552016605 0ustar piotrpiotr Command Line Interface — Pygments

Pygments

Command Line Interface

« Back To Index

You can use Pygments from the shell, provided you installed the pygmentize script:

$ pygmentize test.py
print "Hello World"

will print the file test.py to standard output, using the Python lexer (inferred from the file name extension) and the terminal formatter (because you didn't give an explicit formatter name).

If you want HTML output:

$ pygmentize -f html -l python -o test.html test.py

As you can see, the -l option explicitly selects a lexer. As seen above, if you give an input file name and it has an extension that Pygments recognizes, you can omit this option.

The -o option gives an output file name. If it is not given, output is written to stdout.

The -f option selects a formatter (as with -l, it can also be omitted if an output file name is given and has a supported extension). If no output file name is given and -f is omitted, the TerminalFormatter is used.

The above command could therefore also be given as:

$ pygmentize -o test.html test.py

To create a full HTML document, including line numbers and stylesheet (using the "emacs" style), highlighting the Python file test.py to test.html:

$ pygmentize -O full,style=emacs -o test.html test.py

Options and filters

Lexer and formatter options can be given using the -O option:

$ pygmentize -f html -O style=colorful,linenos=1 -l python test.py

Be sure to enclose the option string in quotes if it contains any special shell characters, such as spaces or expansion wildcards like *. If an option expects a list value, separate the list entries with spaces (you'll have to quote the option value in this case too, so that the shell doesn't split it).

Since the -O option argument is split at commas and expects the split values to be of the form name=value, you can't give an option value that contains commas or equals signs. Therefore, an option -P is provided (as of Pygments 0.9) that works like -O but can only pass one option per -P. Its value can then contain all characters:

$ pygmentize -P "heading=Pygments, the Python highlighter" ...

Filters are added to the token stream using the -F option:

$ pygmentize -f html -l pascal -F keywordcase:case=upper main.pas

As you see, options for the filter are given after a colon. As for -O, the filter name and options must be one shell word, so there may not be any spaces around the colon.

Generating styles

Formatters normally don't output full style information. For example, the HTML formatter by default only outputs <span> tags with class attributes. Therefore, there's a special -S option for generating style definitions. Usage is as follows:

$ pygmentize -f html -S colorful -a .syntax

generates a CSS style sheet (because you selected the HTML formatter) for the "colorful" style prepending a ".syntax" selector to all style rules.

For an explanation what -a means for a particular formatter, look for the arg argument for the formatter's get_style_defs() method.

Getting lexer names

New in Pygments 1.0.

The -N option guesses a lexer name for a given filename, so that

$ pygmentize -N setup.py

will print out python. It won't highlight anything yet. If no specific lexer is known for that filename, text is printed.

Getting help

The -L option lists lexers, formatters, along with their short names and supported file name extensions, styles and filters. If you want to see only one category, give it as an argument:

$ pygmentize -L filters

will list only all installed filters.

The -H option will give you detailed information (the same that can be found in this documentation) about a lexer, formatter or filter. Usage is as follows:

$ pygmentize -H formatter html

will print the help for the HTML formatter, while

$ pygmentize -H lexer python

will print the help for the Python lexer, etc.

A note on encodings

New in Pygments 0.9.

Pygments tries to be smart regarding encodings in the formatting process:

  • If you give an encoding option, it will be used as the input and output encoding.
  • If you give an outencoding option, it will override encoding as the output encoding.
  • If you don't give an encoding and have given an output file, the default encoding for lexer and formatter is latin1 (which will pass through all non-ASCII characters).
  • If you don't give an encoding and haven't given an output file (that means output is written to the console), the default encoding for lexer and formatter is the terminal encoding (sys.stdout.encoding).
Pygments-1.6/docs/build/java.html0000644000175000017500000002671712103430066016110 0ustar piotrpiotr Use Pygments in Java — Pygments

Pygments

Use Pygments in Java

« Back To Index

Thanks to Jython it is possible to use Pygments in Java.

This page is a simple tutorial to get an idea of how this is working. You can then look at the Jython documentation for more advanced use.

Since version 1.5, Pygments is deployed on Maven Central as a JAR so is Jython which makes it a lot easier to create the Java project.

Here is an example of a Maven pom.xml file for a project running Pygments:

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
                             http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>example</groupId>
  <artifactId>example</artifactId>
  <version>1.0-SNAPSHOT</version>
  <dependencies>
    <dependency>
      <groupId>org.python</groupId>
      <artifactId>jython-standalone</artifactId>
      <version>2.5.3</version>
    </dependency>
    <dependency>
      <groupId>org.pygments</groupId>
      <artifactId>pygments</artifactId>
      <version>1.5</version>
      <scope>runtime</scope>
    </dependency>
  </dependencies>
</project>

The following Java example:

PythonInterpreter interpreter = new PythonInterpreter();

// Set a variable with the content you want to work with
interpreter.set("code", code);

// Simple use Pygments as you would in Python
interpreter.exec("from pygments import highlight\n"
    + "from pygments.lexers import PythonLexer\n"
    + "from pygments.formatters import HtmlFormatter\n"
    + "\nresult = highlight(code, PythonLexer(), HtmlFormatter())");

// Get the result that has been set in a variable
System.out.println(interpreter.get("result", String.class));

will print something like:

<div class="highlight">
<pre><span class="k">print</span> <span class="s">&quot;Hello World&quot;</span></pre>
</div>
Pygments-1.6/docs/build/formatters.html0000644000175000017500000013444412073317552017364 0ustar piotrpiotr Available formatters — Pygments

Pygments

Available formatters

« Back To Index

This page lists all builtin formatters.

Common options

All formatters support these options:

encoding

New in Pygments 0.6.

If given, must be an encoding name (such as "utf-8"). This will be used to convert the token strings (which are Unicode strings) to byte strings in the output (default: None). It will also be written in an encoding declaration suitable for the document format if the full option is given (e.g. a meta content-type directive in HTML or an invocation of the inputenc package in LaTeX).

If this is "" or None, Unicode strings will be written to the output file, which most file-like objects do not support. For example, pygments.highlight() will return a Unicode string if called with no outfile argument and a formatter that has encoding set to None because it uses a StringIO.StringIO object that supports Unicode arguments to write(). Using a regular file object wouldn't work.

outencoding

New in Pygments 0.7.

When using Pygments from the command line, any encoding option given is passed to the lexer and the formatter. This is sometimes not desirable, for example if you want to set the input encoding to "guess". Therefore, outencoding has been introduced which overrides encoding for the formatter if given.

Formatter classes

All these classes are importable from pygments.formatters.

BBCodeFormatter

Format tokens with BBcodes. These formatting codes are used by many bulletin boards, so you can highlight your sourcecode with pygments before posting it there.

This formatter has no support for background colors and borders, as there are no common BBcode tags for that.

Some board systems (e.g. phpBB) don't support colors in their [code] tag, so you can't use the highlighting together with that tag. Text in a [code] tag usually is shown with a monospace font (which this formatter can do with the monofont option) and no spaces (which you need for indentation) are removed.

Additional options accepted:

style
The style to use, can be a string or a Style subclass (default: 'default').
codetag
If set to true, put the output into [code] tags (default: false)
monofont
If set to true, add a tag to show the code with a monospace font (default: false).
Short names:bbcode, bb
Filename patterns:None

BmpImageFormatter

Create a bitmap image from source code. This uses the Python Imaging Library to generate a pixmap from the source code.

New in Pygments 1.0. (You could create bitmap images before by passing a suitable image_format option to the ImageFormatter.)

Short names:bmp, bitmap
Filename patterns:*.bmp

GifImageFormatter

Create a GIF image from source code. This uses the Python Imaging Library to generate a pixmap from the source code.

New in Pygments 1.0. (You could create GIF images before by passing a suitable image_format option to the ImageFormatter.)

Short names:gif
Filename patterns:*.gif

HtmlFormatter

Format tokens as HTML 4 <span> tags within a <pre> tag, wrapped in a <div> tag. The <div>'s CSS class can be set by the cssclass option.

If the linenos option is set to "table", the <pre> is additionally wrapped inside a <table> which has one row and two cells: one containing the line numbers and one containing the code. Example:

<div class="highlight" >
<table><tr>
  <td class="linenos" title="click to toggle"
    onclick="with (this.firstChild.style)
             { display = (display == '') ? 'none' : '' }">
    <pre>1
    2</pre>
  </td>
  <td class="code">
    <pre><span class="Ke">def </span><span class="NaFu">foo</span>(bar):
      <span class="Ke">pass</span>
    </pre>
  </td>
</tr></table></div>

(whitespace added to improve clarity).

Wrapping can be disabled using the nowrap option.

A list of lines can be specified using the hl_lines option to make these lines highlighted (as of Pygments 0.11).

With the full option, a complete HTML 4 document is output, including the style definitions inside a <style> tag, or in a separate file if the cssfile option is given.

When tagsfile is set to the path of a ctags index file, it is used to generate hyperlinks from names to their definition. You must enable anchorlines and run ctags with the -n option for this to work. The python-ctags module from PyPI must be installed to use this feature; otherwise a RuntimeError will be raised.

The get_style_defs(arg='') method of a HtmlFormatter returns a string containing CSS rules for the CSS classes used by the formatter. The argument arg can be used to specify additional CSS selectors that are prepended to the classes. A call fmter.get_style_defs('td .code') would result in the following CSS classes:

td .code .kw { font-weight: bold; color: #00FF00 }
td .code .cm { color: #999999 }
...

If you have Pygments 0.6 or higher, you can also pass a list or tuple to the get_style_defs() method to request multiple prefixes for the tokens:

formatter.get_style_defs(['div.syntax pre', 'pre.syntax'])

The output would then look like this:

div.syntax pre .kw,
pre.syntax .kw { font-weight: bold; color: #00FF00 }
div.syntax pre .cm,
pre.syntax .cm { color: #999999 }
...

Additional options accepted:

nowrap
If set to True, don't wrap the tokens at all, not even inside a <pre> tag. This disables most other options (default: False).
full
Tells the formatter to output a "full" document, i.e. a complete self-contained document (default: False).
title
If full is true, the title that should be used to caption the document (default: '').
style
The style to use, can be a string or a Style subclass (default: 'default'). This option has no effect if the cssfile and noclobber_cssfile option are given and the file specified in cssfile exists.
noclasses
If set to true, token <span> tags will not use CSS classes, but inline styles. This is not recommended for larger pieces of code since it increases output size by quite a bit (default: False).
classprefix
Since the token types use relatively short class names, they may clash with some of your own class names. In this case you can use the classprefix option to give a string to prepend to all Pygments-generated CSS class names for token types. Note that this option also affects the output of get_style_defs().
cssclass

CSS class for the wrapping <div> tag (default: 'highlight'). If you set this option, the default selector for get_style_defs() will be this class.

New in Pygments 0.9: If you select the 'table' line numbers, the wrapping table will have a CSS class of this string plus 'table', the default is accordingly 'highlighttable'.

cssstyles
Inline CSS styles for the wrapping <div> tag (default: '').
prestyles
Inline CSS styles for the <pre> tag (default: ''). New in Pygments 0.11.
cssfile
If the full option is true and this option is given, it must be the name of an external file. If the filename does not include an absolute path, the file's path will be assumed to be relative to the main output file's path, if the latter can be found. The stylesheet is then written to this file instead of the HTML file. New in Pygments 0.6.
noclobber_cssfile
If cssfile is given and the specified file exists, the css file will not be overwritten. This allows the use of the full option in combination with a user specified css file. Default is False. New in Pygments 1.1.
linenos

If set to 'table', output line numbers as a table with two cells, one containing the line numbers, the other the whole code. This is copy-and-paste-friendly, but may cause alignment problems with some browsers or fonts. If set to 'inline', the line numbers will be integrated in the <pre> tag that contains the code (that setting is new in Pygments 0.8).

For compatibility with Pygments 0.7 and earlier, every true value except 'inline' means the same as 'table' (in particular, that means also True).

The default value is False, which means no line numbers at all.

Note: with the default ("table") line number mechanism, the line numbers and code can have different line heights in Internet Explorer unless you give the enclosing <pre> tags an explicit line-height CSS property (you get the default line spacing with line-height: 125%).

hl_lines
Specify a list of lines to be highlighted. New in Pygments 0.11.
linenostart
The line number for the first line (default: 1).
linenostep
If set to a number n > 1, only every nth line number is printed.
linenospecial
If set to a number n > 0, every nth line number is given the CSS class "special" (default: 0).
nobackground
If set to True, the formatter won't output the background color for the wrapping element (this automatically defaults to False when there is no wrapping element [eg: no argument for the get_syntax_defs method given]) (default: False). New in Pygments 0.6.
lineseparator
This string is output between lines of code. It defaults to "\n", which is enough to break a line inside <pre> tags, but you can e.g. set it to "<br>" to get HTML line breaks. New in Pygments 0.7.
lineanchors
If set to a nonempty string, e.g. foo, the formatter will wrap each output line in an anchor tag with a name of foo-linenumber. This allows easy linking to certain lines. New in Pygments 0.9.
linespans
If set to a nonempty string, e.g. foo, the formatter will wrap each output line in a span tag with an id of foo-linenumber. This allows easy access to lines via javascript. New in Pygments 1.6.
anchorlinenos
If set to True, will wrap line numbers in <a> tags. Used in combination with linenos and lineanchors.
tagsfile
If set to the path of a ctags file, wrap names in anchor tags that link to their definitions. lineanchors should be used, and the tags file should specify line numbers (see the -n option to ctags). New in Pygments 1.6.
tagurlformat
A string formatting pattern used to generate links to ctags definitions. Avaliabe variable are %(path)s, %(fname)s and %(fext)s. Defaults to an empty string, resulting in just #prefix-number links. New in Pygments 1.6.

Subclassing the HTML formatter

New in Pygments 0.7.

The HTML formatter is now built in a way that allows easy subclassing, thus customizing the output HTML code. The format() method calls self._format_lines() which returns a generator that yields tuples of (1, line), where the 1 indicates that the line is a line of the formatted source code.

If the nowrap option is set, the generator is the iterated over and the resulting HTML is output.

Otherwise, format() calls self.wrap(), which wraps the generator with other generators. These may add some HTML code to the one generated by _format_lines(), either by modifying the lines generated by the latter, then yielding them again with (1, line), and/or by yielding other HTML code before or after the lines, with (0, html). The distinction between source lines and other code makes it possible to wrap the generator multiple times.

The default wrap() implementation adds a <div> and a <pre> tag.

A custom HtmlFormatter subclass could look like this:

class CodeHtmlFormatter(HtmlFormatter):

    def wrap(self, source, outfile):
        return self._wrap_code(source)

    def _wrap_code(self, source):
        yield 0, '<code>'
        for i, t in source:
            if i == 1:
                # it's a line of formatted code
                t += '<br>'
            yield i, t
        yield 0, '</code>'

This results in wrapping the formatted lines with a <code> tag, where the source lines are broken using <br> tags.

After calling wrap(), the format() method also adds the "line numbers" and/or "full document" wrappers if the respective options are set. Then, all HTML yielded by the wrapped generator is output.

Short names:html
Filename patterns:*.html, *.htm

ImageFormatter

Create a PNG image from source code. This uses the Python Imaging Library to generate a pixmap from the source code.

New in Pygments 0.10.

Additional options accepted:

image_format

An image format to output to that is recognised by PIL, these include:

  • "PNG" (default)
  • "JPEG"
  • "BMP"
  • "GIF"
line_pad

The extra spacing (in pixels) between each line of text.

Default: 2

font_name

The font name to be used as the base font from which others, such as bold and italic fonts will be generated. This really should be a monospace font to look sane.

Default: "Bitstream Vera Sans Mono"

font_size

The font size in points to be used.

Default: 14

image_pad

The padding, in pixels to be used at each edge of the resulting image.

Default: 10

line_numbers

Whether line numbers should be shown: True/False

Default: True

line_number_start

The line number of the first line.

Default: 1

line_number_step

The step used when printing line numbers.

Default: 1

line_number_bg

The background colour (in "#123456" format) of the line number bar, or None to use the style background color.

Default: "#eed"

line_number_fg

The text color of the line numbers (in "#123456"-like format).

Default: "#886"

line_number_chars

The number of columns of line numbers allowable in the line number margin.

Default: 2

line_number_bold

Whether line numbers will be bold: True/False

Default: False

line_number_italic

Whether line numbers will be italicized: True/False

Default: False

line_number_separator

Whether a line will be drawn between the line number area and the source code area: True/False

Default: True

line_number_pad

The horizontal padding (in pixels) between the line number margin, and the source code area.

Default: 6

hl_lines

Specify a list of lines to be highlighted. New in Pygments 1.2.

Default: empty list

hl_color

Specify the color for highlighting lines. New in Pygments 1.2.

Default: highlight color of the selected style

Short names:img, IMG, png
Filename patterns:*.png

JpgImageFormatter

Create a JPEG image from source code. This uses the Python Imaging Library to generate a pixmap from the source code.

New in Pygments 1.0. (You could create JPEG images before by passing a suitable image_format option to the ImageFormatter.)

Short names:jpg, jpeg
Filename patterns:*.jpg

LatexFormatter

Format tokens as LaTeX code. This needs the fancyvrb and color standard packages.

Without the full option, code is formatted as one Verbatim environment, like this:

\begin{Verbatim}[commandchars=\\{\}]
\PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}):
    \PY{k}{pass}
\end{Verbatim}

The special command used here (\PY) and all the other macros it needs are output by the get_style_defs method.

With the full option, a complete LaTeX document is output, including the command definitions in the preamble.

The get_style_defs() method of a LatexFormatter returns a string containing \def commands defining the macros needed inside the Verbatim environments.

Additional options accepted:

style
The style to use, can be a string or a Style subclass (default: 'default').
full
Tells the formatter to output a "full" document, i.e. a complete self-contained document (default: False).
title
If full is true, the title that should be used to caption the document (default: '').
docclass
If the full option is enabled, this is the document class to use (default: 'article').
preamble
If the full option is enabled, this can be further preamble commands, e.g. \usepackage (default: '').
linenos
If set to True, output line numbers (default: False).
linenostart
The line number for the first line (default: 1).
linenostep
If set to a number n > 1, only every nth line number is printed.
verboptions
Additional options given to the Verbatim environment (see the fancyvrb docs for possible values) (default: '').
commandprefix

The LaTeX commands used to produce colored output are constructed using this prefix and some letters (default: 'PY'). New in Pygments 0.7.

New in Pygments 0.10: the default is now 'PY' instead of 'C'.

texcomments
If set to True, enables LaTeX comment lines. That is, LaTex markup in comment tokens is not escaped so that LaTeX can render it (default: False). New in Pygments 1.2.
mathescape
If set to True, enables LaTeX math mode escape in comments. That is, '$...$' inside a comment will trigger math mode (default: False). New in Pygments 1.2.
Short names:latex, tex
Filename patterns:*.tex

NullFormatter

Output the text unchanged without any formatting.

Short names:text, null
Filename patterns:*.txt

RawTokenFormatter

Format tokens as a raw representation for storing token streams.

The format is tokentype<TAB>repr(tokenstring)\n. The output can later be converted to a token stream with the RawTokenLexer, described in the lexer list.

Only two options are accepted:

compress
If set to 'gz' or 'bz2', compress the output with the given compression algorithm after encoding (default: '').
error_color
If set to a color name, highlight error tokens using that color. If set but with no value, defaults to 'red'. New in Pygments 0.11.
Short names:raw, tokens
Filename patterns:*.raw

RtfFormatter

Format tokens as RTF markup. This formatter automatically outputs full RTF documents with color information and other useful stuff. Perfect for Copy and Paste into Microsoft® Word® documents.

New in Pygments 0.6.

Additional options accepted:

style
The style to use, can be a string or a Style subclass (default: 'default').
fontface
The used font famliy, for example Bitstream Vera Sans. Defaults to some generic font which is supposed to have fixed width.
Short names:rtf
Filename patterns:*.rtf

SvgFormatter

Format tokens as an SVG graphics file. This formatter is still experimental. Each line of code is a <text> element with explicit x and y coordinates containing <tspan> elements with the individual token styles.

By default, this formatter outputs a full SVG document including doctype declaration and the <svg> root element.

New in Pygments 0.9.

Additional options accepted:

nowrap
Don't wrap the SVG <text> elements in <svg><g> elements and don't add a XML declaration and a doctype. If true, the fontfamily and fontsize options are ignored. Defaults to False.
fontfamily
The value to give the wrapping <g> element's font-family attribute, defaults to "monospace".
fontsize
The value to give the wrapping <g> element's font-size attribute, defaults to "14px".
xoffset
Starting offset in X direction, defaults to 0.
yoffset
Starting offset in Y direction, defaults to the font size if it is given in pixels, or 20 else. (This is necessary since text coordinates refer to the text baseline, not the top edge.)
ystep
Offset to add to the Y coordinate for each subsequent line. This should roughly be the text size plus 5. It defaults to that value if the text size is given in pixels, or 25 else.
spacehack
Convert spaces in the source to &#160;, which are non-breaking spaces. SVG provides the xml:space attribute to control how whitespace inside tags is handled, in theory, the preserve value could be used to keep all whitespace as-is. However, many current SVG viewers don't obey that rule, so this option is provided as a workaround and defaults to True.
Short names:svg
Filename patterns:*.svg

Terminal256Formatter

Format tokens with ANSI color sequences, for output in a 256-color terminal or console. Like in TerminalFormatter color sequences are terminated at newlines, so that paging the output works correctly.

The formatter takes colors from a style defined by the style option and converts them to nearest ANSI 256-color escape sequences. Bold and underline attributes from the style are preserved (and displayed).

New in Pygments 0.9.

Options accepted:

style
The style to use, can be a string or a Style subclass (default: 'default').
Short names:terminal256, console256, 256
Filename patterns:None

TerminalFormatter

Format tokens with ANSI color sequences, for output in a text console. Color sequences are terminated at newlines, so that paging the output works correctly.

The get_style_defs() method doesn't do anything special since there is no support for common styles.

Options accepted:

bg
Set to "light" or "dark" depending on the terminal's background (default: "light").
colorscheme
A dictionary mapping token types to (lightbg, darkbg) color names or None (default: None = use builtin colorscheme).
Short names:terminal, console
Filename patterns:None
Pygments-1.6/docs/build/changelog.html0000644000175000017500000010533312073317554017122 0ustar piotrpiotr Changelog — Pygments

Pygments

Changelog

« Back To Index

Issue numbers refer to the tracker at <http://bitbucket.org/birkenfeld/pygments-main/issues>, pull request numbers to the requests at <http://bitbucket.org/birkenfeld/pygments-main/pull-requests/merged>.

Version 1.6rc1

(released Jan 9, 2013)

  • Lexers added:
    • AspectJ (PR#90)
    • AutoIt (PR#122)
    • BUGS-like languages (PR#89)
    • Ceylon (PR#86)
    • Croc (new name for MiniD)
    • CUDA (PR#75)
    • Dg (PR#116)
    • IDL (PR#115)
    • Jags (PR#89)
    • Julia (PR#61)
    • Kconfig (#711)
    • Lasso (PR#95, PR#113)
    • LiveScript (PR#84)
    • Monkey (PR#117)
    • Mscgen (PR#80)
    • NSIS scripts (PR#136)
    • OpenCOBOL (PR#72)
    • QML (PR#123)
    • Puppet (PR#133)
    • Racket (PR#94)
    • Rdoc (PR#99)
    • Robot Framework (PR#137)
    • RPM spec files (PR#124)
    • Rust (PR#67)
    • Smali (Dalvik assembly)
    • SourcePawn (PR#39)
    • Stan (PR#89)
    • Treetop (PR#125)
    • TypeScript (PR#114)
    • VGL (PR#12)
    • Visual FoxPro (#762)
    • Windows Registry (#819)
    • Xtend (PR#68)
  • The HTML formatter now supports linking to tags using CTags files, when the python-ctags package is installed (PR#87).
  • The HTML formatter now has a "linespans" option that wraps every line in a <span> tag with a specific id (PR#82).
  • When deriving a lexer from another lexer with token definitions, definitions for states not in the child lexer are now inherited. If you override a state in the child lexer, an "inherit" keyword has been added to insert the base state at that position (PR#141).
  • The C family lexers now inherit token definitions from a common base class, removing code duplication (PR#141).
  • Use "colorama" on Windows for console color output (PR#142).
  • Fix Template Haskell highlighting (PR#63).
  • Fix some S/R lexer errors (PR#91).
  • Fix a bug in the Prolog lexer with names that start with 'is' (#810).
  • Rewrite Dylan lexer, add Dylan LID lexer (PR#147).
  • Add a Java quickstart document (PR#146).
  • Add a "external/autopygmentize" file that can be used as .lessfilter (#802).

Version 1.5

(codename Zeitdilatation, released Mar 10, 2012)

  • Lexers added:
    • Awk (#630)
    • Fancy (#633)
    • PyPy Log
    • eC
    • Nimrod
    • Nemerle (#667)
    • F# (#353)
    • Groovy (#501)
    • PostgreSQL (#660)
    • DTD
    • Gosu (#634)
    • Octave (PR#22)
    • Standard ML (PR#14)
    • CFengine3 (#601)
    • Opa (PR#37)
    • HTTP sessions (PR#42)
    • JSON (PR#31)
    • SNOBOL (PR#30)
    • MoonScript (PR#43)
    • ECL (PR#29)
    • Urbiscript (PR#17)
    • OpenEdge ABL (PR#27)
    • SystemVerilog (PR#35)
    • Coq (#734)
    • PowerShell (#654)
    • Dart (#715)
    • Fantom (PR#36)
    • Bro (PR#5)
    • NewLISP (PR#26)
    • VHDL (PR#45)
    • Scilab (#740)
    • Elixir (PR#57)
    • Tea (PR#56)
    • Kotlin (PR#58)
  • Fix Python 3 terminal highlighting with pygmentize (#691).
  • In the LaTeX formatter, escape special &, < and > chars (#648).
  • In the LaTeX formatter, fix display problems for styles with token background colors (#670).
  • Enhancements to the Squid conf lexer (#664).
  • Several fixes to the reStructuredText lexer (#636).
  • Recognize methods in the ObjC lexer (#638).
  • Fix Lua "class" highlighting: it does not have classes (#665).
  • Fix degenerate regex in Scala lexer (#671) and highlighting bugs (#713, 708).
  • Fix number pattern order in Ocaml lexer (#647).
  • Fix generic type highlighting in ActionScript 3 (#666).
  • Fixes to the Clojure lexer (PR#9).
  • Fix degenerate regex in Nemerle lexer (#706).
  • Fix infinite looping in CoffeeScript lexer (#729).
  • Fix crashes and analysis with ObjectiveC lexer (#693, #696).
  • Add some Fortran 2003 keywords.
  • Fix Boo string regexes (#679).
  • Add "rrt" style (#727).
  • Fix infinite looping in Darcs Patch lexer.
  • Lots of misc fixes to character-eating bugs and ordering problems in many different lexers.

Version 1.4

(codename Unschärfe, released Jan 03, 2011)

  • Lexers added:
    • Factor (#520)
    • PostScript (#486)
    • Verilog (#491)
    • BlitzMax Basic (#478)
    • Ioke (#465)
    • Java properties, split out of the INI lexer (#445)
    • Scss (#509)
    • Duel/JBST
    • XQuery (#617)
    • Mason (#615)
    • GoodData (#609)
    • SSP (#473)
    • Autohotkey (#417)
    • Google Protocol Buffers
    • Hybris (#506)
  • Do not fail in analyse_text methods (#618).
  • Performance improvements in the HTML formatter (#523).
  • With the noclasses option in the HTML formatter, some styles present in the stylesheet were not added as inline styles.
  • Four fixes to the Lua lexer (#480, #481, #482, #497).
  • More context-sensitive Gherkin lexer with support for more i18n translations.
  • Support new OO keywords in Matlab lexer (#521).
  • Small fix in the CoffeeScript lexer (#519).
  • A bugfix for backslashes in ocaml strings (#499).
  • Fix unicode/raw docstrings in the Python lexer (#489).
  • Allow PIL to work without PIL.pth (#502).
  • Allow seconds as a unit in CSS (#496).
  • Support application/javascript as a JavaScript mime type (#504).
  • Support Offload C++ Extensions as keywords in the C++ lexer (#484).
  • Escape more characters in LaTeX output (#505).
  • Update Haml/Sass lexers to version 3 (#509).
  • Small PHP lexer string escaping fix (#515).
  • Support comments before preprocessor directives, and unsigned/ long long literals in C/C++ (#613, #616).
  • Support line continuations in the INI lexer (#494).
  • Fix lexing of Dylan string and char literals (#628).
  • Fix class/procedure name highlighting in VB.NET lexer (#624).

Version 1.3.1

(bugfix release, released Mar 05, 2010)

  • The pygmentize script was missing from the distribution.

Version 1.3

(codename Schneeglöckchen, released Mar 01, 2010)

  • Added the ensurenl lexer option, which can be used to suppress the automatic addition of a newline to the lexer input.
  • Lexers added:
    • Ada
    • Coldfusion
    • Modula-2
    • haXe
    • R console
    • Objective-J
    • Haml and Sass
    • CoffeeScript
  • Enhanced reStructuredText highlighting.
  • Added support for PHP 5.3 namespaces in the PHP lexer.
  • Added a bash completion script for pygmentize, to the external/ directory (#466).
  • Fixed a bug in do_insertions() used for multi-lexer languages.
  • Fixed a Ruby regex highlighting bug (#476).
  • Fixed regex highlighting bugs in Perl lexer (#258).
  • Add small enhancements to the C lexer (#467) and Bash lexer (#469).
  • Small fixes for the Tcl, Debian control file, Nginx config, Smalltalk, Objective-C, Clojure, Lua lexers.
  • Gherkin lexer: Fixed single apostrophe bug and added new i18n keywords.

Version 1.2.2

(bugfix release, released Jan 02, 2010)

  • Removed a backwards incompatibility in the LaTeX formatter that caused Sphinx to produce invalid commands when writing LaTeX output (#463).
  • Fixed a forever-backtracking regex in the BashLexer (#462).

Version 1.2.1

(bugfix release, released Jan 02, 2010)

  • Fixed mishandling of an ellipsis in place of the frames in a Python console traceback, resulting in clobbered output.

Version 1.2

(codename Neujahr, released Jan 01, 2010)

  • Dropped Python 2.3 compatibility.
  • Lexers added:
    • Asymptote
    • Go
    • Gherkin (Cucumber)
    • CMake
    • Ooc
    • Coldfusion
    • haXe
    • R console
  • Added options for rendering LaTeX in source code comments in the LaTeX formatter (#461).
  • Updated the Logtalk lexer.
  • Added line_number_start option to image formatter (#456).
  • Added hl_lines and hl_color options to image formatter (#457).
  • Fixed the HtmlFormatter's handling of noclasses=True to not output any classes (#427).
  • Added the Monokai style (#453).
  • Fixed LLVM lexer identifier syntax and added new keywords (#442).
  • Fixed the PythonTracebackLexer to handle non-traceback data in header or trailer, and support more partial tracebacks that start on line 2 (#437).
  • Fixed the CLexer to not highlight ternary statements as labels.
  • Fixed lexing of some Ruby quoting peculiarities (#460).
  • A few ASM lexer fixes (#450).

Version 1.1.1

(bugfix release, released Sep 15, 2009)

  • Fixed the BBCode lexer (#435).
  • Added support for new Jinja2 keywords.
  • Fixed test suite failures.
  • Added Gentoo-specific suffixes to Bash lexer.

Version 1.1

(codename Brillouin, released Sep 11, 2009)

  • Ported Pygments to Python 3. This needed a few changes in the way encodings are handled; they may affect corner cases when used with Python 2 as well.
  • Lexers added:
    • Antlr/Ragel, thanks to Ana Nelson
    • (Ba)sh shell
    • Erlang shell
    • GLSL
    • Prolog
    • Evoque
    • Modelica
    • Rebol
    • MXML
    • Cython
    • ABAP
    • ASP.net (VB/C#)
    • Vala
    • Newspeak
  • Fixed the LaTeX formatter's output so that output generated for one style can be used with the style definitions of another (#384).
  • Added "anchorlinenos" and "noclobber_cssfile" (#396) options to HTML formatter.
  • Support multiline strings in Lua lexer.
  • Rewrite of the JavaScript lexer by Pumbaa80 to better support regular expression literals (#403).
  • When pygmentize is asked to highlight a file for which multiple lexers match the filename, use the analyse_text guessing engine to determine the winner (#355).
  • Fixed minor bugs in the JavaScript lexer (#383), the Matlab lexer (#378), the Scala lexer (#392), the INI lexer (#391), the Clojure lexer (#387) and the AS3 lexer (#389).
  • Fixed three Perl heredoc lexing bugs (#379, #400, #422).
  • Fixed a bug in the image formatter which misdetected lines (#380).
  • Fixed bugs lexing extended Ruby strings and regexes.
  • Fixed a bug when lexing git diffs.
  • Fixed a bug lexing the empty commit in the PHP lexer (#405).
  • Fixed a bug causing Python numbers to be mishighlighted as floats (#397).
  • Fixed a bug when backslashes are used in odd locations in Python (#395).
  • Fixed various bugs in Matlab and S-Plus lexers, thanks to Winston Chang (#410, #411, #413, #414) and fmarc (#419).
  • Fixed a bug in Haskell single-line comment detection (#426).
  • Added new-style reStructuredText directive for docutils 0.5+ (#428).

Version 1.0

(codename Dreiundzwanzig, released Nov 23, 2008)

  • Don't use join(splitlines()) when converting newlines to \n, because that doesn't keep all newlines at the end when the stripnl lexer option is False.

  • Added -N option to command-line interface to get a lexer name for a given filename.

  • Added Tango style, written by Andre Roberge for the Crunchy project.

  • Added Python3TracebackLexer and python3 option to PythonConsoleLexer.

  • Fixed a few bugs in the Haskell lexer.

  • Fixed PythonTracebackLexer to be able to recognize SyntaxError and KeyboardInterrupt (#360).

  • Provide one formatter class per image format, so that surprises like:

    pygmentize -f gif -o foo.gif foo.py
    

    creating a PNG file are avoided.

  • Actually use the font_size option of the image formatter.

  • Fixed numpy lexer that it doesn't listen for *.py any longer.

  • Fixed HTML formatter so that text options can be Unicode strings (#371).

  • Unified Diff lexer supports the "udiff" alias now.

  • Fixed a few issues in Scala lexer (#367).

  • RubyConsoleLexer now supports simple prompt mode (#363).

  • JavascriptLexer is smarter about what constitutes a regex (#356).

  • Add Applescript lexer, thanks to Andreas Amann (#330).

  • Make the codetags more strict about matching words (#368).

  • NginxConfLexer is a little more accurate on mimetypes and variables (#370).

Version 0.11.1

(released Aug 24, 2008)

  • Fixed a Jython compatibility issue in pygments.unistring (#358).

Version 0.11

(codename Straußenei, released Aug 23, 2008)

Many thanks go to Tim Hatch for writing or integrating most of the bug fixes and new features.

  • Lexers added:
    • Nasm-style assembly language, thanks to delroth
    • YAML, thanks to Kirill Simonov
    • ActionScript 3, thanks to Pierre Bourdon
    • Cheetah/Spitfire templates, thanks to Matt Good
    • Lighttpd config files
    • Nginx config files
    • Gnuplot plotting scripts
    • Clojure
    • POV-Ray scene files
    • Sqlite3 interactive console sessions
    • Scala source files, thanks to Krzysiek Goj
  • Lexers improved:
    • C lexer highlights standard library functions now and supports C99 types.
    • Bash lexer now correctly highlights heredocs without preceding whitespace.
    • Vim lexer now highlights hex colors properly and knows a couple more keywords.
    • Irc logs lexer now handles xchat's default time format (#340) and correctly highlights lines ending in >.
    • Support more delimiters for perl regular expressions (#258).
    • ObjectiveC lexer now supports 2.0 features.
  • Added "Visual Studio" style.
  • Updated markdown processor to Markdown 1.7.
  • Support roman/sans/mono style defs and use them in the LaTeX formatter.
  • The RawTokenFormatter is no longer registered to *.raw and it's documented that tokenization with this lexer may raise exceptions.
  • New option hl_lines to HTML formatter, to highlight certain lines.
  • New option prestyles to HTML formatter.
  • New option -g to pygmentize, to allow lexer guessing based on filetext (can be slowish, so file extensions are still checked first).
  • guess_lexer() now makes its decision much faster due to a cache of whether data is xml-like (a check which is used in several versions of analyse_text(). Several lexers also have more accurate analyse_text() now.

Version 0.10

(codename Malzeug, released May 06, 2008)

  • Lexers added:
    • Io
    • Smalltalk
    • Darcs patches
    • Tcl
    • Matlab
    • Matlab sessions
    • FORTRAN
    • XSLT
    • tcsh
    • NumPy
    • Python 3
    • S, S-plus, R statistics languages
    • Logtalk
  • In the LatexFormatter, the commandprefix option is now by default 'PY' instead of 'C', since the latter resulted in several collisions with other packages. Also, the special meaning of the arg argument to get_style_defs() was removed.
  • Added ImageFormatter, to format code as PNG, JPG, GIF or BMP. (Needs the Python Imaging Library.)
  • Support doc comments in the PHP lexer.
  • Handle format specifications in the Perl lexer.
  • Fix comment handling in the Batch lexer.
  • Add more file name extensions for the C++, INI and XML lexers.
  • Fixes in the IRC and MuPad lexers.
  • Fix function and interface name highlighting in the Java lexer.
  • Fix at-rule handling in the CSS lexer.
  • Handle KeyboardInterrupts gracefully in pygmentize.
  • Added BlackWhiteStyle.
  • Bash lexer now correctly highlights math, does not require whitespace after semicolons, and correctly highlights boolean operators.
  • Makefile lexer is now capable of handling BSD and GNU make syntax.

Version 0.9

(codename Herbstzeitlose, released Oct 14, 2007)

  • Lexers added:
    • Erlang
    • ActionScript
    • Literate Haskell
    • Common Lisp
    • Various assembly languages
    • Gettext catalogs
    • Squid configuration
    • Debian control files
    • MySQL-style SQL
    • MOOCode
  • Lexers improved:
    • Greatly improved the Haskell and OCaml lexers.
    • Improved the Bash lexer's handling of nested constructs.
    • The C# and Java lexers exhibited abysmal performance with some input code; this should now be fixed.
    • The IRC logs lexer is now able to colorize weechat logs too.
    • The Lua lexer now recognizes multi-line comments.
    • Fixed bugs in the D and MiniD lexer.
  • The encoding handling of the command line mode (pygmentize) was enhanced. You shouldn't get UnicodeErrors from it anymore if you don't give an encoding option.
  • Added a -P option to the command line mode which can be used to give options whose values contain commas or equals signs.
  • Added 256-color terminal formatter.
  • Added an experimental SVG formatter.
  • Added the lineanchors option to the HTML formatter, thanks to Ian Charnas for the idea.
  • Gave the line numbers table a CSS class in the HTML formatter.
  • Added a Vim 7-like style.

Version 0.8.1

(released Jun 27, 2007)

  • Fixed POD highlighting in the Ruby lexer.
  • Fixed Unicode class and namespace name highlighting in the C# lexer.
  • Fixed Unicode string prefix highlighting in the Python lexer.
  • Fixed a bug in the D and MiniD lexers.
  • Fixed the included MoinMoin parser.

Version 0.8

(codename Maikäfer, released May 30, 2007)

  • Lexers added:
    • Haskell, thanks to Adam Blinkinsop
    • Redcode, thanks to Adam Blinkinsop
    • D, thanks to Kirk McDonald
    • MuPad, thanks to Christopher Creutzig
    • MiniD, thanks to Jarrett Billingsley
    • Vim Script, by Tim Hatch
  • The HTML formatter now has a second line-numbers mode in which it will just integrate the numbers in the same <pre> tag as the code.
  • The CSharpLexer now is Unicode-aware, which means that it has an option that can be set so that it correctly lexes Unicode identifiers allowed by the C# specs.
  • Added a RaiseOnErrorTokenFilter that raises an exception when the lexer generates an error token, and a VisibleWhitespaceFilter that converts whitespace (spaces, tabs, newlines) into visible characters.
  • Fixed the do_insertions() helper function to yield correct indices.
  • The ReST lexer now automatically highlights source code blocks in ".. sourcecode:: language" and ".. code:: language" directive blocks.
  • Improved the default style (thanks to Tiberius Teng). The old default is still available as the "emacs" style (which was an alias before).
  • The get_style_defs method of HTML formatters now uses the cssclass option as the default selector if it was given.
  • Improved the ReST and Bash lexers a bit.
  • Fixed a few bugs in the Makefile and Bash lexers, thanks to Tim Hatch.
  • Fixed a bug in the command line code that disallowed -O options when using the -S option.
  • Fixed a bug in the RawTokenFormatter.

Version 0.7.1

(released Feb 15, 2007)

  • Fixed little highlighting bugs in the Python, Java, Scheme and Apache Config lexers.
  • Updated the included manpage.
  • Included a built version of the documentation in the source tarball.

Version 0.7

(codename Faschingskrapfn, released Feb 14, 2007)

  • Added a MoinMoin parser that uses Pygments. With it, you get Pygments highlighting in Moin Wiki pages.
  • Changed the exception raised if no suitable lexer, formatter etc. is found in one of the get_*_by_* functions to a custom exception, pygments.util.ClassNotFound. It is, however, a subclass of ValueError in order to retain backwards compatibility.
  • Added a -H command line option which can be used to get the docstring of a lexer, formatter or filter.
  • Made the handling of lexers and formatters more consistent. The aliases and filename patterns of formatters are now attributes on them.
  • Added an OCaml lexer, thanks to Adam Blinkinsop.
  • Made the HTML formatter more flexible, and easily subclassable in order to make it easy to implement custom wrappers, e.g. alternate line number markup. See the documentation.
  • Added an outencoding option to all formatters, making it possible to override the encoding (which is used by lexers and formatters) when using the command line interface. Also, if using the terminal formatter and the output file is a terminal and has an encoding attribute, use it if no encoding is given.
  • Made it possible to just drop style modules into the styles subpackage of the Pygments installation.
  • Added a "state" keyword argument to the using helper.
  • Added a commandprefix option to the LatexFormatter which allows to control how the command names are constructed.
  • Added quite a few new lexers, thanks to Tim Hatch:
    • Java Server Pages
    • Windows batch files
    • Trac Wiki markup
    • Python tracebacks
    • ReStructuredText
    • Dylan
    • and the Befunge esoteric programming language (yay!)
  • Added Mako lexers by Ben Bangert.
  • Added "fruity" style, another dark background originally vim-based theme.
  • Added sources.list lexer by Dennis Kaarsemaker.
  • Added token stream filters, and a pygmentize option to use them.
  • Changed behavior of in Operator for tokens.
  • Added mimetypes for all lexers.
  • Fixed some problems lexing Python strings.
  • Fixed tickets: #167, #178, #179, #180, #185, #201.

Version 0.6

(codename Zimtstern, released Dec 20, 2006)

  • Added option for the HTML formatter to write the CSS to an external file in "full document" mode.
  • Added RTF formatter.
  • Added Bash and Apache configuration lexers (thanks to Tim Hatch).
  • Improved guessing methods for various lexers.
  • Added @media support to CSS lexer (thanks to Tim Hatch).
  • Added a Groff lexer (thanks to Tim Hatch).
  • License change to BSD.
  • Added lexers for the Myghty template language.
  • Added a Scheme lexer (thanks to Marek Kubica).
  • Added some functions to iterate over existing lexers, formatters and lexers.
  • The HtmlFormatter's get_style_defs() can now take a list as an argument to generate CSS with multiple prefixes.
  • Support for guessing input encoding added.
  • Encoding support added: all processing is now done with Unicode strings, input and output are converted from and optionally to byte strings (see the encoding option of lexers and formatters).
  • Some improvements in the C(++) lexers handling comments and line continuations.

Version 0.5.1

(released Oct 30, 2006)

  • Fixed traceback in pygmentize -L (thanks to Piotr Ozarowski).

Version 0.5

(codename PyKleur, released Oct 30, 2006)

  • Initial public release.
Pygments-1.6/docs/build/lexers.html0000644000175000017500000063700112073317552016475 0ustar piotrpiotr Available lexers — Pygments

Pygments

Available lexers

« Back To Index

This page lists all available builtin lexers and the options they take.

Currently, all lexers support these options:

stripnl
Strip leading and trailing newlines from the input (default: True)
stripall
Strip all leading and trailing whitespace from the input (default: False).
ensurenl
Make sure that the input ends with a newline (default: True). This is required for some lexers that consume input linewise. New in Pygments 1.3.
tabsize
If given and greater than 0, expand tabs in the input (default: 0).
encoding

New in Pygments 0.6.

If given, must be an encoding name (such as "utf-8"). This encoding will be used to convert the input string to Unicode (if it is not already a Unicode string). The default is "latin1".

If this option is set to "guess", a simple UTF-8 vs. Latin-1 detection is used, if it is set to "chardet", the chardet library is used to guess the encoding of the input.

The "Short Names" field lists the identifiers that can be used with the get_lexer_by_name() function.

These lexers are builtin and can be imported from pygments.lexers:

Lexers for agile languages

CrocLexer

For Croc source.

Short names:croc
Filename patterns:*.croc
Mimetypes:text/x-crocsrc

DgLexer

Lexer for dg, a functional and object-oriented programming language running on the CPython 3 VM.

New in Pygments 1.6.

Short names:dg
Filename patterns:*.dg
Mimetypes:text/x-dg

FactorLexer

Lexer for the Factor language.

New in Pygments 1.4.

Short names:factor
Filename patterns:*.factor
Mimetypes:text/x-factor

FancyLexer

Pygments Lexer For Fancy.

Fancy is a self-hosted, pure object-oriented, dynamic, class-based, concurrent general-purpose programming language running on Rubinius, the Ruby VM.

New in Pygments 1.5.

Short names:fancy, fy
Filename patterns:*.fy, *.fancypack
Mimetypes:text/x-fancysrc

IoLexer

For Io (a small, prototype-based programming language) source.

New in Pygments 0.10.

Short names:io
Filename patterns:*.io
Mimetypes:text/x-iosrc

LuaLexer

For Lua source code.

Additional options accepted:

func_name_highlighting
If given and True, highlight builtin function names (default: True).
disabled_modules

If given, must be a list of module names whose function names should not be highlighted. By default all modules are highlighted.

To get a list of allowed modules have a look into the _luabuiltins module:

>>> from pygments.lexers._luabuiltins import MODULES
>>> MODULES.keys()
['string', 'coroutine', 'modules', 'io', 'basic', ...]
Short names:lua
Filename patterns:*.lua, *.wlua
Mimetypes:text/x-lua, application/x-lua

MiniDLexer

For MiniD source. MiniD is now known as Croc.

Short names:minid
Filename patterns:*.md
Mimetypes:text/x-minidsrc

MoonScriptLexer

For MoonScript source code.

New in Pygments 1.5.

Short names:moon, moonscript
Filename patterns:*.moon
Mimetypes:text/x-moonscript, application/x-moonscript

PerlLexer

For Perl source code.

Short names:perl, pl
Filename patterns:*.pl, *.pm
Mimetypes:text/x-perl, application/x-perl

Python3Lexer

For Python source code (version 3.0).

New in Pygments 0.10.

Short names:python3, py3
Filename patterns:None
Mimetypes:text/x-python3, application/x-python3

Python3TracebackLexer

For Python 3.0 tracebacks, with support for chained exceptions.

New in Pygments 1.0.

Short names:py3tb
Filename patterns:*.py3tb
Mimetypes:text/x-python3-traceback

PythonConsoleLexer

For Python console output or doctests, such as:

>>> a = 'foo'
>>> print a
foo
>>> 1 / 0
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ZeroDivisionError: integer division or modulo by zero

Additional options:

python3
Use Python 3 lexer for code. Default is False. New in Pygments 1.0.
Short names:pycon
Filename patterns:None
Mimetypes:text/x-python-doctest

PythonLexer

For Python source code.

Short names:python, py, sage
Filename patterns:*.py, *.pyw, *.sc, SConstruct, SConscript, *.tac, *.sage
Mimetypes:text/x-python, application/x-python

PythonTracebackLexer

For Python tracebacks.

New in Pygments 0.7.

Short names:pytb
Filename patterns:*.pytb
Mimetypes:text/x-python-traceback

RubyConsoleLexer

For Ruby interactive console (irb) output like:

irb(main):001:0> a = 1
=> 1
irb(main):002:0> puts a
1
=> nil
Short names:rbcon, irb
Filename patterns:None
Mimetypes:text/x-ruby-shellsession

RubyLexer

For Ruby source code.

Short names:rb, ruby, duby
Filename patterns:*.rb, *.rbw, Rakefile, *.rake, *.gemspec, *.rbx, *.duby
Mimetypes:text/x-ruby, application/x-ruby

TclLexer

For Tcl source code.

New in Pygments 0.10.

Short names:tcl
Filename patterns:*.tcl
Mimetypes:text/x-tcl, text/x-script.tcl, application/x-tcl

Lexers for assembly languages

CObjdumpLexer

For the output of 'objdump -Sr on compiled C files'

Short names:c-objdump
Filename patterns:*.c-objdump
Mimetypes:text/x-c-objdump

Ca65Lexer

For ca65 assembler sources.

New in Pygments 1.6.

Short names:ca65
Filename patterns:*.s
Mimetypes:None

CppObjdumpLexer

For the output of 'objdump -Sr on compiled C++ files'

Short names:cpp-objdump, c++-objdumb, cxx-objdump
Filename patterns:*.cpp-objdump, *.c++-objdump, *.cxx-objdump
Mimetypes:text/x-cpp-objdump

DObjdumpLexer

For the output of 'objdump -Sr on compiled D files'

Short names:d-objdump
Filename patterns:*.d-objdump
Mimetypes:text/x-d-objdump

GasLexer

For Gas (AT&T) assembly code.

Short names:gas
Filename patterns:*.s, *.S
Mimetypes:text/x-gas

LlvmLexer

For LLVM assembly code.

Short names:llvm
Filename patterns:*.ll
Mimetypes:text/x-llvm

NasmLexer

For Nasm (Intel) assembly code.

Short names:nasm
Filename patterns:*.asm, *.ASM
Mimetypes:text/x-nasm

ObjdumpLexer

For the output of 'objdump -dr'

Short names:objdump
Filename patterns:*.objdump
Mimetypes:text/x-objdump

Lexers for compiled languages

AdaLexer

For Ada source code.

New in Pygments 1.3.

Short names:ada, ada95ada2005
Filename patterns:*.adb, *.ads, *.ada
Mimetypes:text/x-ada

BlitzMaxLexer

For BlitzMax source code.

New in Pygments 1.4.

Short names:blitzmax, bmax
Filename patterns:*.bmx
Mimetypes:text/x-bmx

CLexer

For C source code with preprocessor directives.

Short names:c
Filename patterns:*.c, *.h, *.idc
Mimetypes:text/x-chdr, text/x-csrc

CobolFreeformatLexer

Lexer for Free format OpenCOBOL code.

New in Pygments 1.6.

Short names:cobolfree
Filename patterns:*.cbl, *.CBL
Mimetypes:None

CobolLexer

Lexer for OpenCOBOL code.

New in Pygments 1.6.

Short names:cobol
Filename patterns:*.cob, *.COB, *.cpy, *.CPY
Mimetypes:text/x-cobol

CppLexer

For C++ source code with preprocessor directives.

Short names:cpp, c++
Filename patterns:*.cpp, *.hpp, *.c++, *.h++, *.cc, *.hh, *.cxx, *.hxx, *.C, *.H, *.cp, *.CPP
Mimetypes:text/x-c++hdr, text/x-c++src

CudaLexer

For NVIDIA CUDA™ source.

New in Pygments 1.6.

Short names:cuda, cu
Filename patterns:*.cu, *.cuh
Mimetypes:text/x-cuda

CythonLexer

For Pyrex and Cython source code.

New in Pygments 1.1.

Short names:cython, pyx
Filename patterns:*.pyx, *.pxd, *.pxi
Mimetypes:text/x-cython, application/x-cython

DLexer

For D source.

New in Pygments 1.2.

Short names:d
Filename patterns:*.d, *.di
Mimetypes:text/x-dsrc

DelphiLexer

For Delphi (Borland Object Pascal), Turbo Pascal and Free Pascal source code.

Additional options accepted:

turbopascal
Highlight Turbo Pascal specific keywords (default: True).
delphi
Highlight Borland Delphi specific keywords (default: True).
freepascal
Highlight Free Pascal specific keywords (default: True).
units
A list of units that should be considered builtin, supported are System, SysUtils, Classes and Math. Default is to consider all of them builtin.
Short names:delphi, pas, pascal, objectpascal
Filename patterns:*.pas
Mimetypes:text/x-pascal

DylanLexer

For the Dylan language.

New in Pygments 0.7.

Short names:dylan
Filename patterns:*.dylan, *.dyl, *.intr
Mimetypes:text/x-dylan

DylanLidLexer

For Dylan LID (Library Interchange Definition) files.

New in Pygments 1.6.

Short names:dylan-lid, lid
Filename patterns:*.lid, *.hdp
Mimetypes:text/x-dylan-lid

ECLexer

For eC source code with preprocessor directives.

New in Pygments 1.5.

Short names:ec
Filename patterns:*.ec, *.eh
Mimetypes:text/x-echdr, text/x-ecsrc

FantomLexer

For Fantom source code.

New in Pygments 1.5.

Short names:fan
Filename patterns:*.fan
Mimetypes:application/x-fantom

FelixLexer

For Felix source code.

New in Pygments 1.2.

Short names:felix, flx
Filename patterns:*.flx, *.flxh
Mimetypes:text/x-felix

FortranLexer

Lexer for FORTRAN 90 code.

New in Pygments 0.10.

Short names:fortran
Filename patterns:*.f, *.f90, *.F, *.F90
Mimetypes:text/x-fortran

GLShaderLexer

GLSL (OpenGL Shader) lexer.

New in Pygments 1.1.

Short names:glsl
Filename patterns:*.vert, *.frag, *.geo
Mimetypes:text/x-glslsrc

GoLexer

For Go source.

Short names:go
Filename patterns:*.go
Mimetypes:text/x-gosrc

Modula2Lexer

For Modula-2 source code.

Additional options that determine which keywords are highlighted:

pim
Select PIM Modula-2 dialect (default: True).
iso
Select ISO Modula-2 dialect (default: False).
objm2
Select Objective Modula-2 dialect (default: False).
gm2ext
Also highlight GNU extensions (default: False).

New in Pygments 1.3.

Short names:modula2, m2
Filename patterns:*.def, *.mod
Mimetypes:text/x-modula2

MonkeyLexer

For Monkey source code.

New in Pygments 1.6.

Short names:monkey
Filename patterns:*.monkey
Mimetypes:text/x-monkey

NimrodLexer

For Nimrod source code.

New in Pygments 1.5.

Short names:nimrod, nim
Filename patterns:*.nim, *.nimrod
Mimetypes:text/x-nimrod

ObjectiveCLexer

For Objective-C source code with preprocessor directives.

Short names:objective-c, objectivec, obj-c, objc
Filename patterns:*.m, *.h
Mimetypes:text/x-objective-c

ObjectiveCppLexer

For Objective-C++ source code with preprocessor directives.

Short names:objective-c++, objectivec++, obj-c++, objc++
Filename patterns:*.mm, *.hh
Mimetypes:text/x-objective-c++

OocLexer

For Ooc source code

New in Pygments 1.2.

Short names:ooc
Filename patterns:*.ooc
Mimetypes:text/x-ooc

PrologLexer

Lexer for Prolog files.

Short names:prolog
Filename patterns:*.prolog, *.pro, *.pl
Mimetypes:text/x-prolog

RustLexer

Lexer for Mozilla's Rust programming language.

New in Pygments 1.6.

Short names:rust
Filename patterns:*.rs, *.rc
Mimetypes:text/x-rustsrc

ValaLexer

For Vala source code with preprocessor directives.

New in Pygments 1.1.

Short names:vala, vapi
Filename patterns:*.vala, *.vapi
Mimetypes:text/x-vala

Lexers for .net languages

BooLexer

For Boo source code.

Short names:boo
Filename patterns:*.boo
Mimetypes:text/x-boo

CSharpAspxLexer

Lexer for highligting C# within ASP.NET pages.

Short names:aspx-cs
Filename patterns:*.aspx, *.asax, *.ascx, *.ashx, *.asmx, *.axd
Mimetypes:None

CSharpLexer

For C# source code.

Additional options accepted:

unicodelevel

Determines which Unicode characters this lexer allows for identifiers. The possible values are:

  • none -- only the ASCII letters and numbers are allowed. This is the fastest selection.
  • basic -- all Unicode characters from the specification except category Lo are allowed.
  • full -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the Lo category has more than 40,000 characters in it!

The default value is basic.

New in Pygments 0.8.

Short names:csharp, c#
Filename patterns:*.cs
Mimetypes:text/x-csharp

FSharpLexer

For the F# language.

New in Pygments 1.5.

Short names:fsharp
Filename patterns:*.fs, *.fsi
Mimetypes:text/x-fsharp

NemerleLexer

For Nemerle source code.

Additional options accepted:

unicodelevel

Determines which Unicode characters this lexer allows for identifiers. The possible values are:

  • none -- only the ASCII letters and numbers are allowed. This is the fastest selection.
  • basic -- all Unicode characters from the specification except category Lo are allowed.
  • full -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the Lo category has more than 40,000 characters in it!

The default value is basic.

New in Pygments 1.5.

Short names:nemerle
Filename patterns:*.n
Mimetypes:text/x-nemerle

VbNetAspxLexer

Lexer for highligting Visual Basic.net within ASP.NET pages.

Short names:aspx-vb
Filename patterns:*.aspx, *.asax, *.ascx, *.ashx, *.asmx, *.axd
Mimetypes:None

VbNetLexer

For Visual Basic.NET source code.

Short names:vb.net, vbnet
Filename patterns:*.vb, *.bas
Mimetypes:text/x-vbnet, text/x-vba

Simple lexer for Microsoft Visual FoxPro source code

FoxProLexer Lexer for Microsoft Visual FoxPro language.

FoxPro syntax allows to shorten all keywords and function names to 4 characters. Shortened forms are not recognized by this lexer.

New in Pygments 1.6.

Short names:Clipper, XBase
Filename patterns:*.PRG, *.prg
Mimetypes:None

Lexers for functional languages

CommonLispLexer

A Common Lisp lexer.

New in Pygments 0.9.

Short names:common-lisp, cl
Filename patterns:*.cl, *.lisp, *.el
Mimetypes:text/x-common-lisp

CoqLexer

For the Coq theorem prover.

New in Pygments 1.5.

Short names:coq
Filename patterns:*.v
Mimetypes:text/x-coq

ElixirConsoleLexer

For Elixir interactive console (iex) output like:

iex> [head | tail] = [1,2,3]
[1,2,3]
iex> head
1
iex> tail
[2,3]
iex> [head | tail]
[1,2,3]
iex> length [head | tail]
3

New in Pygments 1.5.

Short names:iex
Filename patterns:None
Mimetypes:text/x-elixir-shellsession

ElixirLexer

For the Elixir language.

New in Pygments 1.5.

Short names:elixir, ex, exs
Filename patterns:*.ex, *.exs
Mimetypes:text/x-elixir

ErlangLexer

For the Erlang functional programming language.

Blame Jeremy Thurgood (http://jerith.za.net/).

New in Pygments 0.9.

Short names:erlang
Filename patterns:*.erl, *.hrl, *.es, *.escript
Mimetypes:text/x-erlang

ErlangShellLexer

Shell sessions in erl (for Erlang code).

New in Pygments 1.1.

Short names:erl
Filename patterns:*.erl-sh
Mimetypes:text/x-erl-shellsession

HaskellLexer

A Haskell lexer based on the lexemes defined in the Haskell 98 Report.

New in Pygments 0.8.

Short names:haskell, hs
Filename patterns:*.hs
Mimetypes:text/x-haskell

KokaLexer

Lexer for the Koka language.

New in Pygments 1.6.

Short names:koka
Filename patterns:*.kk, *.kki
Mimetypes:text/x-koka

LiterateHaskellLexer

For Literate Haskell (Bird-style or LaTeX) source.

Additional options accepted:

litstyle
If given, must be "bird" or "latex". If not given, the style is autodetected: if the first non-whitespace character in the source is a backslash or percent character, LaTeX is assumed, else Bird.

New in Pygments 0.9.

Short names:lhs, literate-haskell
Filename patterns:*.lhs
Mimetypes:text/x-literate-haskell

NewLispLexer

For newLISP. source code (version 10.3.0).

New in Pygments 1.5.

Short names:newlisp
Filename patterns:*.lsp, *.nl
Mimetypes:text/x-newlisp, application/x-newlisp

OcamlLexer

For the OCaml language.

New in Pygments 0.7.

Short names:ocaml
Filename patterns:*.ml, *.mli, *.mll, *.mly
Mimetypes:text/x-ocaml

OpaLexer

Lexer for the Opa language (http://opalang.org).

New in Pygments 1.5.

Short names:opa
Filename patterns:*.opa
Mimetypes:text/x-opa

RacketLexer

Lexer for Racket source code (formerly known as PLT Scheme).

New in Pygments 1.6.

Short names:racket, rkt
Filename patterns:*.rkt, *.rktl
Mimetypes:text/x-racket, application/x-racket

SMLLexer

For the Standard ML language.

New in Pygments 1.5.

Short names:sml
Filename patterns:*.sml, *.sig, *.fun
Mimetypes:text/x-standardml, application/x-standardml

SchemeLexer

A Scheme lexer, parsing a stream and outputting the tokens needed to highlight scheme code. This lexer could be most probably easily subclassed to parse other LISP-Dialects like Common Lisp, Emacs Lisp or AutoLisp.

This parser is checked with pastes from the LISP pastebin at http://paste.lisp.org/ to cover as much syntax as possible.

It supports the full Scheme syntax as defined in R5RS.

New in Pygments 0.6.

Short names:scheme, scm
Filename patterns:*.scm, *.ss
Mimetypes:text/x-scheme, application/x-scheme

Lexers for hardware descriptor languages

SystemVerilogLexer

Extends verilog lexer to recognise all SystemVerilog keywords from IEEE 1800-2009 standard.

New in Pygments 1.5.

Short names:sv
Filename patterns:*.sv, *.svh
Mimetypes:text/x-systemverilog

VerilogLexer

For verilog source code with preprocessor directives.

New in Pygments 1.4.

Short names:v
Filename patterns:*.v
Mimetypes:text/x-verilog

VhdlLexer

For VHDL source code.

New in Pygments 1.5.

Short names:vhdl
Filename patterns:*.vhdl, *.vhd
Mimetypes:text/x-vhdl

Pygments lexers for JVM languages

AspectJLexer

For AspectJ source code.

New in Pygments 1.6.

Short names:aspectj
Filename patterns:*.aj
Mimetypes:text/x-aspectj

CeylonLexer

For Ceylon source code.

New in Pygments 1.6.

Short names:ceylon
Filename patterns:*.ceylon
Mimetypes:text/x-ceylon

ClojureLexer

Lexer for Clojure source code.

New in Pygments 0.11.

Short names:clojure, clj
Filename patterns:*.clj
Mimetypes:text/x-clojure, application/x-clojure

GosuLexer

For Gosu source code.

New in Pygments 1.5.

Short names:gosu
Filename patterns:*.gs, *.gsx, *.gsp, *.vark
Mimetypes:text/x-gosu

GosuTemplateLexer

For Gosu templates.

New in Pygments 1.5.

Short names:gst
Filename patterns:*.gst
Mimetypes:text/x-gosu-template

GroovyLexer

For Groovy source code.

New in Pygments 1.5.

Short names:groovy
Filename patterns:*.groovy
Mimetypes:text/x-groovy

IokeLexer

For Ioke (a strongly typed, dynamic, prototype based programming language) source.

New in Pygments 1.4.

Short names:ioke, ik
Filename patterns:*.ik
Mimetypes:text/x-iokesrc

JavaLexer

For Java source code.

Short names:java
Filename patterns:*.java
Mimetypes:text/x-java

KotlinLexer

For Kotlin source code.

Additional options accepted:

unicodelevel

Determines which Unicode characters this lexer allows for identifiers. The possible values are:

  • none -- only the ASCII letters and numbers are allowed. This is the fastest selection.
  • basic -- all Unicode characters from the specification except category Lo are allowed.
  • full -- all Unicode characters as specified in the C# specs are allowed. Note that this means a considerable slowdown since the Lo category has more than 40,000 characters in it!

The default value is basic.

New in Pygments 1.5.

Short names:kotlin
Filename patterns:*.kt
Mimetypes:text/x-kotlin

ScalaLexer

For Scala source code.

Short names:scala
Filename patterns:*.scala
Mimetypes:text/x-scala

XtendLexer

For Xtend source code.

New in Pygments 1.6.

Short names:xtend
Filename patterns:*.xtend
Mimetypes:text/x-xtend

Lexers for math languages

BugsLexer

Pygments Lexer for OpenBugs and WinBugs models.

New in Pygments 1.6.

Short names:bugs, winbugs, openbugs
Filename patterns:*.bug
Mimetypes:None

IDLLexer

Pygments Lexer for IDL (Interactive Data Language).

New in Pygments 1.6.

Short names:idl
Filename patterns:*.pro
Mimetypes:text/idl

JagsLexer

Pygments Lexer for JAGS.

New in Pygments 1.6.

Short names:jags
Filename patterns:*.jag, *.bug
Mimetypes:None

JuliaConsoleLexer

For Julia console sessions. Modeled after MatlabSessionLexer.

New in Pygments 1.6.

Short names:jlcon
Filename patterns:None
Mimetypes:None

JuliaLexer

For Julia source code.

New in Pygments 1.6.

Short names:julia, jl
Filename patterns:*.jl
Mimetypes:text/x-julia, application/x-julia

MatlabLexer

For Matlab source code.

New in Pygments 0.10.

Short names:matlab
Filename patterns:*.m
Mimetypes:text/matlab

MatlabSessionLexer

For Matlab sessions. Modeled after PythonConsoleLexer. Contributed by Ken Schutte <kschutte@csail.mit.edu>.

New in Pygments 0.10.

Short names:matlabsession
Filename patterns:None
Mimetypes:None

MuPADLexer

A MuPAD lexer. Contributed by Christopher Creutzig <christopher@creutzig.de>.

New in Pygments 0.8.

Short names:mupad
Filename patterns:*.mu
Mimetypes:None

NumPyLexer

A Python lexer recognizing Numerical Python builtins.

New in Pygments 0.10.

Short names:numpy
Filename patterns:None
Mimetypes:None

OctaveLexer

For GNU Octave source code.

New in Pygments 1.5.

Short names:octave
Filename patterns:*.m
Mimetypes:text/octave

RConsoleLexer

For R console transcripts or R CMD BATCH output files.

Short names:rconsole, rout
Filename patterns:*.Rout
Mimetypes:None

RdLexer

Pygments Lexer for R documentation (Rd) files

This is a very minimal implementation, highlighting little more than the macros. A description of Rd syntax is found in Writing R Extensions and Parsing Rd files.

New in Pygments 1.6.

Short names:rd
Filename patterns:*.Rd
Mimetypes:text/x-r-doc

SLexer

For S, S-plus, and R source code.

New in Pygments 0.10.

Short names:splus, s, r
Filename patterns:*.S, *.R, .Rhistory, .Rprofile
Mimetypes:text/S-plus, text/S, text/x-r-source, text/x-r, text/x-R, text/x-r-history, text/x-r-profile

ScilabLexer

For Scilab source code.

New in Pygments 1.5.

Short names:scilab
Filename patterns:*.sci, *.sce, *.tst
Mimetypes:text/scilab

StanLexer

Pygments Lexer for Stan models.

New in Pygments 1.6.

Short names:stan
Filename patterns:*.stan
Mimetypes:None

Lexers for other languages

ABAPLexer

Lexer for ABAP, SAP's integrated language.

New in Pygments 1.1.

Short names:abap
Filename patterns:*.abap
Mimetypes:text/x-abap

AppleScriptLexer

For AppleScript source code, including AppleScript Studio. Contributed by Andreas Amann <aamann@mac.com>.

Short names:applescript
Filename patterns:*.applescript
Mimetypes:None

AsymptoteLexer

For Asymptote source code.

New in Pygments 1.2.

Short names:asy, asymptote
Filename patterns:*.asy
Mimetypes:text/x-asymptote

AutoItLexer

For AutoIt files.

AutoIt is a freeware BASIC-like scripting language designed for automating the Windows GUI and general scripting

New in Pygments 1.6.

Short names:autoit, Autoit
Filename patterns:*.au3
Mimetypes:text/x-autoit

AutohotkeyLexer

For autohotkey source code.

New in Pygments 1.4.

Short names:ahk
Filename patterns:*.ahk, *.ahkl
Mimetypes:text/x-autohotkey

AwkLexer

For Awk scripts.

New in Pygments 1.5.

Short names:awk, gawk, mawk, nawk
Filename patterns:*.awk
Mimetypes:application/x-awk

BefungeLexer

Lexer for the esoteric Befunge language.

New in Pygments 0.7.

Short names:befunge
Filename patterns:*.befunge
Mimetypes:application/x-befunge

BrainfuckLexer

Lexer for the esoteric BrainFuck language.

Short names:brainfuck, bf
Filename patterns:*.bf, *.b
Mimetypes:application/x-brainfuck

BroLexer

For Bro scripts.

New in Pygments 1.5.

Short names:bro
Filename patterns:*.bro
Mimetypes:None

CbmBasicV2Lexer

For CBM BASIC V2 sources.

New in Pygments 1.6.

Short names:cbmbas
Filename patterns:*.bas
Mimetypes:None

Cfengine3Lexer

Lexer for CFEngine3 policy files.

New in Pygments 1.5.

Short names:cfengine3, cf3
Filename patterns:*.cf
Mimetypes:None

ECLLexer

Lexer for the declarative big-data ECL language.

New in Pygments 1.5.

Short names:ecl
Filename patterns:*.ecl
Mimetypes:application/x-ecl

GherkinLexer

For Gherkin <http://github.com/aslakhellesoy/gherkin/> syntax.

New in Pygments 1.2.

Short names:Cucumber, cucumber, Gherkin, gherkin
Filename patterns:*.feature
Mimetypes:text/x-gherkin

GnuplotLexer

For Gnuplot plotting scripts.

New in Pygments 0.11.

Short names:gnuplot
Filename patterns:*.plot, *.plt
Mimetypes:text/x-gnuplot

GoodDataCLLexer

Lexer for GoodData-CL script files.

New in Pygments 1.4.

Short names:gooddata-cl
Filename patterns:*.gdc
Mimetypes:text/x-gooddata-cl

HybrisLexer

For Hybris source code.

New in Pygments 1.4.

Short names:hybris, hy
Filename patterns:*.hy, *.hyb
Mimetypes:text/x-hybris, application/x-hybris

KconfigLexer

For Linux-style Kconfig files.

New in Pygments 1.6.

Short names:kconfig, menuconfig, linux-config, kernel-config
Filename patterns:Kconfig, *Config.in*, external.in*, standard-modules.in
Mimetypes:text/x-kconfig

LogtalkLexer

For Logtalk source code.

New in Pygments 0.10.

Short names:logtalk
Filename patterns:*.lgt
Mimetypes:text/x-logtalk

MOOCodeLexer

For MOOCode (the MOO scripting language).

New in Pygments 0.9.

Short names:moocode
Filename patterns:*.moo
Mimetypes:text/x-moocode

MaqlLexer

Lexer for GoodData MAQL scripts.

New in Pygments 1.4.

Short names:maql
Filename patterns:*.maql
Mimetypes:text/x-gooddata-maql, application/x-gooddata-maql

ModelicaLexer

For Modelica source code.

New in Pygments 1.1.

Short names:modelica
Filename patterns:*.mo
Mimetypes:text/x-modelica

MscgenLexer

For Mscgen files.

New in Pygments 1.6.

Short names:mscgen, msc
Filename patterns:*.msc
Mimetypes:None

NSISLexer

For NSIS scripts.

New in Pygments 1.6.

Short names:nsis, nsi, nsh
Filename patterns:*.nsi, *.nsh
Mimetypes:text/x-nsis

NewspeakLexer

For Newspeak <http://newspeaklanguage.org/> syntax.

Short names:newspeak
Filename patterns:*.ns2
Mimetypes:text/x-newspeak

OpenEdgeLexer

Lexer for OpenEdge ABL (formerly Progress) source code.

New in Pygments 1.5.

Short names:openedge, abl, progress
Filename patterns:*.p, *.cls
Mimetypes:text/x-openedge, application/x-openedge

PostScriptLexer

Lexer for PostScript files.

The PostScript Language Reference published by Adobe at <http://partners.adobe.com/public/developer/en/ps/PLRM.pdf> is the authority for this.

New in Pygments 1.4.

Short names:postscript
Filename patterns:*.ps, *.eps
Mimetypes:application/postscript

PovrayLexer

For Persistence of Vision Raytracer files.

New in Pygments 0.11.

Short names:pov
Filename patterns:*.pov, *.inc
Mimetypes:text/x-povray

ProtoBufLexer

Lexer for Protocol Buffer definition files.

New in Pygments 1.4.

Short names:protobuf
Filename patterns:*.proto
Mimetypes:None

PuppetLexer

For Puppet configuration DSL.

New in Pygments 1.6.

Short names:puppet
Filename patterns:*.pp
Mimetypes:None

RPMSpecLexer

For RPM *.spec files

System Message: WARNING/2 (<string>, line 1687); backlink

Inline emphasis start-string without end-string.

New in Pygments 1.6.

Short names:spec
Filename patterns:*.spec
Mimetypes:text/x-rpm-spec

RebolLexer

A REBOL lexer.

New in Pygments 1.1.

Short names:rebol
Filename patterns:*.r, *.r3
Mimetypes:text/x-rebol

RedcodeLexer

A simple Redcode lexer based on ICWS'94. Contributed by Adam Blinkinsop <blinks@acm.org>.

New in Pygments 0.8.

Short names:redcode
Filename patterns:*.cw
Mimetypes:None

RobotFrameworkLexer

For Robot Framework test data.

Supports both space and pipe separated plain text formats.

New in Pygments 1.6.

Short names:RobotFramework, robotframework
Filename patterns:*.txt
Mimetypes:text/x-robotframework

SmalltalkLexer

For Smalltalk syntax. Contributed by Stefan Matthias Aust. Rewritten by Nils Winter.

New in Pygments 0.10.

Short names:smalltalk, squeak
Filename patterns:*.st
Mimetypes:text/x-smalltalk

SnobolLexer

Lexer for the SNOBOL4 programming language.

Recognizes the common ASCII equivalents of the original SNOBOL4 operators. Does not require spaces around binary operators.

New in Pygments 1.5.

Short names:snobol
Filename patterns:*.snobol
Mimetypes:text/x-snobol

SourcePawnLexer

For SourcePawn source code with preprocessor directives.

New in Pygments 1.6.

Short names:sp
Filename patterns:*.sp
Mimetypes:text/x-sourcepawn

UrbiscriptLexer

For UrbiScript source code.

New in Pygments 1.5.

Short names:urbiscript
Filename patterns:*.u
Mimetypes:application/x-urbiscript

VGLLexer

For SampleManager VGL source code.

New in Pygments 1.6.

Short names:vgl
Filename patterns:*.rpf
Mimetypes:None

Lexers for parser generators

AntlrActionScriptLexer

ANTLR with ActionScript Target

New in Pygments 1.1.

Short names:antlr-as, antlr-actionscript
Filename patterns:*.G, *.g
Mimetypes:None

AntlrCSharpLexer

ANTLR with C# Target

New in Pygments 1.1.

Short names:antlr-csharp, antlr-c#
Filename patterns:*.G, *.g
Mimetypes:None

AntlrCppLexer

ANTLR with CPP Target

New in Pygments 1.1.

Short names:antlr-cpp
Filename patterns:*.G, *.g
Mimetypes:None

AntlrJavaLexer

ANTLR with Java Target

New in Pygments 1.1

Short names:antlr-java
Filename patterns:*.G, *.g
Mimetypes:None

AntlrLexer

Generic ANTLR Lexer. Should not be called directly, instead use DelegatingLexer for your target language.

New in Pygments 1.1.

Short names:antlr
Filename patterns:None
Mimetypes:None

AntlrObjectiveCLexer

ANTLR with Objective-C Target

New in Pygments 1.1.

Short names:antlr-objc
Filename patterns:*.G, *.g
Mimetypes:None

AntlrPerlLexer

ANTLR with Perl Target

New in Pygments 1.1.

Short names:antlr-perl
Filename patterns:*.G, *.g
Mimetypes:None

AntlrPythonLexer

ANTLR with Python Target

New in Pygments 1.1.

Short names:antlr-python
Filename patterns:*.G, *.g
Mimetypes:None

AntlrRubyLexer

ANTLR with Ruby Target

New in Pygments 1.1.

Short names:antlr-ruby, antlr-rb
Filename patterns:*.G, *.g
Mimetypes:None

RagelCLexer

A lexer for Ragel in a C host file.

New in Pygments 1.1.

Short names:ragel-c
Filename patterns:*.rl
Mimetypes:None

RagelCppLexer

A lexer for Ragel in a CPP host file.

New in Pygments 1.1.

Short names:ragel-cpp
Filename patterns:*.rl
Mimetypes:None

RagelDLexer

A lexer for Ragel in a D host file.

New in Pygments 1.1.

Short names:ragel-d
Filename patterns:*.rl
Mimetypes:None

RagelEmbeddedLexer

A lexer for Ragel embedded in a host language file.

This will only highlight Ragel statements. If you want host language highlighting then call the language-specific Ragel lexer.

New in Pygments 1.1.

Short names:ragel-em
Filename patterns:*.rl
Mimetypes:None

RagelJavaLexer

A lexer for Ragel in a Java host file.

New in Pygments 1.1.

Short names:ragel-java
Filename patterns:*.rl
Mimetypes:None

RagelLexer

A pure Ragel lexer. Use this for fragments of Ragel. For .rl files, use RagelEmbeddedLexer instead (or one of the language-specific subclasses).

New in Pygments 1.1.

Short names:ragel
Filename patterns:None
Mimetypes:None

RagelObjectiveCLexer

A lexer for Ragel in an Objective C host file.

New in Pygments 1.1.

Short names:ragel-objc
Filename patterns:*.rl
Mimetypes:None

RagelRubyLexer

A lexer for Ragel in a Ruby host file.

New in Pygments 1.1.

Short names:ragel-ruby, ragel-rb
Filename patterns:*.rl
Mimetypes:None

TreetopLexer

A lexer for Treetop grammars.

New in Pygments 1.6.

Short names:treetop
Filename patterns:*.treetop, *.tt
Mimetypes:None

Lexers for various shells

BashLexer

Lexer for (ba|k|)sh shell scripts.

New in Pygments 0.6.

Short names:bash, sh, ksh
Filename patterns:*.sh, *.ksh, *.bash, *.ebuild, *.eclass, .bashrc, bashrc, .bash\*, bash\*
Mimetypes:application/x-sh, application/x-shellscript

BashSessionLexer

Lexer for simplistic shell sessions.

New in Pygments 1.1.

Short names:console
Filename patterns:*.sh-session
Mimetypes:application/x-shell-session

BatchLexer

Lexer for the DOS/Windows Batch file format.

New in Pygments 0.7.

Short names:bat
Filename patterns:*.bat, *.cmd
Mimetypes:application/x-dos-batch

PowerShellLexer

For Windows PowerShell code.

New in Pygments 1.5.

Short names:powershell, posh, ps1
Filename patterns:*.ps1
Mimetypes:text/x-powershell

TcshLexer

Lexer for tcsh scripts.

New in Pygments 0.10.

Short names:tcsh, csh
Filename patterns:*.tcsh, *.csh
Mimetypes:application/x-csh

Special lexers

RawTokenLexer

Recreate a token stream formatted with the RawTokenFormatter. This lexer raises exceptions during parsing if the token stream in the file is malformed.

Additional options accepted:

compress
If set to "gz" or "bz2", decompress the token stream with the given compression algorithm before lexing (default: "").
Short names:raw
Filename patterns:None
Mimetypes:application/x-pygments-tokens

TextLexer

"Null" lexer, doesn't highlight anything.

Short names:text
Filename patterns:*.txt
Mimetypes:text/plain

Lexers for various template engines' markup

CheetahHtmlLexer

Subclass of the CheetahLexer that highlights unlexer data with the HtmlLexer.

Short names:html+cheetah, html+spitfire
Filename patterns:None
Mimetypes:text/html+cheetah, text/html+spitfire

CheetahJavascriptLexer

Subclass of the CheetahLexer that highlights unlexer data with the JavascriptLexer.

Short names:js+cheetah, javascript+cheetah, js+spitfire, javascript+spitfire
Filename patterns:None
Mimetypes:application/x-javascript+cheetah, text/x-javascript+cheetah, text/javascript+cheetah, application/x-javascript+spitfire, text/x-javascript+spitfire, text/javascript+spitfire

CheetahLexer

Generic cheetah templates lexer. Code that isn't Cheetah markup is yielded as Token.Other. This also works for spitfire templates which use the same syntax.

Short names:cheetah, spitfire
Filename patterns:*.tmpl, *.spt
Mimetypes:application/x-cheetah, application/x-spitfire

CheetahXmlLexer

Subclass of the CheetahLexer that highlights unlexer data with the XmlLexer.

Short names:xml+cheetah, xml+spitfire
Filename patterns:None
Mimetypes:application/xml+cheetah, application/xml+spitfire

ColdfusionHtmlLexer

Coldfusion markup in html

Short names:cfm
Filename patterns:*.cfm, *.cfml, *.cfc
Mimetypes:application/x-coldfusion

ColdfusionLexer

Coldfusion statements

Short names:cfs
Filename patterns:None
Mimetypes:None

CssDjangoLexer

Subclass of the DjangoLexer that highlights unlexed data with the CssLexer.

Short names:css+django, css+jinja
Filename patterns:None
Mimetypes:text/css+django, text/css+jinja

CssErbLexer

Subclass of ErbLexer which highlights unlexed data with the CssLexer.

Short names:css+erb, css+ruby
Filename patterns:None
Mimetypes:text/css+ruby

CssGenshiLexer

A lexer that highlights CSS definitions in genshi text templates.

Short names:css+genshitext, css+genshi
Filename patterns:None
Mimetypes:text/css+genshi

CssPhpLexer

Subclass of PhpLexer which highlights unmatched data with the CssLexer.

Short names:css+php
Filename patterns:None
Mimetypes:text/css+php

CssSmartyLexer

Subclass of the SmartyLexer that highlights unlexed data with the CssLexer.

Short names:css+smarty
Filename patterns:None
Mimetypes:text/css+smarty

DjangoLexer

Generic django and jinja template lexer.

It just highlights django/jinja code between the preprocessor directives, other data is left untouched by the lexer.

Short names:django, jinja
Filename patterns:None
Mimetypes:application/x-django-templating, application/x-jinja

ErbLexer

Generic ERB (Ruby Templating) lexer.

Just highlights ruby code between the preprocessor directives, other data is left untouched by the lexer.

All options are also forwarded to the RubyLexer.

Short names:erb
Filename patterns:None
Mimetypes:application/x-ruby-templating

EvoqueHtmlLexer

Subclass of the EvoqueLexer that highlights unlexed data with the HtmlLexer.

New in Pygments 1.1.

Short names:html+evoque
Filename patterns:*.html
Mimetypes:text/html+evoque

EvoqueLexer

For files using the Evoque templating system.

New in Pygments 1.1.

Short names:evoque
Filename patterns:*.evoque
Mimetypes:application/x-evoque

EvoqueXmlLexer

Subclass of the EvoqueLexer that highlights unlexed data with the XmlLexer.

New in Pygments 1.1.

Short names:xml+evoque
Filename patterns:*.xml
Mimetypes:application/xml+evoque

GenshiLexer

A lexer that highlights genshi and kid kid XML templates.

Short names:genshi, kid, xml+genshi, xml+kid
Filename patterns:*.kid
Mimetypes:application/x-genshi, application/x-kid

GenshiTextLexer

A lexer that highlights genshi text templates.

Short names:genshitext
Filename patterns:None
Mimetypes:application/x-genshi-text, text/x-genshi

HtmlDjangoLexer

Subclass of the DjangoLexer that highighlights unlexed data with the HtmlLexer.

Nested Javascript and CSS is highlighted too.

Short names:html+django, html+jinja
Filename patterns:None
Mimetypes:text/html+django, text/html+jinja

HtmlGenshiLexer

A lexer that highlights genshi and kid kid HTML templates.

Short names:html+genshi, html+kid
Filename patterns:None
Mimetypes:text/html+genshi

HtmlPhpLexer

Subclass of PhpLexer that highlights unhandled data with the HtmlLexer.

Nested Javascript and CSS is highlighted too.

Short names:html+php
Filename patterns:*.phtml
Mimetypes:application/x-php, application/x-httpd-php, application/x-httpd-php3, application/x-httpd-php4, application/x-httpd-php5

HtmlSmartyLexer

Subclass of the SmartyLexer that highighlights unlexed data with the HtmlLexer.

Nested Javascript and CSS is highlighted too.

Short names:html+smarty
Filename patterns:None
Mimetypes:text/html+smarty

JavascriptDjangoLexer

Subclass of the DjangoLexer that highlights unlexed data with the JavascriptLexer.

Short names:js+django, javascript+django, js+jinja, javascript+jinja
Filename patterns:None
Mimetypes:application/x-javascript+django, application/x-javascript+jinja, text/x-javascript+django, text/x-javascript+jinja, text/javascript+django, text/javascript+jinja

JavascriptErbLexer

Subclass of ErbLexer which highlights unlexed data with the JavascriptLexer.

Short names:js+erb, javascript+erb, js+ruby, javascript+ruby
Filename patterns:None
Mimetypes:application/x-javascript+ruby, text/x-javascript+ruby, text/javascript+ruby

JavascriptGenshiLexer

A lexer that highlights javascript code in genshi text templates.

Short names:js+genshitext, js+genshi, javascript+genshitext, javascript+genshi
Filename patterns:None
Mimetypes:application/x-javascript+genshi, text/x-javascript+genshi, text/javascript+genshi

JavascriptPhpLexer

Subclass of PhpLexer which highlights unmatched data with the JavascriptLexer.

Short names:js+php, javascript+php
Filename patterns:None
Mimetypes:application/x-javascript+php, text/x-javascript+php, text/javascript+php

JavascriptSmartyLexer

Subclass of the SmartyLexer that highlights unlexed data with the JavascriptLexer.

Short names:js+smarty, javascript+smarty
Filename patterns:None
Mimetypes:application/x-javascript+smarty, text/x-javascript+smarty, text/javascript+smarty

JspLexer

Lexer for Java Server Pages.

New in Pygments 0.7.

Short names:jsp
Filename patterns:*.jsp
Mimetypes:application/x-jsp

LassoCssLexer

Subclass of the LassoLexer which highlights unhandled data with the CssLexer.

New in Pygments 1.6.

Short names:css+lasso
Filename patterns:None
Mimetypes:text/css+lasso

LassoHtmlLexer

Subclass of the LassoLexer which highlights unhandled data with the HtmlLexer.

Nested JavaScript and CSS is also highlighted.

New in Pygments 1.6.

Short names:html+lasso
Filename patterns:None
Mimetypes:text/html+lasso, application/x-httpd-lasso, application/x-httpd-lasso[89]

LassoJavascriptLexer

Subclass of the LassoLexer which highlights unhandled data with the JavascriptLexer.

New in Pygments 1.6.

Short names:js+lasso, javascript+lasso
Filename patterns:None
Mimetypes:application/x-javascript+lasso, text/x-javascript+lasso, text/javascript+lasso

LassoXmlLexer

Subclass of the LassoLexer which highlights unhandled data with the XmlLexer.

New in Pygments 1.6.

Short names:xml+lasso
Filename patterns:None
Mimetypes:application/xml+lasso

MakoCssLexer

Subclass of the MakoLexer that highlights unlexer data with the CssLexer.

New in Pygments 0.7.

Short names:css+mako
Filename patterns:None
Mimetypes:text/css+mako

MakoHtmlLexer

Subclass of the MakoLexer that highlights unlexed data with the HtmlLexer.

New in Pygments 0.7.

Short names:html+mako
Filename patterns:None
Mimetypes:text/html+mako

MakoJavascriptLexer

Subclass of the MakoLexer that highlights unlexer data with the JavascriptLexer.

New in Pygments 0.7.

Short names:js+mako, javascript+mako
Filename patterns:None
Mimetypes:application/x-javascript+mako, text/x-javascript+mako, text/javascript+mako

MakoLexer

Generic mako templates lexer. Code that isn't Mako markup is yielded as Token.Other.

New in Pygments 0.7.

Short names:mako
Filename patterns:*.mao
Mimetypes:application/x-mako

MakoXmlLexer

Subclass of the MakoLexer that highlights unlexer data with the XmlLexer.

New in Pygments 0.7.

Short names:xml+mako
Filename patterns:None
Mimetypes:application/xml+mako

MasonLexer

Generic mason templates lexer. Stolen from Myghty lexer. Code that isn't Mason markup is HTML.

New in Pygments 1.4.

Short names:mason
Filename patterns:*.m, *.mhtml, *.mc, *.mi, autohandler, dhandler
Mimetypes:application/x-mason

MyghtyCssLexer

Subclass of the MyghtyLexer that highlights unlexer data with the CssLexer.

New in Pygments 0.6.

Short names:css+myghty
Filename patterns:None
Mimetypes:text/css+myghty

MyghtyHtmlLexer

Subclass of the MyghtyLexer that highlights unlexer data with the HtmlLexer.

New in Pygments 0.6.

Short names:html+myghty
Filename patterns:None
Mimetypes:text/html+myghty

MyghtyJavascriptLexer

Subclass of the MyghtyLexer that highlights unlexer data with the JavascriptLexer.

New in Pygments 0.6.

Short names:js+myghty, javascript+myghty
Filename patterns:None
Mimetypes:application/x-javascript+myghty, text/x-javascript+myghty, text/javascript+mygthy

MyghtyLexer

Generic myghty templates lexer. Code that isn't Myghty markup is yielded as Token.Other.

New in Pygments 0.6.

Short names:myghty
Filename patterns:*.myt, autodelegate
Mimetypes:application/x-myghty

MyghtyXmlLexer

Subclass of the MyghtyLexer that highlights unlexer data with the XmlLexer.

New in Pygments 0.6.

Short names:xml+myghty
Filename patterns:None
Mimetypes:application/xml+myghty

RhtmlLexer

Subclass of the ERB lexer that highlights the unlexed data with the html lexer.

Nested Javascript and CSS is highlighted too.

Short names:rhtml, html+erb, html+ruby
Filename patterns:*.rhtml
Mimetypes:text/html+ruby

SmartyLexer

Generic Smarty template lexer.

Just highlights smarty code between the preprocessor directives, other data is left untouched by the lexer.

Short names:smarty
Filename patterns:*.tpl
Mimetypes:application/x-smarty

SspLexer

Lexer for Scalate Server Pages.

New in Pygments 1.4.

Short names:ssp
Filename patterns:*.ssp
Mimetypes:application/x-ssp

TeaTemplateLexer

Lexer for Tea Templates.

New in Pygments 1.5.

Short names:tea
Filename patterns:*.tea
Mimetypes:text/x-tea

VelocityHtmlLexer

Subclass of the VelocityLexer that highlights unlexer data with the HtmlLexer.

Short names:html+velocity
Filename patterns:None
Mimetypes:text/html+velocity

VelocityLexer

Generic Velocity template lexer.

Just highlights velocity directives and variable references, other data is left untouched by the lexer.

Short names:velocity
Filename patterns:*.vm, *.fhtml
Mimetypes:None

VelocityXmlLexer

Subclass of the VelocityLexer that highlights unlexer data with the XmlLexer.

Short names:xml+velocity
Filename patterns:None
Mimetypes:application/xml+velocity

XmlDjangoLexer

Subclass of the DjangoLexer that highlights unlexed data with the XmlLexer.

Short names:xml+django, xml+jinja
Filename patterns:None
Mimetypes:application/xml+django, application/xml+jinja

XmlErbLexer

Subclass of ErbLexer which highlights data outside preprocessor directives with the XmlLexer.

Short names:xml+erb, xml+ruby
Filename patterns:None
Mimetypes:application/xml+ruby

XmlPhpLexer

Subclass of PhpLexer that higlights unhandled data with the XmlLexer.

Short names:xml+php
Filename patterns:None
Mimetypes:application/xml+php

XmlSmartyLexer

Subclass of the SmartyLexer that highlights unlexed data with the XmlLexer.

Short names:xml+smarty
Filename patterns:None
Mimetypes:application/xml+smarty

Lexers for non-source code file types

ApacheConfLexer

Lexer for configuration files following the Apache config file format.

New in Pygments 0.6.

Short names:apacheconf, aconf, apache
Filename patterns:.htaccess, apache.conf, apache2.conf
Mimetypes:text/x-apacheconf

BBCodeLexer

A lexer that highlights BBCode(-like) syntax.

New in Pygments 0.6.

Short names:bbcode
Filename patterns:None
Mimetypes:text/x-bbcode

BaseMakefileLexer

Lexer for simple Makefiles (no preprocessing).

New in Pygments 0.10.

Short names:basemake
Filename patterns:None
Mimetypes:None

CMakeLexer

Lexer for CMake files.

New in Pygments 1.2.

Short names:cmake
Filename patterns:*.cmake, CMakeLists.txt
Mimetypes:text/x-cmake

DarcsPatchLexer

DarcsPatchLexer is a lexer for the various versions of the darcs patch format. Examples of this format are derived by commands such as darcs annotate --patch and darcs send.

New in Pygments 0.10.

Short names:dpatch
Filename patterns:*.dpatch, *.darcspatch
Mimetypes:None

DebianControlLexer

Lexer for Debian control files and apt-cache show <pkg> outputs.

New in Pygments 0.9.

Short names:control
Filename patterns:control
Mimetypes:None

DiffLexer

Lexer for unified or context-style diffs or patches.

Short names:diff, udiff
Filename patterns:*.diff, *.patch
Mimetypes:text/x-diff, text/x-patch

GettextLexer

Lexer for Gettext catalog files.

New in Pygments 0.9.

Short names:pot, po
Filename patterns:*.pot, *.po
Mimetypes:application/x-gettext, text/x-gettext, text/gettext

GroffLexer

Lexer for the (g)roff typesetting language, supporting groff extensions. Mainly useful for highlighting manpage sources.

New in Pygments 0.6.

Short names:groff, nroff, man
Filename patterns:*.[1234567], *.man
Mimetypes:application/x-troff, text/troff

HttpLexer

Lexer for HTTP sessions.

New in Pygments 1.5.

Short names:http
Filename patterns:None
Mimetypes:None

HxmlLexer

Lexer for haXe build files.

New in Pygments 1.6.

Short names:haxeml, hxml
Filename patterns:*.hxml
Mimetypes:None

IniLexer

Lexer for configuration files in INI style.

Short names:ini, cfg
Filename patterns:*.ini, *.cfg
Mimetypes:text/x-ini

IrcLogsLexer

Lexer for IRC logs in irssi, xchat or weechat style.

Short names:irc
Filename patterns:*.weechatlog
Mimetypes:text/x-irclog

LighttpdConfLexer

Lexer for Lighttpd configuration files.

New in Pygments 0.11.

Short names:lighty, lighttpd
Filename patterns:None
Mimetypes:text/x-lighttpd-conf

MakefileLexer

Lexer for BSD and GNU make extensions (lenient enough to handle both in the same file even).

Rewritten in Pygments 0.10.

Short names:make, makefile, mf, bsdmake
Filename patterns:*.mak, Makefile, makefile, Makefile.*, GNUmakefile
Mimetypes:text/x-makefile

MoinWikiLexer

For MoinMoin (and Trac) Wiki markup.

New in Pygments 0.7.

Short names:trac-wiki, moin
Filename patterns:None
Mimetypes:text/x-trac-wiki

NginxConfLexer

Lexer for Nginx configuration files.

New in Pygments 0.11.

Short names:nginx
Filename patterns:None
Mimetypes:text/x-nginx-conf

PropertiesLexer

Lexer for configuration files in Java's properties format.

New in Pygments 1.4.

Short names:properties
Filename patterns:*.properties
Mimetypes:text/x-java-properties

PyPyLogLexer

Lexer for PyPy log files.

New in Pygments 1.5.

Short names:pypylog, pypy
Filename patterns:*.pypylog
Mimetypes:application/x-pypylog

RegeditLexer

Lexer for Windows Registry files produced by regedit.

New in Pygments 1.6.

Short names:None
Filename patterns:*.reg
Mimetypes:text/x-windows-registry

RstLexer

For reStructuredText markup.

New in Pygments 0.7.

Additional options accepted:

handlecodeblocks
Highlight the contents of .. sourcecode:: langauge and .. code:: language directives with a lexer for the given language (default: True). New in Pygments 0.8.
Short names:rst, rest, restructuredtext
Filename patterns:*.rst, *.rest
Mimetypes:text/x-rst, text/prs.fallenstein.rst

SourcesListLexer

Lexer that highlights debian sources.list files.

New in Pygments 0.7.

Short names:sourceslist, sources.list
Filename patterns:sources.list
Mimetypes:None

SquidConfLexer

Lexer for squid configuration files.

New in Pygments 0.9.

Short names:squidconf, squid.conf, squid
Filename patterns:squid.conf
Mimetypes:text/x-squidconf

TexLexer

Lexer for the TeX and LaTeX typesetting languages.

Short names:tex, latex
Filename patterns:*.tex, *.aux, *.toc
Mimetypes:text/x-tex, text/x-latex

VimLexer

Lexer for VimL script files.

New in Pygments 0.8.

Short names:vim
Filename patterns:*.vim, .vimrc, .exrc, .gvimrc, vimrc, exrc, gvimrc, vimrc, gvimrc
Mimetypes:text/x-vim

YamlLexer

Lexer for YAML, a human-friendly data serialization language.

New in Pygments 0.11.

Short names:yaml
Filename patterns:*.yaml, *.yml
Mimetypes:text/x-yaml

Iterating over all lexers

New in Pygments 0.6.

To get all lexers (both the builtin and the plugin ones), you can use the get_all_lexers() function from the pygments.lexers module:

>>> from pygments.lexers import get_all_lexers
>>> i = get_all_lexers()
>>> i.next()
('Diff', ('diff',), ('*.diff', '*.patch'), ('text/x-diff', 'text/x-patch'))
>>> i.next()
('Delphi', ('delphi', 'objectpascal', 'pas', 'pascal'), ('*.pas',), ('text/x-pascal',))
>>> i.next()
('XML+Ruby', ('xml+erb', 'xml+ruby'), (), ())

As you can see, the return value is an iterator which yields tuples in the form (name, aliases, filetypes, mimetypes).

Pygments-1.6/docs/build/api.html0000644000175000017500000004776512103430066015746 0ustar piotrpiotr The full Pygments API — Pygments

Pygments

The full Pygments API

« Back To Index

This page describes the Pygments API.

High-level API

Functions from the pygments module:

def lex(code, lexer):
Lex code with the lexer (must be a Lexer instance) and return an iterable of tokens. Currently, this only calls lexer.get_tokens().
def format(tokens, formatter, outfile=None):
Format a token stream (iterable of tokens) tokens with the formatter (must be a Formatter instance). The result is written to outfile, or if that is None, returned as a string.
def highlight(code, lexer, formatter, outfile=None):
This is the most high-level highlighting function. It combines lex and format in one function.

Functions from pygments.lexers:

def get_lexer_by_name(alias, **options):

Return an instance of a Lexer subclass that has alias in its aliases list. The lexer is given the options at its instantiation.

Will raise pygments.util.ClassNotFound if no lexer with that alias is found.

def get_lexer_for_filename(fn, **options):

Return a Lexer subclass instance that has a filename pattern matching fn. The lexer is given the options at its instantiation.

Will raise pygments.util.ClassNotFound if no lexer for that filename is found.

def get_lexer_for_mimetype(mime, **options):

Return a Lexer subclass instance that has mime in its mimetype list. The lexer is given the options at its instantiation.

Will raise pygments.util.ClassNotFound if not lexer for that mimetype is found.

def guess_lexer(text, **options):

Return a Lexer subclass instance that's guessed from the text in text. For that, the analyse_text() method of every known lexer class is called with the text as argument, and the lexer which returned the highest value will be instantiated and returned.

pygments.util.ClassNotFound is raised if no lexer thinks it can handle the content.

def guess_lexer_for_filename(filename, text, **options):

As guess_lexer(), but only lexers which have a pattern in filenames or alias_filenames that matches filename are taken into consideration.

pygments.util.ClassNotFound is raised if no lexer thinks it can handle the content.

def get_all_lexers():

Return an iterable over all registered lexers, yielding tuples in the format:

(longname, tuple of aliases, tuple of filename patterns, tuple of mimetypes)

New in Pygments 0.6.

Functions from pygments.formatters:

def get_formatter_by_name(alias, **options):

Return an instance of a Formatter subclass that has alias in its aliases list. The formatter is given the options at its instantiation.

Will raise pygments.util.ClassNotFound if no formatter with that alias is found.

def get_formatter_for_filename(fn, **options):

Return a Formatter subclass instance that has a filename pattern matching fn. The formatter is given the options at its instantiation.

Will raise pygments.util.ClassNotFound if no formatter for that filename is found.

Functions from pygments.styles:

def get_style_by_name(name):

Return a style class by its short name. The names of the builtin styles are listed in pygments.styles.STYLE_MAP.

Will raise pygments.util.ClassNotFound if no style of that name is found.

def get_all_styles():

Return an iterable over all registered styles, yielding their names.

New in Pygments 0.6.

Lexers

A lexer (derived from pygments.lexer.Lexer) has the following functions:

def __init__(self, **options):

The constructor. Takes a **keywords dictionary of options. Every subclass must first process its own options and then call the Lexer constructor, since it processes the stripnl, stripall and tabsize options.

An example looks like this:

def __init__(self, **options):
    self.compress = options.get('compress', '')
    Lexer.__init__(self, **options)

As these options must all be specifiable as strings (due to the command line usage), there are various utility functions available to help with that, see Option processing.

def get_tokens(self, text):

This method is the basic interface of a lexer. It is called by the highlight() function. It must process the text and return an iterable of (tokentype, value) pairs from text.

Normally, you don't need to override this method. The default implementation processes the stripnl, stripall and tabsize options and then yields all tokens from get_tokens_unprocessed(), with the index dropped.

def get_tokens_unprocessed(self, text):

This method should process the text and return an iterable of (index, tokentype, value) tuples where index is the starting position of the token within the input text.

This method must be overridden by subclasses.

def analyse_text(text):
A static method which is called for lexer guessing. It should analyse the text and return a float in the range from 0.0 to 1.0. If it returns 0.0, the lexer will not be selected as the most probable one, if it returns 1.0, it will be selected immediately.

For a list of known tokens have a look at the Tokens page.

A lexer also can have the following attributes (in fact, they are mandatory except alias_filenames) that are used by the builtin lookup mechanism.

name
Full name for the lexer, in human-readable form.
aliases
A list of short, unique identifiers that can be used to lookup the lexer from a list, e.g. using get_lexer_by_name().
filenames
A list of fnmatch patterns that match filenames which contain content for this lexer. The patterns in this list should be unique among all lexers.
alias_filenames
A list of fnmatch patterns that match filenames which may or may not contain content for this lexer. This list is used by the guess_lexer_for_filename() function, to determine which lexers are then included in guessing the correct one. That means that e.g. every lexer for HTML and a template language should include \*.html in this list.
mimetypes
A list of MIME types for content that can be lexed with this lexer.

Formatters

A formatter (derived from pygments.formatter.Formatter) has the following functions:

def __init__(self, **options):

As with lexers, this constructor processes options and then must call the base class __init__.

The Formatter class recognizes the options style, full and title. It is up to the formatter class whether it uses them.

def get_style_defs(self, arg=''):

This method must return statements or declarations suitable to define the current style for subsequent highlighted text (e.g. CSS classes in the HTMLFormatter).

The optional argument arg can be used to modify the generation and is formatter dependent (it is standardized because it can be given on the command line).

This method is called by the -S command-line option, the arg is then given by the -a option.

def format(self, tokensource, outfile):

This method must format the tokens from the tokensource iterable and write the formatted version to the file object outfile.

Formatter options can control how exactly the tokens are converted.

A formatter must have the following attributes that are used by the builtin lookup mechanism. (New in Pygments 0.7.)

name
Full name for the formatter, in human-readable form.
aliases
A list of short, unique identifiers that can be used to lookup the formatter from a list, e.g. using get_formatter_by_name().
filenames
A list of fnmatch patterns that match filenames for which this formatter can produce output. The patterns in this list should be unique among all formatters.

Option processing

The pygments.util module has some utility functions usable for option processing:

class OptionError
This exception will be raised by all option processing functions if the type or value of the argument is not correct.
def get_bool_opt(options, optname, default=None):

Interpret the key optname from the dictionary options as a boolean and return it. Return default if optname is not in options.

The valid string values for True are 1, yes, true and on, the ones for False are 0, no, false and off (matched case-insensitively).

def get_int_opt(options, optname, default=None):
As get_bool_opt, but interpret the value as an integer.
def get_list_opt(options, optname, default=None):
If the key optname from the dictionary options is a string, split it at whitespace and return it. If it is already a list or a tuple, it is returned as a list.
def get_choice_opt(options, optname, allowed, default=None):
If the key optname from the dictionary is not in the sequence allowed, raise an error, otherwise return it. New in Pygments 0.8.
Pygments-1.6/docs/build/installation.html0000644000175000017500000002304712073317553017674 0ustar piotrpiotr Installation — Pygments

Pygments

Installation

« Back To Index

Pygments requires at least Python 2.4 to work correctly. Just to clarify: there won't ever be support for Python versions below 2.4. However, there are no other dependencies.

Installing a released version

As a Python egg (via easy_install)

You can install the most recent Pygments version using easy_install:

sudo easy_install Pygments

This will install a Pygments egg in your Python installation's site-packages directory.

From the tarball release

  1. Download the most recent tarball from the download page
  2. Unpack the tarball
  3. sudo python setup.py install

Note that the last command will automatically download and install setuptools if you don't already have it installed. This requires a working internet connection.

This will install Pygments into your Python installation's site-packages directory.

Installing the development version

If you want to play around with the code

  1. Install Mercurial
  2. hg clone http://bitbucket.org/birkenfeld/pygments-main pygments
  3. cd pygments
  4. ln -s pygments /usr/lib/python2.X/site-packages
  5. ln -s pygmentize /usr/local/bin

As an alternative to steps 4 and 5 you can also do python setup.py develop which will install the package via setuptools in development mode.

Pygments-1.6/docs/build/tokens.html0000644000175000017500000005454312073317553016503 0ustar piotrpiotr Builtin Tokens — Pygments

Pygments

Builtin Tokens

« Back To Index

Inside the pygments.token module, there is a special object called Token that is used to create token types.

You can create a new token type by accessing an attribute of Token:

>>> from pygments.token import Token
>>> Token.String
Token.String
>>> Token.String is Token.String
True

Note that tokens are singletons so you can use the is operator for comparing token types.

As of Pygments 0.7 you can also use the in operator to perform set tests:

>>> from pygments.token import Comment
>>> Comment.Single in Comment
True
>>> Comment in Comment.Multi
False

This can be useful in filters and if you write lexers on your own without using the base lexers.

You can also split a token type into a hierarchy, and get the parent of it:

>>> String.split()
[Token, Token.Literal, Token.Literal.String]
>>> String.parent
Token.Literal

In principle, you can create an unlimited number of token types but nobody can guarantee that a style would define style rules for a token type. Because of that, Pygments proposes some global token types defined in the pygments.token.STANDARD_TYPES dict.

For some tokens aliases are already defined:

>>> from pygments.token import String
>>> String
Token.Literal.String

Inside the pygments.token module the following aliases are defined:

Text Token.Text for any type of text data
Whitespace Token.Text.Whitespace for specially highlighted whitespace
Error Token.Error represents lexer errors
Other Token.Other special token for data not matched by a parser (e.g. HTML markup in PHP code)
Keyword Token.Keyword any kind of keywords
Name Token.Name variable/function names
Literal Token.Literal Any literals
String Token.Literal.String string literals
Number Token.Literal.Number number literals
Operator Token.Operator operators (+, not...)
Punctuation Token.Punctuation punctuation ([, (...)
Comment Token.Comment any kind of comments
Generic Token.Generic generic tokens (have a look at the explanation below)

The Whitespace token type is new in Pygments 0.8. It is used only by the VisibleWhitespaceFilter currently.

Normally you just create token types using the already defined aliases. For each of those token aliases, a number of subtypes exists (excluding the special tokens Token.Text, Token.Error and Token.Other)

The is_token_subtype() function in the pygments.token module can be used to test if a token type is a subtype of another (such as Name.Tag and Name). (This is the same as Name.Tag in Name. The overloaded in operator was newly introduced in Pygments 0.7, the function still exists for backwards compatiblity.)

With Pygments 0.7, it's also possible to convert strings to token types (for example if you want to supply a token from the command line):

>>> from pygments.token import String, string_to_tokentype
>>> string_to_tokentype("String")
Token.Literal.String
>>> string_to_tokentype("Token.Literal.String")
Token.Literal.String
>>> string_to_tokentype(String)
Token.Literal.String

Keyword Tokens

Keyword
For any kind of keyword (especially if it doesn't match any of the subtypes of course).
Keyword.Constant
For keywords that are constants (e.g. None in future Python versions).
Keyword.Declaration
For keywords used for variable declaration (e.g. var in some programming languages like JavaScript).
Keyword.Namespace
For keywords used for namespace declarations (e.g. import in Python and Java and package in Java).
Keyword.Pseudo
For keywords that aren't really keywords (e.g. None in old Python versions).
Keyword.Reserved
For reserved keywords.
Keyword.Type
For builtin types that can't be used as identifiers (e.g. int, char etc. in C).

Name Tokens

Name
For any name (variable names, function names, classes).
Name.Attribute
For all attributes (e.g. in HTML tags).
Name.Builtin
Builtin names; names that are available in the global namespace.
Name.Builtin.Pseudo
Builtin names that are implicit (e.g. self in Ruby, this in Java).
Name.Class
Class names. Because no lexer can know if a name is a class or a function or something else this token is meant for class declarations.
Name.Constant
Token type for constants. In some languages you can recognise a token by the way it's defined (the value after a const keyword for example). In other languages constants are uppercase by definition (Ruby).
Name.Decorator
Token type for decorators. Decorators are synatic elements in the Python language. Similar syntax elements exist in C# and Java.
Name.Entity
Token type for special entities. (e.g. &nbsp; in HTML).
Name.Exception
Token type for exception names (e.g. RuntimeError in Python). Some languages define exceptions in the function signature (Java). You can highlight the name of that exception using this token then.
Name.Function
Token type for function names.
Name.Label
Token type for label names (e.g. in languages that support goto).
Name.Namespace
Token type for namespaces. (e.g. import paths in Java/Python), names following the module/namespace keyword in other languages.
Name.Other
Other names. Normally unused.
Name.Tag
Tag names (in HTML/XML markup or configuration files).
Name.Variable
Token type for variables. Some languages have prefixes for variable names (PHP, Ruby, Perl). You can highlight them using this token.
Name.Variable.Class
same as Name.Variable but for class variables (also static variables).
Name.Variable.Global
same as Name.Variable but for global variables (used in Ruby, for example).
Name.Variable.Instance
same as Name.Variable but for instance variables.

Literals

Literal
For any literal (if not further defined).
Literal.Date
for date literals (e.g. 42d in Boo).
String
For any string literal.
String.Backtick
Token type for strings enclosed in backticks.
String.Char
Token type for single characters (e.g. Java, C).
String.Doc
Token type for documentation strings (for example Python).
String.Double
Double quoted strings.
String.Escape
Token type for escape sequences in strings.
String.Heredoc
Token type for "heredoc" strings (e.g. in Ruby or Perl).
String.Interpol
Token type for interpolated parts in strings (e.g. #{foo} in Ruby).
String.Other
Token type for any other strings (for example %q{foo} string constructs in Ruby).
String.Regex
Token type for regular expression literals (e.g. /foo/ in JavaScript).
String.Single
Token type for single quoted strings.
String.Symbol
Token type for symbols (e.g. :foo in LISP or Ruby).
Number
Token type for any number literal.
Number.Float
Token type for float literals (e.g. 42.0).
Number.Hex
Token type for hexadecimal number literals (e.g. 0xdeadbeef).
Number.Integer
Token type for integer literals (e.g. 42).
Number.Integer.Long
Token type for long integer literals (e.g. 42L in Python).
Number.Oct
Token type for octal literals.

Operators

Operator
For any punctuation operator (e.g. +, -).
Operator.Word
For any operator that is a word (e.g. not).

Punctuation

New in Pygments 0.7.

Punctuation
For any punctuation which is not an operator (e.g. [, (...)

Comments

Comment
Token type for any comment.
Comment.Multiline
Token type for multiline comments.
Comment.Preproc
Token type for preprocessor comments (also <?php/<% constructs).
Comment.Single
Token type for comments that end at the end of a line (e.g. # foo).
Comment.Special
Special data in comments. For example code tags, author and license information, etc.

Generic Tokens

Generic tokens are for special lexers like the DiffLexer that doesn't really highlight a programming language but a patch file.

Generic
A generic, unstyled token. Normally you don't use this token type.
Generic.Deleted
Marks the token value as deleted.
Generic.Emph
Marks the token value as emphasized.
Generic.Error
Marks the token value as an error message.
Generic.Heading
Marks the token value as headline.
Generic.Inserted
Marks the token value as inserted.
Generic.Output
Marks the token value as program output (e.g. for python cli lexer).
Generic.Prompt
Marks the token value as command prompt (e.g. bash lexer).
Generic.Strong
Marks the token value as bold (e.g. for rst lexer).
Generic.Subheading
Marks the token value as subheadline.
Generic.Traceback
Marks the token value as a part of an error traceback.
Pygments-1.6/docs/build/unicode.html0000644000175000017500000002236012073317554016617 0ustar piotrpiotr Unicode and Encodings — Pygments

Pygments

Unicode and Encodings

« Back To Index

Since Pygments 0.6, all lexers use unicode strings internally. Because of that you might encounter the occasional UnicodeDecodeError if you pass strings with the wrong encoding.

Per default all lexers have their input encoding set to latin1. If you pass a lexer a string object (not unicode), it tries to decode the data using this encoding. You can override the encoding using the encoding lexer option. If you have the chardet library installed and set the encoding to chardet if will ananlyse the text and use the encoding it thinks is the right one automatically:

from pygments.lexers import PythonLexer
lexer = PythonLexer(encoding='chardet')

The best way is to pass Pygments unicode objects. In that case you can't get unexpected output.

The formatters now send Unicode objects to the stream if you don't set the output encoding. You can do so by passing the formatters an encoding option:

from pygments.formatters import HtmlFormatter
f = HtmlFormatter(encoding='utf-8')

You will have to set this option if you have non-ASCII characters in the source and the output stream does not accept Unicode written to it! This is the case for all regular files and for terminals.

Note: The Terminal formatter tries to be smart: if its output stream has an encoding attribute, and you haven't set the option, it will encode any Unicode string with this encoding before writing it. This is the case for sys.stdout, for example. The other formatters don't have that behavior.

Another note: If you call Pygments via the command line (pygmentize), encoding is handled differently, see the command line docs.

New in Pygments 0.7: the formatters now also accept an outencoding option which will override the encoding option if given. This makes it possible to use a single options dict with lexers and formatters, and still have different input and output encodings.

Pygments-1.6/docs/build/index.html0000644000175000017500000002116512103430066016266 0ustar piotrpiotr Overview — Pygments

Pygments

Overview

Welcome to the Pygments documentation.


If you find bugs or have suggestions for the documentation, please look here for info on how to contact the team.

You can download an offline version of this documentation from the download page.

Pygments-1.6/Pygments.egg-info/0000755000175000017500000000000012103430105015527 5ustar piotrpiotrPygments-1.6/Pygments.egg-info/dependency_links.txt0000644000175000017500000000000112103430104021574 0ustar piotrpiotr Pygments-1.6/Pygments.egg-info/top_level.txt0000644000175000017500000000001112103430104020250 0ustar piotrpiotrpygments Pygments-1.6/Pygments.egg-info/entry_points.txt0000644000175000017500000000006612103430104021026 0ustar piotrpiotr[console_scripts] pygmentize = pygments.cmdline:main Pygments-1.6/Pygments.egg-info/SOURCES.txt0000644000175000017500000002747612103430104017432 0ustar piotrpiotrAUTHORS CHANGES LICENSE MANIFEST.in Makefile TODO ez_setup.py pygmentize setup.cfg setup.py Pygments.egg-info/PKG-INFO Pygments.egg-info/SOURCES.txt Pygments.egg-info/dependency_links.txt Pygments.egg-info/entry_points.txt Pygments.egg-info/not-zip-safe Pygments.egg-info/top_level.txt docs/generate.py docs/pygmentize.1 docs/build/api.html docs/build/authors.html docs/build/changelog.html docs/build/cmdline.html docs/build/filterdevelopment.html docs/build/filters.html docs/build/formatterdevelopment.html docs/build/formatters.html docs/build/index.html docs/build/installation.html docs/build/integrate.html docs/build/java.html docs/build/lexerdevelopment.html docs/build/lexers.html docs/build/moinmoin.html docs/build/plugins.html docs/build/quickstart.html docs/build/rstdirective.html docs/build/styles.html docs/build/tokens.html docs/build/unicode.html docs/src/api.txt docs/src/authors.txt docs/src/changelog.txt docs/src/cmdline.txt docs/src/filterdevelopment.txt docs/src/filters.txt docs/src/formatterdevelopment.txt docs/src/formatters.txt docs/src/index.txt docs/src/installation.txt docs/src/integrate.txt docs/src/java.txt docs/src/lexerdevelopment.txt docs/src/lexers.txt docs/src/moinmoin.txt docs/src/plugins.txt docs/src/quickstart.txt docs/src/rstdirective.txt docs/src/styles.txt docs/src/tokens.txt docs/src/unicode.txt external/autopygmentize external/lasso-builtins-generator-9.lasso external/markdown-processor.py external/moin-parser.py external/pygments.bashcomp external/rst-directive-old.py external/rst-directive.py pygments/__init__.py pygments/cmdline.py pygments/console.py pygments/filter.py pygments/formatter.py pygments/lexer.py pygments/plugin.py pygments/scanner.py pygments/style.py pygments/token.py pygments/unistring.py pygments/util.py pygments/filters/__init__.py pygments/formatters/__init__.py pygments/formatters/_mapping.py pygments/formatters/bbcode.py pygments/formatters/html.py pygments/formatters/img.py pygments/formatters/latex.py pygments/formatters/other.py pygments/formatters/rtf.py pygments/formatters/svg.py pygments/formatters/terminal.py pygments/formatters/terminal256.py pygments/lexers/__init__.py pygments/lexers/_asybuiltins.py pygments/lexers/_clbuiltins.py pygments/lexers/_lassobuiltins.py pygments/lexers/_luabuiltins.py pygments/lexers/_mapping.py pygments/lexers/_openedgebuiltins.py pygments/lexers/_phpbuiltins.py pygments/lexers/_postgres_builtins.py pygments/lexers/_robotframeworklexer.py pygments/lexers/_scilab_builtins.py pygments/lexers/_sourcemodbuiltins.py pygments/lexers/_stan_builtins.py pygments/lexers/_vimbuiltins.py pygments/lexers/agile.py pygments/lexers/asm.py pygments/lexers/compiled.py pygments/lexers/dalvik.py pygments/lexers/dotnet.py pygments/lexers/foxpro.py pygments/lexers/functional.py pygments/lexers/hdl.py pygments/lexers/jvm.py pygments/lexers/math.py pygments/lexers/other.py pygments/lexers/parsers.py pygments/lexers/shell.py pygments/lexers/special.py pygments/lexers/sql.py pygments/lexers/templates.py pygments/lexers/text.py pygments/lexers/web.py pygments/styles/__init__.py pygments/styles/autumn.py pygments/styles/borland.py pygments/styles/bw.py pygments/styles/colorful.py pygments/styles/default.py pygments/styles/emacs.py pygments/styles/friendly.py pygments/styles/fruity.py pygments/styles/manni.py pygments/styles/monokai.py pygments/styles/murphy.py pygments/styles/native.py pygments/styles/pastie.py pygments/styles/perldoc.py pygments/styles/rrt.py pygments/styles/tango.py pygments/styles/trac.py pygments/styles/vim.py pygments/styles/vs.py scripts/check_sources.py scripts/detect_missing_analyse_text.py scripts/epydoc.css scripts/find_codetags.py scripts/find_error.py scripts/get_vimkw.py scripts/pylintrc scripts/reindent.py scripts/vim2pygments.py tests/old_run.py tests/run.py tests/support.py tests/test_basic_api.py tests/test_clexer.py tests/test_cmdline.py tests/test_examplefiles.py tests/test_html_formatter.py tests/test_latex_formatter.py tests/test_perllexer.py tests/test_regexlexer.py tests/test_token.py tests/test_using_api.py tests/test_util.py tests/dtds/HTML4-f.dtd tests/dtds/HTML4-s.dtd tests/dtds/HTML4.dcl tests/dtds/HTML4.dtd tests/dtds/HTML4.soc tests/dtds/HTMLlat1.ent tests/dtds/HTMLspec.ent tests/dtds/HTMLsym.ent tests/examplefiles/ANTLRv3.g tests/examplefiles/AcidStateAdvanced.hs tests/examplefiles/AlternatingGroup.mu tests/examplefiles/BOM.js tests/examplefiles/CPDictionary.j tests/examplefiles/Config.in.cache tests/examplefiles/Constants.mo tests/examplefiles/DancingSudoku.lhs tests/examplefiles/Errors.scala tests/examplefiles/File.hy tests/examplefiles/Intro.java tests/examplefiles/Makefile tests/examplefiles/Object.st tests/examplefiles/OrderedMap.hx tests/examplefiles/SmallCheck.hs tests/examplefiles/Sorting.mod tests/examplefiles/Sudoku.lhs tests/examplefiles/addressbook.proto tests/examplefiles/antlr_throws tests/examplefiles/apache2.conf tests/examplefiles/as3_test.as tests/examplefiles/as3_test2.as tests/examplefiles/as3_test3.as tests/examplefiles/aspx-cs_example tests/examplefiles/autoit_submit.au3 tests/examplefiles/badcase.java tests/examplefiles/batchfile.bat tests/examplefiles/bigtest.nsi tests/examplefiles/boot-9.scm tests/examplefiles/ca65_example tests/examplefiles/cbmbas_example tests/examplefiles/cells.ps tests/examplefiles/ceval.c tests/examplefiles/cheetah_example.html tests/examplefiles/classes.dylan tests/examplefiles/condensed_ruby.rb tests/examplefiles/coq_RelationClasses tests/examplefiles/database.pytb tests/examplefiles/de.MoinMoin.po tests/examplefiles/demo.ahk tests/examplefiles/demo.cfm tests/examplefiles/django_sample.html+django tests/examplefiles/dwarf.cw tests/examplefiles/erl_session tests/examplefiles/escape_semicolon.clj tests/examplefiles/evil_regex.js tests/examplefiles/example.Rd tests/examplefiles/example.bug tests/examplefiles/example.c tests/examplefiles/example.ceylon tests/examplefiles/example.cls tests/examplefiles/example.cob tests/examplefiles/example.cpp tests/examplefiles/example.gs tests/examplefiles/example.gst tests/examplefiles/example.jag tests/examplefiles/example.kt tests/examplefiles/example.lua tests/examplefiles/example.monkey tests/examplefiles/example.moo tests/examplefiles/example.moon tests/examplefiles/example.msc tests/examplefiles/example.nim tests/examplefiles/example.ns2 tests/examplefiles/example.p tests/examplefiles/example.pas tests/examplefiles/example.prg tests/examplefiles/example.rb tests/examplefiles/example.reg tests/examplefiles/example.rhtml tests/examplefiles/example.rkt tests/examplefiles/example.rpf tests/examplefiles/example.sh-session tests/examplefiles/example.shell-session tests/examplefiles/example.sml tests/examplefiles/example.snobol tests/examplefiles/example.stan tests/examplefiles/example.tea tests/examplefiles/example.ts tests/examplefiles/example.u tests/examplefiles/example.weechatlog tests/examplefiles/example.xhtml tests/examplefiles/example.xtend tests/examplefiles/example.yaml tests/examplefiles/example2.aspx tests/examplefiles/example2.msc tests/examplefiles/example_elixir.ex tests/examplefiles/example_file.fy tests/examplefiles/firefox.mak tests/examplefiles/flipflop.sv tests/examplefiles/foo.sce tests/examplefiles/format.ml tests/examplefiles/fucked_up.rb tests/examplefiles/function.mu tests/examplefiles/functional.rst tests/examplefiles/garcia-wachs.kk tests/examplefiles/genclass.clj tests/examplefiles/genshi_example.xml+genshi tests/examplefiles/genshitext_example.genshitext tests/examplefiles/glsl.frag tests/examplefiles/glsl.vert tests/examplefiles/hello.smali tests/examplefiles/hello.sp tests/examplefiles/html+php_faulty.php tests/examplefiles/http_request_example tests/examplefiles/http_response_example tests/examplefiles/import.hs tests/examplefiles/inet_pton6.dg tests/examplefiles/intro.ik tests/examplefiles/ints.php tests/examplefiles/intsyn.fun tests/examplefiles/intsyn.sig tests/examplefiles/irb_heredoc tests/examplefiles/irc.lsp tests/examplefiles/java.properties tests/examplefiles/jbst_example1.jbst tests/examplefiles/jbst_example2.jbst tests/examplefiles/jinjadesignerdoc.rst tests/examplefiles/json.lasso tests/examplefiles/json.lasso9 tests/examplefiles/lighttpd_config.conf tests/examplefiles/linecontinuation.py tests/examplefiles/livescript-demo.ls tests/examplefiles/logos_example.xm tests/examplefiles/ltmain.sh tests/examplefiles/main.cmake tests/examplefiles/markdown.lsp tests/examplefiles/matlab_noreturn tests/examplefiles/matlab_sample tests/examplefiles/matlabsession_sample.txt tests/examplefiles/metagrammar.treetop tests/examplefiles/mg_sample.pro tests/examplefiles/minehunt.qml tests/examplefiles/minimal.ns2 tests/examplefiles/moin_SyntaxReference.txt tests/examplefiles/multiline_regexes.rb tests/examplefiles/nanomsg.intr tests/examplefiles/nasm_aoutso.asm tests/examplefiles/nasm_objexe.asm tests/examplefiles/nemerle_sample.n tests/examplefiles/nginx_nginx.conf tests/examplefiles/numbers.c tests/examplefiles/objc_example.m tests/examplefiles/objc_example2.m tests/examplefiles/perl_misc tests/examplefiles/perl_perl5db tests/examplefiles/perl_regex-delims tests/examplefiles/perlfunc.1 tests/examplefiles/phpMyAdmin.spec tests/examplefiles/phpcomplete.vim tests/examplefiles/pleac.in.rb tests/examplefiles/postgresql_test.txt tests/examplefiles/pppoe.applescript tests/examplefiles/psql_session.txt tests/examplefiles/py3_test.txt tests/examplefiles/pycon_test.pycon tests/examplefiles/pytb_test2.pytb tests/examplefiles/pytb_test3.pytb tests/examplefiles/python25-bsd.mak tests/examplefiles/qsort.prolog tests/examplefiles/r-console-transcript.Rout tests/examplefiles/ragel-cpp_rlscan tests/examplefiles/ragel-cpp_snippet tests/examplefiles/regex.js tests/examplefiles/reversi.lsp tests/examplefiles/robotframework.txt tests/examplefiles/ruby_func_def.rb tests/examplefiles/rust_example.rs tests/examplefiles/scilab.sci tests/examplefiles/session.dylan-console tests/examplefiles/sibling.prolog tests/examplefiles/simple.md tests/examplefiles/smarty_example.html tests/examplefiles/source.lgt tests/examplefiles/sources.list tests/examplefiles/sphere.pov tests/examplefiles/sqlite3.sqlite3-console tests/examplefiles/squid.conf tests/examplefiles/string.jl tests/examplefiles/string_delimiters.d tests/examplefiles/stripheredoc.sh tests/examplefiles/test.R tests/examplefiles/test.adb tests/examplefiles/test.asy tests/examplefiles/test.awk tests/examplefiles/test.bas tests/examplefiles/test.bmx tests/examplefiles/test.boo tests/examplefiles/test.bro tests/examplefiles/test.cs tests/examplefiles/test.css tests/examplefiles/test.cu tests/examplefiles/test.d tests/examplefiles/test.dart tests/examplefiles/test.dtd tests/examplefiles/test.ec tests/examplefiles/test.ecl tests/examplefiles/test.eh tests/examplefiles/test.erl tests/examplefiles/test.evoque tests/examplefiles/test.fan tests/examplefiles/test.flx tests/examplefiles/test.gdc tests/examplefiles/test.groovy tests/examplefiles/test.html tests/examplefiles/test.ini tests/examplefiles/test.java tests/examplefiles/test.jsp tests/examplefiles/test.maql tests/examplefiles/test.mod tests/examplefiles/test.moo tests/examplefiles/test.myt tests/examplefiles/test.nim tests/examplefiles/test.opa tests/examplefiles/test.pas tests/examplefiles/test.php tests/examplefiles/test.plot tests/examplefiles/test.ps1 tests/examplefiles/test.pypylog tests/examplefiles/test.r3 tests/examplefiles/test.rb tests/examplefiles/test.rhtml tests/examplefiles/test.scaml tests/examplefiles/test.ssp tests/examplefiles/test.tcsh tests/examplefiles/test.vb tests/examplefiles/test.vhdl tests/examplefiles/test.xqy tests/examplefiles/test.xsl tests/examplefiles/test2.pypylog tests/examplefiles/truncated.pytb tests/examplefiles/type.lisp tests/examplefiles/underscore.coffee tests/examplefiles/unicode.applescript tests/examplefiles/unicodedoc.py tests/examplefiles/unix-io.lid tests/examplefiles/webkit-transition.css tests/examplefiles/while.pov tests/examplefiles/wiki.factor tests/examplefiles/xml_example tests/examplefiles/zmlrpc.f90 tests/support/tagsPygments-1.6/Pygments.egg-info/not-zip-safe0000644000175000017500000000000112073317614017774 0ustar piotrpiotr Pygments-1.6/Pygments.egg-info/PKG-INFO0000644000175000017500000000374012103430104016627 0ustar piotrpiotrMetadata-Version: 1.1 Name: Pygments Version: 1.6 Summary: Pygments is a syntax highlighting package written in Python. Home-page: http://pygments.org/ Author: Georg Brandl Author-email: georg@python.org License: BSD License Description: Pygments ~~~~~~~~ Pygments is a syntax highlighting package written in Python. It is a generic syntax highlighter for general use in all kinds of software such as forum systems, wikis or other applications that need to prettify source code. Highlights are: * a wide range of common languages and markup formats is supported * special attention is paid to details, increasing quality by a fair amount * support for new languages and formats are added easily * a number of output formats, presently HTML, LaTeX, RTF, SVG, all image formats that PIL supports and ANSI sequences * it is usable as a command-line tool and as a library * ... and it highlights even Brainfuck! The `Pygments tip`_ is installable with ``easy_install Pygments==dev``. .. _Pygments tip: http://bitbucket.org/birkenfeld/pygments-main/get/default.zip#egg=Pygments-dev :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. Keywords: syntax highlighting Platform: any Classifier: License :: OSI Approved :: BSD License Classifier: Intended Audience :: Developers Classifier: Intended Audience :: End Users/Desktop Classifier: Intended Audience :: System Administrators Classifier: Development Status :: 6 - Mature Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 3 Classifier: Operating System :: OS Independent Classifier: Topic :: Text Processing :: Filters Classifier: Topic :: Utilities Pygments-1.6/external/0000755000175000017500000000000012103430105014051 5ustar piotrpiotrPygments-1.6/external/lasso-builtins-generator-9.lasso0000755000175000017500000000613212103426531022233 0ustar piotrpiotr#!/usr/bin/lasso9 /* Builtins Generator for Lasso 9 This is the shell script that was used to extract Lasso 9's built-in keywords and generate most of the _lassobuiltins.py file. When run, it creates a file named "lassobuiltins-9.py" containing the types, traits, and methods of the currently-installed version of Lasso 9. A partial list of keywords in Lasso 8 can be generated with this code: insert(string_removeleading(#i, -pattern='_global_')); /iterate; #l8tags->sort; iterate(#l8tags, local('i')); string_lowercase(#i)+"
"; /iterate; */ output("This output statement is required for a complete list of methods.") local(f) = file("lassobuiltins-9.py") #f->doWithClose => { #f->openWrite #f->writeString('# -*- coding: utf-8 -*- """ pygments.lexers._lassobuiltins ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Built-in Lasso types, traits, and methods. """ ') lcapi_loadModules // Load all of the libraries from builtins and lassoserver // This forces all possible available types and methods to be registered local(srcs = tie( dir(sys_masterHomePath + 'LassoLibraries/builtins/')->eachFilePath, dir(sys_masterHomePath + 'LassoLibraries/lassoserver/')->eachFilePath ) ) with topLevelDir in #srcs where !#topLevelDir->lastComponent->beginsWith('.') do protect => { handle_error => { stdoutnl('Unable to load: ' + #topLevelDir + ' ' + error_msg) } library_thread_loader->loadLibrary(#topLevelDir) stdoutnl('Loaded: ' + #topLevelDir) } local( typesList = list(), traitsList = list(), methodsList = list() ) // unbound methods with method in sys_listUnboundMethods where !#method->methodName->asString->endsWith('=') where #method->methodName->asString->isalpha(1) where #methodsList !>> #method->methodName->asString do #methodsList->insert(#method->methodName->asString) // traits with trait in sys_listTraits where !#trait->asString->beginsWith('$') where #traitsList !>> #trait->asString do { #traitsList->insert(#trait->asString) with tmethod in tie(#trait->getType->provides, #trait->getType->requires) where !#tmethod->methodName->asString->endsWith('=') where #tmethod->methodName->asString->isalpha(1) where #methodsList !>> #tmethod->methodName->asString do #methodsList->insert(#tmethod->methodName->asString) } // types with type in sys_listTypes where #typesList !>> #type->asString do { #typesList->insert(#type->asString) with tmethod in #type->getType->listMethods where !#tmethod->methodName->asString->endsWith('=') where #tmethod->methodName->asString->isalpha(1) where #methodsList !>> #tmethod->methodName->asString do #methodsList->insert(#tmethod->methodName->asString) } #f->writeString("BUILTINS = { 'Types': [ ") with t in #typesList do #f->writeString(" '"+string_lowercase(#t)+"',\n") #f->writeString(" ], 'Traits': [ ") with t in #traitsList do #f->writeString(" '"+string_lowercase(#t)+"',\n") #f->writeString(" ], 'Methods': [ ") with t in #methodsList do #f->writeString(" '"+string_lowercase(#t)+"',\n") #f->writeString(" ], } ") } Pygments-1.6/external/autopygmentize0000755000175000017500000000406012103426531017073 0ustar piotrpiotr#!/bin/sh # Best effort auto-pygmentization with transparent decompression # (c) Reuben Thomas 2012-2013 # This program is in the public domain. # Strategy: first see if pygmentize can find a lexer; if not, ask file; if that finds nothing, fail # Set the environment variable PYGMENTIZE_OPTS to configure pygments. # This program can be used as a .lessfilter for the less pager to auto-color less's output lexer=`pygmentize -N "$1"` if [ "$lexer" = "text" ]; then file_common_opts="--brief --dereference --uncompress" unset lexer case `file --mime-type $file_common_opts "$1"` in application/xml|image/svg+xml) lexer=xml;; text/html) lexer=html;; text/troff) lexer=nroff;; text/x-asm) lexer=nasm;; text/x-awk) lexer=awk;; text/x-c) lexer=c;; text/x-c++) lexer=cpp;; text/x-diff) lexer=diff;; text/x-fortran) lexer=fortran;; text/x-gawk) lexer=gawk;; text/x-java) lexer=java;; text/x-lisp) lexer=common-lisp;; text/x-lua) lexer=lua;; text/x-makefile) lexer=make;; text/x-msdos-batch) lexer=bat;; text/x-nawk) lexer=nawk;; text/x-pascal) lexer=pascal;; text/x-perl) lexer=perl;; text/x-php) lexer=php;; text/x-po) lexer=po;; text/x-python) lexer=python;; text/x-ruby) lexer=ruby;; text/x-shellscript) lexer=sh;; text/x-tcl) lexer=tcl;; text/x-tex|text/x-texinfo) lexer=latex;; # FIXME: texinfo really needs its own lexer # Types that file outputs which pygmentize didn't support as of file 5.11, pygments 1.6rc1 # text/calendar # text/PGP # text/rtf # text/texmacs # text/x-bcpl # text/x-info # text/x-m4 # text/x-vcard # text/x-xmcd esac fi encoding=`file --brief --mime-encoding $file_common_opts "$1"` if [ -n "$lexer" ]; then # FIXME: Specify input encoding rather than output encoding https://bitbucket.org/birkenfeld/pygments-main/issue/800 zcat "$1" | pygmentize -O encoding=$encoding,outencoding=UTF-8 $PYGMENTIZE_OPTS -l $lexer exit 0 fi exit 1 Pygments-1.6/external/rst-directive.py0000644000175000017500000000504512103426531017223 0ustar piotrpiotr# -*- coding: utf-8 -*- """ The Pygments reStructuredText directive ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This fragment is a Docutils_ 0.5 directive that renders source code (to HTML only, currently) via Pygments. To use it, adjust the options below and copy the code into a module that you import on initialization. The code then automatically registers a ``sourcecode`` directive that you can use instead of normal code blocks like this:: .. sourcecode:: python My code goes here. If you want to have different code styles, e.g. one with line numbers and one without, add formatters with their names in the VARIANTS dict below. You can invoke them instead of the DEFAULT one by using a directive option:: .. sourcecode:: python :linenos: My code goes here. Look at the `directive documentation`_ to get all the gory details. .. _Docutils: http://docutils.sf.net/ .. _directive documentation: http://docutils.sourceforge.net/docs/howto/rst-directives.html :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ # Options # ~~~~~~~ # Set to True if you want inline CSS styles instead of classes INLINESTYLES = False from pygments.formatters import HtmlFormatter # The default formatter DEFAULT = HtmlFormatter(noclasses=INLINESTYLES) # Add name -> formatter pairs for every variant you want to use VARIANTS = { # 'linenos': HtmlFormatter(noclasses=INLINESTYLES, linenos=True), } from docutils import nodes from docutils.parsers.rst import directives, Directive from pygments import highlight from pygments.lexers import get_lexer_by_name, TextLexer class Pygments(Directive): """ Source code syntax hightlighting. """ required_arguments = 1 optional_arguments = 0 final_argument_whitespace = True option_spec = dict([(key, directives.flag) for key in VARIANTS]) has_content = True def run(self): self.assert_has_content() try: lexer = get_lexer_by_name(self.arguments[0]) except ValueError: # no lexer found - use the text one instead of an exception lexer = TextLexer() # take an arbitrary option if more than one is given formatter = self.options and VARIANTS[self.options.keys()[0]] or DEFAULT parsed = highlight(u'\n'.join(self.content), lexer, formatter) return [nodes.raw('', parsed, format='html')] directives.register_directive('sourcecode', Pygments) Pygments-1.6/external/markdown-processor.py0000644000175000017500000000363212103426531020276 0ustar piotrpiotr# -*- coding: utf-8 -*- """ The Pygments Markdown Preprocessor ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This fragment is a Markdown_ preprocessor that renders source code to HTML via Pygments. To use it, invoke Markdown like so:: from markdown import Markdown md = Markdown() md.textPreprocessors.insert(0, CodeBlockPreprocessor()) html = md.convert(someText) markdown is then a callable that can be passed to the context of a template and used in that template, for example. This uses CSS classes by default, so use ``pygmentize -S -f html > pygments.css`` to create a stylesheet to be added to the website. You can then highlight source code in your markdown markup:: [sourcecode:lexer] some code [/sourcecode] .. _Markdown: http://www.freewisdom.org/projects/python-markdown/ :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ # Options # ~~~~~~~ # Set to True if you want inline CSS styles instead of classes INLINESTYLES = False import re from markdown import TextPreprocessor from pygments import highlight from pygments.formatters import HtmlFormatter from pygments.lexers import get_lexer_by_name, TextLexer class CodeBlockPreprocessor(TextPreprocessor): pattern = re.compile( r'\[sourcecode:(.+?)\](.+?)\[/sourcecode\]', re.S) formatter = HtmlFormatter(noclasses=INLINESTYLES) def run(self, lines): def repl(m): try: lexer = get_lexer_by_name(m.group(1)) except ValueError: lexer = TextLexer() code = highlight(m.group(2), lexer, self.formatter) code = code.replace('\n\n', '\n \n').replace('\n', '
') return '\n\n
%s
\n\n' % code return self.pattern.sub( repl, lines) Pygments-1.6/external/rst-directive-old.py0000644000175000017500000000472612103426531020004 0ustar piotrpiotr# -*- coding: utf-8 -*- """ The Pygments reStructuredText directive ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This fragment is a Docutils_ 0.4 directive that renders source code (to HTML only, currently) via Pygments. To use it, adjust the options below and copy the code into a module that you import on initialization. The code then automatically registers a ``sourcecode`` directive that you can use instead of normal code blocks like this:: .. sourcecode:: python My code goes here. If you want to have different code styles, e.g. one with line numbers and one without, add formatters with their names in the VARIANTS dict below. You can invoke them instead of the DEFAULT one by using a directive option:: .. sourcecode:: python :linenos: My code goes here. Look at the `directive documentation`_ to get all the gory details. .. _Docutils: http://docutils.sf.net/ .. _directive documentation: http://docutils.sourceforge.net/docs/howto/rst-directives.html :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ # Options # ~~~~~~~ # Set to True if you want inline CSS styles instead of classes INLINESTYLES = False from pygments.formatters import HtmlFormatter # The default formatter DEFAULT = HtmlFormatter(noclasses=INLINESTYLES) # Add name -> formatter pairs for every variant you want to use VARIANTS = { # 'linenos': HtmlFormatter(noclasses=INLINESTYLES, linenos=True), } from docutils import nodes from docutils.parsers.rst import directives from pygments import highlight from pygments.lexers import get_lexer_by_name, TextLexer def pygments_directive(name, arguments, options, content, lineno, content_offset, block_text, state, state_machine): try: lexer = get_lexer_by_name(arguments[0]) except ValueError: # no lexer found - use the text one instead of an exception lexer = TextLexer() # take an arbitrary option if more than one is given formatter = options and VARIANTS[options.keys()[0]] or DEFAULT parsed = highlight(u'\n'.join(content), lexer, formatter) return [nodes.raw('', parsed, format='html')] pygments_directive.arguments = (1, 0, 1) pygments_directive.content = 1 pygments_directive.options = dict([(key, directives.flag) for key in VARIANTS]) directives.register_directive('sourcecode', pygments_directive) Pygments-1.6/external/pygments.bashcomp0000644000175000017500000000204711713467262017464 0ustar piotrpiotr#!bash # # Bash completion support for Pygments (the 'pygmentize' command). # _pygmentize() { local cur prev COMPREPLY=() cur=`_get_cword` prev=${COMP_WORDS[COMP_CWORD-1]} case "$prev" in -f) FORMATTERS=`pygmentize -L formatters | grep '* ' | cut -c3- | sed -e 's/,//g' -e 's/:$//'` COMPREPLY=( $( compgen -W '$FORMATTERS' -- "$cur" ) ) return 0 ;; -l) LEXERS=`pygmentize -L lexers | grep '* ' | cut -c3- | sed -e 's/,//g' -e 's/:$//'` COMPREPLY=( $( compgen -W '$LEXERS' -- "$cur" ) ) return 0 ;; -S) STYLES=`pygmentize -L styles | grep '* ' | cut -c3- | sed s/:$//` COMPREPLY=( $( compgen -W '$STYLES' -- "$cur" ) ) return 0 ;; esac if [[ "$cur" == -* ]]; then COMPREPLY=( $( compgen -W '-f -l -S -L -g -O -P -F \ -N -H -h -V -o' -- "$cur" ) ) return 0 fi } complete -F _pygmentize -o default pygmentize Pygments-1.6/external/moin-parser.py0000644000175000017500000000702012103426531016666 0ustar piotrpiotr# -*- coding: utf-8 -*- """ The Pygments MoinMoin Parser ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This is a MoinMoin parser plugin that renders source code to HTML via Pygments; you need Pygments 0.7 or newer for this parser to work. To use it, set the options below to match your setup and put this file in the data/plugin/parser subdirectory of your Moin instance, and give it the name that the parser directive should have. For example, if you name the file ``code.py``, you can get a highlighted Python code sample with this Wiki markup:: {{{ #!code python [...] }}} Additionally, if you set ATTACHMENTS below to True, Pygments will also be called for all attachments for whose filenames there is no other parser registered. You are responsible for including CSS rules that will map the Pygments CSS classes to colors. You can output a stylesheet file with `pygmentize`, put it into the `htdocs` directory of your Moin instance and then include it in the `stylesheets` configuration option in the Moin config, e.g.:: stylesheets = [('screen', '/htdocs/pygments.css')] If you do not want to do that and are willing to accept larger HTML output, you can set the INLINESTYLES option below to True. :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ # Options # ~~~~~~~ # Set to True if you want to highlight attachments, in addition to # {{{ }}} blocks. ATTACHMENTS = True # Set to True if you want inline CSS styles instead of classes INLINESTYLES = False import sys from pygments import highlight from pygments.lexers import get_lexer_by_name, get_lexer_for_filename, TextLexer from pygments.formatters import HtmlFormatter from pygments.util import ClassNotFound # wrap lines in s so that the Moin-generated line numbers work class MoinHtmlFormatter(HtmlFormatter): def wrap(self, source, outfile): for line in source: yield 1, '' + line[1] + '' htmlformatter = MoinHtmlFormatter(noclasses=INLINESTYLES) textlexer = TextLexer() codeid = [0] class Parser: """ MoinMoin Pygments parser. """ if ATTACHMENTS: extensions = '*' else: extensions = [] Dependencies = [] def __init__(self, raw, request, **kw): self.raw = raw self.req = request if "format_args" in kw: # called from a {{{ }}} block try: self.lexer = get_lexer_by_name(kw['format_args'].strip()) except ClassNotFound: self.lexer = textlexer return if "filename" in kw: # called for an attachment filename = kw['filename'] else: # called for an attachment by an older moin # HACK: find out the filename by peeking into the execution # frame which might not always work try: frame = sys._getframe(1) filename = frame.f_locals['filename'] except: filename = 'x.txt' try: self.lexer = get_lexer_for_filename(filename) except ClassNotFound: self.lexer = textlexer def format(self, formatter): codeid[0] += 1 id = "pygments_%s" % codeid[0] w = self.req.write w(formatter.code_area(1, id, start=1, step=1)) w(formatter.rawHTML(highlight(self.raw, self.lexer, htmlformatter))) w(formatter.code_area(0, id)) Pygments-1.6/LICENSE0000644000175000017500000000246312103426531013251 0ustar piotrpiotrCopyright (c) 2006-2013 by the respective authors (see AUTHORS file). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Pygments-1.6/Makefile0000644000175000017500000000277212103426531013707 0ustar piotrpiotr# # Makefile for Pygments # ~~~~~~~~~~~~~~~~~~~~~ # # Combines scripts for common tasks. # # :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. # :license: BSD, see LICENSE for details. # PYTHON ?= python export PYTHONPATH = $(shell echo "$$PYTHONPATH"):$(shell python -c 'import os; print ":".join(os.path.abspath(line.strip()) for line in file("PYTHONPATH"))' 2>/dev/null) .PHONY: all check clean clean-pyc codetags docs mapfiles \ pylint reindent test test-coverage all: clean-pyc check test check: @$(PYTHON) scripts/detect_missing_analyse_text.py || true @$(PYTHON) scripts/check_sources.py -i build -i dist -i pygments/lexers/_mapping.py \ -i docs/build -i pygments/formatters/_mapping.py -i pygments/unistring.py \ -i pygments/lexers/_vimbuiltins.py clean: clean-pyc -rm -rf build -rm -f codetags.html clean-pyc: find . -name '*.pyc' -exec rm -f {} + find . -name '*.pyo' -exec rm -f {} + find . -name '*~' -exec rm -f {} + codetags: @$(PYTHON) scripts/find_codetags.py -i tests/examplefiles -i scripts/pylintrc \ -i scripts/find_codetags.py -o codetags.html . docs: docs/build docs/build: docs/src/*.txt $(PYTHON) docs/generate.py html docs/build $? touch docs/build mapfiles: (cd pygments/lexers; $(PYTHON) _mapping.py) (cd pygments/formatters; $(PYTHON) _mapping.py) pylint: @pylint --rcfile scripts/pylintrc pygments reindent: @$(PYTHON) scripts/reindent.py -r -B . test: @$(PYTHON) tests/run.py $(TESTS) test-coverage: @$(PYTHON) tests/run.py -C $(TESTS) Pygments-1.6/CHANGES0000644000175000017500000005301512103430040013224 0ustar piotrpiotrPygments changelog ================== Issue numbers refer to the tracker at , pull request numbers to the requests at . Version 1.6 ----------- (released Feb 3, 2013) - Lexers added: * Dylan console (PR#149) * Logos (PR#150) * Shell sessions (PR#158) - Fix guessed lexers not receiving lexer options (#838). - Fix unquoted HTML attribute lexing in Opa (#841). - Fixes to the Dart lexer (PR#160). Version 1.6rc1 -------------- (released Jan 9, 2013) - Lexers added: * AspectJ (PR#90) * AutoIt (PR#122) * BUGS-like languages (PR#89) * Ceylon (PR#86) * Croc (new name for MiniD) * CUDA (PR#75) * Dg (PR#116) * IDL (PR#115) * Jags (PR#89) * Julia (PR#61) * Kconfig (#711) * Lasso (PR#95, PR#113) * LiveScript (PR#84) * Monkey (PR#117) * Mscgen (PR#80) * NSIS scripts (PR#136) * OpenCOBOL (PR#72) * QML (PR#123) * Puppet (PR#133) * Racket (PR#94) * Rdoc (PR#99) * Robot Framework (PR#137) * RPM spec files (PR#124) * Rust (PR#67) * Smali (Dalvik assembly) * SourcePawn (PR#39) * Stan (PR#89) * Treetop (PR#125) * TypeScript (PR#114) * VGL (PR#12) * Visual FoxPro (#762) * Windows Registry (#819) * Xtend (PR#68) - The HTML formatter now supports linking to tags using CTags files, when the python-ctags package is installed (PR#87). - The HTML formatter now has a "linespans" option that wraps every line in a tag with a specific id (PR#82). - When deriving a lexer from another lexer with token definitions, definitions for states not in the child lexer are now inherited. If you override a state in the child lexer, an "inherit" keyword has been added to insert the base state at that position (PR#141). - The C family lexers now inherit token definitions from a common base class, removing code duplication (PR#141). - Use "colorama" on Windows for console color output (PR#142). - Fix Template Haskell highlighting (PR#63). - Fix some S/R lexer errors (PR#91). - Fix a bug in the Prolog lexer with names that start with 'is' (#810). - Rewrite Dylan lexer, add Dylan LID lexer (PR#147). - Add a Java quickstart document (PR#146). - Add a "external/autopygmentize" file that can be used as .lessfilter (#802). Version 1.5 ----------- (codename Zeitdilatation, released Mar 10, 2012) - Lexers added: * Awk (#630) * Fancy (#633) * PyPy Log * eC * Nimrod * Nemerle (#667) * F# (#353) * Groovy (#501) * PostgreSQL (#660) * DTD * Gosu (#634) * Octave (PR#22) * Standard ML (PR#14) * CFengine3 (#601) * Opa (PR#37) * HTTP sessions (PR#42) * JSON (PR#31) * SNOBOL (PR#30) * MoonScript (PR#43) * ECL (PR#29) * Urbiscript (PR#17) * OpenEdge ABL (PR#27) * SystemVerilog (PR#35) * Coq (#734) * PowerShell (#654) * Dart (#715) * Fantom (PR#36) * Bro (PR#5) * NewLISP (PR#26) * VHDL (PR#45) * Scilab (#740) * Elixir (PR#57) * Tea (PR#56) * Kotlin (PR#58) - Fix Python 3 terminal highlighting with pygmentize (#691). - In the LaTeX formatter, escape special &, < and > chars (#648). - In the LaTeX formatter, fix display problems for styles with token background colors (#670). - Enhancements to the Squid conf lexer (#664). - Several fixes to the reStructuredText lexer (#636). - Recognize methods in the ObjC lexer (#638). - Fix Lua "class" highlighting: it does not have classes (#665). - Fix degenerate regex in Scala lexer (#671) and highlighting bugs (#713, 708). - Fix number pattern order in Ocaml lexer (#647). - Fix generic type highlighting in ActionScript 3 (#666). - Fixes to the Clojure lexer (PR#9). - Fix degenerate regex in Nemerle lexer (#706). - Fix infinite looping in CoffeeScript lexer (#729). - Fix crashes and analysis with ObjectiveC lexer (#693, #696). - Add some Fortran 2003 keywords. - Fix Boo string regexes (#679). - Add "rrt" style (#727). - Fix infinite looping in Darcs Patch lexer. - Lots of misc fixes to character-eating bugs and ordering problems in many different lexers. Version 1.4 ----------- (codename Unschärfe, released Jan 03, 2011) - Lexers added: * Factor (#520) * PostScript (#486) * Verilog (#491) * BlitzMax Basic (#478) * Ioke (#465) * Java properties, split out of the INI lexer (#445) * Scss (#509) * Duel/JBST * XQuery (#617) * Mason (#615) * GoodData (#609) * SSP (#473) * Autohotkey (#417) * Google Protocol Buffers * Hybris (#506) - Do not fail in analyse_text methods (#618). - Performance improvements in the HTML formatter (#523). - With the ``noclasses`` option in the HTML formatter, some styles present in the stylesheet were not added as inline styles. - Four fixes to the Lua lexer (#480, #481, #482, #497). - More context-sensitive Gherkin lexer with support for more i18n translations. - Support new OO keywords in Matlab lexer (#521). - Small fix in the CoffeeScript lexer (#519). - A bugfix for backslashes in ocaml strings (#499). - Fix unicode/raw docstrings in the Python lexer (#489). - Allow PIL to work without PIL.pth (#502). - Allow seconds as a unit in CSS (#496). - Support ``application/javascript`` as a JavaScript mime type (#504). - Support `Offload `_ C++ Extensions as keywords in the C++ lexer (#484). - Escape more characters in LaTeX output (#505). - Update Haml/Sass lexers to version 3 (#509). - Small PHP lexer string escaping fix (#515). - Support comments before preprocessor directives, and unsigned/ long long literals in C/C++ (#613, #616). - Support line continuations in the INI lexer (#494). - Fix lexing of Dylan string and char literals (#628). - Fix class/procedure name highlighting in VB.NET lexer (#624). Version 1.3.1 ------------- (bugfix release, released Mar 05, 2010) - The ``pygmentize`` script was missing from the distribution. Version 1.3 ----------- (codename Schneeglöckchen, released Mar 01, 2010) - Added the ``ensurenl`` lexer option, which can be used to suppress the automatic addition of a newline to the lexer input. - Lexers added: * Ada * Coldfusion * Modula-2 * haXe * R console * Objective-J * Haml and Sass * CoffeeScript - Enhanced reStructuredText highlighting. - Added support for PHP 5.3 namespaces in the PHP lexer. - Added a bash completion script for `pygmentize`, to the external/ directory (#466). - Fixed a bug in `do_insertions()` used for multi-lexer languages. - Fixed a Ruby regex highlighting bug (#476). - Fixed regex highlighting bugs in Perl lexer (#258). - Add small enhancements to the C lexer (#467) and Bash lexer (#469). - Small fixes for the Tcl, Debian control file, Nginx config, Smalltalk, Objective-C, Clojure, Lua lexers. - Gherkin lexer: Fixed single apostrophe bug and added new i18n keywords. Version 1.2.2 ------------- (bugfix release, released Jan 02, 2010) * Removed a backwards incompatibility in the LaTeX formatter that caused Sphinx to produce invalid commands when writing LaTeX output (#463). * Fixed a forever-backtracking regex in the BashLexer (#462). Version 1.2.1 ------------- (bugfix release, released Jan 02, 2010) * Fixed mishandling of an ellipsis in place of the frames in a Python console traceback, resulting in clobbered output. Version 1.2 ----------- (codename Neujahr, released Jan 01, 2010) - Dropped Python 2.3 compatibility. - Lexers added: * Asymptote * Go * Gherkin (Cucumber) * CMake * Ooc * Coldfusion * haXe * R console - Added options for rendering LaTeX in source code comments in the LaTeX formatter (#461). - Updated the Logtalk lexer. - Added `line_number_start` option to image formatter (#456). - Added `hl_lines` and `hl_color` options to image formatter (#457). - Fixed the HtmlFormatter's handling of noclasses=True to not output any classes (#427). - Added the Monokai style (#453). - Fixed LLVM lexer identifier syntax and added new keywords (#442). - Fixed the PythonTracebackLexer to handle non-traceback data in header or trailer, and support more partial tracebacks that start on line 2 (#437). - Fixed the CLexer to not highlight ternary statements as labels. - Fixed lexing of some Ruby quoting peculiarities (#460). - A few ASM lexer fixes (#450). Version 1.1.1 ------------- (bugfix release, released Sep 15, 2009) - Fixed the BBCode lexer (#435). - Added support for new Jinja2 keywords. - Fixed test suite failures. - Added Gentoo-specific suffixes to Bash lexer. Version 1.1 ----------- (codename Brillouin, released Sep 11, 2009) - Ported Pygments to Python 3. This needed a few changes in the way encodings are handled; they may affect corner cases when used with Python 2 as well. - Lexers added: * Antlr/Ragel, thanks to Ana Nelson * (Ba)sh shell * Erlang shell * GLSL * Prolog * Evoque * Modelica * Rebol * MXML * Cython * ABAP * ASP.net (VB/C#) * Vala * Newspeak - Fixed the LaTeX formatter's output so that output generated for one style can be used with the style definitions of another (#384). - Added "anchorlinenos" and "noclobber_cssfile" (#396) options to HTML formatter. - Support multiline strings in Lua lexer. - Rewrite of the JavaScript lexer by Pumbaa80 to better support regular expression literals (#403). - When pygmentize is asked to highlight a file for which multiple lexers match the filename, use the analyse_text guessing engine to determine the winner (#355). - Fixed minor bugs in the JavaScript lexer (#383), the Matlab lexer (#378), the Scala lexer (#392), the INI lexer (#391), the Clojure lexer (#387) and the AS3 lexer (#389). - Fixed three Perl heredoc lexing bugs (#379, #400, #422). - Fixed a bug in the image formatter which misdetected lines (#380). - Fixed bugs lexing extended Ruby strings and regexes. - Fixed a bug when lexing git diffs. - Fixed a bug lexing the empty commit in the PHP lexer (#405). - Fixed a bug causing Python numbers to be mishighlighted as floats (#397). - Fixed a bug when backslashes are used in odd locations in Python (#395). - Fixed various bugs in Matlab and S-Plus lexers, thanks to Winston Chang (#410, #411, #413, #414) and fmarc (#419). - Fixed a bug in Haskell single-line comment detection (#426). - Added new-style reStructuredText directive for docutils 0.5+ (#428). Version 1.0 ----------- (codename Dreiundzwanzig, released Nov 23, 2008) - Don't use join(splitlines()) when converting newlines to ``\n``, because that doesn't keep all newlines at the end when the ``stripnl`` lexer option is False. - Added ``-N`` option to command-line interface to get a lexer name for a given filename. - Added Tango style, written by Andre Roberge for the Crunchy project. - Added Python3TracebackLexer and ``python3`` option to PythonConsoleLexer. - Fixed a few bugs in the Haskell lexer. - Fixed PythonTracebackLexer to be able to recognize SyntaxError and KeyboardInterrupt (#360). - Provide one formatter class per image format, so that surprises like:: pygmentize -f gif -o foo.gif foo.py creating a PNG file are avoided. - Actually use the `font_size` option of the image formatter. - Fixed numpy lexer that it doesn't listen for `*.py` any longer. - Fixed HTML formatter so that text options can be Unicode strings (#371). - Unified Diff lexer supports the "udiff" alias now. - Fixed a few issues in Scala lexer (#367). - RubyConsoleLexer now supports simple prompt mode (#363). - JavascriptLexer is smarter about what constitutes a regex (#356). - Add Applescript lexer, thanks to Andreas Amann (#330). - Make the codetags more strict about matching words (#368). - NginxConfLexer is a little more accurate on mimetypes and variables (#370). Version 0.11.1 -------------- (released Aug 24, 2008) - Fixed a Jython compatibility issue in pygments.unistring (#358). Version 0.11 ------------ (codename Straußenei, released Aug 23, 2008) Many thanks go to Tim Hatch for writing or integrating most of the bug fixes and new features. - Lexers added: * Nasm-style assembly language, thanks to delroth * YAML, thanks to Kirill Simonov * ActionScript 3, thanks to Pierre Bourdon * Cheetah/Spitfire templates, thanks to Matt Good * Lighttpd config files * Nginx config files * Gnuplot plotting scripts * Clojure * POV-Ray scene files * Sqlite3 interactive console sessions * Scala source files, thanks to Krzysiek Goj - Lexers improved: * C lexer highlights standard library functions now and supports C99 types. * Bash lexer now correctly highlights heredocs without preceding whitespace. * Vim lexer now highlights hex colors properly and knows a couple more keywords. * Irc logs lexer now handles xchat's default time format (#340) and correctly highlights lines ending in ``>``. * Support more delimiters for perl regular expressions (#258). * ObjectiveC lexer now supports 2.0 features. - Added "Visual Studio" style. - Updated markdown processor to Markdown 1.7. - Support roman/sans/mono style defs and use them in the LaTeX formatter. - The RawTokenFormatter is no longer registered to ``*.raw`` and it's documented that tokenization with this lexer may raise exceptions. - New option ``hl_lines`` to HTML formatter, to highlight certain lines. - New option ``prestyles`` to HTML formatter. - New option *-g* to pygmentize, to allow lexer guessing based on filetext (can be slowish, so file extensions are still checked first). - ``guess_lexer()`` now makes its decision much faster due to a cache of whether data is xml-like (a check which is used in several versions of ``analyse_text()``. Several lexers also have more accurate ``analyse_text()`` now. Version 0.10 ------------ (codename Malzeug, released May 06, 2008) - Lexers added: * Io * Smalltalk * Darcs patches * Tcl * Matlab * Matlab sessions * FORTRAN * XSLT * tcsh * NumPy * Python 3 * S, S-plus, R statistics languages * Logtalk - In the LatexFormatter, the *commandprefix* option is now by default 'PY' instead of 'C', since the latter resulted in several collisions with other packages. Also, the special meaning of the *arg* argument to ``get_style_defs()`` was removed. - Added ImageFormatter, to format code as PNG, JPG, GIF or BMP. (Needs the Python Imaging Library.) - Support doc comments in the PHP lexer. - Handle format specifications in the Perl lexer. - Fix comment handling in the Batch lexer. - Add more file name extensions for the C++, INI and XML lexers. - Fixes in the IRC and MuPad lexers. - Fix function and interface name highlighting in the Java lexer. - Fix at-rule handling in the CSS lexer. - Handle KeyboardInterrupts gracefully in pygmentize. - Added BlackWhiteStyle. - Bash lexer now correctly highlights math, does not require whitespace after semicolons, and correctly highlights boolean operators. - Makefile lexer is now capable of handling BSD and GNU make syntax. Version 0.9 ----------- (codename Herbstzeitlose, released Oct 14, 2007) - Lexers added: * Erlang * ActionScript * Literate Haskell * Common Lisp * Various assembly languages * Gettext catalogs * Squid configuration * Debian control files * MySQL-style SQL * MOOCode - Lexers improved: * Greatly improved the Haskell and OCaml lexers. * Improved the Bash lexer's handling of nested constructs. * The C# and Java lexers exhibited abysmal performance with some input code; this should now be fixed. * The IRC logs lexer is now able to colorize weechat logs too. * The Lua lexer now recognizes multi-line comments. * Fixed bugs in the D and MiniD lexer. - The encoding handling of the command line mode (pygmentize) was enhanced. You shouldn't get UnicodeErrors from it anymore if you don't give an encoding option. - Added a ``-P`` option to the command line mode which can be used to give options whose values contain commas or equals signs. - Added 256-color terminal formatter. - Added an experimental SVG formatter. - Added the ``lineanchors`` option to the HTML formatter, thanks to Ian Charnas for the idea. - Gave the line numbers table a CSS class in the HTML formatter. - Added a Vim 7-like style. Version 0.8.1 ------------- (released Jun 27, 2007) - Fixed POD highlighting in the Ruby lexer. - Fixed Unicode class and namespace name highlighting in the C# lexer. - Fixed Unicode string prefix highlighting in the Python lexer. - Fixed a bug in the D and MiniD lexers. - Fixed the included MoinMoin parser. Version 0.8 ----------- (codename Maikäfer, released May 30, 2007) - Lexers added: * Haskell, thanks to Adam Blinkinsop * Redcode, thanks to Adam Blinkinsop * D, thanks to Kirk McDonald * MuPad, thanks to Christopher Creutzig * MiniD, thanks to Jarrett Billingsley * Vim Script, by Tim Hatch - The HTML formatter now has a second line-numbers mode in which it will just integrate the numbers in the same ``
`` tag as the
  code.

- The `CSharpLexer` now is Unicode-aware, which means that it has an
  option that can be set so that it correctly lexes Unicode
  identifiers allowed by the C# specs.

- Added a `RaiseOnErrorTokenFilter` that raises an exception when the
  lexer generates an error token, and a `VisibleWhitespaceFilter` that
  converts whitespace (spaces, tabs, newlines) into visible
  characters.

- Fixed the `do_insertions()` helper function to yield correct
  indices.

- The ReST lexer now automatically highlights source code blocks in
  ".. sourcecode:: language" and ".. code:: language" directive
  blocks.

- Improved the default style (thanks to Tiberius Teng). The old
  default is still available as the "emacs" style (which was an alias
  before).

- The `get_style_defs` method of HTML formatters now uses the
  `cssclass` option as the default selector if it was given.

- Improved the ReST and Bash lexers a bit.

- Fixed a few bugs in the Makefile and Bash lexers, thanks to Tim
  Hatch.

- Fixed a bug in the command line code that disallowed ``-O`` options
  when using the ``-S`` option.

- Fixed a bug in the `RawTokenFormatter`.


Version 0.7.1
-------------
(released Feb 15, 2007)

- Fixed little highlighting bugs in the Python, Java, Scheme and
  Apache Config lexers.

- Updated the included manpage.

- Included a built version of the documentation in the source tarball.


Version 0.7
-----------
(codename Faschingskrapfn, released Feb 14, 2007)

- Added a MoinMoin parser that uses Pygments. With it, you get
  Pygments highlighting in Moin Wiki pages.

- Changed the exception raised if no suitable lexer, formatter etc. is
  found in one of the `get_*_by_*` functions to a custom exception,
  `pygments.util.ClassNotFound`. It is, however, a subclass of
  `ValueError` in order to retain backwards compatibility.

- Added a `-H` command line option which can be used to get the
  docstring of a lexer, formatter or filter.

- Made the handling of lexers and formatters more consistent. The
  aliases and filename patterns of formatters are now attributes on
  them.

- Added an OCaml lexer, thanks to Adam Blinkinsop.

- Made the HTML formatter more flexible, and easily subclassable in
  order to make it easy to implement custom wrappers, e.g. alternate
  line number markup. See the documentation.

- Added an `outencoding` option to all formatters, making it possible
  to override the `encoding` (which is used by lexers and formatters)
  when using the command line interface. Also, if using the terminal
  formatter and the output file is a terminal and has an encoding
  attribute, use it if no encoding is given.

- Made it possible to just drop style modules into the `styles`
  subpackage of the Pygments installation.

- Added a "state" keyword argument to the `using` helper.

- Added a `commandprefix` option to the `LatexFormatter` which allows
  to control how the command names are constructed.

- Added quite a few new lexers, thanks to Tim Hatch:

  * Java Server Pages
  * Windows batch files
  * Trac Wiki markup
  * Python tracebacks
  * ReStructuredText
  * Dylan
  * and the Befunge esoteric programming language (yay!)

- Added Mako lexers by Ben Bangert.

- Added "fruity" style, another dark background originally vim-based
  theme.

- Added sources.list lexer by Dennis Kaarsemaker.

- Added token stream filters, and a pygmentize option to use them.

- Changed behavior of `in` Operator for tokens.

- Added mimetypes for all lexers.

- Fixed some problems lexing Python strings.

- Fixed tickets: #167, #178, #179, #180, #185, #201.


Version 0.6
-----------
(codename Zimtstern, released Dec 20, 2006)

- Added option for the HTML formatter to write the CSS to an external
  file in "full document" mode.

- Added RTF formatter.

- Added Bash and Apache configuration lexers (thanks to Tim Hatch).

- Improved guessing methods for various lexers.

- Added `@media` support to CSS lexer (thanks to Tim Hatch).

- Added a Groff lexer (thanks to Tim Hatch).

- License change to BSD.

- Added lexers for the Myghty template language.

- Added a Scheme lexer (thanks to Marek Kubica).

- Added some functions to iterate over existing lexers, formatters and
  lexers.

- The HtmlFormatter's `get_style_defs()` can now take a list as an
  argument to generate CSS with multiple prefixes.

- Support for guessing input encoding added.

- Encoding support added: all processing is now done with Unicode
  strings, input and output are converted from and optionally to byte
  strings (see the ``encoding`` option of lexers and formatters).

- Some improvements in the C(++) lexers handling comments and line
  continuations.


Version 0.5.1
-------------
(released Oct 30, 2006)

- Fixed traceback in ``pygmentize -L`` (thanks to Piotr Ozarowski).


Version 0.5
-----------
(codename PyKleur, released Oct 30, 2006)

- Initial public release.
Pygments-1.6/ez_setup.py0000755000175000017500000002275511713467262014501 0ustar  piotrpiotr#!python
"""Bootstrap setuptools installation

If you want to use setuptools in your package's setup.py, just include this
file in the same directory with it, and add this to the top of your setup.py::

    from ez_setup import use_setuptools
    use_setuptools()

If you want to require a specific version of setuptools, set a download
mirror, or use an alternate download directory, you can do so by supplying
the appropriate options to ``use_setuptools()``.

This file can also be run as a script to install or upgrade setuptools.
"""
import sys
DEFAULT_VERSION = "0.6c9"
DEFAULT_URL     = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3]

md5_data = {
    'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca',
    'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb',
    'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b',
    'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a',
    'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618',
    'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac',
    'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5',
    'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4',
    'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c',
    'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b',
    'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27',
    'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277',
    'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa',
    'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e',
    'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e',
    'setuptools-0.6c4-py2.3.egg': 'b0b9131acab32022bfac7f44c5d7971f',
    'setuptools-0.6c4-py2.4.egg': '2a1f9656d4fbf3c97bf946c0a124e6e2',
    'setuptools-0.6c4-py2.5.egg': '8f5a052e32cdb9c72bcf4b5526f28afc',
    'setuptools-0.6c5-py2.3.egg': 'ee9fd80965da04f2f3e6b3576e9d8167',
    'setuptools-0.6c5-py2.4.egg': 'afe2adf1c01701ee841761f5bcd8aa64',
    'setuptools-0.6c5-py2.5.egg': 'a8d3f61494ccaa8714dfed37bccd3d5d',
    'setuptools-0.6c6-py2.3.egg': '35686b78116a668847237b69d549ec20',
    'setuptools-0.6c6-py2.4.egg': '3c56af57be3225019260a644430065ab',
    'setuptools-0.6c6-py2.5.egg': 'b2f8a7520709a5b34f80946de5f02f53',
    'setuptools-0.6c7-py2.3.egg': '209fdf9adc3a615e5115b725658e13e2',
    'setuptools-0.6c7-py2.4.egg': '5a8f954807d46a0fb67cf1f26c55a82e',
    'setuptools-0.6c7-py2.5.egg': '45d2ad28f9750e7434111fde831e8372',
    'setuptools-0.6c8-py2.3.egg': '50759d29b349db8cfd807ba8303f1902',
    'setuptools-0.6c8-py2.4.egg': 'cba38d74f7d483c06e9daa6070cce6de',
    'setuptools-0.6c8-py2.5.egg': '1721747ee329dc150590a58b3e1ac95b',
    'setuptools-0.6c9-py2.3.egg': 'a83c4020414807b496e4cfbe08507c03',
    'setuptools-0.6c9-py2.4.egg': '260a2be2e5388d66bdaee06abec6342a',
    'setuptools-0.6c9-py2.5.egg': 'fe67c3e5a17b12c0e7c541b7ea43a8e6',
    'setuptools-0.6c9-py2.6.egg': 'ca37b1ff16fa2ede6e19383e7b59245a',
}

import sys, os
try: from hashlib import md5
except ImportError: from md5 import md5

def _validate_md5(egg_name, data):
    if egg_name in md5_data:
        digest = md5(data).hexdigest()
        if digest != md5_data[egg_name]:
            print >>sys.stderr, (
                "md5 validation of %s failed!  (Possible download problem?)"
                % egg_name
            )
            sys.exit(2)
    return data

def use_setuptools(
    version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
    download_delay=15
):
    """Automatically find/download setuptools and make it available on sys.path

    `version` should be a valid setuptools version number that is available
    as an egg for download under the `download_base` URL (which should end with
    a '/').  `to_dir` is the directory where setuptools will be downloaded, if
    it is not already available.  If `download_delay` is specified, it should
    be the number of seconds that will be paused before initiating a download,
    should one be required.  If an older version of setuptools is installed,
    this routine will print a message to ``sys.stderr`` and raise SystemExit in
    an attempt to abort the calling script.
    """
    was_imported = 'pkg_resources' in sys.modules or 'setuptools' in sys.modules
    def do_download():
        egg = download_setuptools(version, download_base, to_dir, download_delay)
        sys.path.insert(0, egg)
        import setuptools; setuptools.bootstrap_install_from = egg
    try:
        import pkg_resources
    except ImportError:
        return do_download()
    try:
        pkg_resources.require("setuptools>="+version); return
    except pkg_resources.VersionConflict, e:
        if was_imported:
            print >>sys.stderr, (
            "The required version of setuptools (>=%s) is not available, and\n"
            "can't be installed while this script is running. Please install\n"
            " a more recent version first, using 'easy_install -U setuptools'."
            "\n\n(Currently using %r)"
            ) % (version, e.args[0])
            sys.exit(2)
        else:
            del pkg_resources, sys.modules['pkg_resources']    # reload ok
            return do_download()
    except pkg_resources.DistributionNotFound:
        return do_download()

def download_setuptools(
    version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir,
    delay = 15
):
    """Download setuptools from a specified location and return its filename

    `version` should be a valid setuptools version number that is available
    as an egg for download under the `download_base` URL (which should end
    with a '/'). `to_dir` is the directory where the egg will be downloaded.
    `delay` is the number of seconds to pause before an actual download attempt.
    """
    import urllib2, shutil
    egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3])
    url = download_base + egg_name
    saveto = os.path.join(to_dir, egg_name)
    src = dst = None
    if not os.path.exists(saveto):  # Avoid repeated downloads
        try:
            from distutils import log
            if delay:
                log.warn("""
---------------------------------------------------------------------------
This script requires setuptools version %s to run (even to display
help).  I will attempt to download it for you (from
%s), but
you may need to enable firewall access for this script first.
I will start the download in %d seconds.

(Note: if this machine does not have network access, please obtain the file

   %s

and place it in this directory before rerunning this script.)
---------------------------------------------------------------------------""",
                    version, download_base, delay, url
                ); from time import sleep; sleep(delay)
            log.warn("Downloading %s", url)
            src = urllib2.urlopen(url)
            # Read/write all in one block, so we don't create a corrupt file
            # if the download is interrupted.
            data = _validate_md5(egg_name, src.read())
            dst = open(saveto,"wb"); dst.write(data)
        finally:
            if src: src.close()
            if dst: dst.close()
    return os.path.realpath(saveto)




































def main(argv, version=DEFAULT_VERSION):
    """Install or upgrade setuptools and EasyInstall"""
    try:
        import setuptools
    except ImportError:
        egg = None
        try:
            egg = download_setuptools(version, delay=0)
            sys.path.insert(0,egg)
            from setuptools.command.easy_install import main
            return main(list(argv)+[egg])   # we're done here
        finally:
            if egg and os.path.exists(egg):
                os.unlink(egg)
    else:
        if setuptools.__version__ == '0.0.1':
            print >>sys.stderr, (
            "You have an obsolete version of setuptools installed.  Please\n"
            "remove it from your system entirely before rerunning this script."
            )
            sys.exit(2)

    req = "setuptools>="+version
    import pkg_resources
    try:
        pkg_resources.require(req)
    except pkg_resources.VersionConflict:
        try:
            from setuptools.command.easy_install import main
        except ImportError:
            from easy_install import main
        main(list(argv)+[download_setuptools(delay=0)])
        sys.exit(0) # try to force an exit
    else:
        if argv:
            from setuptools.command.easy_install import main
            main(argv)
        else:
            print "Setuptools version",version,"or greater has been installed."
            print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)'

def update_md5(filenames):
    """Update our built-in md5 registry"""

    import re

    for name in filenames:
        base = os.path.basename(name)
        f = open(name,'rb')
        md5_data[base] = md5(f.read()).hexdigest()
        f.close()

    data = ["    %r: %r,\n" % it for it in md5_data.items()]
    data.sort()
    repl = "".join(data)

    import inspect
    srcfile = inspect.getsourcefile(sys.modules[__name__])
    f = open(srcfile, 'rb'); src = f.read(); f.close()

    match = re.search("\nmd5_data = {\n([^}]+)}", src)
    if not match:
        print >>sys.stderr, "Internal error!"
        sys.exit(2)

    src = src[:match.start(1)] + repl + src[match.end(1):]
    f = open(srcfile,'w')
    f.write(src)
    f.close()


if __name__=='__main__':
    if len(sys.argv)>2 and sys.argv[1]=='--md5update':
        update_md5(sys.argv[2:])
    else:
        main(sys.argv[1:])






Pygments-1.6/pygments/0000755000175000017500000000000012103430105014075 5ustar  piotrpiotrPygments-1.6/pygments/formatters/0000755000175000017500000000000012103430105016263 5ustar  piotrpiotrPygments-1.6/pygments/formatters/img.py0000644000175000017500000004321312103426531017424 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    pygments.formatters.img
    ~~~~~~~~~~~~~~~~~~~~~~~

    Formatter for Pixmap output.

    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import sys

from pygments.formatter import Formatter
from pygments.util import get_bool_opt, get_int_opt, \
     get_list_opt, get_choice_opt

# Import this carefully
try:
    from PIL import Image, ImageDraw, ImageFont
    pil_available = True
except ImportError:
    pil_available = False

try:
    import _winreg
except ImportError:
    _winreg = None

__all__ = ['ImageFormatter', 'GifImageFormatter', 'JpgImageFormatter',
           'BmpImageFormatter']


# For some unknown reason every font calls it something different
STYLES = {
    'NORMAL':     ['', 'Roman', 'Book', 'Normal', 'Regular', 'Medium'],
    'ITALIC':     ['Oblique', 'Italic'],
    'BOLD':       ['Bold'],
    'BOLDITALIC': ['Bold Oblique', 'Bold Italic'],
}

# A sane default for modern systems
DEFAULT_FONT_NAME_NIX = 'Bitstream Vera Sans Mono'
DEFAULT_FONT_NAME_WIN = 'Courier New'


class PilNotAvailable(ImportError):
    """When Python imaging library is not available"""


class FontNotFound(Exception):
    """When there are no usable fonts specified"""


class FontManager(object):
    """
    Manages a set of fonts: normal, italic, bold, etc...
    """

    def __init__(self, font_name, font_size=14):
        self.font_name = font_name
        self.font_size = font_size
        self.fonts = {}
        self.encoding = None
        if sys.platform.startswith('win'):
            if not font_name:
                self.font_name = DEFAULT_FONT_NAME_WIN
            self._create_win()
        else:
            if not font_name:
                self.font_name = DEFAULT_FONT_NAME_NIX
            self._create_nix()

    def _get_nix_font_path(self, name, style):
        from commands import getstatusoutput
        exit, out = getstatusoutput('fc-list "%s:style=%s" file' %
                                    (name, style))
        if not exit:
            lines = out.splitlines()
            if lines:
                path = lines[0].strip().strip(':')
                return path

    def _create_nix(self):
        for name in STYLES['NORMAL']:
            path = self._get_nix_font_path(self.font_name, name)
            if path is not None:
                self.fonts['NORMAL'] = ImageFont.truetype(path, self.font_size)
                break
        else:
            raise FontNotFound('No usable fonts named: "%s"' %
                               self.font_name)
        for style in ('ITALIC', 'BOLD', 'BOLDITALIC'):
            for stylename in STYLES[style]:
                path = self._get_nix_font_path(self.font_name, stylename)
                if path is not None:
                    self.fonts[style] = ImageFont.truetype(path, self.font_size)
                    break
            else:
                if style == 'BOLDITALIC':
                    self.fonts[style] = self.fonts['BOLD']
                else:
                    self.fonts[style] = self.fonts['NORMAL']

    def _lookup_win(self, key, basename, styles, fail=False):
        for suffix in ('', ' (TrueType)'):
            for style in styles:
                try:
                    valname = '%s%s%s' % (basename, style and ' '+style, suffix)
                    val, _ = _winreg.QueryValueEx(key, valname)
                    return val
                except EnvironmentError:
                    continue
        else:
            if fail:
                raise FontNotFound('Font %s (%s) not found in registry' %
                                   (basename, styles[0]))
            return None

    def _create_win(self):
        try:
            key = _winreg.OpenKey(
                _winreg.HKEY_LOCAL_MACHINE,
                r'Software\Microsoft\Windows NT\CurrentVersion\Fonts')
        except EnvironmentError:
            try:
                key = _winreg.OpenKey(
                    _winreg.HKEY_LOCAL_MACHINE,
                    r'Software\Microsoft\Windows\CurrentVersion\Fonts')
            except EnvironmentError:
                raise FontNotFound('Can\'t open Windows font registry key')
        try:
            path = self._lookup_win(key, self.font_name, STYLES['NORMAL'], True)
            self.fonts['NORMAL'] = ImageFont.truetype(path, self.font_size)
            for style in ('ITALIC', 'BOLD', 'BOLDITALIC'):
                path = self._lookup_win(key, self.font_name, STYLES[style])
                if path:
                    self.fonts[style] = ImageFont.truetype(path, self.font_size)
                else:
                    if style == 'BOLDITALIC':
                        self.fonts[style] = self.fonts['BOLD']
                    else:
                        self.fonts[style] = self.fonts['NORMAL']
        finally:
            _winreg.CloseKey(key)

    def get_char_size(self):
        """
        Get the character size.
        """
        return self.fonts['NORMAL'].getsize('M')

    def get_font(self, bold, oblique):
        """
        Get the font based on bold and italic flags.
        """
        if bold and oblique:
            return self.fonts['BOLDITALIC']
        elif bold:
            return self.fonts['BOLD']
        elif oblique:
            return self.fonts['ITALIC']
        else:
            return self.fonts['NORMAL']


class ImageFormatter(Formatter):
    """
    Create a PNG image from source code. This uses the Python Imaging Library to
    generate a pixmap from the source code.

    *New in Pygments 0.10.*

    Additional options accepted:

    `image_format`
        An image format to output to that is recognised by PIL, these include:

        * "PNG" (default)
        * "JPEG"
        * "BMP"
        * "GIF"

    `line_pad`
        The extra spacing (in pixels) between each line of text.

        Default: 2

    `font_name`
        The font name to be used as the base font from which others, such as
        bold and italic fonts will be generated.  This really should be a
        monospace font to look sane.

        Default: "Bitstream Vera Sans Mono"

    `font_size`
        The font size in points to be used.

        Default: 14

    `image_pad`
        The padding, in pixels to be used at each edge of the resulting image.

        Default: 10

    `line_numbers`
        Whether line numbers should be shown: True/False

        Default: True

    `line_number_start`
        The line number of the first line.

        Default: 1

    `line_number_step`
        The step used when printing line numbers.

        Default: 1

    `line_number_bg`
        The background colour (in "#123456" format) of the line number bar, or
        None to use the style background color.

        Default: "#eed"

    `line_number_fg`
        The text color of the line numbers (in "#123456"-like format).

        Default: "#886"

    `line_number_chars`
        The number of columns of line numbers allowable in the line number
        margin.

        Default: 2

    `line_number_bold`
        Whether line numbers will be bold: True/False

        Default: False

    `line_number_italic`
        Whether line numbers will be italicized: True/False

        Default: False

    `line_number_separator`
        Whether a line will be drawn between the line number area and the
        source code area: True/False

        Default: True

    `line_number_pad`
        The horizontal padding (in pixels) between the line number margin, and
        the source code area.

        Default: 6

    `hl_lines`
        Specify a list of lines to be highlighted.  *New in Pygments 1.2.*

        Default: empty list

    `hl_color`
        Specify the color for highlighting lines.  *New in Pygments 1.2.*

        Default: highlight color of the selected style
    """

    # Required by the pygments mapper
    name = 'img'
    aliases = ['img', 'IMG', 'png']
    filenames = ['*.png']

    unicodeoutput = False

    default_image_format = 'png'

    def __init__(self, **options):
        """
        See the class docstring for explanation of options.
        """
        if not pil_available:
            raise PilNotAvailable(
                'Python Imaging Library is required for this formatter')
        Formatter.__init__(self, **options)
        # Read the style
        self.styles = dict(self.style)
        if self.style.background_color is None:
            self.background_color = '#fff'
        else:
            self.background_color = self.style.background_color
        # Image options
        self.image_format = get_choice_opt(
            options, 'image_format', ['png', 'jpeg', 'gif', 'bmp'],
            self.default_image_format, normcase=True)
        self.image_pad = get_int_opt(options, 'image_pad', 10)
        self.line_pad = get_int_opt(options, 'line_pad', 2)
        # The fonts
        fontsize = get_int_opt(options, 'font_size', 14)
        self.fonts = FontManager(options.get('font_name', ''), fontsize)
        self.fontw, self.fonth = self.fonts.get_char_size()
        # Line number options
        self.line_number_fg = options.get('line_number_fg', '#886')
        self.line_number_bg = options.get('line_number_bg', '#eed')
        self.line_number_chars = get_int_opt(options,
                                        'line_number_chars', 2)
        self.line_number_bold = get_bool_opt(options,
                                        'line_number_bold', False)
        self.line_number_italic = get_bool_opt(options,
                                        'line_number_italic', False)
        self.line_number_pad = get_int_opt(options, 'line_number_pad', 6)
        self.line_numbers = get_bool_opt(options, 'line_numbers', True)
        self.line_number_separator = get_bool_opt(options,
                                        'line_number_separator', True)
        self.line_number_step = get_int_opt(options, 'line_number_step', 1)
        self.line_number_start = get_int_opt(options, 'line_number_start', 1)
        if self.line_numbers:
            self.line_number_width = (self.fontw * self.line_number_chars +
                                   self.line_number_pad * 2)
        else:
            self.line_number_width = 0
        self.hl_lines = []
        hl_lines_str = get_list_opt(options, 'hl_lines', [])
        for line in hl_lines_str:
            try:
                self.hl_lines.append(int(line))
            except ValueError:
                pass
        self.hl_color = options.get('hl_color',
                                    self.style.highlight_color) or '#f90'
        self.drawables = []

    def get_style_defs(self, arg=''):
        raise NotImplementedError('The -S option is meaningless for the image '
                                  'formatter. Use -O style= instead.')

    def _get_line_height(self):
        """
        Get the height of a line.
        """
        return self.fonth + self.line_pad

    def _get_line_y(self, lineno):
        """
        Get the Y coordinate of a line number.
        """
        return lineno * self._get_line_height() + self.image_pad

    def _get_char_width(self):
        """
        Get the width of a character.
        """
        return self.fontw

    def _get_char_x(self, charno):
        """
        Get the X coordinate of a character position.
        """
        return charno * self.fontw + self.image_pad + self.line_number_width

    def _get_text_pos(self, charno, lineno):
        """
        Get the actual position for a character and line position.
        """
        return self._get_char_x(charno), self._get_line_y(lineno)

    def _get_linenumber_pos(self, lineno):
        """
        Get the actual position for the start of a line number.
        """
        return (self.image_pad, self._get_line_y(lineno))

    def _get_text_color(self, style):
        """
        Get the correct color for the token from the style.
        """
        if style['color'] is not None:
            fill = '#' + style['color']
        else:
            fill = '#000'
        return fill

    def _get_style_font(self, style):
        """
        Get the correct font for the style.
        """
        return self.fonts.get_font(style['bold'], style['italic'])

    def _get_image_size(self, maxcharno, maxlineno):
        """
        Get the required image size.
        """
        return (self._get_char_x(maxcharno) + self.image_pad,
                self._get_line_y(maxlineno + 0) + self.image_pad)

    def _draw_linenumber(self, posno, lineno):
        """
        Remember a line number drawable to paint later.
        """
        self._draw_text(
            self._get_linenumber_pos(posno),
            str(lineno).rjust(self.line_number_chars),
            font=self.fonts.get_font(self.line_number_bold,
                                     self.line_number_italic),
            fill=self.line_number_fg,
        )

    def _draw_text(self, pos, text, font, **kw):
        """
        Remember a single drawable tuple to paint later.
        """
        self.drawables.append((pos, text, font, kw))

    def _create_drawables(self, tokensource):
        """
        Create drawables for the token content.
        """
        lineno = charno = maxcharno = 0
        for ttype, value in tokensource:
            while ttype not in self.styles:
                ttype = ttype.parent
            style = self.styles[ttype]
            # TODO: make sure tab expansion happens earlier in the chain.  It
            # really ought to be done on the input, as to do it right here is
            # quite complex.
            value = value.expandtabs(4)
            lines = value.splitlines(True)
            #print lines
            for i, line in enumerate(lines):
                temp = line.rstrip('\n')
                if temp:
                    self._draw_text(
                        self._get_text_pos(charno, lineno),
                        temp,
                        font = self._get_style_font(style),
                        fill = self._get_text_color(style)
                    )
                    charno += len(temp)
                    maxcharno = max(maxcharno, charno)
                if line.endswith('\n'):
                    # add a line for each extra line in the value
                    charno = 0
                    lineno += 1
        self.maxcharno = maxcharno
        self.maxlineno = lineno

    def _draw_line_numbers(self):
        """
        Create drawables for the line numbers.
        """
        if not self.line_numbers:
            return
        for p in xrange(self.maxlineno):
            n = p + self.line_number_start
            if (n % self.line_number_step) == 0:
                self._draw_linenumber(p, n)

    def _paint_line_number_bg(self, im):
        """
        Paint the line number background on the image.
        """
        if not self.line_numbers:
            return
        if self.line_number_fg is None:
            return
        draw = ImageDraw.Draw(im)
        recth = im.size[-1]
        rectw = self.image_pad + self.line_number_width - self.line_number_pad
        draw.rectangle([(0, 0),
                        (rectw, recth)],
             fill=self.line_number_bg)
        draw.line([(rectw, 0), (rectw, recth)], fill=self.line_number_fg)
        del draw

    def format(self, tokensource, outfile):
        """
        Format ``tokensource``, an iterable of ``(tokentype, tokenstring)``
        tuples and write it into ``outfile``.

        This implementation calculates where it should draw each token on the
        pixmap, then calculates the required pixmap size and draws the items.
        """
        self._create_drawables(tokensource)
        self._draw_line_numbers()
        im = Image.new(
            'RGB',
            self._get_image_size(self.maxcharno, self.maxlineno),
            self.background_color
        )
        self._paint_line_number_bg(im)
        draw = ImageDraw.Draw(im)
        # Highlight
        if self.hl_lines:
            x = self.image_pad + self.line_number_width - self.line_number_pad + 1
            recth = self._get_line_height()
            rectw = im.size[0] - x
            for linenumber in self.hl_lines:
                y = self._get_line_y(linenumber - 1)
                draw.rectangle([(x, y), (x + rectw, y + recth)],
                               fill=self.hl_color)
        for pos, value, font, kw in self.drawables:
            draw.text(pos, value, font=font, **kw)
        im.save(outfile, self.image_format.upper())


# Add one formatter per format, so that the "-f gif" option gives the correct result
# when used in pygmentize.

class GifImageFormatter(ImageFormatter):
    """
    Create a GIF image from source code. This uses the Python Imaging Library to
    generate a pixmap from the source code.

    *New in Pygments 1.0.* (You could create GIF images before by passing a
    suitable `image_format` option to the `ImageFormatter`.)
    """

    name = 'img_gif'
    aliases = ['gif']
    filenames = ['*.gif']
    default_image_format = 'gif'


class JpgImageFormatter(ImageFormatter):
    """
    Create a JPEG image from source code. This uses the Python Imaging Library to
    generate a pixmap from the source code.

    *New in Pygments 1.0.* (You could create JPEG images before by passing a
    suitable `image_format` option to the `ImageFormatter`.)
    """

    name = 'img_jpg'
    aliases = ['jpg', 'jpeg']
    filenames = ['*.jpg']
    default_image_format = 'jpeg'


class BmpImageFormatter(ImageFormatter):
    """
    Create a bitmap image from source code. This uses the Python Imaging Library to
    generate a pixmap from the source code.

    *New in Pygments 1.0.* (You could create bitmap images before by passing a
    suitable `image_format` option to the `ImageFormatter`.)
    """

    name = 'img_bmp'
    aliases = ['bmp', 'bitmap']
    filenames = ['*.bmp']
    default_image_format = 'bmp'
Pygments-1.6/pygments/formatters/html.py0000644000175000017500000007456112103427442017630 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    pygments.formatters.html
    ~~~~~~~~~~~~~~~~~~~~~~~~

    Formatter for HTML output.

    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import os
import sys
import os.path
import StringIO

from pygments.formatter import Formatter
from pygments.token import Token, Text, STANDARD_TYPES
from pygments.util import get_bool_opt, get_int_opt, get_list_opt, bytes

try:
    import ctags
except ImportError:
    ctags = None

__all__ = ['HtmlFormatter']


_escape_html_table = {
    ord('&'): u'&',
    ord('<'): u'<',
    ord('>'): u'>',
    ord('"'): u'"',
    ord("'"): u''',
}

def escape_html(text, table=_escape_html_table):
    """Escape &, <, > as well as single and double quotes for HTML."""
    return text.translate(table)

def get_random_id():
    """Return a random id for javascript fields."""
    from random import random
    from time import time
    try:
        from hashlib import sha1 as sha
    except ImportError:
        import sha
        sha = sha.new
    return sha('%s|%s' % (random(), time())).hexdigest()


def _get_ttype_class(ttype):
    fname = STANDARD_TYPES.get(ttype)
    if fname:
        return fname
    aname = ''
    while fname is None:
        aname = '-' + ttype[-1] + aname
        ttype = ttype.parent
        fname = STANDARD_TYPES.get(ttype)
    return fname + aname


CSSFILE_TEMPLATE = '''\
td.linenos { background-color: #f0f0f0; padding-right: 10px; }
span.lineno { background-color: #f0f0f0; padding: 0 5px 0 5px; }
pre { line-height: 125%%; }
%(styledefs)s
'''

DOC_HEADER = '''\




  %(title)s
  
  


%(title)s

''' DOC_HEADER_EXTERNALCSS = '''\ %(title)s

%(title)s

''' DOC_FOOTER = '''\ ''' class HtmlFormatter(Formatter): r""" Format tokens as HTML 4 ```` tags within a ``
`` tag, wrapped
    in a ``
`` tag. The ``
``'s CSS class can be set by the `cssclass` option. If the `linenos` option is set to ``"table"``, the ``
`` is
    additionally wrapped inside a ```` which has one row and two
    cells: one containing the line numbers and one containing the code.
    Example:

    .. sourcecode:: html

        
1
            2
def foo(bar):
              pass
            
(whitespace added to improve clarity). Wrapping can be disabled using the `nowrap` option. A list of lines can be specified using the `hl_lines` option to make these lines highlighted (as of Pygments 0.11). With the `full` option, a complete HTML 4 document is output, including the style definitions inside a ``

Code tags report for %s

%s
LineTagWhoDescription
''' TABLE = '\nFile: %s\n' TR = ('%%(lno)d' '%%(tag)s' '%%(who)s%%(what)s') f = file(output, 'w') table = '\n'.join(TABLE % fname + '\n'.join(TR % (no % 2,) % entry for no, entry in enumerate(store[fname])) for fname in sorted(store)) f.write(HTML % (', '.join(map(abspath, args)), table)) f.close() print "Report written to %s." % output return 0 if __name__ == '__main__': sys.exit(main()) Pygments-1.6/scripts/vim2pygments.py0000644000175000017500000006325311713467263016772 0ustar piotrpiotr#!/usr/bin/env python # -*- coding: utf-8 -*- """ Vim Colorscheme Converter ~~~~~~~~~~~~~~~~~~~~~~~~~ This script converts vim colorscheme files to valid pygments style classes meant for putting into modules. :copyright 2006 by Armin Ronacher. :license: BSD, see LICENSE for details. """ import sys import re from os import path from cStringIO import StringIO split_re = re.compile(r'(? 2 and \ len(parts[0]) >= 2 and \ 'highlight'.startswith(parts[0]): token = parts[1].lower() if token not in TOKENS: continue for item in parts[2:]: p = item.split('=', 1) if not len(p) == 2: continue key, value = p if key in ('ctermfg', 'guifg'): color = get_vim_color(value) if color: set('color', color) elif key in ('ctermbg', 'guibg'): color = get_vim_color(value) if color: set('bgcolor', color) elif key in ('term', 'cterm', 'gui'): items = value.split(',') for item in items: item = item.lower() if item == 'none': set('noinherit', True) elif item == 'bold': set('bold', True) elif item == 'underline': set('underline', True) elif item == 'italic': set('italic', True) if bg_color is not None and not colors['Normal'].get('bgcolor'): colors['Normal']['bgcolor'] = bg_color color_map = {} for token, styles in colors.iteritems(): if token in TOKENS: tmp = [] if styles.get('noinherit'): tmp.append('noinherit') if 'color' in styles: tmp.append(styles['color']) if 'bgcolor' in styles: tmp.append('bg:' + styles['bgcolor']) if styles.get('bold'): tmp.append('bold') if styles.get('italic'): tmp.append('italic') if styles.get('underline'): tmp.append('underline') tokens = TOKENS[token] if not isinstance(tokens, tuple): tokens = (tokens,) for token in tokens: color_map[token] = ' '.join(tmp) default_token = color_map.pop('') return default_token, color_map class StyleWriter(object): def __init__(self, code, name): self.code = code self.name = name.lower() def write_header(self, out): out.write('# -*- coding: utf-8 -*-\n"""\n') out.write(' %s Colorscheme\n' % self.name.title()) out.write(' %s\n\n' % ('~' * (len(self.name) + 12))) out.write(' Converted by %s\n' % SCRIPT_NAME) out.write('"""\nfrom pygments.style import Style\n') out.write('from pygments.token import Token, %s\n\n' % ', '.join(TOKEN_TYPES)) out.write('class %sStyle(Style):\n\n' % self.name.title()) def write(self, out): self.write_header(out) default_token, tokens = find_colors(self.code) tokens = tokens.items() tokens.sort(lambda a, b: cmp(len(a[0]), len(a[1]))) bg_color = [x[3:] for x in default_token.split() if x.startswith('bg:')] if bg_color: out.write(' background_color = %r\n' % bg_color[0]) out.write(' styles = {\n') out.write(' %-20s%r,\n' % ('Token:', default_token)) for token, definition in tokens: if definition: out.write(' %-20s%r,\n' % (token + ':', definition)) out.write(' }') def __repr__(self): out = StringIO() self.write_style(out) return out.getvalue() def convert(filename, stream=None): name = path.basename(filename) if name.endswith('.vim'): name = name[:-4] f = file(filename) code = f.read() f.close() writer = StyleWriter(code, name) if stream is not None: out = stream else: out = StringIO() writer.write(out) if stream is None: return out.getvalue() def main(): if len(sys.argv) != 2 or sys.argv[1] in ('-h', '--help'): print 'Usage: %s ' % sys.argv[0] return 2 if sys.argv[1] in ('-v', '--version'): print '%s %s' % (SCRIPT_NAME, SCRIPT_VERSION) return filename = sys.argv[1] if not (path.exists(filename) and path.isfile(filename)): print 'Error: %s not found' % filename return 1 convert(filename, sys.stdout) sys.stdout.write('\n') if __name__ == '__main__': sys.exit(main() or 0) Pygments-1.6/scripts/epydoc.css0000644000175000017500000003277411713467263015755 0ustar piotrpiotr /* Epydoc CSS Stylesheet * * This stylesheet can be used to customize the appearance of epydoc's * HTML output. * */ /* Adapted for Pocoo API docs by Georg Brandl */ /* Default Colors & Styles * - Set the default foreground & background color with 'body'; and * link colors with 'a:link' and 'a:visited'. * - Use bold for decision list terms. * - The heading styles defined here are used for headings *within* * docstring descriptions. All headings used by epydoc itself use * either class='epydoc' or class='toc' (CSS styles for both * defined below). */ body { background: #ffffff; color: #000000; font-family: Trebuchet MS,Tahoma,sans-serif; font-size: 0.9em; line-height: 140%; margin: 0; padding: 0 1.2em 1.2em 1.2em; } a:link { color: #C87900; text-decoration: none; border-bottom: 1px solid #C87900; } a:visited { color: #C87900; text-decoration: none; border-bottom: 1px dotted #C87900; } a:hover { color: #F8A900; border-bottom-color: #F8A900; } dt { font-weight: bold; } h1 { font-size: +180%; font-style: italic; font-weight: bold; margin-top: 1.5em; } h2 { font-size: +140%; font-style: italic; font-weight: bold; } h3 { font-size: +110%; font-style: italic; font-weight: normal; } p { margin-top: .5em; margin-bottom: .5em; } hr { margin-top: 1.5em; margin-bottom: 1.5em; border: 1px solid #BBB; } tt.literal { background: #F5FFD0; padding: 2px; font-size: 110%; } table.rst-docutils { border: 0; } table.rst-docutils td { border: 0; padding: 5px 20px 5px 0px; } /* Page Header & Footer * - The standard page header consists of a navigation bar (with * pointers to standard pages such as 'home' and 'trees'); a * breadcrumbs list, which can be used to navigate to containing * classes or modules; options links, to show/hide private * variables and to show/hide frames; and a page title (using *

). The page title may be followed by a link to the * corresponding source code (using 'span.codelink'). * - The footer consists of a navigation bar, a timestamp, and a * pointer to epydoc's homepage. */ h1.epydoc { margin-top: .4em; margin-bottom: .4em; font-size: +180%; font-weight: bold; font-style: normal; } h2.epydoc { font-size: +130%; font-weight: bold; font-style: normal; } h3.epydoc { font-size: +115%; font-weight: bold; font-style: normal; } table.navbar { background: #E6F8A0; color: #000000; border-top: 1px solid #c0d0d0; border-bottom: 1px solid #c0d0d0; margin: -1px -1.2em 1em -1.2em; } table.navbar th { padding: 2px 7px 2px 0px; } th.navbar-select { background-color: transparent; } th.navbar-select:before { content: ">" } th.navbar-select:after { content: "<" } table.navbar a { border: 0; } span.breadcrumbs { font-size: 95%; font-weight: bold; } span.options { font-size: 80%; } span.codelink { font-size: 85%; } td.footer { font-size: 85%; } /* Table Headers * - Each summary table and details section begins with a 'header' * row. This row contains a section title (marked by * 'span.table-header') as well as a show/hide private link * (marked by 'span.options', defined above). * - Summary tables that contain user-defined groups mark those * groups using 'group header' rows. */ td.table-header { background: #B6C870; color: #000000; border-bottom: 1px solid #FFF; } span.table-header { font-size: 110%; font-weight: bold; } th.group-header { text-align: left; font-style: italic; font-size: 110%; } td.spacer { width: 5%; } /* Summary Tables (functions, variables, etc) * - Each object is described by a single row of the table with * two cells. The left cell gives the object's type, and is * marked with 'code.summary-type'. The right cell gives the * object's name and a summary description. * - CSS styles for the table's header and group headers are * defined above, under 'Table Headers' */ table.summary { border-collapse: collapse; background: #E6F8A0; color: #000000; margin: 1em 0 .5em 0; border: 0; } table.summary tr { border-bottom: 1px solid #BBB; } td.summary a { font-weight: bold; } code.summary-type { font-size: 85%; } /* Details Tables (functions, variables, etc) * - Each object is described in its own single-celled table. * - A single-row summary table w/ table-header is used as * a header for each details section (CSS style for table-header * is defined above, under 'Table Headers'). */ table.detsummary { margin-top: 2em; } table.details { border-collapse: collapse; background: #E6F8A0; color: #000000; border-bottom: 1px solid #BBB; margin: 0; } table.details td { padding: .2em .2em .2em .5em; } table.details table td { padding: 0; } table.details h3 { margin: 5px 0 5px 0; font-size: 105%; font-style: normal; } table.details dd { display: inline; margin-left: 5px; } table.details dl { margin-left: 5px; } /* Index tables (identifier index, term index, etc) * - link-index is used for indices containing lists of links * (namely, the identifier index & term index). * - index-where is used in link indices for the text indicating * the container/source for each link. * - metadata-index is used for indices containing metadata * extracted from fields (namely, the bug index & todo index). */ table.link-index { border-collapse: collapse; background: #F6FFB0; color: #000000; border: 1px solid #608090; } td.link-index { border-width: 0px; } span.index-where { font-size: 70%; } table.metadata-index { border-collapse: collapse; background: #F6FFB0; color: #000000; border: 1px solid #608090; margin: .2em 0 0 0; } td.metadata-index { border-width: 1px; border-style: solid; } /* Function signatures * - sig* is used for the signature in the details section. * - .summary-sig* is used for the signature in the summary * table, and when listing property accessor functions. * */ .sig-name { color: #006080; } .sig-arg { color: #008060; } .sig-default { color: #602000; } .summary-sig-name { font-weight: bold; } .summary-sig-arg { color: #006040; } .summary-sig-default { color: #501800; } /* Variable values * - In the 'variable details' sections, each varaible's value is * listed in a 'pre.variable' box. The width of this box is * restricted to 80 chars; if the value's repr is longer than * this it will be wrapped, using a backslash marked with * class 'variable-linewrap'. If the value's repr is longer * than 3 lines, the rest will be ellided; and an ellipsis * marker ('...' marked with 'variable-ellipsis') will be used. * - If the value is a string, its quote marks will be marked * with 'variable-quote'. * - If the variable is a regexp, it is syntax-highlighted using * the re* CSS classes. */ pre.variable { padding: .5em; margin: 0; background-color: #dce4ec; border: 1px solid #708890; } .variable-linewrap { display: none; } .variable-ellipsis { color: #604000; font-weight: bold; } .variable-quote { color: #604000; font-weight: bold; } .re { color: #000000; } .re-char { color: #006030; } .re-op { color: #600000; } .re-group { color: #003060; } .re-ref { color: #404040; } /* Base tree * - Used by class pages to display the base class hierarchy. */ pre.base-tree { font-size: 90%; margin: 1em 0 2em 0; line-height: 100%;} /* Frames-based table of contents headers * - Consists of two frames: one for selecting modules; and * the other listing the contents of the selected module. * - h1.toc is used for each frame's heading * - h2.toc is used for subheadings within each frame. */ h1.toc { text-align: center; font-size: 105%; margin: 0; font-weight: bold; padding: 0; } h2.toc { font-size: 100%; font-weight: bold; margin: 0.5em 0 0 -0.3em; } /* Syntax Highlighting for Source Code * - doctest examples are displayed in a 'pre.py-doctest' block. * If the example is in a details table entry, then it will use * the colors specified by the 'table pre.py-doctest' line. * - Source code listings are displayed in a 'pre.py-src' block. * Each line is marked with 'span.py-line' (used to draw a line * down the left margin, separating the code from the line * numbers). Line numbers are displayed with 'span.py-lineno'. * The expand/collapse block toggle button is displayed with * 'a.py-toggle' (Note: the CSS style for 'a.py-toggle' should not * modify the font size of the text.) * - If a source code page is opened with an anchor, then the * corresponding code block will be highlighted. The code * block's header is highlighted with 'py-highlight-hdr'; and * the code block's body is highlighted with 'py-highlight'. * - The remaining py-* classes are used to perform syntax * highlighting (py-string for string literals, py-name for names, * etc.) */ pre.rst-literal-block, pre.py-doctest { margin-left: 1em; margin-right: 1.5em; line-height: 150%; background-color: #F5FFD0; padding: .5em; border: 1px solid #B6C870; font-size: 110%; } pre.py-src { border: 1px solid #BBB; margin-top: 3em; background: #f0f0f0; color: #000000; line-height: 150%; } span.py-line { margin-left: .2em; padding-left: .4em; } span.py-lineno { border-right: 1px solid #BBB; padding: .3em .5em .3em .5em; font-style: italic; font-size: 90%; } a.py-toggle { text-decoration: none; } div.py-highlight-hdr { border-top: 1px solid #BBB; background: #d0e0e0; } div.py-highlight { border-bottom: 1px solid #BBB; background: #d0e0e0; } .py-prompt { color: #005050; font-weight: bold;} .py-string { color: #006030; } .py-comment { color: #003060; } .py-keyword { color: #600000; } .py-output { color: #404040; } .py-name { color: #000050; } .py-name:link { color: #000050; } .py-name:visited { color: #000050; } .py-number { color: #005000; } .py-def-name { color: #000060; font-weight: bold; } .py-base-class { color: #000060; } .py-param { color: #000060; } .py-docstring { color: #006030; } .py-decorator { color: #804020; } /* Use this if you don't want links to names underlined: */ /*a.py-name { text-decoration: none; }*/ /* Graphs & Diagrams * - These CSS styles are used for graphs & diagrams generated using * Graphviz dot. 'img.graph-without-title' is used for bare * diagrams (to remove the border created by making the image * clickable). */ img.graph-without-title { border: none; } img.graph-with-title { border: 1px solid #000000; } span.graph-title { font-weight: bold; } span.graph-caption { } /* General-purpose classes * - 'p.indent-wrapped-lines' defines a paragraph whose first line * is not indented, but whose subsequent lines are. * - The 'nomargin-top' class is used to remove the top margin (e.g. * from lists). The 'nomargin' class is used to remove both the * top and bottom margin (but not the left or right margin -- * for lists, that would cause the bullets to disappear.) */ p.indent-wrapped-lines { padding: 0 0 0 7em; text-indent: -7em; margin: 0; } .nomargin-top { margin-top: 0; } .nomargin { margin-top: 0; margin-bottom: 0; } Pygments-1.6/scripts/detect_missing_analyse_text.py0000644000175000017500000000163612103426532022070 0ustar piotrpiotrimport sys from pygments.lexers import get_all_lexers, find_lexer_class from pygments.lexer import Lexer def main(): uses = {} for name, aliases, filenames, mimetypes in get_all_lexers(): cls = find_lexer_class(name) if not cls.aliases: print cls, "has no aliases" for f in filenames: if f not in uses: uses[f] = [] uses[f].append(cls) ret = 0 for k, v in uses.iteritems(): if len(v) > 1: #print "Multiple for", k, v for i in v: if i.analyse_text is None: print i, "has a None analyse_text" ret |= 1 elif Lexer.analyse_text.__doc__ == i.analyse_text.__doc__: print i, "needs analyse_text, multiple lexers for", k ret |= 2 return ret if __name__ == '__main__': sys.exit(main()) Pygments-1.6/scripts/check_sources.py0000755000175000017500000001645312103426532017135 0ustar piotrpiotr#!/usr/bin/env python # -*- coding: utf-8 -*- """ Checker for file headers ~~~~~~~~~~~~~~~~~~~~~~~~ Make sure each Python file has a correct file header including copyright and license information. :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import sys, os, re import getopt import cStringIO from os.path import join, splitext, abspath checkers = {} def checker(*suffixes, **kwds): only_pkg = kwds.pop('only_pkg', False) def deco(func): for suffix in suffixes: checkers.setdefault(suffix, []).append(func) func.only_pkg = only_pkg return func return deco name_mail_re = r'[\w ]+(<.*?>)?' copyright_re = re.compile(r'^ :copyright: Copyright 2006-2013 by ' r'the Pygments team, see AUTHORS\.$', re.UNICODE) copyright_2_re = re.compile(r'^ %s(, %s)*[,.]$' % (name_mail_re, name_mail_re), re.UNICODE) coding_re = re.compile(r'coding[:=]\s*([-\w.]+)') not_ix_re = re.compile(r'\bnot\s+\S+?\s+i[sn]\s\S+') is_const_re = re.compile(r'if.*?==\s+(None|False|True)\b') misspellings = ["developement", "adress", "verificate", # ALLOW-MISSPELLING "informations"] # ALLOW-MISSPELLING @checker('.py') def check_syntax(fn, lines): try: compile(''.join(lines), fn, "exec") except SyntaxError, err: yield 0, "not compilable: %s" % err @checker('.py') def check_style_and_encoding(fn, lines): encoding = 'ascii' for lno, line in enumerate(lines): if len(line) > 90: yield lno+1, "line too long" m = not_ix_re.search(line) if m: yield lno+1, '"' + m.group() + '"' if is_const_re.search(line): yield lno+1, 'using == None/True/False' if lno < 2: co = coding_re.search(line) if co: encoding = co.group(1) try: line.decode(encoding) except UnicodeDecodeError, err: yield lno+1, "not decodable: %s\n Line: %r" % (err, line) except LookupError, err: yield 0, "unknown encoding: %s" % encoding encoding = 'latin1' @checker('.py', only_pkg=True) def check_fileheader(fn, lines): # line number correction c = 1 if lines[0:1] == ['#!/usr/bin/env python\n']: lines = lines[1:] c = 2 llist = [] docopen = False for lno, l in enumerate(lines): llist.append(l) if lno == 0: if l == '# -*- coding: rot13 -*-\n': # special-case pony package return elif l != '# -*- coding: utf-8 -*-\n': yield 1, "missing coding declaration" elif lno == 1: if l != '"""\n' and l != 'r"""\n': yield 2, 'missing docstring begin (""")' else: docopen = True elif docopen: if l == '"""\n': # end of docstring if lno <= 4: yield lno+c, "missing module name in docstring" break if l != "\n" and l[:4] != ' ' and docopen: yield lno+c, "missing correct docstring indentation" if lno == 2: # if not in package, don't check the module name modname = fn[:-3].replace('/', '.').replace('.__init__', '') while modname: if l.lower()[4:-1] == modname: break modname = '.'.join(modname.split('.')[1:]) else: yield 3, "wrong module name in docstring heading" modnamelen = len(l.strip()) elif lno == 3: if l.strip() != modnamelen * "~": yield 4, "wrong module name underline, should be ~~~...~" else: yield 0, "missing end and/or start of docstring..." # check for copyright and license fields license = llist[-2:-1] if license != [" :license: BSD, see LICENSE for details.\n"]: yield 0, "no correct license info" ci = -3 copyright = [s.decode('utf-8') for s in llist[ci:ci+1]] while copyright and copyright_2_re.match(copyright[0]): ci -= 1 copyright = llist[ci:ci+1] if not copyright or not copyright_re.match(copyright[0]): yield 0, "no correct copyright info" @checker('.py', '.html', '.js') def check_whitespace_and_spelling(fn, lines): for lno, line in enumerate(lines): if "\t" in line: yield lno+1, "OMG TABS!!!1 " if line[:-1].rstrip(' \t') != line[:-1]: yield lno+1, "trailing whitespace" for word in misspellings: if word in line and 'ALLOW-MISSPELLING' not in line: yield lno+1, '"%s" used' % word bad_tags = ('', '', '', '', '' '
', '', '', '>out, "%s:%d: %s" % (fn, lno, msg) num += 1 if verbose: print if num == 0: print "No errors found." else: print out.getvalue().rstrip('\n') print "%d error%s found." % (num, num > 1 and "s" or "") return int(num > 0) if __name__ == '__main__': sys.exit(main(sys.argv)) Pygments-1.6/scripts/reindent.py0000755000175000017500000002330611713467263016134 0ustar piotrpiotr#! /usr/bin/env python # Released to the public domain, by Tim Peters, 03 October 2000. # -B option added by Georg Brandl, 2006. """reindent [-d][-r][-v] [ path ... ] -d (--dryrun) Dry run. Analyze, but don't make any changes to files. -r (--recurse) Recurse. Search for all .py files in subdirectories too. -B (--no-backup) Don't write .bak backup files. -v (--verbose) Verbose. Print informative msgs; else only names of changed files. -h (--help) Help. Print this usage information and exit. Change Python (.py) files to use 4-space indents and no hard tab characters. Also trim excess spaces and tabs from ends of lines, and remove empty lines at the end of files. Also ensure the last line ends with a newline. If no paths are given on the command line, reindent operates as a filter, reading a single source file from standard input and writing the transformed source to standard output. In this case, the -d, -r and -v flags are ignored. You can pass one or more file and/or directory paths. When a directory path, all .py files within the directory will be examined, and, if the -r option is given, likewise recursively for subdirectories. If output is not to standard output, reindent overwrites files in place, renaming the originals with a .bak extension. If it finds nothing to change, the file is left alone. If reindent does change a file, the changed file is a fixed-point for future runs (i.e., running reindent on the resulting .py file won't change it again). The hard part of reindenting is figuring out what to do with comment lines. So long as the input files get a clean bill of health from tabnanny.py, reindent should do a good job. """ __version__ = "1" import tokenize import os import sys verbose = 0 recurse = 0 dryrun = 0 no_backup = 0 def usage(msg=None): if msg is not None: print >> sys.stderr, msg print >> sys.stderr, __doc__ def errprint(*args): sep = "" for arg in args: sys.stderr.write(sep + str(arg)) sep = " " sys.stderr.write("\n") def main(): import getopt global verbose, recurse, dryrun, no_backup try: opts, args = getopt.getopt(sys.argv[1:], "drvhB", ["dryrun", "recurse", "verbose", "help", "no-backup"]) except getopt.error, msg: usage(msg) return for o, a in opts: if o in ('-d', '--dryrun'): dryrun += 1 elif o in ('-r', '--recurse'): recurse += 1 elif o in ('-v', '--verbose'): verbose += 1 elif o in ('-B', '--no-backup'): no_backup += 1 elif o in ('-h', '--help'): usage() return if not args: r = Reindenter(sys.stdin) r.run() r.write(sys.stdout) return for arg in args: check(arg) def check(file): if os.path.isdir(file) and not os.path.islink(file): if verbose: print "listing directory", file names = os.listdir(file) for name in names: fullname = os.path.join(file, name) if ((recurse and os.path.isdir(fullname) and not os.path.islink(fullname)) or name.lower().endswith(".py")): check(fullname) return if verbose: print "checking", file, "...", try: f = open(file) except IOError, msg: errprint("%s: I/O Error: %s" % (file, str(msg))) return r = Reindenter(f) f.close() if r.run(): if verbose: print "changed." if dryrun: print "But this is a dry run, so leaving it alone." else: print "reindented", file, (dryrun and "(dry run => not really)" or "") if not dryrun: if not no_backup: bak = file + ".bak" if os.path.exists(bak): os.remove(bak) os.rename(file, bak) if verbose: print "renamed", file, "to", bak f = open(file, "w") r.write(f) f.close() if verbose: print "wrote new", file else: if verbose: print "unchanged." class Reindenter: def __init__(self, f): self.find_stmt = 1 # next token begins a fresh stmt? self.level = 0 # current indent level # Raw file lines. self.raw = f.readlines() # File lines, rstripped & tab-expanded. Dummy at start is so # that we can use tokenize's 1-based line numbering easily. # Note that a line is all-blank iff it's "\n". self.lines = [line.rstrip('\n \t').expandtabs() + "\n" for line in self.raw] self.lines.insert(0, None) self.index = 1 # index into self.lines of next line # List of (lineno, indentlevel) pairs, one for each stmt and # comment line. indentlevel is -1 for comment lines, as a # signal that tokenize doesn't know what to do about them; # indeed, they're our headache! self.stats = [] def run(self): tokenize.tokenize(self.getline, self.tokeneater) # Remove trailing empty lines. lines = self.lines while lines and lines[-1] == "\n": lines.pop() # Sentinel. stats = self.stats stats.append((len(lines), 0)) # Map count of leading spaces to # we want. have2want = {} # Program after transformation. after = self.after = [] # Copy over initial empty lines -- there's nothing to do until # we see a line with *something* on it. i = stats[0][0] after.extend(lines[1:i]) for i in range(len(stats)-1): thisstmt, thislevel = stats[i] nextstmt = stats[i+1][0] have = getlspace(lines[thisstmt]) want = thislevel * 4 if want < 0: # A comment line. if have: # An indented comment line. If we saw the same # indentation before, reuse what it most recently # mapped to. want = have2want.get(have, -1) if want < 0: # Then it probably belongs to the next real stmt. for j in xrange(i+1, len(stats)-1): jline, jlevel = stats[j] if jlevel >= 0: if have == getlspace(lines[jline]): want = jlevel * 4 break if want < 0: # Maybe it's a hanging # comment like this one, # in which case we should shift it like its base # line got shifted. for j in xrange(i-1, -1, -1): jline, jlevel = stats[j] if jlevel >= 0: want = have + getlspace(after[jline-1]) - \ getlspace(lines[jline]) break if want < 0: # Still no luck -- leave it alone. want = have else: want = 0 assert want >= 0 have2want[have] = want diff = want - have if diff == 0 or have == 0: after.extend(lines[thisstmt:nextstmt]) else: for line in lines[thisstmt:nextstmt]: if diff > 0: if line == "\n": after.append(line) else: after.append(" " * diff + line) else: remove = min(getlspace(line), -diff) after.append(line[remove:]) return self.raw != self.after def write(self, f): f.writelines(self.after) # Line-getter for tokenize. def getline(self): if self.index >= len(self.lines): line = "" else: line = self.lines[self.index] self.index += 1 return line # Line-eater for tokenize. def tokeneater(self, type, token, (sline, scol), end, line, INDENT=tokenize.INDENT, DEDENT=tokenize.DEDENT, NEWLINE=tokenize.NEWLINE, COMMENT=tokenize.COMMENT, NL=tokenize.NL): if type == NEWLINE: # A program statement, or ENDMARKER, will eventually follow, # after some (possibly empty) run of tokens of the form # (NL | COMMENT)* (INDENT | DEDENT+)? self.find_stmt = 1 elif type == INDENT: self.find_stmt = 1 self.level += 1 elif type == DEDENT: self.find_stmt = 1 self.level -= 1 elif type == COMMENT: if self.find_stmt: self.stats.append((sline, -1)) # but we're still looking for a new stmt, so leave # find_stmt alone elif type == NL: pass elif self.find_stmt: # This is the first "real token" following a NEWLINE, so it # must be the first token of the next program statement, or an # ENDMARKER. self.find_stmt = 0 if line: # not endmarker self.stats.append((sline, self.level)) # Count number of leading blanks. def getlspace(line): i, n = 0, len(line) while i < n and line[i] == " ": i += 1 return i if __name__ == '__main__': main() Pygments-1.6/scripts/get_vimkw.py0000644000175000017500000000266511713467263016322 0ustar piotrpiotrimport re from pprint import pprint r_line = re.compile(r"^(syn keyword vimCommand contained|syn keyword vimOption " r"contained|syn keyword vimAutoEvent contained)\s+(.*)") r_item = re.compile(r"(\w+)(?:\[(\w+)\])?") def getkw(input, output): out = file(output, 'w') output_info = {'command': [], 'option': [], 'auto': []} for line in file(input): m = r_line.match(line) if m: # Decide which output gets mapped to d if 'vimCommand' in m.group(1): d = output_info['command'] elif 'AutoEvent' in m.group(1): d = output_info['auto'] else: d = output_info['option'] # Extract all the shortened versions for i in r_item.finditer(m.group(2)): d.append('(%r,%r)' % (i.group(1), "%s%s" % (i.group(1), i.group(2) or ''))) output_info['option'].append("('nnoremap','nnoremap')") output_info['option'].append("('inoremap','inoremap')") output_info['option'].append("('vnoremap','vnoremap')") for a, b in output_info.items(): b.sort() print >>out, '%s=[%s]' % (a, ','.join(b)) def is_keyword(w, keywords): for i in range(len(w), 0, -1): if w[:i] in keywords: return signals[w[:i]][:len(w)] == w return False if __name__ == "__main__": getkw("/usr/share/vim/vim73/syntax/vim.vim", "temp.py") Pygments-1.6/scripts/pylintrc0000644000175000017500000002114711713467263015537 0ustar piotrpiotr# lint Python modules using external checkers. # # This is the main checker controling the other ones and the reports # generation. It is itself both a raw checker and an astng checker in order # to: # * handle message activation / deactivation at the module level # * handle some basic but necessary stats'data (number of classes, methods...) # [MASTER] # Specify a configuration file. #rcfile= # Profiled execution. profile=no # Add to the black list. It should be a base name, not a # path. You may set this option multiple times. ignore=.svn # Pickle collected data for later comparisons. persistent=yes # Set the cache size for astng objects. cache-size=500 # List of plugins (as comma separated values of python modules names) to load, # usually to register additional checkers. load-plugins= [MESSAGES CONTROL] # Enable only checker(s) with the given id(s). This option conflict with the # disable-checker option #enable-checker= # Enable all checker(s) except those with the given id(s). This option conflict # with the disable-checker option #disable-checker= # Enable all messages in the listed categories. #enable-msg-cat= # Disable all messages in the listed categories. #disable-msg-cat= # Enable the message(s) with the given id(s). #enable-msg= # Disable the message(s) with the given id(s). disable-msg=C0323,W0142,C0301,C0103,C0111,E0213,C0302,C0203,W0703,R0201 [REPORTS] # set the output format. Available formats are text, parseable, colorized and # html output-format=colorized # Include message's id in output include-ids=yes # Put messages in a separate file for each module / package specified on the # command line instead of printing them on stdout. Reports (if any) will be # written in a file name "pylint_global.[txt|html]". files-output=no # Tells wether to display a full report or only the messages reports=yes # Python expression which should return a note less than 10 (10 is the highest # note).You have access to the variables errors warning, statement which # respectivly contain the number of errors / warnings messages and the total # number of statements analyzed. This is used by the global evaluation report # (R0004). evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) # Add a comment according to your evaluation note. This is used by the global # evaluation report (R0004). comment=no # Enable the report(s) with the given id(s). #enable-report= # Disable the report(s) with the given id(s). #disable-report= # checks for # * unused variables / imports # * undefined variables # * redefinition of variable from builtins or from an outer scope # * use of variable before assigment # [VARIABLES] # Tells wether we should check for unused import in __init__ files. init-import=no # A regular expression matching names used for dummy variables (i.e. not used). dummy-variables-rgx=_|dummy # List of additional names supposed to be defined in builtins. Remember that # you should avoid to define new builtins when possible. additional-builtins= # try to find bugs in the code using type inference # [TYPECHECK] # Tells wether missing members accessed in mixin class should be ignored. A # mixin class is detected if its name ends with "mixin" (case insensitive). ignore-mixin-members=yes # When zope mode is activated, consider the acquired-members option to ignore # access to some undefined attributes. zope=no # List of members which are usually get through zope's acquisition mecanism and # so shouldn't trigger E0201 when accessed (need zope=yes to be considered). acquired-members=REQUEST,acl_users,aq_parent # checks for : # * doc strings # * modules / classes / functions / methods / arguments / variables name # * number of arguments, local variables, branchs, returns and statements in # functions, methods # * required module attributes # * dangerous default values as arguments # * redefinition of function / method / class # * uses of the global statement # [BASIC] # Required attributes for module, separated by a comma required-attributes= # Regular expression which should only match functions or classes name which do # not require a docstring no-docstring-rgx=__.*__ # Regular expression which should only match correct module names module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ # Regular expression which should only match correct module level names const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$ # Regular expression which should only match correct class names class-rgx=[A-Z_][a-zA-Z0-9]+$ # Regular expression which should only match correct function names function-rgx=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match correct method names method-rgx=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match correct instance attribute names attr-rgx=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match correct argument names argument-rgx=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match correct variable names variable-rgx=[a-z_][a-z0-9_]{2,30}$ # Regular expression which should only match correct list comprehension / # generator expression variable names inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ # Good variable names which should always be accepted, separated by a comma good-names=i,j,k,ex,Run,_ # Bad variable names which should always be refused, separated by a comma bad-names=foo,bar,baz,toto,tutu,tata # List of builtins function names that should not be used, separated by a comma bad-functions=apply,input # checks for sign of poor/misdesign: # * number of methods, attributes, local variables... # * size, complexity of functions, methods # [DESIGN] # Maximum number of arguments for function / method max-args=12 # Maximum number of locals for function / method body max-locals=30 # Maximum number of return / yield for function / method body max-returns=12 # Maximum number of branch for function / method body max-branchs=30 # Maximum number of statements in function / method body max-statements=60 # Maximum number of parents for a class (see R0901). max-parents=7 # Maximum number of attributes for a class (see R0902). max-attributes=20 # Minimum number of public methods for a class (see R0903). min-public-methods=0 # Maximum number of public methods for a class (see R0904). max-public-methods=20 # checks for # * external modules dependencies # * relative / wildcard imports # * cyclic imports # * uses of deprecated modules # [IMPORTS] # Deprecated modules which should not be used, separated by a comma deprecated-modules=regsub,string,TERMIOS,Bastion,rexec # Create a graph of every (i.e. internal and external) dependencies in the # given file (report R0402 must not be disabled) import-graph= # Create a graph of external dependencies in the given file (report R0402 must # not be disabled) ext-import-graph= # Create a graph of internal dependencies in the given file (report R0402 must # not be disabled) int-import-graph= # checks for : # * methods without self as first argument # * overridden methods signature # * access only to existant members via self # * attributes not defined in the __init__ method # * supported interfaces implementation # * unreachable code # [CLASSES] # List of interface methods to ignore, separated by a comma. This is used for # instance to not check methods defines in Zope's Interface base class. ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by # List of method names used to declare (i.e. assign) instance attributes. defining-attr-methods=__init__,__new__,setUp # checks for similarities and duplicated code. This computation may be # memory / CPU intensive, so you should disable it if you experiments some # problems. # [SIMILARITIES] # Minimum lines number of a similarity. min-similarity-lines=10 # Ignore comments when computing similarities. ignore-comments=yes # Ignore docstrings when computing similarities. ignore-docstrings=yes # checks for: # * warning notes in the code like FIXME, XXX # * PEP 263: source code with non ascii character but no encoding declaration # [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. notes=FIXME,XXX,TODO # checks for : # * unauthorized constructions # * strict indentation # * line length # * use of <> instead of != # [FORMAT] # Maximum number of characters on a single line. max-line-length=90 # Maximum number of lines in a module max-module-lines=1000 # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 # tab). indent-string=' ' Pygments-1.6/setup.py0000755000175000017500000000546112103430007013753 0ustar piotrpiotr#!/usr/bin/env python # -*- coding: utf-8 -*- """ Pygments ~~~~~~~~ Pygments is a syntax highlighting package written in Python. It is a generic syntax highlighter for general use in all kinds of software such as forum systems, wikis or other applications that need to prettify source code. Highlights are: * a wide range of common languages and markup formats is supported * special attention is paid to details, increasing quality by a fair amount * support for new languages and formats are added easily * a number of output formats, presently HTML, LaTeX, RTF, SVG, all image \ formats that PIL supports and ANSI sequences * it is usable as a command-line tool and as a library * ... and it highlights even Brainfuck! The `Pygments tip`_ is installable with ``easy_install Pygments==dev``. .. _Pygments tip: http://bitbucket.org/birkenfeld/pygments-main/get/default.zip#egg=Pygments-dev :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ try: from setuptools import setup, find_packages have_setuptools = True except ImportError: from distutils.core import setup def find_packages(): return [ 'pygments', 'pygments.lexers', 'pygments.formatters', 'pygments.styles', 'pygments.filters', ] have_setuptools = False try: from distutils.command.build_py import build_py_2to3 as build_py except ImportError: from distutils.command.build_py import build_py if have_setuptools: add_keywords = dict( entry_points = { 'console_scripts': ['pygmentize = pygments.cmdline:main'], }, ) else: add_keywords = dict( scripts = ['pygmentize'], ) setup( name = 'Pygments', version = '1.6', url = 'http://pygments.org/', license = 'BSD License', author = 'Georg Brandl', author_email = 'georg@python.org', description = 'Pygments is a syntax highlighting package written in Python.', long_description = __doc__, keywords = 'syntax highlighting', packages = find_packages(), platforms = 'any', zip_safe = False, include_package_data = True, classifiers = [ 'License :: OSI Approved :: BSD License', 'Intended Audience :: Developers', 'Intended Audience :: End Users/Desktop', 'Intended Audience :: System Administrators', 'Development Status :: 6 - Mature', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 3', 'Operating System :: OS Independent', 'Topic :: Text Processing :: Filters', 'Topic :: Utilities', ], cmdclass = {'build_py': build_py}, **add_keywords ) Pygments-1.6/AUTHORS0000644000175000017500000001174112103427040013307 0ustar piotrpiotrPygments is written and maintained by Georg Brandl . Major developers are Tim Hatch and Armin Ronacher . Other contributors, listed alphabetically, are: * Sam Aaron -- Ioke lexer * Kumar Appaiah -- Debian control lexer * Ali Afshar -- image formatter * Andreas Amann -- AppleScript lexer * Timothy Armstrong -- Dart lexer fixes * Jeffrey Arnold -- R/S, Rd, BUGS, Jags, and Stan lexers * Jeremy Ashkenas -- CoffeeScript lexer * Stefan Matthias Aust -- Smalltalk lexer * Ben Bangert -- Mako lexers * Max Battcher -- Darcs patch lexer * Paul Baumgart, 280 North, Inc. -- Objective-J lexer * Michael Bayer -- Myghty lexers * John Benediktsson -- Factor lexer * Christopher Bertels -- Fancy lexer * Jarrett Billingsley -- MiniD lexer * Adam Blinkinsop -- Haskell, Redcode lexers * Frits van Bommel -- assembler lexers * Pierre Bourdon -- bugfixes * Hiram Chirino -- Scaml and Jade lexers * Ian Cooper -- VGL lexer * Leaf Corcoran -- MoonScript lexer * Christian Jann -- ShellSession lexer * Christopher Creutzig -- MuPAD lexer * Pete Curry -- bugfixes * Owen Durni -- haXe lexer * Nick Efford -- Python 3 lexer * Sven Efftinge -- Xtend lexer * Artem Egorkine -- terminal256 formatter * James H. Fisher -- PostScript lexer * Carlos Galdino -- Elixir and Elixir Console lexers * Michael Galloy -- IDL lexer * Naveen Garg -- Autohotkey lexer * Laurent Gautier -- R/S lexer * Alex Gaynor -- PyPy log lexer * Alain Gilbert -- TypeScript lexer * Bertrand Goetzmann -- Groovy lexer * Krzysiek Goj -- Scala lexer * Matt Good -- Genshi, Cheetah lexers * Patrick Gotthardt -- PHP namespaces support * Olivier Guibe -- Asymptote lexer * Jordi Gutiérrez Hermoso -- Octave lexer * Martin Harriman -- SNOBOL lexer * Matthew Harrison -- SVG formatter * Steven Hazel -- Tcl lexer * Aslak Hellesøy -- Gherkin lexer * Greg Hendershott -- Racket lexer * David Hess, Fish Software, Inc. -- Objective-J lexer * Varun Hiremath -- Debian control lexer * Doug Hogan -- Mscgen lexer * Ben Hollis -- Mason lexer * Dustin Howett -- Logos lexer * Alastair Houghton -- Lexer inheritance facility * Tim Howard -- BlitzMax lexer * Ivan Inozemtsev -- Fantom lexer * Brian R. Jackson -- Tea lexer * Dennis Kaarsemaker -- sources.list lexer * Igor Kalnitsky -- vhdl lexer * Pekka Klärck -- Robot Framework lexer * Eric Knibbe -- Lasso lexer * Adam Koprowski -- Opa lexer * Benjamin Kowarsch -- Modula-2 lexer * Alexander Kriegisch -- Kconfig and AspectJ lexers * Marek Kubica -- Scheme lexer * Jochen Kupperschmidt -- Markdown processor * Gerd Kurzbach -- Modelica lexer * Jon Larimer, Google Inc. -- Smali lexer * Olov Lassus -- Dart lexer * Sylvestre Ledru -- Scilab lexer * Mark Lee -- Vala lexer * Ben Mabey -- Gherkin lexer * Angus MacArthur -- QML lexer * Simone Margaritelli -- Hybris lexer * Kirk McDonald -- D lexer * Gordon McGregor -- SystemVerilog lexer * Stephen McKamey -- Duel/JBST lexer * Brian McKenna -- F# lexer * Charles McLaughlin -- Puppet lexer * Lukas Meuser -- BBCode formatter, Lua lexer * Paul Miller -- LiveScript lexer * Hong Minhee -- HTTP lexer * Michael Mior -- Awk lexer * Bruce Mitchener -- Dylan lexer rewrite * Reuben Morais -- SourcePawn lexer * Jon Morton -- Rust lexer * Paulo Moura -- Logtalk lexer * Mher Movsisyan -- DTD lexer * Ana Nelson -- Ragel, ANTLR, R console lexers * Nam T. Nguyen -- Monokai style * Jesper Noehr -- HTML formatter "anchorlinenos" * Mike Nolta -- Julia lexer * Jonas Obrist -- BBCode lexer * David Oliva -- Rebol lexer * Jon Parise -- Protocol buffers lexer * Ronny Pfannschmidt -- BBCode lexer * Benjamin Peterson -- Test suite refactoring * Dominik Picheta -- Nimrod lexer * Clément Prévost -- UrbiScript lexer * Kashif Rasul -- CUDA lexer * Justin Reidy -- MXML lexer * Norman Richards -- JSON lexer * Lubomir Rintel -- GoodData MAQL and CL lexers * Andre Roberge -- Tango style * Konrad Rudolph -- LaTeX formatter enhancements * Mario Ruggier -- Evoque lexers * Stou Sandalski -- NumPy, FORTRAN, tcsh and XSLT lexers * Matteo Sasso -- Common Lisp lexer * Joe Schafer -- Ada lexer * Ken Schutte -- Matlab lexers * Tassilo Schweyer -- Io, MOOCode lexers * Ted Shaw -- AutoIt lexer * Joerg Sieker -- ABAP lexer * Robert Simmons -- Standard ML lexer * Kirill Simonov -- YAML lexer * Alexander Smishlajev -- Visual FoxPro lexer * Steve Spigarelli -- XQuery lexer * Jerome St-Louis -- eC lexer * James Strachan -- Kotlin lexer * Tom Stuart -- Treetop lexer * Tiberius Teng -- default style overhaul * Jeremy Thurgood -- Erlang, Squid config lexers * Brian Tiffin -- OpenCOBOL lexer * Erick Tryzelaar -- Felix lexer * Daniele Varrazzo -- PostgreSQL lexers * Abe Voelker -- OpenEdge ABL lexer * Pepijn de Vos -- HTML formatter CTags support * Whitney Young -- ObjectiveC lexer * Matthias Vallentin -- Bro lexer * Nathan Weizenbaum -- Haml and Sass lexers * Dietmar Winkler -- Modelica lexer * Nils Winter -- Smalltalk lexer * Davy Wybiral -- Clojure lexer * Diego Zamboni -- CFengine3 lexer * Enrique Zamudio -- Ceylon lexer * Alex Zimin -- Nemerle lexer Many thanks for all contributions! Pygments-1.6/tests/0000755000175000017500000000000012103430105013371 5ustar piotrpiotrPygments-1.6/tests/test_regexlexer.py0000644000175000017500000000235112103426532017166 0ustar piotrpiotr# -*- coding: utf-8 -*- """ Pygments regex lexer tests ~~~~~~~~~~~~~~~~~~~~~~~~~~ :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import unittest from pygments.token import Text from pygments.lexer import RegexLexer from pygments.lexer import bygroups class TestLexer(RegexLexer): """Test tuple state transitions including #pop.""" tokens = { 'root': [ ('a', Text.Root, 'rag'), ('e', Text.Root), ], 'beer': [ ('d', Text.Beer, ('#pop', '#pop')), ], 'rag': [ ('b', Text.Rag, '#push'), ('c', Text.Rag, ('#pop', 'beer')), ], } class TupleTransTest(unittest.TestCase): def test(self): lx = TestLexer() toks = list(lx.get_tokens_unprocessed('abcde')) self.assertEqual(toks, [(0, Text.Root, 'a'), (1, Text.Rag, 'b'), (2, Text.Rag, 'c'), (3, Text.Beer, 'd'), (4, Text.Root, 'e')]) def test_multiline(self): lx = TestLexer() toks = list(lx.get_tokens_unprocessed('a\ne')) self.assertEqual(toks, [(0, Text.Root, 'a'), (1, Text, u'\n'), (2, Text.Root, 'e')]) Pygments-1.6/tests/test_html_formatter.py0000644000175000017500000001422712103426532020050 0ustar piotrpiotr# -*- coding: utf-8 -*- """ Pygments HTML formatter tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ import os import re import unittest import StringIO import tempfile from os.path import join, dirname, isfile from pygments.lexers import PythonLexer from pygments.formatters import HtmlFormatter, NullFormatter from pygments.formatters.html import escape_html from pygments.util import uni_open import support TESTFILE, TESTDIR = support.location(__file__) fp = uni_open(TESTFILE, encoding='utf-8') try: tokensource = list(PythonLexer().get_tokens(fp.read())) finally: fp.close() class HtmlFormatterTest(unittest.TestCase): def test_correct_output(self): hfmt = HtmlFormatter(nowrap=True) houtfile = StringIO.StringIO() hfmt.format(tokensource, houtfile) nfmt = NullFormatter() noutfile = StringIO.StringIO() nfmt.format(tokensource, noutfile) stripped_html = re.sub('<.*?>', '', houtfile.getvalue()) escaped_text = escape_html(noutfile.getvalue()) self.assertEqual(stripped_html, escaped_text) def test_external_css(self): # test correct behavior # CSS should be in /tmp directory fmt1 = HtmlFormatter(full=True, cssfile='fmt1.css', outencoding='utf-8') # CSS should be in TESTDIR (TESTDIR is absolute) fmt2 = HtmlFormatter(full=True, cssfile=join(TESTDIR, 'fmt2.css'), outencoding='utf-8') tfile = tempfile.NamedTemporaryFile(suffix='.html') fmt1.format(tokensource, tfile) try: fmt2.format(tokensource, tfile) self.assertTrue(isfile(join(TESTDIR, 'fmt2.css'))) except IOError: # test directory not writable pass tfile.close() self.assertTrue(isfile(join(dirname(tfile.name), 'fmt1.css'))) os.unlink(join(dirname(tfile.name), 'fmt1.css')) try: os.unlink(join(TESTDIR, 'fmt2.css')) except OSError: pass def test_all_options(self): for optdict in [dict(nowrap=True), dict(linenos=True), dict(linenos=True, full=True), dict(linenos=True, full=True, noclasses=True)]: outfile = StringIO.StringIO() fmt = HtmlFormatter(**optdict) fmt.format(tokensource, outfile) def test_linenos(self): optdict = dict(linenos=True) outfile = StringIO.StringIO() fmt = HtmlFormatter(**optdict) fmt.format(tokensource, outfile) html = outfile.getvalue() self.assertTrue(re.search("
\s+1\s+2\s+3", html))

    def test_linenos_with_startnum(self):
        optdict = dict(linenos=True, linenostart=5)
        outfile = StringIO.StringIO()
        fmt = HtmlFormatter(**optdict)
        fmt.format(tokensource, outfile)
        html = outfile.getvalue()
        self.assertTrue(re.search("
\s+5\s+6\s+7", html))

    def test_lineanchors(self):
        optdict = dict(lineanchors="foo")
        outfile = StringIO.StringIO()
        fmt = HtmlFormatter(**optdict)
        fmt.format(tokensource, outfile)
        html = outfile.getvalue()
        self.assertTrue(re.search("
", html))

    def test_lineanchors_with_startnum(self):
        optdict = dict(lineanchors="foo", linenostart=5)
        outfile = StringIO.StringIO()
        fmt = HtmlFormatter(**optdict)
        fmt.format(tokensource, outfile)
        html = outfile.getvalue()
        self.assertTrue(re.search("
", html))

    def test_valid_output(self):
        # test all available wrappers
        fmt = HtmlFormatter(full=True, linenos=True, noclasses=True,
                            outencoding='utf-8')

        handle, pathname = tempfile.mkstemp('.html')
        tfile = os.fdopen(handle, 'w+b')
        fmt.format(tokensource, tfile)
        tfile.close()
        catname = os.path.join(TESTDIR, 'dtds', 'HTML4.soc')
        try:
            import subprocess
            po = subprocess.Popen(['nsgmls', '-s', '-c', catname, pathname],
                                  stdout=subprocess.PIPE)
            ret = po.wait()
            output = po.stdout.read()
            po.stdout.close()
        except OSError:
            # nsgmls not available
            pass
        else:
            if ret:
                print output
            self.assertFalse(ret, 'nsgmls run reported errors')

        os.unlink(pathname)

    def test_get_style_defs(self):
        fmt = HtmlFormatter()
        sd = fmt.get_style_defs()
        self.assertTrue(sd.startswith('.'))

        fmt = HtmlFormatter(cssclass='foo')
        sd = fmt.get_style_defs()
        self.assertTrue(sd.startswith('.foo'))
        sd = fmt.get_style_defs('.bar')
        self.assertTrue(sd.startswith('.bar'))
        sd = fmt.get_style_defs(['.bar', '.baz'])
        fl = sd.splitlines()[0]
        self.assertTrue('.bar' in fl and '.baz' in fl)

    def test_unicode_options(self):
        fmt = HtmlFormatter(title=u'Föö',
                            cssclass=u'bär',
                            cssstyles=u'div:before { content: \'bäz\' }',
                            encoding='utf-8')
        handle, pathname = tempfile.mkstemp('.html')
        tfile = os.fdopen(handle, 'w+b')
        fmt.format(tokensource, tfile)
        tfile.close()

    def test_ctags(self):
        try:
            import ctags
        except ImportError:
            # we can't check without the ctags module, but at least check the exception
            self.assertRaises(RuntimeError, HtmlFormatter, tagsfile='support/tags')
        else:
            # this tagfile says that test_ctags() is on line 165, even if it isn't
            # anymore in the actual source
            fmt = HtmlFormatter(tagsfile='support/tags', lineanchors='L',
                                tagurlformat='%(fname)s%(fext)s')
            outfile = StringIO.StringIO()
            fmt.format(tokensource, outfile)
            self.assertTrue('test_ctags'
                            in outfile.getvalue())
Pygments-1.6/tests/run.py0000644000175000017500000000240512103426532014561 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    Pygments unit tests
    ~~~~~~~~~~~~~~~~~~

    Usage::

        python run.py [testfile ...]


    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import sys, os

if sys.version_info >= (3,):
    # copy test suite over to "build/lib" and convert it
    print ('Copying and converting sources to build/lib/test...')
    from distutils.util import copydir_run_2to3
    testroot = os.path.dirname(__file__)
    newroot = os.path.join(testroot, '..', 'build/lib/test')
    copydir_run_2to3(testroot, newroot)
    # make nose believe that we run from the converted dir
    os.chdir(newroot)
else:
    # only find tests in this directory
    if os.path.dirname(__file__):
        os.chdir(os.path.dirname(__file__))


try:
    import nose
except ImportError:
    print ('nose is required to run the Pygments test suite')
    sys.exit(1)

try:
    # make sure the current source is first on sys.path
    sys.path.insert(0, '..')
    import pygments
except ImportError:
    print ('Cannot find Pygments to test: %s' % sys.exc_info()[1])
    sys.exit(1)
else:
    print ('Pygments %s test suite running (Python %s)...' %
           (pygments.__version__, sys.version.split()[0]))

nose.main()
Pygments-1.6/tests/test_cmdline.py0000644000175000017500000000630612103426532016433 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    Command line test
    ~~~~~~~~~~~~~~~~~

    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

# Test the command line interface

import sys, os
import unittest
import StringIO

from pygments import highlight
from pygments.cmdline import main as cmdline_main

import support

TESTFILE, TESTDIR = support.location(__file__)


def run_cmdline(*args):
    saved_stdout = sys.stdout
    saved_stderr = sys.stderr
    new_stdout = sys.stdout = StringIO.StringIO()
    new_stderr = sys.stderr = StringIO.StringIO()
    try:
        ret = cmdline_main(["pygmentize"] + list(args))
    finally:
        sys.stdout = saved_stdout
        sys.stderr = saved_stderr
    return (ret, new_stdout.getvalue(), new_stderr.getvalue())


class CmdLineTest(unittest.TestCase):

    def test_L_opt(self):
        c, o, e = run_cmdline("-L")
        self.assertEqual(c, 0)
        self.assertTrue("Lexers" in o and "Formatters" in o and
                        "Filters" in o and "Styles" in o)
        c, o, e = run_cmdline("-L", "lexer")
        self.assertEqual(c, 0)
        self.assertTrue("Lexers" in o and "Formatters" not in o)
        c, o, e = run_cmdline("-L", "lexers")
        self.assertEqual(c, 0)

    def test_O_opt(self):
        filename = TESTFILE
        c, o, e = run_cmdline("-Ofull=1,linenos=true,foo=bar",
                              "-fhtml", filename)
        self.assertEqual(c, 0)
        self.assertTrue("foo, bar=baz=," in o)

    def test_F_opt(self):
        filename = TESTFILE
        c, o, e = run_cmdline("-Fhighlight:tokentype=Name.Blubb,"
                              "names=TESTFILE filename",
                              "-fhtml", filename)
        self.assertEqual(c, 0)
        self.assertTrue(' 10:
            self.fail('tokenization took too long')
        return tokens

    ### Strings.

    def test_single_quote_strings(self):
        self.assert_single_token(r"'foo\tbar\\\'baz'", String)
        self.assert_fast_tokenization("'" + '\\'*999)

    def test_double_quote_strings(self):
        self.assert_single_token(r'"foo\tbar\\\"baz"', String)
        self.assert_fast_tokenization('"' + '\\'*999)

    def test_backtick_strings(self):
        self.assert_single_token(r'`foo\tbar\\\`baz`', String.Backtick)
        self.assert_fast_tokenization('`' + '\\'*999)

    ### Regex matches with various delimiters.

    def test_match(self):
        self.assert_single_token(r'/aa\tbb/', String.Regex)
        self.assert_fast_tokenization('/' + '\\'*999)

    def test_match_with_slash(self):
        self.assert_tokens(['m', '/\n\\t\\\\/'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m/xxx\n' + '\\'*999)

    def test_match_with_bang(self):
        self.assert_tokens(['m', r'!aa\t\!bb!'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m!' + '\\'*999)

    def test_match_with_brace(self):
        self.assert_tokens(['m', r'{aa\t\}bb}'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m{' + '\\'*999)

    def test_match_with_angle_brackets(self):
        self.assert_tokens(['m', r'bb>'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m<' + '\\'*999)

    def test_match_with_parenthesis(self):
        self.assert_tokens(['m', r'(aa\t\)bb)'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m(' + '\\'*999)

    def test_match_with_at_sign(self):
        self.assert_tokens(['m', r'@aa\t\@bb@'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m@' + '\\'*999)

    def test_match_with_percent_sign(self):
        self.assert_tokens(['m', r'%aa\t\%bb%'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m%' + '\\'*999)

    def test_match_with_dollar_sign(self):
        self.assert_tokens(['m', r'$aa\t\$bb$'], [String.Regex, String.Regex])
        self.assert_fast_tokenization('m$' + '\\'*999)

    ### Regex substitutions with various delimeters.

    def test_substitution_with_slash(self):
        self.assert_single_token('s/aaa/bbb/g', String.Regex)
        self.assert_fast_tokenization('s/foo/' + '\\'*999)

    def test_substitution_with_at_sign(self):
        self.assert_single_token(r's@aaa@bbb@g', String.Regex)
        self.assert_fast_tokenization('s@foo@' + '\\'*999)

    def test_substitution_with_percent_sign(self):
        self.assert_single_token(r's%aaa%bbb%g', String.Regex)
        self.assert_fast_tokenization('s%foo%' + '\\'*999)

    def test_substitution_with_brace(self):
        self.assert_single_token(r's{aaa}', String.Regex)
        self.assert_fast_tokenization('s{' + '\\'*999)

    def test_substitution_with_angle_bracket(self):
        self.assert_single_token(r's', String.Regex)
        self.assert_fast_tokenization('s<' + '\\'*999)

    def test_substitution_with_angle_bracket(self):
        self.assert_single_token(r's', String.Regex)
        self.assert_fast_tokenization('s<' + '\\'*999)

    def test_substitution_with_square_bracket(self):
        self.assert_single_token(r's[aaa]', String.Regex)
        self.assert_fast_tokenization('s[' + '\\'*999)

    def test_substitution_with_parenthesis(self):
        self.assert_single_token(r's(aaa)', String.Regex)
        self.assert_fast_tokenization('s(' + '\\'*999)
Pygments-1.6/tests/test_clexer.py0000644000175000017500000000160212103426532016274 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    Basic CLexer Test
    ~~~~~~~~~~~~~~~~~

    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import unittest
import os

from pygments.token import Text, Number
from pygments.lexers import CLexer


class CLexerTest(unittest.TestCase):

    def setUp(self):
        self.lexer = CLexer()

    def testNumbers(self):
        code = '42 23.42 23. .42 023 0xdeadbeef 23e+42 42e-23'
        wanted = []
        for item in zip([Number.Integer, Number.Float, Number.Float,
                         Number.Float, Number.Oct, Number.Hex,
                         Number.Float, Number.Float], code.split()):
            wanted.append(item)
            wanted.append((Text, ' '))
        wanted = [(Text, '')] + wanted[:-1] + [(Text, '\n')]
        self.assertEqual(list(self.lexer.get_tokens(code)), wanted)
Pygments-1.6/tests/test_basic_api.py0000644000175000017500000002421112103427040016720 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    Pygments basic API tests
    ~~~~~~~~~~~~~~~~~~~~~~~~

    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import os
import random
import unittest

from pygments import lexers, formatters, filters, format
from pygments.token import _TokenType, Text
from pygments.lexer import RegexLexer
from pygments.formatters.img import FontNotFound
from pygments.util import BytesIO, StringIO, bytes, b

import support

TESTFILE, TESTDIR = support.location(__file__)

test_content = [chr(i) for i in xrange(33, 128)] * 5
random.shuffle(test_content)
test_content = ''.join(test_content) + '\n'


def test_lexer_import_all():
    # instantiate every lexer, to see if the token type defs are correct
    for x in lexers.LEXERS.keys():
        c = getattr(lexers, x)()


def test_lexer_classes():
    # test that every lexer class has the correct public API
    def verify(cls):
        assert type(cls.name) is str
        for attr in 'aliases', 'filenames', 'alias_filenames', 'mimetypes':
            assert hasattr(cls, attr)
            assert type(getattr(cls, attr)) is list, \
                   "%s: %s attribute wrong" % (cls, attr)
        result = cls.analyse_text("abc")
        assert isinstance(result, float) and 0.0 <= result <= 1.0
        result = cls.analyse_text(".abc")
        assert isinstance(result, float) and 0.0 <= result <= 1.0

        inst = cls(opt1="val1", opt2="val2")
        if issubclass(cls, RegexLexer):
            if not hasattr(cls, '_tokens'):
                # if there's no "_tokens", the lexer has to be one with
                # multiple tokendef variants
                assert cls.token_variants
                for variant in cls.tokens:
                    assert 'root' in cls.tokens[variant]
            else:
                assert 'root' in cls._tokens, \
                       '%s has no root state' % cls

        if cls.name in ['XQuery', 'Opa']:   # XXX temporary
            return

        tokens = list(inst.get_tokens(test_content))
        txt = ""
        for token in tokens:
            assert isinstance(token, tuple)
            assert isinstance(token[0], _TokenType)
            if isinstance(token[1], str):
                print repr(token[1])
            assert isinstance(token[1], unicode)
            txt += token[1]
        assert txt == test_content, "%s lexer roundtrip failed: %r != %r" % \
               (cls.name, test_content, txt)

    for lexer in lexers._iter_lexerclasses():
        yield verify, lexer


def test_lexer_options():
    # test that the basic options work
    def ensure(tokens, output):
        concatenated = ''.join(token[1] for token in tokens)
        assert concatenated == output, \
               '%s: %r != %r' % (lexer, concatenated, output)
    def verify(cls):
        inst = cls(stripnl=False)
        ensure(inst.get_tokens('a\nb'), 'a\nb\n')
        ensure(inst.get_tokens('\n\n\n'), '\n\n\n')
        inst = cls(stripall=True)
        ensure(inst.get_tokens('   \n  b\n\n\n'), 'b\n')
        # some lexers require full lines in input
        if cls.__name__ not in (
            'PythonConsoleLexer', 'RConsoleLexer', 'RubyConsoleLexer',
            'SqliteConsoleLexer', 'MatlabSessionLexer', 'ErlangShellLexer',
            'BashSessionLexer', 'LiterateHaskellLexer', 'PostgresConsoleLexer',
            'ElixirConsoleLexer', 'JuliaConsoleLexer', 'RobotFrameworkLexer',
            'DylanConsoleLexer', 'ShellSessionLexer'):
            inst = cls(ensurenl=False)
            ensure(inst.get_tokens('a\nb'), 'a\nb')
            inst = cls(ensurenl=False, stripall=True)
            ensure(inst.get_tokens('a\nb\n\n'), 'a\nb')

    for lexer in lexers._iter_lexerclasses():
        if lexer.__name__ == 'RawTokenLexer':
            # this one is special
            continue
        yield verify, lexer


def test_get_lexers():
    # test that the lexers functions work
    def verify(func, args):
        x = func(opt='val', *args)
        assert isinstance(x, lexers.PythonLexer)
        assert x.options["opt"] == "val"

    for func, args in [(lexers.get_lexer_by_name, ("python",)),
                       (lexers.get_lexer_for_filename, ("test.py",)),
                       (lexers.get_lexer_for_mimetype, ("text/x-python",)),
                       (lexers.guess_lexer, ("#!/usr/bin/python -O\nprint",)),
                       (lexers.guess_lexer_for_filename, ("a.py", "<%= @foo %>"))
                       ]:
        yield verify, func, args

    for cls, (_, lname, aliases, _, mimetypes) in lexers.LEXERS.iteritems():
        assert cls == lexers.find_lexer_class(lname).__name__

        for alias in aliases:
            assert cls == lexers.get_lexer_by_name(alias).__class__.__name__

        for mimetype in mimetypes:
            assert cls == lexers.get_lexer_for_mimetype(mimetype).__class__.__name__


def test_formatter_public_api():
    ts = list(lexers.PythonLexer().get_tokens("def f(): pass"))
    out = StringIO()
    # test that every formatter class has the correct public API
    def verify(formatter, info):
        assert len(info) == 4
        assert info[0], "missing formatter name"
        assert info[1], "missing formatter aliases"
        assert info[3], "missing formatter docstring"

        if formatter.name == 'Raw tokens':
            # will not work with Unicode output file
            return

        try:
            inst = formatter(opt1="val1")
        except (ImportError, FontNotFound):
            return
        try:
            inst.get_style_defs()
        except NotImplementedError:
            # may be raised by formatters for which it doesn't make sense
            pass
        inst.format(ts, out)

    for formatter, info in formatters.FORMATTERS.iteritems():
        yield verify, formatter, info

def test_formatter_encodings():
    from pygments.formatters import HtmlFormatter

    # unicode output
    fmt = HtmlFormatter()
    tokens = [(Text, u"ä")]
    out = format(tokens, fmt)
    assert type(out) is unicode
    assert u"ä" in out

    # encoding option
    fmt = HtmlFormatter(encoding="latin1")
    tokens = [(Text, u"ä")]
    assert u"ä".encode("latin1") in format(tokens, fmt)

    # encoding and outencoding option
    fmt = HtmlFormatter(encoding="latin1", outencoding="utf8")
    tokens = [(Text, u"ä")]
    assert u"ä".encode("utf8") in format(tokens, fmt)


def test_formatter_unicode_handling():
    # test that the formatter supports encoding and Unicode
    tokens = list(lexers.PythonLexer(encoding='utf-8').
                  get_tokens("def f(): 'ä'"))

    def verify(formatter):
        try:
            inst = formatter(encoding=None)
        except (ImportError, FontNotFound):
            # some dependency or font not installed
            return

        if formatter.name != 'Raw tokens':
            out = format(tokens, inst)
            if formatter.unicodeoutput:
                assert type(out) is unicode

            inst = formatter(encoding='utf-8')
            out = format(tokens, inst)
            assert type(out) is bytes, '%s: %r' % (formatter, out)
            # Cannot test for encoding, since formatters may have to escape
            # non-ASCII characters.
        else:
            inst = formatter()
            out = format(tokens, inst)
            assert type(out) is bytes, '%s: %r' % (formatter, out)

    for formatter, info in formatters.FORMATTERS.iteritems():
        yield verify, formatter


def test_get_formatters():
    # test that the formatters functions work
    x = formatters.get_formatter_by_name("html", opt="val")
    assert isinstance(x, formatters.HtmlFormatter)
    assert x.options["opt"] == "val"

    x = formatters.get_formatter_for_filename("a.html", opt="val")
    assert isinstance(x, formatters.HtmlFormatter)
    assert x.options["opt"] == "val"


def test_styles():
    # minimal style test
    from pygments.formatters import HtmlFormatter
    fmt = HtmlFormatter(style="pastie")


class FiltersTest(unittest.TestCase):

    def test_basic(self):
        filter_args = {
            'whitespace': {'spaces': True, 'tabs': True, 'newlines': True},
            'highlight': {'names': ['isinstance', 'lexers', 'x']},
        }
        for x in filters.FILTERS.keys():
            lx = lexers.PythonLexer()
            lx.add_filter(x, **filter_args.get(x, {}))
            fp = open(TESTFILE, 'rb')
            try:
                text = fp.read().decode('utf-8')
            finally:
                fp.close()
            tokens = list(lx.get_tokens(text))
            roundtext = ''.join([t[1] for t in tokens])
            if x not in ('whitespace', 'keywordcase'):
                # these filters change the text
                self.assertEqual(roundtext, text,
                                 "lexer roundtrip with %s filter failed" % x)

    def test_raiseonerror(self):
        lx = lexers.PythonLexer()
        lx.add_filter('raiseonerror', excclass=RuntimeError)
        self.assertRaises(RuntimeError, list, lx.get_tokens('$'))

    def test_whitespace(self):
        lx = lexers.PythonLexer()
        lx.add_filter('whitespace', spaces='%')
        fp = open(TESTFILE, 'rb')
        try:
            text = fp.read().decode('utf-8')
        finally:
            fp.close()
        lxtext = ''.join([t[1] for t in list(lx.get_tokens(text))])
        self.assertFalse(' ' in lxtext)

    def test_keywordcase(self):
        lx = lexers.PythonLexer()
        lx.add_filter('keywordcase', case='capitalize')
        fp = open(TESTFILE, 'rb')
        try:
            text = fp.read().decode('utf-8')
        finally:
            fp.close()
        lxtext = ''.join([t[1] for t in list(lx.get_tokens(text))])
        self.assertTrue('Def' in lxtext and 'Class' in lxtext)

    def test_codetag(self):
        lx = lexers.PythonLexer()
        lx.add_filter('codetagify')
        text = u'# BUG: text'
        tokens = list(lx.get_tokens(text))
        self.assertEqual('# ', tokens[0][1])
        self.assertEqual('BUG', tokens[1][1])

    def test_codetag_boundary(self):
        # ticket #368
        lx = lexers.PythonLexer()
        lx.add_filter('codetagify')
        text = u'# DEBUG: text'
        tokens = list(lx.get_tokens(text))
        self.assertEqual('# DEBUG: text', tokens[0][1])
Pygments-1.6/tests/test_using_api.py0000644000175000017500000000210012103426532016762 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    Pygments tests for using()
    ~~~~~~~~~~~~~~~~~~~~~~~~~~

    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import unittest

from pygments.lexer import using, bygroups, this, RegexLexer
from pygments.token import String, Text, Keyword

class TestLexer(RegexLexer):
    tokens = {
        'root': [
            (r'#.*',
             using(this, state='invalid')),
            (r'(")(.+?)(")',
             bygroups(String, using(this, state='string'), String)),
            (r'[^"]+', Text),
        ],
        'string': [
            (r'.+', Keyword),
        ],
    }


class UsingStateTest(unittest.TestCase):
    def test_basic(self):
        expected = [(Text, 'a'), (String, '"'), (Keyword, 'bcd'),
                    (String, '"'), (Text, 'e\n')]
        t = list(TestLexer().get_tokens('a"bcd"e'))
        self.assertEqual(t, expected)

    def test_error(self):
        def gen():
            return list(TestLexer().get_tokens('#a'))
        self.assertRaises(KeyError, gen)
Pygments-1.6/tests/support/0000755000175000017500000000000012103430105015105 5ustar  piotrpiotrPygments-1.6/tests/support/tags0000644000175000017500000000354112103426532016002 0ustar  piotrpiotr!_TAG_FILE_FORMAT	2	/extended format; --format=1 will not append ;" to lines/
!_TAG_FILE_SORTED	1	/0=unsorted, 1=sorted, 2=foldcase/
!_TAG_PROGRAM_AUTHOR	Darren Hiebert	/dhiebert@users.sourceforge.net/
!_TAG_PROGRAM_NAME	Exuberant Ctags	//
!_TAG_PROGRAM_URL	http://ctags.sourceforge.net	/official site/
!_TAG_PROGRAM_VERSION	5.8	//
HtmlFormatter	test_html_formatter.py	19;"	i
HtmlFormatterTest	test_html_formatter.py	34;"	c
NullFormatter	test_html_formatter.py	19;"	i
PythonLexer	test_html_formatter.py	18;"	i
StringIO	test_html_formatter.py	13;"	i
dirname	test_html_formatter.py	16;"	i
escape_html	test_html_formatter.py	20;"	i
fp	test_html_formatter.py	27;"	v
inspect	test_html_formatter.py	15;"	i
isfile	test_html_formatter.py	16;"	i
join	test_html_formatter.py	16;"	i
os	test_html_formatter.py	10;"	i
re	test_html_formatter.py	11;"	i
subprocess	test_html_formatter.py	125;"	i
support	test_html_formatter.py	23;"	i
tempfile	test_html_formatter.py	14;"	i
test_all_options	test_html_formatter.py	72;"	m	class:HtmlFormatterTest
test_correct_output	test_html_formatter.py	35;"	m	class:HtmlFormatterTest
test_ctags	test_html_formatter.py	165;"	m	class:HtmlFormatterTest
test_external_css	test_html_formatter.py	48;"	m	class:HtmlFormatterTest
test_get_style_defs	test_html_formatter.py	141;"	m	class:HtmlFormatterTest
test_lineanchors	test_html_formatter.py	98;"	m	class:HtmlFormatterTest
test_lineanchors_with_startnum	test_html_formatter.py	106;"	m	class:HtmlFormatterTest
test_linenos	test_html_formatter.py	82;"	m	class:HtmlFormatterTest
test_linenos_with_startnum	test_html_formatter.py	90;"	m	class:HtmlFormatterTest
test_unicode_options	test_html_formatter.py	155;"	m	class:HtmlFormatterTest
test_valid_output	test_html_formatter.py	114;"	m	class:HtmlFormatterTest
tokensource	test_html_formatter.py	29;"	v
uni_open	test_html_formatter.py	21;"	i
unittest	test_html_formatter.py	12;"	i
Pygments-1.6/tests/test_util.py0000644000175000017500000001226712103426532016000 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    Test suite for the util module
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import re
import unittest

from pygments import util


class FakeLexer(object):
    def analyse(text):
        return float(text)
    analyse = util.make_analysator(analyse)


class UtilTest(unittest.TestCase):

    def test_getoptions(self):
        raises = self.assertRaises
        equals = self.assertEqual

        equals(util.get_bool_opt({}, 'a', True), True)
        equals(util.get_bool_opt({}, 'a', 1), True)
        equals(util.get_bool_opt({}, 'a', 'true'), True)
        equals(util.get_bool_opt({}, 'a', 'no'), False)
        raises(util.OptionError, util.get_bool_opt, {}, 'a', [])
        raises(util.OptionError, util.get_bool_opt, {}, 'a', 'foo')

        equals(util.get_int_opt({}, 'a', 1), 1)
        raises(util.OptionError, util.get_int_opt, {}, 'a', [])
        raises(util.OptionError, util.get_int_opt, {}, 'a', 'bar')

        equals(util.get_list_opt({}, 'a', [1]), [1])
        equals(util.get_list_opt({}, 'a', '1 2'), ['1', '2'])
        raises(util.OptionError, util.get_list_opt, {}, 'a', 1)


    def test_docstring_headline(self):
        def f1():
            """
            docstring headline

            other text
            """
        def f2():
            """
            docstring
            headline

            other text
            """

        self.assertEqual(util.docstring_headline(f1), "docstring headline")
        self.assertEqual(util.docstring_headline(f2), "docstring headline")

    def test_analysator_returns_float(self):
        # If an analysator wrapped by make_analysator returns a floating point
        # number, then that number will be returned by the wrapper.
        self.assertEqual(FakeLexer.analyse('0.5'), 0.5)

    def test_analysator_returns_boolean(self):
        # If an analysator wrapped by make_analysator returns a boolean value,
        # then the wrapper will return 1.0 if the boolean was True or 0.0 if
        # it was False.
        self.assertEqual(FakeLexer.analyse(True), 1.0)
        self.assertEqual(FakeLexer.analyse(False), 0.0)

    def test_analysator_raises_exception(self):
        # If an analysator wrapped by make_analysator raises an exception,
        # then the wrapper will return 0.0.
        class ErrorLexer(object):
            def analyse(text):
                raise RuntimeError('something bad happened')
            analyse = util.make_analysator(analyse)
        self.assertEqual(ErrorLexer.analyse(''), 0.0)

    def test_analysator_value_error(self):
        # When converting the analysator's return value to a float a
        # ValueError may occur.  If that happens 0.0 is returned instead.
        self.assertEqual(FakeLexer.analyse('bad input'), 0.0)

    def test_analysator_type_error(self):
        # When converting the analysator's return value to a float a
        # TypeError may occur.  If that happens 0.0 is returned instead.
        self.assertEqual(FakeLexer.analyse(None), 0.0)

    def test_shebang_matches(self):
        self.assertTrue(util.shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?'))
        self.assertTrue(util.shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?'))
        self.assertTrue(util.shebang_matches('#!/usr/bin/startsomethingwith python',
                                             r'python(2\.\d)?'))
        self.assertTrue(util.shebang_matches('#!C:\\Python2.4\\Python.exe',
                                             r'python(2\.\d)?'))

        self.assertFalse(util.shebang_matches('#!/usr/bin/python-ruby',
                                              r'python(2\.\d)?'))
        self.assertFalse(util.shebang_matches('#!/usr/bin/python/ruby',
                                              r'python(2\.\d)?'))
        self.assertFalse(util.shebang_matches('#!', r'python'))

    def test_doctype_matches(self):
        self.assertTrue(util.doctype_matches(
            ' ', 'html.*'))
        self.assertFalse(util.doctype_matches(
            '  ', 'html.*'))
        self.assertTrue(util.html_doctype_matches(
            ''))

    def test_xml(self):
        self.assertTrue(util.looks_like_xml(
            ''))
        self.assertTrue(util.looks_like_xml('abc'))
        self.assertFalse(util.looks_like_xml(''))

    def test_unirange(self):
        first_non_bmp = u'\U00010000'
        r = re.compile(util.unirange(0x10000, 0x20000))
        m = r.match(first_non_bmp)
        self.assertTrue(m)
        self.assertEquals(m.end(), len(first_non_bmp))
        self.assertFalse(r.match(u'\uffff'))
        self.assertFalse(r.match(u'xxx'))
        # Tests that end is inclusive
        r = re.compile(util.unirange(0x10000, 0x10000) + '+')
        # Tests that the plus works for the entire unicode point, if narrow
        # build
        m = r.match(first_non_bmp * 2)
        self.assertTrue(m)
        self.assertEquals(m.end(), len(first_non_bmp) * 2)
Pygments-1.6/tests/dtds/0000755000175000017500000000000012103430105014327 5ustar  piotrpiotrPygments-1.6/tests/dtds/HTML4.dcl0000644000175000017500000000547611713467263015704 0ustar  piotrpiotrPygments-1.6/tests/dtds/HTML4-f.dtd0000644000175000017500000000175711713467263016136 0ustar  piotrpiotr

    
    
    ...
    
    
    ...
    
    
-->



%HTML4.dtd;Pygments-1.6/tests/dtds/HTML4.dtd0000644000175000017500000013114111713467263015702 0ustar  piotrpiotr

    
    
    ...
    
    
    ...
    
    

    The URI used as a system identifier with the public identifier allows
    the user agent to download the DTD and entity sets as needed.

    The FPI for the Strict HTML 4.0 DTD is:

        "-//W3C//DTD HTML 4.0//EN"

    and its URI is:

        http://www.w3.org/TR/REC-html40/strict.dtd

    Authors should use the Strict DTD unless they need the
    presentation control for user agents that don't (adequately)
    support style sheets.

    If you are writing a document that includes frames, use 
    the following FPI:

        "-//W3C//DTD HTML 4.0 Frameset//EN"

    with the URI:

        http://www.w3.org/TR/REC-html40/frameset.dtd

    The following URIs are supported in relation to HTML 4.0

    "http://www.w3.org/TR/REC-html40/strict.dtd" (Strict DTD)
    "http://www.w3.org/TR/REC-html40/loose.dtd" (Loose DTD)
    "http://www.w3.org/TR/REC-html40/frameset.dtd" (Frameset DTD)
    "http://www.w3.org/TR/REC-html40/HTMLlat1.ent" (Latin-1 entities)
    "http://www.w3.org/TR/REC-html40/HTMLsymbol.ent" (Symbol entities)
    "http://www.w3.org/TR/REC-html40/HTMLspecial.ent" (Special entities)

    These URIs point to the latest version of each file. To reference
    this specific revision use the following URIs:

    "http://www.w3.org/TR/REC-html40-971218/strict.dtd"
    "http://www.w3.org/TR/REC-html40-971218/loose.dtd"
    "http://www.w3.org/TR/REC-html40-971218/frameset.dtd"
    "http://www.w3.org/TR/REC-html40-971218/HTMLlat1.ent"
    "http://www.w3.org/TR/REC-html40-971218/HTMLsymbol.ent"
    "http://www.w3.org/TR/REC-html40-971218/HTMLspecial.ent"

-->





















































%HTMLlat1;


%HTMLsymbol;


%HTMLspecial;













]]>



















































































































































































































































  




















































]]>




]]>





]]>







































]]>





Pygments-1.6/tests/dtds/HTMLspec.ent0000644000175000017500000001002411713467263016500 0ustar  piotrpiotr
















































Pygments-1.6/tests/dtds/HTMLsym.ent0000644000175000017500000003415611713467263016372 0ustar  piotrpiotr






































































































































 



























Pygments-1.6/tests/dtds/HTML4-s.dtd0000644000175000017500000010422511713467263016145 0ustar  piotrpiotr
















































%HTMLlat1;


%HTMLsymbol;


%HTMLspecial;













]]>





























































































































































































































  












































































Pygments-1.6/tests/dtds/HTMLlat1.ent0000644000175000017500000002736711713467263016431 0ustar  piotrpiotr

































































































Pygments-1.6/tests/dtds/HTML4.soc0000644000175000017500000000057611713467263015722 0ustar  piotrpiotrOVERRIDE YES
SGMLDECL HTML4.dcl
DOCTYPE HTML HTML4.dtd
PUBLIC "-//W3C//DTD HTML 4.0//EN" HTML4-s.dtd
PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" HTML4.dtd
PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" HTML4-f.dtd
PUBLIC "-//W3C//ENTITIES Latin1//EN//HTML" HTMLlat1.ent
PUBLIC "-//W3C//ENTITIES Special//EN//HTML" HTMLspec.ent
PUBLIC "-//W3C//ENTITIES Symbols//EN//HTML" HTMLsym.ent
Pygments-1.6/tests/test_latex_formatter.py0000644000175000017500000000274612103426532020224 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    Pygments LaTeX formatter tests
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import os
import unittest
import tempfile

from pygments.formatters import LatexFormatter
from pygments.lexers import PythonLexer

import support

TESTFILE, TESTDIR = support.location(__file__)


class LatexFormatterTest(unittest.TestCase):

    def test_valid_output(self):
        fp = open(TESTFILE)
        try:
            tokensource = list(PythonLexer().get_tokens(fp.read()))
        finally:
            fp.close()
        fmt = LatexFormatter(full=True, encoding='latin1')

        handle, pathname = tempfile.mkstemp('.tex')
        # place all output files in /tmp too
        old_wd = os.getcwd()
        os.chdir(os.path.dirname(pathname))
        tfile = os.fdopen(handle, 'wb')
        fmt.format(tokensource, tfile)
        tfile.close()
        try:
            import subprocess
            po = subprocess.Popen(['latex', '-interaction=nonstopmode',
                                   pathname], stdout=subprocess.PIPE)
            ret = po.wait()
            output = po.stdout.read()
            po.stdout.close()
        except OSError:
            # latex not available
            pass
        else:
            if ret:
                print output
            self.assertFalse(ret, 'latex run reported errors')

        os.unlink(pathname)
        os.chdir(old_wd)
Pygments-1.6/tests/old_run.py0000644000175000017500000000751412103426532015425 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    Pygments unit tests
    ~~~~~~~~~~~~~~~~~~

    Usage::

        python run.py [testfile ...]


    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import sys, os
import unittest

from os.path import dirname, basename, join, abspath

import pygments

try:
    import coverage
except ImportError:
    coverage = None

testdir = abspath(dirname(__file__))

failed = []
total_test_count = 0
error_test_count = 0


def err(file, what, exc):
    print >>sys.stderr, file, 'failed %s:' % what,
    print >>sys.stderr, exc
    failed.append(file[:-3])


class QuietTestRunner(object):
    """Customized test runner for relatively quiet output"""

    def __init__(self, testname, stream=sys.stderr):
        self.testname = testname
        self.stream = unittest._WritelnDecorator(stream)

    def run(self, test):
        global total_test_count
        global error_test_count
        result = unittest._TextTestResult(self.stream, True, 1)
        test(result)
        if not result.wasSuccessful():
            self.stream.write(' FAIL:')
            result.printErrors()
            failed.append(self.testname)
        else:
            self.stream.write(' ok\n')
        total_test_count += result.testsRun
        error_test_count += len(result.errors) + len(result.failures)
        return result


def run_tests(with_coverage=False):
    # needed to avoid confusion involving atexit handlers
    import logging

    if sys.argv[1:]:
        # test only files given on cmdline
        files = [entry + '.py' for entry in sys.argv[1:] if entry.startswith('test_')]
    else:
        files = [entry for entry in os.listdir(testdir)
                 if (entry.startswith('test_') and entry.endswith('.py'))]
        files.sort()

    WIDTH = 85

    print >>sys.stderr, \
        ('Pygments %s Test Suite running%s, stand by...' %
         (pygments.__version__,
          with_coverage and " with coverage analysis" or "")).center(WIDTH)
    print >>sys.stderr, ('(using Python %s)' % sys.version.split()[0]).center(WIDTH)
    print >>sys.stderr, '='*WIDTH

    if with_coverage:
        coverage.erase()
        coverage.start()

    for testfile in files:
        globs = {'__file__': join(testdir, testfile)}
        try:
            execfile(join(testdir, testfile), globs)
        except Exception, exc:
            raise
            err(testfile, 'execfile', exc)
            continue
        sys.stderr.write(testfile[:-3] + ': ')
        try:
            runner = QuietTestRunner(testfile[:-3])
            # make a test suite of all TestCases in the file
            tests = []
            for name, thing in globs.iteritems():
                if name.endswith('Test'):
                    tests.append((name, unittest.makeSuite(thing)))
            tests.sort()
            suite = unittest.TestSuite()
            suite.addTests([x[1] for x in tests])
            runner.run(suite)
        except Exception, exc:
            err(testfile, 'running test', exc)

    print >>sys.stderr, '='*WIDTH
    if failed:
        print >>sys.stderr, '%d of %d tests failed.' % \
              (error_test_count, total_test_count)
        print >>sys.stderr, 'Tests failed in:', ', '.join(failed)
        ret = 1
    else:
        if total_test_count == 1:
            print >>sys.stderr, '1 test happy.'
        else:
            print >>sys.stderr, 'All %d tests happy.' % total_test_count
        ret = 0

    if with_coverage:
        coverage.stop()
        modules = [mod for name, mod in sys.modules.iteritems()
                   if name.startswith('pygments.') and mod]
        coverage.report(modules)

    return ret


if __name__ == '__main__':
    with_coverage = False
    if sys.argv[1:2] == ['-C']:
        with_coverage = bool(coverage)
        del sys.argv[1]
    sys.exit(run_tests(with_coverage))
Pygments-1.6/tests/test_token.py0000644000175000017500000000261512103426532016137 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    Test suite for the token module
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import unittest

from pygments import token


class TokenTest(unittest.TestCase):

    def test_tokentype(self):
        e = self.assertEqual

        t = token.String

        e(t.split(), [token.Token, token.Literal, token.String])

        e(t.__class__, token._TokenType)

    def test_functions(self):
        self.assertTrue(token.is_token_subtype(token.String, token.String))
        self.assertTrue(token.is_token_subtype(token.String, token.Literal))
        self.assertFalse(token.is_token_subtype(token.Literal, token.String))

        self.assertTrue(token.string_to_tokentype(token.String) is token.String)
        self.assertTrue(token.string_to_tokentype('') is token.Token)
        self.assertTrue(token.string_to_tokentype('String') is token.String)

    def test_sanity_check(self):
        stp = token.STANDARD_TYPES.copy()
        stp[token.Token] = '---' # Token and Text do conflict, that is okay
        t = {}
        for k, v in stp.iteritems():
            t.setdefault(v, []).append(k)
        if len(t) == len(stp):
            return # Okay

        for k, v in t.iteritems():
            if len(v) > 1:
                self.fail("%r has more than one key: %r" % (k, v))
Pygments-1.6/tests/test_examplefiles.py0000644000175000017500000000637512103426532017504 0ustar  piotrpiotr# -*- coding: utf-8 -*-
"""
    Pygments tests with example files
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import os
import pprint
import difflib
import cPickle as pickle

from pygments.lexers import get_lexer_for_filename, get_lexer_by_name
from pygments.token import Error
from pygments.util import ClassNotFound, b

STORE_OUTPUT = False

# generate methods
def test_example_files():
    testdir = os.path.dirname(__file__)
    outdir = os.path.join(testdir, 'examplefiles', 'output')
    if STORE_OUTPUT and not os.path.isdir(outdir):
        os.makedirs(outdir)
    for fn in os.listdir(os.path.join(testdir, 'examplefiles')):
        if fn.startswith('.') or fn.endswith('#'):
            continue

        absfn = os.path.join(testdir, 'examplefiles', fn)
        if not os.path.isfile(absfn):
            continue
        outfn = os.path.join(outdir, fn)

        try:
            lx = get_lexer_for_filename(absfn)
        except ClassNotFound:
            if "_" not in fn:
                raise AssertionError('file %r has no registered extension, '
                                     'nor is of the form _filename '
                                     'for overriding, thus no lexer found.'
                                    % fn)
            try:
                name, rest = fn.split("_", 1)
                lx = get_lexer_by_name(name)
            except ClassNotFound:
                raise AssertionError('no lexer found for file %r' % fn)
        yield check_lexer, lx, absfn, outfn

def check_lexer(lx, absfn, outfn):
    fp = open(absfn, 'rb')
    try:
        text = fp.read()
    finally:
        fp.close()
    text = text.replace(b('\r\n'), b('\n'))
    text = text.strip(b('\n')) + b('\n')
    try:
        text = text.decode('utf-8')
        if text.startswith(u'\ufeff'):
            text = text[len(u'\ufeff'):]
    except UnicodeError:
        text = text.decode('latin1')
    ntext = []
    tokens = []
    for type, val in lx.get_tokens(text):
        ntext.append(val)
        assert type != Error, \
            'lexer %s generated error token for %s: %r at position %d' % \
            (lx, absfn, val, len(u''.join(ntext)))
        tokens.append((type, val))
    if u''.join(ntext) != text:
        print '\n'.join(difflib.unified_diff(u''.join(ntext).splitlines(),
                                             text.splitlines()))
        raise AssertionError('round trip failed for ' + absfn)

    # check output against previous run if enabled
    if STORE_OUTPUT:
        # no previous output -- store it
        if not os.path.isfile(outfn):
            fp = open(outfn, 'wb')
            try:
                pickle.dump(tokens, fp)
            finally:
                fp.close()
            return
        # otherwise load it and compare
        fp = open(outfn, 'rb')
        try:
            stored_tokens = pickle.load(fp)
        finally:
            fp.close()
        if stored_tokens != tokens:
            f1 = pprint.pformat(stored_tokens)
            f2 = pprint.pformat(tokens)
            print '\n'.join(difflib.unified_diff(f1.splitlines(),
                                                 f2.splitlines()))
            assert False, absfn
Pygments-1.6/tests/support.py0000644000175000017500000000050211713467267015505 0ustar  piotrpiotr# coding: utf-8
"""
Support for Pygments tests
"""

import os


def location(mod_name):
    """
    Return the file and directory that the code for *mod_name* is in.
    """
    source = mod_name.endswith("pyc") and mod_name[:-1] or mod_name
    source = os.path.abspath(source)
    return source, os.path.dirname(source)
Pygments-1.6/TODO0000644000175000017500000000072311726624213012740 0ustar  piotrpiotrTodo
====

- suggested new lexers
  * IPython sessions

- lexers that need work:
  * review perl lexer (numerous bugs, but so far no one had complaints ;)
  * readd property support for C# lexer? that is, find a regex that doesn't
    backtrack to death...
  * add support for function name highlighting to C++ lexer

- allow "overlay" token types to highlight specials: nth line, a word etc.

- pygmentize option presets, more sophisticated method to output styles?
Pygments-1.6/PKG-INFO0000644000175000017500000000374012103430105013330 0ustar  piotrpiotrMetadata-Version: 1.1
Name: Pygments
Version: 1.6
Summary: Pygments is a syntax highlighting package written in Python.
Home-page: http://pygments.org/
Author: Georg Brandl
Author-email: georg@python.org
License: BSD License
Description: 
            Pygments
            ~~~~~~~~
        
            Pygments is a syntax highlighting package written in Python.
        
            It is a generic syntax highlighter for general use in all kinds of software
            such as forum systems, wikis or other applications that need to prettify
            source code. Highlights are:
        
            * a wide range of common languages and markup formats is supported
            * special attention is paid to details, increasing quality by a fair amount
            * support for new languages and formats are added easily
            * a number of output formats, presently HTML, LaTeX, RTF, SVG, all image       formats that PIL supports and ANSI sequences
            * it is usable as a command-line tool and as a library
            * ... and it highlights even Brainfuck!
        
            The `Pygments tip`_ is installable with ``easy_install Pygments==dev``.
        
            .. _Pygments tip:
               http://bitbucket.org/birkenfeld/pygments-main/get/default.zip#egg=Pygments-dev
        
            :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
            :license: BSD, see LICENSE for details.
        
Keywords: syntax highlighting
Platform: any
Classifier: License :: OSI Approved :: BSD License
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: End Users/Desktop
Classifier: Intended Audience :: System Administrators
Classifier: Development Status :: 6 - Mature
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 3
Classifier: Operating System :: OS Independent
Classifier: Topic :: Text Processing :: Filters
Classifier: Topic :: Utilities