pyPEG2-2.15.2/0000755000175000017500000000000012605271064013760 5ustar florianflorian00000000000000pyPEG2-2.15.2/PKG-INFO0000644000175000017500000000346512605271064015065 0ustar florianflorian00000000000000Metadata-Version: 1.1 Name: pyPEG2 Version: 2.15.2 Summary: An intrinsic PEG Parser-Interpreter for Python Home-page: http://fdik.org/pyPEG2 Author: Volker Birk Author-email: vb@dingens.org License: LICENSE.txt Download-URL: http://fdik.org/pyPEG2/pyPEG2-2.15.2.tar.gz Description: ============================== pyPEG 2 for Python 2.7 and 3.x ============================== Python is a nice scripting language. It even gives you access to its own parser and compiler. It also gives you access to different other parsers for special purposes like XML and string templates. But sometimes you may want to have your own parser. This is what's pyPEG for. And pyPEG supports Unicode. The source code for all you can find on bitbucket: https://bitbucket.org/fdik/pypeg/ To build the documentation, you'll need YML 2. You can download YML here: Homepage: http://fdik.org/yml/ Toolchain: http://fdik.org/yml2.tar.bz2 You can install pyPEG 2 with: pip install pypeg2 pyPEG 2 depends on lxml, see http://lxml.de/ Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: Intended Audience :: Science/Research Classifier: License :: OSI Approved :: GNU General Public License v2 (GPLv2) Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 2 Classifier: Topic :: Software Development :: Compilers Classifier: Topic :: Software Development :: Interpreters Classifier: Topic :: Software Development :: Libraries :: Python Modules Requires: lxml Provides: pyPEG2 (2.15.2) pyPEG2-2.15.2/CHANGES.txt0000644000175000017500000000007612600574741015577 0ustar florianflorian00000000000000v2.0, 05/12/2012 -- Initial release of rewrite for Python 3.x pyPEG2-2.15.2/samples/0000755000175000017500000000000012605271064015424 5ustar florianflorian00000000000000pyPEG2-2.15.2/samples/sample1.py0000644000175000017500000000757212600575547017363 0ustar florianflorian00000000000000#!/usr/bin/python3 """ Parsing sample To parse we're giving a text to parse and an thing with a grammar. The default setting includes skipping of whitespace, so we don't need to take care of that. The comment parameter is set to C style /* comments */ >>> f = parse("int f(int a, long b) { do_this; do_that; }", Function, comment=comment_c) Because function has a name() in its grammar, we can access this now as an attribute. With Python 2.7 this gives Symbol(u'f'), with Python 3.2 it gives Symbol('f'): >>> f.name Symbol(...'f') A Function has an Attribute "parms" in its grammar, which directs to class Parameters. >>> f.parms Parameters([(Symbol(...'a'), <__main__.Parameter object at 0x...>), (Symbol(...'b'), <__main__.Parameter object at 0x...>), ]) Because Parameters is a Namespace, we can access its content by name. >>> f.parms["a"] <__main__.Parameter object at 0x...> Its content are Parameter instances. Parameter has an Attribute "typing". >>> f.parms["b"].typing Type(...'long') The Instructions of our small sample are just words. Because Function is a List, we can access them one by one. >>> f Function([...'do_this', ...'do_that'], name=Symbol(...'f')) >>> print("f is " + repr(f[0])) f is ...'do_this' The result can be composed to a text again. >>> f.append(Instruction("do_something_else")) >>> print(compose(f)) int f(int a, long b) { /* on level 1 */ do_this; /* on level 1 */ do_that; /* on level 1 */ do_something_else; } ... pyPEG contains an XML backend, too: >>> del f[2] >>> from pypeg2.xmlast import thing2xml >>> xml = thing2xml(f, pretty=True) >>> print(xml.decode()) do_this do_that ... The XML backend can read XML text and create things: >>> from pypeg2.xmlast import xml2thing >>> xml = b'return' >>> g = xml2thing(xml, globals()) >>> g.name Symbol(...'g') >>> g.typing Type(...'long') >>> g.parms["x"].typing Type(...'int') >>> print("g[0] is " + repr(g[0])) g[0] is ...'return' """ from __future__ import unicode_literals, print_function from pypeg2 import * # A Symbol can be an arbitrary word or one word of an Enum. # In this easy example there is an Enum. class Type(Keyword): grammar = Enum( K("int"), K("long") ) # Parsing attributes adds them to the resulting thing. # blank is a callback function. Callback functions are being executed by # compose(). parse() ignores callback functions. blank inserts " ". # name() generates a name attribute. class Parameter(object): grammar = attr("typing", Type), blank, name() # A Namespace is a container for named things. # csl() creates the grammar for a comma separated list. class Parameters(Namespace): grammar = optional(csl(Parameter)) # This is an example for a user defined callback function, heading(). # endl is a special callback function. It is never executed. Instead it # triggers the indention system of compose() and will be replaced by "\n". class Instruction(str): def heading(self, parser): return "/* on level " + str(parser.indention_level) + " */", endl grammar = heading, word, ";", endl # indent() is a function which marks things for being indented by compose(). # indent() raises the indention level by 1 for each thing which is inside. block = "{", endl, maybe_some(indent(Instruction)), "}", endl # If a thing is a List, then parsed things are being put into. class Function(List): grammar = attr("typing", Type), blank, name(), "(", attr("parms", Parameters), ")", endl, block if __name__ == '__main__': import doctest doctest.testmod(optionflags=(doctest.ELLIPSIS | doctest.REPORT_ONLY_FIRST_FAILURE)) pyPEG2-2.15.2/samples/sample2.py0000644000175000017500000000456612600575565017364 0ustar florianflorian00000000000000#!/usr/bin/python3 """ Ini file sample (see end of file for the content of the ini file) To parse an ini file we use the grammar below. Comments in ini files are starting with a semicolon ";". >>> ini_file = parse(ini_file_text, IniFile, comment=(";", restline)) Because IniFile and Section are Namespaces, we can access their content by name. >>> print("found: " + repr(ini_file["Number 1"]["that"])) found: ...'something else' pyPEG is measuring the position of each object in the input text with a tuple (line_number, offset). >>> ini_file["Number 1"]["that"].position_in_text (3, 26) >>> ini_file["Number 2"].position_in_text (6, 85) pyPEG can also do the reverse job, composing a text of an object tree. >>> ini_file["Number 1"]["that"] = Key("new one") >>> ini_file["Number 3"] = Section() >>> print(compose(ini_file)) [Number 1] this=something that=new one [Number 2] once=anything twice=goes [Number 3] ... pyPEG contains an XML backend, too: >>> from pypeg2.xmlast import thing2xml >>> print(thing2xml(ini_file, pretty=True).decode())
something new one
anything goes
... In this sample the tree contains named objects only. Then we can output object names as tag names. Spaces in names will be translated into underscores. >>> print(thing2xml(ini_file, pretty=True, object_names=True).decode()) something new one anything goes ... """ from __future__ import unicode_literals, print_function from pypeg2 import * import re # ini file parser # symbols in ini files can include spaces Symbol.regex = re.compile(r"[\w\s]+") class Key(str): grammar = name(), "=", restline, endl class Section(Namespace): grammar = "[", name(), "]", endl, maybe_some(Key) class IniFile(Namespace): grammar = some(Section) if __name__ == "__main__": ini_file_text = """[Number 1] this=something that=something else ; now for something even more useless [Number 2] once=anything twice=goes """ import doctest doctest.testmod(optionflags=(doctest.ELLIPSIS | doctest.REPORT_ONLY_FIRST_FAILURE)) pyPEG2-2.15.2/README.txt0000644000175000017500000000134312600574741015462 0ustar florianflorian00000000000000============================== pyPEG 2 for Python 2.7 and 3.x ============================== Python is a nice scripting language. It even gives you access to its own parser and compiler. It also gives you access to different other parsers for special purposes like XML and string templates. But sometimes you may want to have your own parser. This is what's pyPEG for. And pyPEG supports Unicode. The source code for all you can find on bitbucket: https://bitbucket.org/fdik/pypeg/ To build the documentation, you'll need YML 2. You can download YML here: Homepage: http://fdik.org/yml/ Toolchain: http://fdik.org/yml2.tar.bz2 You can install pyPEG 2 with: pip install pypeg2 pyPEG 2 depends on lxml, see http://lxml.de/ pyPEG2-2.15.2/LICENSE.txt0000644000175000017500000004310312600574741015607 0ustar florianflorian00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. pyPEG2-2.15.2/TODO.txt0000644000175000017500000000015612600574741015273 0ustar florianflorian00000000000000- omit() includes optional() - thing based memoization - pyPEG 1 compatibility wrapper / grammar transformer pyPEG2-2.15.2/setup.py0000644000175000017500000000210712605265164015476 0ustar florianflorian00000000000000from distutils.core import setup _version = '2.15.2' setup( name='pyPEG2', version=_version, author='Volker Birk', author_email='vb@dingens.org', packages=['pypeg2', 'pypeg2.test'], url='http://fdik.org/pyPEG2', download_url='http://fdik.org/pyPEG2/pyPEG2-' + _version + '.tar.gz', license='LICENSE.txt', description='An intrinsic PEG Parser-Interpreter for Python', long_description=open('README.txt').read(), requires=['lxml',], provides=['pyPEG2 (' + _version + ')',], classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', 'License :: OSI Approved :: GNU General Public License v2 (GPLv2)', 'Operating System :: OS Independent', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 2', 'Topic :: Software Development :: Compilers', 'Topic :: Software Development :: Interpreters', 'Topic :: Software Development :: Libraries :: Python Modules', ], ) pyPEG2-2.15.2/docs/0000755000175000017500000000000012605271064014710 5ustar florianflorian00000000000000pyPEG2-2.15.2/docs/grammar_elements.html0000644000175000017500000011311412605270360021117 0ustar florianflorian00000000000000pyPEG – Grammar Elements

Grammar Elements

Caveat: pyPEG 2.x is written for Python 3. That means, it accepts Unicode strings only. You can use it with Python 2.7 by writing u'string' instead of 'string' or with the following import (you don't need that for Python 3):

from __future__ import unicode_literals

The samples in this documentation are written for Python 3, too. To execute them with Python 2.7, you'll need this import:

from __future__ import print_function

pyPEG 2.x supports new-style classes only.

Basic Grammar Elements

str instances and Literal

Parsing

A str instance as well as an instance of pypeg2.Literal is parsed in the source text as a Terminal Symbol. It is removed and no result is put into the Abstract syntax tree. If it does not exist at the correct position in the source text, a SyntaxError is raised.

Example:

>>> class Key(str):
...     grammar = name(), "=", restline, endl
... 
>>> k = parse("this=something", Key)
>>> k.name
Symbol('this')
>>> k
'something'

Composing

str instances and pypeg2.Literal instances are being output literally.

Example:

>>> class Key(str):
...     grammar = name(), "=", restline, endl
... 
>>> k = Key("a value")
>>> k.name = Symbol("give me")
>>> compose(k)
'give me=a value\n'

Regular Expressions

Parsing

pyPEG uses Python's re module. You can use Python Regular Expression Objects purely, or use the pypeg2.RegEx encapsulation. Regular Expressions are parsed as Terminal Symbols. The matching result is put into the AST. If no match can be achieved, a SyntaxError is raised.

pyPEG predefines different RegEx objects:

word = re.compile(r"\w+")

Regular expression for scanning a word.

restline = re.compile(r".*")

Regular expression for rest of line.

whitespace = re.compile("(?m)\s+")

Regular expression for scanning whitespace.

comment_sh = re.compile(r"\#.*")

Shell script style comment.

comment_cpp = re.compile(r"//.*")

C++ style comment.

comment_c = re.compile(r"(?m)/\*.*?\*/")

C style comment without nesting.

comment_pas = re.compile(r"(?m)\(\*.*?\*\)")

Pascal style comment without nesting.

Example:

>>> class Key(str):
...     grammar = name(), "=", restline, endl
... 
>>> k = parse("this=something", Key)
>>> k.name
Symbol('this')
>>> k
'something'

Composing

For RegEx objects their corresponding value in the AST will be output. If this value does not match the RegEx a ValueError is raised.

Example:

>>> class Key(str):
...     grammar = name(), "=", restline, endl
... 
>>> k = Key("a value")
>>> k.name = Symbol("give me")
>>> compose(k)
'give me=a value\n'

tuple instances and Concat

Parsing

A tuple or an instance of pypeg2.Concat specifies, that different things have to be parsed one after another. If not all of them parse in their sequence, a SyntaxError is raised.

Example:

>>> class Key(str):
...     grammar = name(), "=", restline, endl
... 
>>> k = parse("this=something", Key)
>>> k.name
Symbol('this')
>>> k
'something'

In a tuple there may be integers preceding another thing in the tuple. These integers represent a cardinality. For example, to parse three times a word, you can have as a grammar:

grammar = word, word, word

or:

grammar = 3, word

which is equivalent. There are special cardinality values:

-2, thing

some(thing); this represents the plus cardinality, +

-1, thing

maybe_some(thing); this represents the asterisk cardinality, *

0, thing

optional(thing); this represents the question mark cardinality, ?

The special cardinality values can be generated with the Cardinality Functions. Other negative values are reserved and may not be used.

Composing

For tuple instances and instances of pypeg2.Concat all attributes of the corresponding thing (and elements of the corresponding collection if that applies) in the AST will be composed and the result is concatenated.

Example:

>>> class Key(str):
...     grammar = name(), "=", restline, endl
... 
>>> k = Key("a value")
>>> k.name = Symbol("give me")
>>> compose(k)
'give me=a value\n'

list instances

Parsing

A list instance which is not derived from pypeg2.Concat represents different options. They're tested in their sequence. The first option which parses is chosen, the others are not tested any more. If none matches, a SyntaxError is raised.

Example:

>>> number = re.compile(r"\d+")
>>> parse("hello", [number, word])
'hello'

Composing

The elements of the list are tried out in their sequence, if one of them can be composed. If none can a ValueError is raised.

Example:

>>> letters = re.compile(r"[a-zA-Z]")
>>> number = re.compile(r"\d+")
>>> compose(23, [letters, number])
'23'

Constant None

None parses to nothing. And it composes to nothing. It represents the no-operation value.

Grammar Element Classes

Class Symbol

Class definition

Symbol(str)

Used to scan a Symbol.

If you're putting a Symbol somewhere in your grammar, then Symbol.regex is used to scan while parsing. The result will be a Symbol instance. Optionally it is possible to check that a Symbol instance will not be identical to any Keyword instance. This can be helpful if the source language forbids that.

A class which is derived from Symbol can have an Enum as its grammar only. Other values for its grammar are forbidden and will raise a TypeError. If such an Enum is specified, each parsed value will be checked if being a member of this Enum additionally to the RegEx matching.

Class variables

regex

regular expression to scan, default re.compile(r"\w+")

check_keywords

flag if a Symbol has to be checked for not being a Keyword; default: False

Instance variables

name

name of the Keyword as str instance

Method __init__(self, name, namespace=None)

Construct a Symbol with that name in namespace.

Raises:

ValueError

if check_keywords is True and value is identical to a Keyword

TypeError

if namespace is given and not an instance of Namespace

Parsing

Parsing a Symbol is done by scanning with Symbol.regex. In our example we're using the name() function, which is often used to parse a Symbol. name() equals to attr("name", Symbol).

Example:

>>> Symbol.regex = re.compile(r"[\w\s]+")
>>> class Key(str):
...     grammar = name(), "=", restline, endl
...
>>> k = parse("this one=foo bar", Key)
>>> k.name
Symbol('this one')
>>> k
'foo bar'

Composing

Composing a Symbol is done by converting it to text.

Example:

>>> k.name = Symbol("that one")
>>> compose(k)
'that one=foo bar'

Class Keyword

Class definition

Keyword(Symbol)

Used to access the keyword table.

The Keyword class is meant to be instanciated for each Keyword of the source language. The class holds the keyword table as a Namespace instance. There is the abbreviation K for Keyword. The latter is useful for instancing keywords.

Class variables

regex

regular expression to scan; default re.compile(r"\w+")

table

Namespace with keyword table

Instance variables

name

name of the Keyword as str instance

Method __init__(self, keyword)

Adds keyword to the keyword table.

Parsing

When a Keyword instance is parsed, it is removed and nothing is put into the resulting AST. When a Keyword class is parsed, an instance is created and put into the AST.

Example:

>>> class Type(Keyword):
...     grammar = Enum( K("int"), K("long") )
... 
>>> k = parse("long", Type)
>>> k.name
'long'

Composing

When a Keyword instance is in a grammar, it is converted into a str instance, and the resulting text is added to the result. When a Keyword class is in the grammar, the correspoding instance in the AST is converted into a str instance and added to the result.

Example:

>>> k = K("do")
>>> compose(k)
'do'

Class List

Class definition

List(list)

A List of things.

A List is a collection for parsed things. It can be used as a base class for collections in the grammar. If a List class has no class variable grammar, grammar = csl(Symbol) is assumed.

Method __init__(self, L=[], **kwargs)

Construct a List, and construct its attributes from keyword arguments.

Parsing

A List is parsed by following its grammar. If a List is parsed, then all things which are parsed and which are not attributes are appended to the List.

Example:

>>> class Instruction(str): pass
...
>>> class Block(List):
...     grammar = "{", maybe_some(Instruction), "}"
... 
>>> b = parse("{ hello world }", Block)
>>> b[0]
'hello'
>>> b[1]
'world'
>>> 

Composing

If a List is composed, then its grammar is followed and composed.

Example:

>>> class Instruction(str): pass
... 
>>> class Block(List):
...     grammar = "{", blank, csl(Instruction), blank, "}"
... 
>>> b = Block()
>>> b.append(Instruction("hello"))
>>> b.append(Instruction("world"))
>>> compose(b)
'{ hello, world }'

Class Namespace

Class definition

Namespace(_UserDict)

A dictionary of things, indexed by their name.

A Namespace holds an OrderedDict mapping the name attributes of the collected things to their respective representation instance. Unnamed things cannot be collected with a Namespace.

Method __init__(self, *args, **kwargs)

Initialize an OrderedDict containing the data of the Namespace. Arguments are put into the Namespace, keyword arguments give the attributes of the Namespace.

Parsing

A Namespace is parsed by following its grammar. If a Namespace is parsed, then all things which are parsed and which are not attributes are appended to the Namespace and indexed by their name attribute.

Example:

>>> Symbol.regex = re.compile(r"[\w\s]+")
>>> class Key(str):
...     grammar = name(), "=", restline, endl
... 
>>> class Section(Namespace):
...     grammar = "[", name(), "]", endl, maybe_some(Key)
... 
>>> class IniFile(Namespace):
...     grammar = some(Section)
... 
>>> ini_file_text = """[Number 1]
... this=something
... that=something else
... [Number 2]
... once=anything
... twice=goes
... """
>>> ini_file = parse(ini_file_text, IniFile)
>>> ini_file["Number 2"]["once"]
'anything'

Composing

If a Namespace is composed, then its grammar is followed and composed.

Example:

>>> ini_file["Number 1"]["that"] = Key("new one")
>>> ini_file["Number 3"] = Section()
>>> print(compose(ini_file))
[Number 1]
this=something
that=new one
[Number 2]
once=anything
twice=goes
[Number 3]

Class Enum

Class definition

Enum(Namespace)

A Namespace which is treated as an Enum. Enums can only contain Keyword or Symbol instances. An Enum cannot be modified after creation. An Enum is allowed as the grammar of a Symbol only.

Method __init__(self, *things)

Construct an Enum using a tuple of things.

Parsing

An Enum is parsed as a selection for possible values for a Symbol. If a value is parsed which is not member of the Enum, a SyntaxError is raised.

Example:

>>> class Type(Keyword):
...     grammar = Enum( K("int"), K("long") )
... 
>>> parse("int", Type)
Type('int')
>>> parse("string", Type)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pypeg2/__init__.py", line 382, in parse
    t, r = parser.parse(text, thing)
  File "pypeg2/__init__.py", line 469, in parse
    raise r
  File "<string>", line 1
    string
    ^
SyntaxError: 'string' is not a member of Enum([Keyword('int'),
Keyword('long')])
>>> 

Composing

When a Symbol is composed which has an Enum as its grammar, the composed value is checked if it is a member of the Enum. If not, a ValueError is raised.

>>> class Type(Keyword):
...     grammar = Enum( K("int"), K("long") )
... 
>>> t = Type("int")
>>> compose(t)
'int'
>>> t = Type("string")
>>> compose(t)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pypeg2/__init__.py", line 403, in compose
    return parser.compose(thing, grammar)
  File "pypeg2/__init__.py", line 819, in compose
    raise ValueError(repr(thing) + " is not in " + repr(grammar))
ValueError: Type('string') is not in Enum([Keyword('int'),
Keyword('long')])

Grammar generator functions

Grammar generator function generate a piece of a grammar. They're meant to be used in a grammar directly.

Function some()

Synopsis

some(*thing)

At least one occurrence of thing, + operator. Inserts -2 as cardinality before thing.

Parsing

Parsing some() parses at least one occurence of thing, or as many as there are. If there aren't things then a SyntaxError is generated.

Example:

>>> w = parse("hello world", some(word))
>>> w
['hello', 'world']
>>> w = parse("", some(word))
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pypeg2/__init__.py", line 390, in parse
    t, r = parser.parse(text, thing)
  File "pypeg2/__init__.py", line 477, in parse
    raise r
  File "<string>", line 1
    
    ^
SyntaxError: expecting match on \w+

Composing

Composing some() composes as many things as there are, but at least one. If there is no matching thing, a ValueError is raised.

Example:

>>> class Words(List):
...     grammar = some(word, blank)
... 
>>> compose(Words("hello", "world"))
'hello world '
>>> compose(Words())
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pypeg2/__init__.py", line 414, in compose
    return parser.compose(thing, grammar)
  File "pypeg2/__init__.py", line 931, in compose
    result = compose_tuple(thing, thing[:], grammar)
  File "pypeg2/__init__.py", line 886, in compose_tuple
    raise ValueError("not enough things to compose")
ValueError: not enough things to compose
>>> 

Function maybe_some()

Synopsis

maybe_some(*thing)

No thing or some of them, * operator. Inserts -1 as cardinality before thing.

Parsing

Parsing maybe_some() parses all occurrences of thing. If there aren't things then the result is empty.

Example:

>>> parse("hello world", maybe_some(word))
['hello', 'world']
>>> parse("", maybe_some(word))
[]

Composing

Composing maybe_some() composes as many things as there are.

>>> class Words(List):
...     grammar = maybe_some(word, blank)
... 
>>> compose(Words("hello", "world"))
'hello world '
>>> compose(Words())
''

Function optional()

Synopsis

optional(*thing)

Thing or no thing, ? operator. Inserts 0 as cardinality before thing.

Parsing

Parsing optional() parses one occurrence of thing. If there aren't things then the result is empty.

Example:

>>> parse("hello", optional(word))
['hello']
>>> parse("", optional(word))
[]
>>> number = re.compile("[-+]?\d+")
>>> parse("-23 world", (optional(word), number, word))
['-23', 'world']

Composing

Composing optional() composes one thing if there is any.

Example:

>>> class OptionalWord(str):
...     grammar = optional(word)
... 
>>> compose(OptionalWord("hello"))
'hello'
>>> compose(OptionalWord())
''

Function csl()

Synopsis

Python 3.x:

csl(*thing, separator=",")

Python 2.7:

csl(*thing)

Generate a grammar for a simple comma separated list.

csl(Something) generates Something, maybe_some(",", blank, Something)

Function attr()

Synopsis

attr(name, thing=word, subtype=None)

Generate an Attribute with that name, referencing the thing. An Attribute is a namedtuple("Attribute", ("name", "thing")).

Instance variables

Class

reference to Attribute class generated by namedtuple()

Parsing

An Attribute is parsed following its grammar in thing. The result is not put into another thing directly; instead the result is added as an attribute to containing thing.

Example:

>>> class Type(Keyword):
...     grammar = Enum( K("int"), K("long") )
... 
>>> class Parameter:
...     grammar = attr("typing", Type), blank, name()
... 
>>> p = parse("int a", Parameter)
>>> p.typing
Type('int')

Composing

An Attribute is cmposed following its grammar in thing.

Example:

>>> p = Parameter()
>>> p.typing = K("int")
>>> p.name = "x"
>>> compose(p)
'int x'

Function flag()

Synopsis

flag(name, thing=None)

Generate an Attribute with that name which is valued True or False. If no thing is given, Keyword(name) is assumed.

Parsing

A flag is usually a Keyword which can be there or not. If it is there, the resulting value is True. If it is not there, the resulting value is False.

Example:

>>> class BoolLiteral(Symbol):
...     grammar = Enum( K("True"), K("False") )
... 
>>> class Fact:
...     grammar = name(), K("is"), flag("negated", K("not")), \
...             attr("value", BoolLiteral)
... 
>>> f1 = parse("a is not True", Fact)
>>> f2 = parse("b is False", Fact)
>>> f1.name
Symbol('a')
>>> f1.value
BoolLiteral('True')
>>> f1.negated
True
>>> f2.negated
False

Composing

If the flag is True compose the grammar. If the flag is False don't compose anything.

Example:

>>> class ValidSign:
...     grammar = flag("invalid", K("not")), blank, "valid"
... 
>>> v = ValidSign()
>>> v.invalid = True
>>> compose(v)
'not valid'

Function name()

Synopsis

name()

Generate a grammar for a Symbol with a name. This is a shortcut for attr("name", Symbol).

Function ignore()

Synopsis

ignore(*grammar)

Ignore what matches to the grammar.

Parsing

Parse what's to be ignored. The result is added to an attribute named "_ignore" + str(i) with i as a serial number.

Composing

Compose the result as with any attr().

Function indent()

Synopsis

indent(*thing)

Indent thing by one level.

Parsing

The indent function has no meaning while parsing. The parameters are parsed as if they would be in a tuple.

Composing

While composing the indent function increases the level of indention.

Example:

>>> class Instruction(str):
...     grammar = word, ";", endl
... 
>>> class Block(List):
...     grammar = "{", endl, maybe_some(indent(Instruction)), "}"
... 
>>> print(compose(Block(Instruction("first"), \
...         Instruction("second"))))
{
    first;
    second;
}

Function contiguous()

Synopsis

contiguous(*thing)

Temporary disable automated whitespace removing while parsing thing.

Parsing

While parsing whitespace removing is disabled. That means, if whitespace is not part of the grammar, it will lead to a SyntaxError if whitespace will be found between the parsed objects.

Example:

class Path(List):
    grammar = flag("relative", "."), maybe_some(Symbol, ".")

class Reference(GrammarElement):
    grammar = contiguous(attr("path", Path), name())

Composing

While composing the contiguous function has no effect.

Function separated()

Synopsis

separated(*thing)

Temporary enable automated whitespace removing while parsing thing. Whitespace removing is enabled by default. This function is for temporary enabling whitespace removing after it was disabled with the contiguous function.

Parsing

While parsing whitespace removing is enabled again. That means, if whitespace is not part of the grammar, it will be omitted if whitespace will be found between parsed objects.

Composing

While composing the separated function has no effect.

Function omit()

Synopsis

omit(*thing)

Omit what matches the grammar. This function cuts out thing and throws it away.

Parsing

While parsing omit() cuts out what matches the grammar thing and throws it away.

Example:

>>> p = parse("hello", omit(Symbol))
>>> print(p)
None
>>> _

Composing

While composing omit() does not compose text for what matches the grammar thing.

Example:

>>> compose(Symbol('hello'), omit(Symbol))
''
>>> _

Callback functions

Callback functions are called while composing only. They're ignored while parsing.

Callback function blank()

Synopsis

blank(thing, parser)

Space marker for composing text.

blank is outputting a space character (ASCII 32) when called.

Callback function endl()

Synopsis

endl(thing, parser)

End of line marker for composing text.

endl is outputting a linefeed charater (ASCII 10) when called. The indention system reacts when reading endl while composing.

User defined callback functions

Synopsis

callback_function(thing, parser)

Arbitrary callback functions can be defined and put into the grammar. They will be called while composing.

Example:

>>> class Instruction(str):
...     def heading(self, parser):
...         return "/* on level " + str(parser.indention_level) \
...                 + " */", endl
...     grammar = heading, word, ";", endl
... 
>>> print(compose(Instruction("do_this")))
/* on level 0 */
do_this;

Common class methods for grammar elements

If a method of the following is present in a grammar element, it will override the standard behaviour.

parse() class method of a grammar element

Synopsis

parse(cls, parser, text, pos)

Overwrites the parsing behaviour. If present, this class method is called at each place the grammar references the grammar element instead of automatic parsing.

cls

class object of the grammar element

parser

parser object which is calling

text

text to be parsed

pos

(lineNo, charInText) with positioning information

compose() method of a grammar element

Synopsis

compose(cls, parser)

Overwrites the composing behaviour. If present, this class method is called at each place the grammar references the grammar element instead of automatic composing.

cls

class object of the grammar element

parser

parser object which is calling

Want to download? Go to the ^Top^ and look to the right ;-)
pyPEG2-2.15.2/docs/format.css0000644000175000017500000000502612600574741016720 0ustar florianflorian00000000000000html { background-color: brightwhite; } .mark { background:#ffff80; } .red { background:#ffc0c0; } .green { background:#c0ffc0; } .blue { background:#c0c0ff; } .orange { background:#ffe0c0; } #python1 { position: absolute; top: 40px; left: 910px; width: 200px; background: #f0f0f0; font-size: 12pt; font-weight: normal; padding: 10px; } body { counter-reset: chapter; margin-left: auto; margin-right: auto; margin-top: 0; width: 900px; min-height: 768px; background-color: white; font-family: Sans-serif; font-size: 12pt; } em { color: darkblue; font-weight: bold; font-style: normal; } code, pre { white-space: pre; background: #f0f0f0; font-size: 11pt; line-height: 120%; vertical-align: 2%; } #headline { color: black; font-size: 18pt; font-weight: normal; border-bottom-width: 1px; border-bottom-style: solid; padding: 10px; } table.glossary { padding: 0; border-collapse: collapse; border: none; } td.glossary { vertical-align: baseline; margin: 0; padding-left: 0.3em; padding-right: 0.3em; border: solid gray 1px; border-spacing: 0; } #navigation { position: relative; float: right; width: 200px; border-left-width: 1px; border-left-style: dotted; padding: 10px; font-size: 10pt; } .head { font-size: 12pt; font-weight: bold; } #entries { width: 569px; padding: 10px; } .statusline { width: 569px; padding-left: 10px; padding-right: 10px; font-size: 10pt; } #bottom { clear: both; color: grey; padding: 10px; } #entries h1:before { counter-increment: chapter; content: counter(chapter) ". "; } h1 { counter-reset: section; } h2 { counter-reset: subsection; } h2:before { counter-increment: section; content: counter(chapter) "." counter(section) " "; } h1, h2 { font-size: 12pt; color: darkblue; } h3:before { counter-increment: subsection; content: counter(chapter) "." counter(section) "." counter(subsection) " "; } h3 { font-size: 12pt; color: black; } h4 { font-size: 12pt; color: black; } h5 { font-size: 12pt; font-weight: normal; font-style: italic; color: black; } .subscript { font-size: 10pt; border-bottom-width: 1px; border-bottom-style: dotted; margin-bottom: 1em; padding-bottom: 1em; } .small { font-size: 10pt; margin-bottom: 1em; padding-bottom: 1em; } pyPEG2-2.15.2/docs/index.html0000644000175000017500000005326012605270356016716 0ustar florianflorian00000000000000pyPEG – a PEG Parser-Interpreter in Python

Introduction

Python is a nice scripting language. It even gives you access to its own parser and compiler. It also gives you access to different other parsers for special purposes like XML and string templates.

But sometimes you may want to have your own parser. This is what's pyPEG for. And pyPEG supports Unicode.

pyPEG is a plain and simple intrinsic parser interpreter framework for Python version 2.7 and 3.x. It is based on Parsing Expression Grammar, PEG. With pyPEG you can parse many formal languages in a very easy way. How does that work?

Installation

You can install a 2.x series pyPEG release from PyPY with:

pip install pypeg2

Parsing text with pyPEG

PEG is something like Regular Expressions with recursion. The grammars are like templates. Let's make an example. Let's say, you want to parse a function declaration in a C like language. Such a function declaration consists of:

    type declaration
    name
    parameters
    block with instructions
int f(int a, long b)
{
    do_this;
    do_that;
}

With pyPEG you're declaring a Python class for each object type you want to parse. This class is then instanciated for each parsed object. This class gets an attribute grammar with a description what should be parsed in what way. In our simple example, we are supporting two different things declared as keywords in our language: int and long. So we're writing a class declaration for the typing, which supports an Enum of the two possible keywords as its grammar:

class Type(Keyword):
    grammar = Enum( K("int"), K("long") )

Common parsing tasks are included in the pyPEG framework. In this example, we're using the Keyword class because the result will be a keyword, and we're using Keyword objects (with the abbreviation K), because what we parse will be one of the enlisted keywords.

The total result will be a Function. So we're declaring a Function class:

class Function:
    grammar = Type, …

The next thing will be the name of the Function to parse. Names are somewhat special in pyPEG. But they're easy to handle: to parse a name, there is a ready made name() function you can call in your grammar to generate a .name Attribute:

class Function:
    grammar = Type, name(), …

Now for the Parameters part. First let's declare a class for the parameters. Parameters has to be a collection, because there may be many of them. pyPEG has some ready made collections. For the case of the Parameters, the Namespace collection will fit. It provides indexed access by name, and Parameters have names (in our example: a and b). We write it like this:

class Parameters(Namespace):
    grammar = …

A single Parameter has a structure itself. It has a Type and a name(). So let's define:

class Parameter:
    grammar = Type, name()

class Parameters(Namespace):
    grammar = …

pyPEG will instantiate the Parameter class for each parsed parameter. Where will the Type go to? The name() function will generate a .name Attribute, but the Type object? Well, let's move it to an Attribute, too, named .typing. To generate an Attribute, pyPEG offers the attr() function:

class Parameter:
    grammar = attr("typing", Type), name()

class Parameters(Namespace):
    grammar = …

By the way: name() is just a shortcut for attr("name", Symbol). It generates a Symbol.

How can we fill our Namespace collection named Parameters? Well, we have to declare, how a list of Parameter objects will look like in our source text. An easy way is offered by pyPEG with the cardinality functions. In this case we can use maybe_some(). This function represents the asterisk cardinality, *

class Parameter:
    grammar = attr("typing", Type), name()

class Parameters(Namespace):
    grammar = Parameter, maybe_some(",", Parameter)

This is how we express a comma separated list. Because this task is so common, there is a shortcut generator function again, csl(). The code below will do the same as the code above:

class Parameter:
    grammar = attr("typing", Type), name()

class Parameters(Namespace):
    grammar = csl(Parameter)

Maybe a function has no parameters. This is a case we have to consider. What should happen then? In our example, then the Parameters Namespace should be empty. We're using another cardinality function for that case, optional(). It represents the question mark cardinality, ?

class Parameter:
    grammar = attr("typing", Type), name()

class Parameters(Namespace):
    grammar = optional(csl(Parameter))

We can continue with our Function class. The Parameters will be in parantheses, we just put that into the grammar:

class Function:
    grammar = Type, name(), "(", Parameters, ")", …

Now for the block of instructions. We could declare another collection for the Instructions. But the function itself can be seen as a list of instructions. So let us declare it this way. First we make the Function class itself a List:

class Function(List):
    grammar = Type, name(), "(", Parameters, ")", …

If a class is a List, pyPEG will put everything inside this list, which will be parsed and does not generate an Attribute. So with that modification, our Parameters now will be put into that List, too. And so will be the Type. This is an option, but in our example, it is not what we want. So let's move them to an Attribute .typing and an Attribute .parms respectively:

class Function(List):
    grammar = attr("typing", Type), name(), \
            "(", attr("parms", Parameters), ")", …

Now we can define what a block will look like, and put it just behind into the grammar of a Function. The Instruction class we have plain and simple. Of course, in a real world example, it can be pretty complex ;-) Here we just have it as a word. A word is a predefined RegEx; it is re.compile(r"\w+").

class Instruction(str):
    grammar = word, ";"

block = "{", maybe_some(Instruction), "}"

Now let's put that to the tail of our Function.grammar:

class Function(List):
    grammar = attr("typing", Type), name(), \
            "(", attr("parms", Parameters), ")", block

Caveat: pyPEG 2.x is written for Python 3. You can use it with Python 2.7 with the following import (you don't need that for Python 3):

from __future__ import unicode_literals, print_function

Well, that looks pretty good now. Let's try it out using the parse() function:

>>> from pypeg2 import *
>>> class Type(Keyword):
...     grammar = Enum( K("int"), K("long") )
... 
>>> class Parameter:
...     grammar = attr("typing", Type), name()
... 
>>> class Parameters(Namespace):
...     grammar = optional(csl(Parameter))
... 
>>> class Instruction(str):
...     grammar = word, ";"
... 
>>> block = "{", maybe_some(Instruction), "}"
>>> class Function(List):
...     grammar = attr("typing", Type), name(), \
...             "(", attr("parms", Parameters), ")", block
... 
>>> f = parse("int f(int a, long b) { do_this; do_that; }",
...         Function)
>>> f.name
Symbol('f')
>>> f.typing
Symbol('int')
>>> f.parms["b"].typing
Symbol('long')
>>> f[0]
'do_this'
>>> f[1]
'do_that'

Composing text

pyPEG can do more. It is not only a framework for parsing text, it can compose source code, too. A pyPEG grammar is not only “just like” a template, it can actually be used as a template for composing text. Just call the compose() function:

>>> compose(f, autoblank=False)
'intf(inta, longb){do_this;do_that;}'

As you can see, for composing first there is a lack of whitespace. This is because we used the automated whitespace removing functionality of pyPEG while parsing (which is enabled by default) but we disabled the automated adding of blanks if violating syntax otherwise. To improve on that we have to extend our grammar templates a little bit. For that case, there are callback function objects in pyPEG. They're only executed by compose() and ignored by parse(). And as usual, there are predefined ones for the common cases. Let's try that out. First let's add blank between things which should be separated:

class Parameter:
    grammar = attr("typing", Type), blank, name()

class Function(List):
    grammar = attr("typing", Type), blank, name(), \
            "(", attr("parms", Parameters), ")", block

After resetting everything, this will lead to the output:

>>> compose(f, autoblank=False)
'int f(int a, long b){do_this;do_that;}'

The blank after the comma int a, long b was generated by the csl() function; csl(Parameter) generates:

Parameter, maybe_some(",", blank, Parameter)

Indenting text

In C like languages (like our example) we like to indent blocks. Indention is something, which is relative to a current position. If something is inside a block already, and should be indented, it has to be indented two times (and so on). For that case pyPEG has an indention system.

The indention system basically is using the generating function indent() and the callback function object endl. With indent we can mark what should be indented, sending endl means here should start the next line of the source code being output. We can use this for our block:

class Instruction(str):
    grammar = word, ";", endl

block = "{", endl, maybe_some(indent(Instruction)), "}", endl

class Function(List):
    grammar = attr("typing", Type), blank, name(), \
            "(", attr("parms", Parameters), ")", endl, block

This changes the output to:

>>> print(compose(f))
int f(int a, long b)
{
    do_this;
    do_that;
}

User defined Callback Functions

With User defined Callback Functions pyPEG offers the needed flexibility to be useful as a general purpose template system for code generation. In our simple example let's say we want to have processing information in comments in the Function declaration, i.e. the indention level in a comment bevor each Instruction. For that we can define our own Callback Function:

class Instruction(str):
    def heading(self, parser):
        return "/* on level " + str(parser.indention_level) \
                + " */", endl

Such a Callback Function is called with two arguments. The first argument is the object to output. The second argument is the parser object to get state information of the composing process. Because this fits the convention for Python methods, you can write it as a method of the class where it belongs to.

The return value of such a Callback Function must be the resulting text. In our example, a C comment shell be generated with notes. We can put this now into the grammar.

class Instruction(str):
    def heading(self, parser):
        return "/* on level " + str(parser.indention_level) \
                + " */", endl

    grammar = heading, word, ";", endl

The result is corresponding:

>>> print(compose(f))
int f(int a, long b)
{
    /* on level 1 */
    do_this;
    /* on level 1 */
    do_that;
}

XML output

Sometimes you want to process what you parsed with the XML toolchain, or with the YML toolchain. Because of that, pyPEG has an XML backend. Just call the thing2xml() function to get bytes with encoded XML:

>>> from pypeg2.xmlast import thing2xml
>>> print(thing2xml(f, pretty=True).decode())
<Function typing="int" name="f">
  <Parameters>
    <Parameter typing="int" name="a"/>
    <Parameter typing="long" name="b"/>
  </Parameters>
  <Instruction>do_this</Instruction>
  <Instruction>do_that</Instruction>
</Function>

The complete sample code you can download here.

Want to download? Go to the ^Top^ and look to the right ;-)
pyPEG2-2.15.2/docs/parser_engine.html0000644000175000017500000004777412605270355020444 0ustar florianflorian00000000000000pyPEG – the Parser Engine

Parser Engine

Class Parser

Offers parsing and composing capabilities. Implements an intrinsic Packrat parser.

pyPEG uses memoization as speed enhancement. Create a Parser instance to have a reset cache memory. Usually this is recommended if you're parsing another text – the cache memory will not provide wrong results but a reset will save memory consumption. If you're altering the grammar then clearing the cache memory for the respective things is required for having correct parsing results. Please use the clear_memory() method in that case.

Instance variables

The instance variables are representing the parser's state.

whitespace

Regular expression to scan whitespace; default: re.compile(r"(?m)\s+"). Set to None to disable automatic whitespace removing.

comment

grammar to parse comments; default: None. If a grammar is set here, comments will be removed from the source text automatically.

last_error

after parsing, SyntaxError which ended parsing

indent

string to use to indent while composing; default: four spaces

indention_level

level to indent to; default: 0

text

original text to parse; set for decorated syntax errors

filename

filename where text is origin from

autoblank

add blanks while composing if grammar would possibly be violated otherwise; default: True

keep_feeble_things

keep otherwise cropped things like comments and whitespace; these things are being put into the feeble_things attribute

Method __init__()

Synopsis

__init__(self)

Initialize instance variables to their defaults.

Method clear_memory()

Synopsis

clear_memory(self, thing=None)

Clear cache memory for packrat parsing.

This method clears the cache memory for thing. If None is given as thing, it clears the cache completely.

Arguments

thing

thing for which cache memory is cleared; default: None

Method parse()

Synopsis

parse(self, text, thing, filename=None)

(Partially) parse text following thing as grammar and return the resulting things.

This method parses as far as possible. It does not raise a SyntaxError if the source text does not parse completely. It returns a SyntaxError object as result part of the return value if the beginning of the source text does not comply with grammar thing.

Arguments

text

text to parse

thing

grammar for things to parse

filename

filename where text is origin from

Returns

Returns (text, result) with:

text

unparsed text

result

generated objects

Raises

ValueError

if input does not match types

TypeError

if output classes have wrong syntax for their respective __init__(self, ...)

GrammarTypeError

if grammar contains an object of unkown type

GrammarValueError

if grammar contains an illegal cardinality value

Example:

>>> from pypeg2 import Parser, csl, word
>>> p = Parser()
>>> p.parse("hello, world!", csl(word))
('!', ['hello', 'world'])

Method compose()

Synopsis

compose(self, thing, grammar=None)

Compose text using thing with grammar. If thing.compose() exists, execute it, otherwise use grammar to compose.

Arguments

thing

thing containing other things with grammar

grammar

grammar to use for composing thing; default: type(thing).grammar

Returns

Composed text

Raises

ValueError

if thing does not match grammar

GrammarTypeError

if grammar contains an object of unkown type

GrammarValueError

if grammar contains an illegal cardinality value

Example:

>>> from pypeg2 import Parser, csl, word
>>> p = Parser()
>>> p.compose(['hello', 'world'], csl(word))
'hello, world'

Method generate_syntax_error()

Synopsis

generate_syntax_error(self, msg, pos)

Generate a syntax error construct.

msg

string with error message

pos

(lineNo, charInText) with positioning information

Returns

Instance of SyntaxError with error text

Convenience functions

Function parse()

Synopsis

parse(text, thing, filename=None, whitespace=whitespace,
        comment=None, keep_feeble_things=False)

Parse text following thing as grammar and return the resulting things or raise an error.

Arguments

text

text to parse

thing

grammar for things to parse

filename

filename where text is origin from

whitespace

regular expression to skip whitespace; default: re.compile(r"(?m)\s+")

comment

grammar to parse comments; default: None

keep_feeble_things

keep otherwise cropped things like comments and whitespace; these things are being put into the feeble_things attribute; default: False

Returns

generated things

Raises

SyntaxError

if text does not match the grammar in thing

ValueError

if input does not match types

TypeError

if output classes have wrong syntax for __init__()

GrammarTypeError

if grammar contains an object of unkown type

GrammarValueError

if grammar contains an illegal cardinality value

Example:

>>> from pypeg2 import parse, csl, word
>>> parse("hello, world", csl(word))
['hello', 'world']

Function compose()

Synopsis

compose(thing, grammar=None, indent=" ", autoblank=True)

Compose text using thing with grammar.

Arguments

thing

thing containing other things with grammar

grammar

grammar to use to compose thing; default: thing.grammar

indent

string to use to indent while composing; default: four spaces

autoblank

add blanks if grammar would possibly be violated otherwise; default: True

Returns

composed text

Raises

ValueError

if input does not match grammar

GrammarTypeError

if grammar contains an object of unkown type

GrammarValueError

if grammar contains an illegal cardinality value

Example:

>>> from pypeg2 import compose, csl, word
>>> compose(['hello', 'world'], csl(word))
'hello, world'

Function attributes()

Synopsis

attributes(grammar, invisible=False)

Iterates all attributes of a grammar.

This function can be used to iterate through all attributes which will be generated for the top level object of the grammar. If invisible is False omit attributes whose names are starting with an underscore _.

Example:

>>> from pypeg2 import attr, name, attributes, word, restline
>>> class Me:
...     grammar = name(), attr("typing", word), restline
... 
>>> for a in attributes(Me.grammar): print(a.name)
... 
name
typing
>>> 

Function how_many()

Synopsis

how_many(grammar)

Determines the possibly parsed objects of grammar.

This function is meant to check if the results of a grammar can be stored in a single object or a collection will be needed.

Returns

0

if there will be no objects

1

if there will be a maximum of one object

2

if there can be more than one object

Raises

GrammarTypeError

if grammar contains an object of unkown type

GrammarValueError

if grammar contains an illegal cardinality value

Example:

>>> from pypeg2 import how_many, word, csl
>>> how_many("some")
0
>>> how_many(word)
1
>>> how_many(csl(word))
2

Exceptions

GrammarError

Base class for all errors pyPEG delivers.

GrammarTypeError

A grammar contains an object of a type which cannot be parsed, for example an instance of an unknown class or of a basic type like float. It can be caused by an int at the wrong place, too.

GrammarValueError

A grammar contains an object with an illegal value, for example an undefined cardinality.

Want to download? Go to the ^Top^ and look to the right ;-)
pyPEG2-2.15.2/docs/xml_backend.html0000644000175000017500000002556312605270355020062 0ustar florianflorian00000000000000pyPEG – XML Backend

XML Backend of pyPEG

etree functions

The pyPEG XML Backend uses Python's etree semantic. This way it can easily be integrated into existing working code using XML. The usage of lxml is recommended. If the module lxml is installed, pyPEG uses it automatically.

Function create_tree()

Synopsis

create_tree(thing, parent=None, object_names=False)

Create an XML etree from a thing.

Arguments

thing

thing to interpret

parent

etree.Element to put subtree into; default: create a new Element tree

object_names

experimental feature: if True tag names are object names instead of types

Returns

etree.Element instance created

Example:

>>> from pypeg2.xmlast import create_tree
>>> from pypeg2 import name, restline
>>> class Key(str):
...     grammar = name(), "=", restline
... 
>>> k = Key("world")
>>> k.name = "hello"
>>> t = create_tree(k)
>>> t.attrib["name"]
'hello'
>>> t.text
'world'
>>> type(t)
<class 'lxml.etree._Element'>

Function create_thing()

Synopsis

create_thing(element, symbol_table)

Create thing from an XML element.

Arguments

element

etree.Element instance to read

symbol_table

symbol table where the classes can be found; usually call globals()

Returns

thing created

Example:

>>> from pypeg2.xmlast import create_thing, etree
>>> from pypeg2 import name, restline
>>> class Key(str):
...     grammar = name(), "=", restline
... 
>>> e = etree.fromstring("<Key name='hello'>world</Key>")
>>> k = create_thing(e, globals())
>>> k.name
Symbol('hello')
>>> k
'world'
>>> type(k)
<class '__main__.Key'>

XML convenience functions

Function thing2xml()

Synopsis

thing2xml(thing, pretty=False, object_names=False)

Create XML text from a thing.

Arguments

thing

thing to interpret

pretty

True if XML should be indented, False if XML should be plain (this feature requires lxml)

object_names

experimental feature: if True tag names are object names instead of types

Returns

bytes with encoded XML

Example:

>>> from pypeg2 import name, restline
>>> from pypeg2.xmlast import thing2xml
>>> class Key(str):
...     grammar = name(), "=", restline
... 
>>> k = Key("world")
>>> k.name = "hello"
>>> thing2xml(k)
b'<Key name="hello">world</Key>'

Function xml2thing()

Synopsis

xml2thing(xml, symbol_table)

Create thing from XML text.

Arguments

xml

bytes with encoded XML

symbol_table

symbol table where the classes can be found; usually call globals()

Returns

created thing

Example:

>>> from pypeg2 import name, restline
>>> from pypeg2.xmlast import xml2thing
>>> class Key(str):
...     grammar = name(), "=", restline
... 
>>> k = xml2thing(b"<Key name='hello'>world</Key>", globals())
>>> k.name
Symbol('hello')
>>> k
'world'
Want to download? Go to the ^Top^ and look to the right ;-)
pyPEG2-2.15.2/pypeg2/0000755000175000017500000000000012605271064015166 5ustar florianflorian00000000000000pyPEG2-2.15.2/pypeg2/xmlast.py0000644000175000017500000001254412600574741017061 0ustar florianflorian00000000000000""" XML AST generator pyPEG parsing framework Copyleft 2012, Volker Birk. This program is under GNU General Public License 2.0. """ from __future__ import unicode_literals try: str = unicode except NameError: pass __version__ = 2.15 __author__ = "Volker Birk" __license__ = "This program is under GNU General Public License 2.0." __url__ = "http://fdik.org/pyPEG" try: import lxml from lxml import etree except ImportError: import xml.etree.ElementTree as etree if __debug__: import warnings import pypeg2 def create_tree(thing, parent=None, object_names=False): """Create an XML etree from a thing. Arguments: thing thing to interpret parent etree.Element to put subtree into default: create a new Element tree object_names experimental feature: if True tag names are object names instead of types Returns: etree.Element instance created """ try: grammar = type(thing).grammar except AttributeError: if isinstance(thing, list): grammar = pypeg2.csl(pypeg2.name()) else: grammar = pypeg2.word name = type(thing).__name__ if object_names: try: name = str(thing.name) name = name.replace(" ", "_") except AttributeError: pass if parent is None: me = etree.Element(name) else: me = etree.SubElement(parent, name) for e in pypeg2.attributes(grammar): if object_names and e.name == "name": if name != type(thing).__name__: continue key, value = e.name, getattr(thing, e.name, None) if value is not None: if pypeg2._issubclass(e.thing, (str, int, pypeg2.Literal)) \ or type(e.thing) == pypeg2._RegEx: me.set(key, str(value)) else: create_tree(value, me, object_names) if isinstance(thing, list): things = thing elif isinstance(thing, pypeg2.Namespace): things = thing.values() else: things = [] last = None for t in things: if type(t) == str: if last is not None: last.tail = str(t) else: me.text = str(t) else: last = create_tree(t, me, object_names) if isinstance(thing, str): me.text = str(thing) return me def thing2xml(thing, pretty=False, object_names=False): """Create XML text from a thing. Arguments: thing thing to interpret pretty True if XML should be indented False if XML should be plain object_names experimental feature: if True tag names are object names instead of types Returns: bytes with encoded XML """ tree = create_tree(thing, None, object_names) try: if lxml: return etree.tostring(tree, pretty_print=pretty) except NameError: if __debug__: if pretty: warnings.warn("lxml is needed for pretty printing", ImportWarning) return etree.tostring(tree) def create_thing(element, symbol_table): """Create thing from an XML element. Arguments: element etree.Element instance to read symbol_table symbol table where the classes can be found Returns: thing created """ C = symbol_table[element.tag] if element.text: thing = C(element.text) else: thing = C() subs = iter(element) iterated_already = False try: grammar = C.grammar except AttributeError: pass else: for e in pypeg2.attributes(grammar): key = e.name if pypeg2._issubclass(e.thing, (str, int, pypeg2.Literal)) \ or type(e.thing) == pypeg2._RegEx: try: value = element.attrib[e.name] except KeyError: pass else: setattr(thing, key, e.thing(value)) else: try: if not iterated_already: iterated_already = True sub = next(subs) except StopIteration: pass if sub.tag == e.thing.__name__: iterated_already = False t = create_thing(sub, symbol_table) setattr(thing, key, t) if issubclass(C, list) or issubclass(C, pypeg2.Namespace): try: while True: if iterated_already: iterated_alread = False else: sub = next(subs) t = create_thing(sub, symbol_table) if isinstance(thing, pypeg2.List): thing.append(t) else: thing[t.name] = t except StopIteration: pass return thing def xml2thing(xml, symbol_table): """Create thing from XML text. Arguments: xml bytes with encoded XML symbol_table symbol table where the classes can be found Returns: created thing """ element = etree.fromstring(xml) return create_thing(element, symbol_table) pyPEG2-2.15.2/pypeg2/__init__.py0000644000175000017500000014220712600574741017310 0ustar florianflorian00000000000000""" pyPEG parsing framework pyPEG offers a packrat parser as well as a framework to parse and output languages for Python 2.7 and 3.x, see http://fdik.org/pyPEG2 Copyleft 2012, Volker Birk. This program is under GNU General Public License 2.0. """ from __future__ import unicode_literals try: range = xrange str = unicode except NameError: pass __version__ = 2.15 __author__ = "Volker Birk" __license__ = "This program is under GNU General Public License 2.0." __url__ = "http://fdik.org/pyPEG" import re import sys try: maxsize = sys.maxint except AttributeError: maxsize = sys.maxsize import weakref if __debug__: import warnings from types import FunctionType from collections import namedtuple try: from collections import OrderedDict except ImportError: from ordereddict import OrderedDict word = re.compile(r"\w+") """Regular expression for scanning a word.""" _RegEx = type(word) restline = re.compile(r".*") """Regular expression for rest of line.""" whitespace = re.compile("(?m)\s+") """Regular expression for scanning whitespace.""" comment_sh = re.compile(r"\#.*") """Shell script style comment.""" comment_cpp = re.compile(r"//.*") """C++ style comment.""" comment_c = re.compile(r"(?ms)/\*.*?\*/") """C style comment without nesting comments.""" comment_pas = re.compile(r"(?ms)\(\*.*?\*\)") """Pascal style comment without nesting comments.""" def _card(n, thing): # Reduce unnecessary recursions if len(thing) == 1: return n, thing[0] else: return n, thing def some(*thing): """At least one occurrence of thing, + operator. Inserts -2 as cardinality before thing. """ return _card(-2, thing) def maybe_some(*thing): """No thing or some of them, * operator. Inserts -1 as cardinality before thing. """ return _card(-1, thing) def optional(*thing): """Thing or no thing, ? operator. Inserts 0 as cardinality before thing. """ return _card(0, thing) def _csl(separator, *thing): # reduce unnecessary recursions if len(thing) == 1: L = [thing[0]] L.extend(maybe_some(separator, blank, thing[0])) return tuple(L) else: L = list(thing) L.append(-1) L2 = [separator, blank] L2.extend(tuple(thing)) L.append(tuple(L2)) return tuple(L) try: # Python 3.x _exec = eval("exec") _exec(''' def csl(*thing, separator=","): """Generate a grammar for a simple comma separated list.""" return _csl(separator, *thing) ''') except SyntaxError: # Python 2.7 def csl(*thing): """Generate a grammar for a simple comma separated list.""" return _csl(",", *thing) def attr(name, thing=word, subtype=None): """Generate an Attribute with that name, referencing the thing. Instance variables: Class Attribute class generated by namedtuple() """ # if __debug__: # if isinstance(thing, (tuple, list)): # warnings.warn(type(thing).__name__ # + " not recommended as grammar of attribute " # + repr(name), SyntaxWarning) return attr.Class(name, thing, subtype) attr.Class = namedtuple("Attribute", ("name", "thing", "subtype")) def flag(name, thing=None): """Generate an Attribute with that name which is valued True or False.""" if thing is None: thing = Keyword(name) return attr(name, thing, "Flag") def attributes(grammar, invisible=False): """Iterates all attributes of a grammar.""" if type(grammar) == attr.Class and (invisible or grammar.name[0] != "_"): yield grammar elif type(grammar) == tuple: for e in grammar: for a in attributes(e, invisible): yield a class Whitespace(str): grammar = whitespace class RegEx(object): """Regular Expression. Instance Variables: regex pre-compiled object from re.compile() """ def __init__(self, value, **kwargs): self.regex = re.compile(value, re.U) self.search = self.regex.search self.match = self.regex.match self.split = self.regex.split self.findall = self.regex.findall self.finditer = self.regex.finditer self.sub = self.regex.sub self.subn = self.regex.subn self.flags = self.regex.flags self.groups = self.regex.groups self.groupindex = self.regex.groupindex self.pattern = value for k, v in kwargs.items(): setattr(self, k, v) def __str__(self): return self.pattern def __repr__(self): result = type(self).__name__ + "(" + repr(self.pattern) try: result += ", name=" + repr(self.name) except: pass return result + ")" class Literal(object): """Literal value.""" _basic_types = (bool, int, float, complex, str, bytes, bytearray, list, tuple, slice, set, frozenset, dict) def __init__(self, value, **kwargs): if isinstance(self, Literal._basic_types): pass else: self.value = value for k, v in kwargs.items(): setattr(self, k, v) def __str__(self): if isinstance(self, Literal._basic_types): return super(Literal, self).__str__() else: return str(self.value) def __repr__(self): if isinstance(self, Literal._basic_types): return type(self).__name__ + "(" + \ super(Literal, self).__repr__() + ")" else: return type(self).__name__ + "(" + repr(self.value) + ")" def __eq__(self, other): if isinstance(self, Literal._basic_types): if type(self) == type(other) and super().__eq__(other): return True else: return False else: if type(self) == type(other) and str(self) == str(other): return True else: return False class Plain(object): """A plain object""" def __init__(self, name=None, **kwargs): """Construct a plain object with an optional name and optional other attributes """ if name is not None: self.name = Symbol(name) for k, v in kwargs: setattr(self, k, v) def __repr__(self): """x.__repr__() <==> repr(x)""" try: return self.__class__.__name__ + "(name=" + repr(self.name) + ")" except AttributeError: return self.__class__.__name__ + "()" class List(list): """A List of things.""" def __init__(self, *args, **kwargs): """Construct a List, and construct its attributes from keyword arguments. """ _args = [] if len(args) == 1: if isinstance(args[0], str): self.append(args[0]) elif isinstance(args[0], (tuple, list)): for e in args[0]: if isinstance(e, attr.Class): setattr(self, e.name, e.value) else: _args.append(e) super(List, self).__init__(_args) else: raise ValueError("initializer of List should be collection or string") else: for e in args: if isinstance(e, attr.Class): setattr(self, e.name, e.value) else: _args.append(e) super(List, self).__init__(_args) for k, v in kwargs.items(): setattr(self, k, v) def __repr__(self): """x.__repr__() <==> repr(x)""" result = type(self).__name__ + "(" + super(List, self).__repr__() try: result += ", name=" + repr(self.name) except: pass return result + ")" def __eq__(self, other): return super(List, self).__eq__(list(other)) class _UserDict(object): # UserDict cannot be used because of metaclass conflicts def __init__(self, *args, **kwargs): self.data = dict(*args, **kwargs) def __len__(self): return len(self.data) def __getitem__(self, key): return self.data[key] def __setitem__(self, key, value): self.data[key] = value def __delitem__(self, key): del self.data[key] def __iter__(self): return self.data.keys() def __contains__(self, item): return item in self.data def items(self): return self.data.items() def keys(self): return self.data.keys() def values(self): return self.data.values() def clear(self): self.data.clear() def copy(self): return self.data.copy() class Namespace(_UserDict): """A dictionary of things, indexed by their name.""" name_by = lambda value: "#" + str(id(value)) def __init__(self, *args, **kwargs): """Initialize an OrderedDict containing the data of the Namespace. Arguments are being put into the Namespace, keyword arguments give the attributes of the Namespace. """ if args: self.data = OrderedDict(args) else: self.data = OrderedDict() for k, v in kwargs.items(): setattr(self, k, v) def __setitem__(self, key, value): """x.__setitem__(i, y) <==> x[i]=y""" if key is None: name = Symbol(Namespace.name_by(value)) else: name = Symbol(key) try: value.name = name except AttributeError: pass try: value.namespace except AttributeError: try: value.namespace = weakref.ref(self) except AttributeError: pass else: if not value.namespace: value.namespace = weakref.ref(self) super(Namespace, self).__setitem__(name, value) def __delitem__(self, key): """x.__delitem__(y) <==> del x[y]""" self[key].namespace = None super(Namespace, self).__delitem__(key) def __repr__(self): """x.__repr__() <==> repr(x)""" result = type(self).__name__ + "([" for key, value in self.data.items(): result += "(" + repr(key) + ", " + repr(value) + ")" result += ", " result += "]" try: result += ", name=" + repr(self.name) except: pass return result + ")" class Enum(Namespace): """A Namespace which is being treated as an Enum. Enums can only contain Keywords or Symbols.""" def __init__(self, *things, **kwargs): """Construct an Enum using a tuple of things.""" self.data = OrderedDict() for thing in things: if type(thing) == str: thing = Symbol(thing) if not isinstance(thing, Symbol): raise TypeError(repr(thing) + " is not a Symbol") super(Enum, self).__setitem__(thing, thing) for k, v in kwargs.items(): setattr(self, k, v) def __repr__(self): """x.__repr__() <==> repr(x)""" v = [e for e in self.values()] result = type(self).__name__ + "(" + repr(v) try: result += ", name=" + repr(self.name) except: pass return result + ")" def __setitem__(self, key, value): """x.__setitem__(i, y) <==> x[i]=y""" if not isinstance(value, Keyword) and not isinstance(value, Symbol): raise TypeError("Enums can only contain Keywords or Symbols") raise ValueError("Enums cannot be modified after creation.") class Symbol(str): """Use to scan Symbols. Class variables: regex regular expression to scan, default r"\w+" check_keywords flag if a Symbol is checked for not being a Keyword default: False """ regex = word check_keywords = False def __init__(self, name, namespace=None): """Construct a Symbol with that name in Namespace namespace. Raises: ValueError if check_keywords is True and value is identical to a Keyword TypeError if namespace is given and not a Namespace """ if Symbol.check_keywords and name in Keyword.table: raise ValueError(repr(name) + " is a Keyword, but is used as a Symbol") if namespace: if isinstance(namespace, Namespace): namespace[name] = self else: raise TypeError(repr(namespace) + " is not a Namespace") else: self.name = name self.namespace = None def __repr__(self): """x.__repr__() <==> repr(x)""" return type(self).__name__ + "(" + str(self).__repr__() + ")" class Keyword(Symbol): """Use to access the keyword table. Class variables: regex regular expression to scan, default r"\w+" table Namespace with keyword table """ regex = word table = Namespace() def __init__(self, keyword): """Adds keyword to the keyword table.""" if keyword not in Keyword.table: Keyword.table[keyword] = self self.name = keyword K = Keyword """Shortcut for Keyword.""" class IKeyword(Keyword): """Use for case-insensitive keyword.""" def parse(self, parser, text, pos): m = type(self).regex.match(text) if m: if m.group(0).upper() == str(self).upper(): return text[len(str(self)):], None else: return text, SyntaxError("expecting " + repr(self)) else: return text, SyntaxError("expecting " + repr(self)) IK = IKeyword """Shortcut for case-insensitive Keyword.""" class Concat(List): """Concatenation of things. This class exists as a mutable alternative to using a tuple. """ def name(): """Generate a grammar for a symbol with name.""" return attr("name", Symbol) def ignore(grammar): """Ignore what matches to the grammar.""" try: ignore.serial += 1 except AttributeError: ignore.serial = 1 return attr("_ignore" + str(ignore.serial), grammar) def indent(*thing): """Indent thing by one level. Inserts -3 as cardinality before thing. """ return _card(-3, thing) def contiguous(*thing): """Disable automated whitespace matching. Inserts -4 as cardinality before thing. """ return _card(-4, thing) def separated(*thing): """Enable automated whitespace matching. Inserts -5 as cardinality before thing. """ return _card(-5, thing) def omit(*thing): """Omit what matches to the grammar.""" return _card(-6, thing) endl = lambda thing, parser: "\n" """End of line marker for composing text.""" blank = lambda thing, parser: " " """Space marker for composing text.""" class GrammarError(Exception): """Base class for errors in grammars.""" class GrammarTypeError(TypeError, GrammarError): """Raised if grammar contains an object of unkown type.""" class GrammarValueError(ValueError, GrammarError): """Raised if grammar contains an illegal value.""" def how_many(grammar): """Determines the possibly parsed objects of grammar. Returns: 0 if there will be no objects 1 if there will be a maximum of one object 2 if there can be more than one object Raises: GrammarTypeError if grammar contains an object of unkown type GrammarValueError if grammar contains an illegal cardinality value """ if grammar is None: return 0 elif type(grammar) == int: return grammar elif _issubclass(grammar, Symbol) or isinstance(grammar, (RegEx, _RegEx)): return 1 elif isinstance(grammar, (str, Literal)): return 0 elif isinstance(grammar, attr.Class): return 0 elif type(grammar) == FunctionType: return 0 elif isinstance(grammar, (tuple, Concat)): length, card = 0, 1 for e in grammar: if type(e) == int: if e < -6: raise GrammarValueError( "illegal cardinality value in grammar: " + str(e)) if e in (-5, -4, -3): pass elif e in (-1, -2): card = 2 elif e == 0: card = 1 elif e == -6: return 0 else: card = min(e, 2) else: length += card * how_many(e) if length >= 2: return 2 return length elif isinstance(grammar, list): m = 0 for e in grammar: m = max(m, how_many(e)) if m == 2: return m return m elif _issubclass(grammar, object): return 1 else: raise GrammarTypeError("grammar contains an illegal type: " + type(grammar).__name__ + ": " + repr(grammar)) def parse(text, thing, filename=None, whitespace=whitespace, comment=None, keep_feeble_things=False): """Parse text following thing as grammar and return the resulting things or raise an error. Arguments: text text to parse thing grammar for things to parse filename filename where text is origin from whitespace regular expression to skip whitespace default: regex "(?m)\s+" comment grammar to parse comments default: None keep_feeble_things put whitespace and comments into the .feeble_things attribute instead of dumping them Returns generated things. Raises: SyntaxError if text does not match the grammar in thing ValueError if input does not match types TypeError if output classes have wrong syntax for __init__() GrammarTypeError if grammar contains an object of unkown type GrammarValueError if grammar contains an illegal cardinality value """ parser = Parser() parser.whitespace = whitespace parser.comment = comment parser.text = text parser.filename = filename parser.keep_feeble_things = keep_feeble_things t, r = parser.parse(text, thing) if t: raise parser.last_error return r def compose(thing, grammar=None, indent=" ", autoblank=True): """Compose text using thing with grammar. Arguments: thing thing containing other things with grammar grammar grammar to use to compose thing default: thing.grammar indent string to use to indent while composing default: four spaces autoblank add blanks if grammar would possibly be violated otherwise default: True Returns text Raises: ValueError if input does not match grammar GrammarTypeError if grammar contains an object of unkown type GrammarValueError if grammar contains an illegal cardinality value """ parser = Parser() parser.indent = indent parser.autoblank = autoblank return parser.compose(thing, grammar) def _issubclass(obj, cls): # If obj is not a class, just return False try: return issubclass(obj, cls) except TypeError: return False class Parser(object): """Offers parsing and composing capabilities. Implements a Packrat parser. Instance variables: whitespace regular expression to scan whitespace default: "(?m)\s+" comment grammar to parse comments last_error syntax error which ended parsing indent string to use to indent while composing default: four spaces indention_level level to indent to default: 0 text original text to parse; set for decorated syntax errors filename filename where text is origin from autoblank add blanks while composing if grammar would possibly be violated otherwise default: True keep_feeble_things put whitespace and comments into the .feeble_things attribute instead of dumping them """ def __init__(self): """Initialize instance variables to their defaults.""" self.whitespace = whitespace self.comment = None self.last_error = None self.indent = " " self.indention_level = 0 self.text = None self.filename = None self.autoblank = True self.keep_feeble_things = False self._memory = {} self._got_endl = True self._contiguous = False self._got_regex = False def clear_memory(self, thing=None): """Clear cache memory for packrat parsing. Arguments: thing thing for which cache memory is cleared, None if cache memory should be cleared for all things """ if thing is None: self._memory = {} else: try: del self._memory[id(thing)] except KeyError: pass def parse(self, text, thing, filename=None): """(Partially) parse text following thing as grammar and return the resulting things. Arguments: text text to parse thing grammar for things to parse filename filename where text is origin from Returns (text, result) with: text unparsed text result generated objects or SyntaxError object Raises: ValueError if input does not match types TypeError if output classes have wrong syntax for __init__() GrammarTypeError if grammar contains an object of unkown type GrammarValueError if grammar contains an illegal cardinality value """ self.text = text if filename: self.filename = filename pos = [1, 0] t, skip_result = self._skip(text, pos) t, r = self._parse(t, thing, pos) if type(r) == SyntaxError: raise r else: if self.keep_feeble_things and skip_result: try: r.feeble_things except AttributeError: try: r.feeble_things = skip_result except AttributeError: pass else: r.feeble_things = skip_result + r.feeble_things return t, r def _skip(self, text, pos=None): # Skip whitespace and comments from input text t2 = None t = text result = [] while t2 != t: if self.whitespace and not self._contiguous: t, r = self._parse(t, self.whitespace, pos) if self.keep_feeble_things and r and not isinstance(r, SyntaxError): result.append(r) t2 = t if self.comment: t, r = self._parse(t, self.comment, pos) if self.keep_feeble_things and r and not isinstance(r, SyntaxError): result.append(r) return t, result def generate_syntax_error(self, msg, pos): """Generate a syntax error construct with msg string with error message pos (lineNo, charInText) with positioning information """ result = SyntaxError(msg) if pos: result.lineno = pos[0] start = max(pos[1] - 19, 0) end = min(pos[1] + 20, len(self.text)) result.text = self.text[start:end] result.offset = pos[1] - start + 1 while "\n" in result.text: lf = result.text.find("\n") if lf >= result.offset: result.text = result.text[:result.offset-1] break; else: L = len(result.text) result.text = result.text[lf+1:] result.offset -= L - len(result.text) if self.filename: result.filename = self.filename return result def _parse(self, text, thing, pos=[1, 0]): # Parser implementation def update_pos(text, t, pos): # Calculate where we are in the text if not pos: return if text == t: return d_text = text[:len(text) - len(t)] pos[0] += d_text.count("\n") pos[1] += len(d_text) try: return self._memory[id(thing)][text] except: pass if pos: current_pos = tuple(pos) else: current_pos = None def syntax_error(msg): return self.generate_syntax_error(msg, pos) try: thing.parse except AttributeError: pass else: t, r = thing.parse(self, text, pos) if not isinstance(r, SyntaxError): t, skip_result = self._skip(t) update_pos(text, t, pos) if self.keep_feeble_things: try: r.feeble_things except AttributeError: try: r.feeble_things = skip_result except AttributeError: pass else: r.feeble_things += skip_result return t, r skip_result = None # terminal symbols if thing is None or type(thing) == FunctionType: result = text, None elif isinstance(thing, Symbol): m = type(thing).regex.match(text) if m and m.group(0) == str(thing): t, r = text[len(thing):], None t, skip_result = self._skip(t) result = t, r update_pos(text, t, pos) else: result = text, syntax_error("expecting " + repr(thing)) elif isinstance(thing, (RegEx, _RegEx)): m = thing.match(text) if m: t, r = text[len(m.group(0)):], m.group(0) t, skip_result = self._skip(t) result = t, r update_pos(text, t, pos) else: result = text, syntax_error("expecting match on " + thing.pattern) elif isinstance(thing, (str, Literal)): if text.startswith(str(thing)): t, r = text[len(str(thing)):], None t, skip_result = self._skip(t) result = t, r update_pos(text, t, pos) else: result = text, syntax_error("expecting " + repr(thing)) elif _issubclass(thing, Symbol): m = thing.regex.match(text) if m: result = None try: thing.grammar except AttributeError: pass else: if thing.grammar is None: pass elif isinstance(thing.grammar, Enum): if not m.group(0) in thing.grammar: result = text, syntax_error(repr(m.group(0)) + " is not a member of " + repr(thing.grammar)) else: raise GrammarValueError( "Symbol " + type(thing).__name__ + " has a grammar which is not an Enum: " + repr(thing.grammar)) if not result: t, r = text[len(m.group(0)):], thing(m.group(0)) t, skip_result = self._skip(t) result = t, r update_pos(text, t, pos) else: result = text, syntax_error("expecting " + thing.__name__) # non-terminal constructs elif isinstance(thing, attr.Class): t, r = self._parse(text, thing.thing, pos) if type(r) == SyntaxError: if thing.subtype == "Flag": result = t, attr(thing.name, False) else: result = text, r else: if thing.subtype == "Flag": result = t, attr(thing.name, True) else: result = t, attr(thing.name, r) elif isinstance(thing, (tuple, Concat)): if self.keep_feeble_things: L = List() else: L = [] t = text flag = True _min, _max = 1, 1 contiguous = self._contiguous omit = False for e in thing: if type(e) == int: if e < -6: raise GrammarValueError( "illegal cardinality value in grammar: " + str(e)) if e == -6: omit = True elif e == -5: self._contiguous = False t, skip_result = self._skip(t) if self.keep_feeble_things and skip_result: try: L.feeble_things except AttributeError: try: L.feeble_things = skip_result except AttributeError: pass else: L.feeble_things += skip_result elif e == -4: self._contiguous = True elif e == -3: pass elif e == -2: _min, _max = 1, maxsize elif e == -1: _min, _max = 0, maxsize elif e == 0: _min, _max = 0, 1 else: _min, _max = e, e continue for i in range(_max): t2, r = self._parse(t, e, pos) if type(r) == SyntaxError: i -= 1 break elif omit: t = t2 r = None else: t = t2 if r is not None: if type(r) is list: L.extend(r) else: L.append(r) if i+1 < _min: if type(r) != SyntaxError: r = syntax_error("expecting " + str(_min) + " occurrence(s) of " + repr(e) + " (" + str(i+1) + " found)") flag = False break _min, _max = 1, 1 omit = False if flag: if self._contiguous and not contiguous: self._contiguous = False t, skip_result = self._skip(t) if self.keep_feeble_things and skip_result: try: L.feeble_things except AttributeError: try: L.feeble_things = skip_result except AttributeError: pass else: L.feeble_things += skip_result if len(L) > 1 or how_many(thing) > 1: result = t, L elif not L: if not self.keep_feeble_things: return t, None try: L.feeble_things except AttributeError: return t, None if len(L.feeble_things): return t, L else: return t, None else: if self.keep_feeble_things: try: L.feeble_things except AttributeError: pass else: if L.feeble_things: try: L[0].feeble_things except AttributeError: try: L[0].feeble_things = L.feeble_things except AttributeError: pass else: L[0].feeble_things = L.feeble_things + \ L[0].feeble_things result = t, L[0] else: result = text, r self._contiguous = contiguous elif isinstance(thing, list): found = False for e in thing: try: t, r = self._parse(text, e, pos) if type(r) != SyntaxError: found = True break except GrammarValueError: raise except ValueError: pass if found: result = t, r else: result = text, syntax_error("expecting one of " + repr(thing)) elif _issubclass(thing, Namespace): t, r = self._parse(text, thing.grammar, pos) if type(r) != SyntaxError: if isinstance(r, thing): result = t, r else: obj = thing() for e in r: if type(e) == attr.Class: setattr(obj, e.name, e.thing) else: try: obj[e.name] = e except AttributeError: obj[None] = e try: obj.polish() except AttributeError: pass result = t, obj else: result = text, r elif _issubclass(thing, list): try: g = thing.grammar except AttributeError: g = csl(Symbol) t, r = self._parse(text, g, pos) if type(r) != SyntaxError: if isinstance(r, thing): result = t, r else: obj = thing() if type(r) == list: for e in r: if type(e) == attr.Class: setattr(obj, e.name, e.thing) else: obj.append(e) else: if type(r) == attr.Class: setattr(obj, r.name, r.thing) else: obj.append(r) try: obj.polish() except AttributeError: pass result = t, obj else: result = text, r elif _issubclass(thing, object): try: g = thing.grammar except AttributeError: g = word t, r = self._parse(text, g, pos) if type(r) != SyntaxError: if isinstance(r, thing): result = t, r else: try: if type(r) == list: L, a = [], [] for e in r: if type(e) == attr.Class: a.append(e) else: L.append(e) if L: lg = how_many(thing.grammar) if lg == 0: obj = None elif lg == 1: obj = thing(L[0]) else: obj = thing(L) else: obj = thing() for e in a: setattr(obj, e.name, e.thing) else: if type(r) == attr.Class: obj = thing() setattr(obj, r.name, r.thing) else: if r is None: obj = thing() else: obj = thing(r) except TypeError as t: L = list(t.args) L[0] = thing.__name__ + ": " + L[0] t.args = tuple(L) raise t try: obj.polish() except AttributeError: pass result = t, obj else: result = text, r else: raise GrammarTypeError("in grammar: " + repr(thing)) if pos: if type(result[1]) == SyntaxError: pos[0] = current_pos[0] pos[1] = current_pos[1] self.last_error = result[1] else: try: result[1].position_in_text = current_pos except AttributeError: pass if self.keep_feeble_things and skip_result: try: result[1].feeble_things except AttributeError: try: result[1].feeble_things = skip_result except AttributeError: pass else: result[1].feeble_things += skip_result try: self._memory[id(thing)] except KeyError: self._memory[id(thing)] = { text: result } else: self._memory[id(thing)][text] = result return result def compose(self, thing, grammar=None, attr_of=None): """Compose text using thing with grammar. Arguments: thing thing containing other things with grammar grammar grammar to use for composing thing default: type(thing).grammar attr_of if composing the value of an attribute, this is a reference to the thing where this value is an attribute of; None if this is not an attribute value Returns text Raises: ValueError if thing does not match grammar GrammarTypeError if grammar contains an object of unkown type GrammarValueError if grammar contains an illegal cardinality value """ if __debug__: # make sure that we're not having this typing error compose = None def terminal_indent(do_blank=False): self._got_regex = False if self._got_endl: result = self.indent * self.indention_level self._got_endl = False return result elif do_blank and self.whitespace: if self._contiguous or not self.autoblank: return "" else: return blank(thing, self) else: return "" try: thing.compose except AttributeError: pass else: return terminal_indent() + thing.compose(self, attr_of=attr_of) if not grammar: try: grammar = type(thing).grammar except AttributeError: if isinstance(thing, Symbol): grammar = type(thing).regex elif isinstance(thing, list): grammar = csl(Symbol) else: grammar = word else: if isinstance(thing, Symbol): grammar = type(thing).regex if grammar is None: result = "" elif type(grammar) == FunctionType: if grammar == endl: result = endl(thing, self) self._got_endl = True elif grammar == blank: result = terminal_indent() + blank(thing, self) else: result = self.compose(thing, grammar(thing, self)) elif isinstance(grammar, (RegEx, _RegEx)): m = grammar.match(str(thing)) if m: result = terminal_indent(do_blank=self._got_regex) + str(thing) else: raise ValueError(repr(thing) + " does not match " + grammar.pattern) self._got_regex = True elif isinstance(grammar, Keyword): result = terminal_indent(do_blank=self._got_regex) + str(grammar) self._got_regex = True elif isinstance(grammar, (str, int, Literal)): result = terminal_indent() + str(grammar) elif isinstance(grammar, Enum): if thing in grammar: if isinstance(thing, Keyword): result = terminal_indent(do_blank=self._got_regex) + str(thing) self._got_regex = True else: result = terminal_indent() + str(thing) else: raise ValueError(repr(thing) + " is not in " + repr(grammar)) elif isinstance(grammar, attr.Class): if grammar.subtype == "Flag": if getattr(thing, grammar.name): result = self.compose(thing, grammar.thing, attr_of=thing) else: result = terminal_indent() else: result = self.compose(getattr(thing, grammar.name), grammar.thing, attr_of=thing) elif isinstance(grammar, (tuple, list)): def compose_tuple(thing, things, grammar): text = [] multiple, card = 1, 1 indenting = 0 if isinstance(grammar, (tuple, Concat)): # concatenation for g in grammar: if g is None: multiple = 1 if self.indenting: self.indention_level -= indenting self.indenting = 0 elif type(g) == int: if g < -6: raise GrammarValueError( "illegal cardinality value in grammar: " + str(g)) card = g if g in (-2, -1): multiple = maxsize elif g in (-5, -4, -3, 0): multiple = 1 if g == -3: self.indention_level += 1 indenting += 1 elif g == -6: multiple = 0 else: multiple = g else: passes = 0 try: for r in range(multiple): if isinstance(g, (str, Symbol, Literal)): text.append(self.compose(thing, g)) if card < 1: break elif isinstance(g, FunctionType): text.append(self.compose(thing, g)) if card < 1: break elif isinstance(g, attr.Class): text.append(self.compose(getattr(thing, g.name), g.thing, attr_of=thing)) if card < 1: break elif isinstance(g, (tuple, list)): text.append(compose_tuple(thing, things, g)) if not things: break else: text.append(self.compose(things.pop(), g)) passes += 1 except (IndexError, ValueError): if card == -2: if passes < 1: raise ValueError(repr(g) + " has to be there at least once") elif card > 0: if passes < multiple: raise ValueError(repr(g) + " has to be there exactly " + str(multiple) + " times") multiple = 1 if indenting: self.indention_level -= indenting indenting = 0 return ''.join(text) else: # options for g in grammar: try: if isinstance(g, (str, Symbol, Literal)): return self.compose(thing, g) elif isinstance(g, FunctionType): return self.compose(thing, g) elif isinstance(g, attr.Class): return self.compose(getattr(thing, g.name), g.thing) elif isinstance(g, (tuple, list)): return compose_tuple(thing, things, g) else: try: text = self.compose(things[-1], g) except Exception as e: raise e things.pop() return text except GrammarTypeError: raise except AttributeError: pass except KeyError: pass except TypeError: pass except ValueError: pass raise ValueError("none of the options in " + repr(grammar) + " found") if isinstance(thing, Namespace): L = [e for e in thing.values()] L.reverse() elif isinstance(thing, list): L = thing[:] L.reverse() else: L = [thing] result = compose_tuple(thing, L, grammar) elif _issubclass(grammar, object): if isinstance(thing, grammar): try: grammar.grammar except AttributeError: if _issubclass(grammar, Symbol): result = self.compose(thing, grammar.regex) else: result = self.compose(thing) else: result = self.compose(thing, grammar.grammar) else: if grammar == Symbol and isinstance(thing, str): result = self.compose(str(thing), Symbol.regex) else: raise ValueError(repr(thing) + " is not a " + repr(grammar)) else: raise GrammarTypeError("in grammar: " + repr(grammar)) return result pyPEG2-2.15.2/pypeg2/test/0000755000175000017500000000000012605271064016145 5ustar florianflorian00000000000000pyPEG2-2.15.2/pypeg2/test/test_pyPEG2.py0000644000175000017500000003004612600574741020632 0ustar florianflorian00000000000000from __future__ import unicode_literals import unittest import pypeg2 import re class GrammarTestCase1(unittest.TestCase): def runTest(self): x = pypeg2.some("thing") y = pypeg2.maybe_some("thing") z = pypeg2.optional("hello", "world") self.assertEqual(x, (-2, "thing")) self.assertEqual(y, (-1, "thing")) self.assertEqual(z, (0, ("hello", "world"))) class GrammarTestCase2(unittest.TestCase): def runTest(self): L1 = pypeg2.csl("thing") L2 = pypeg2.csl("hello", "world") self.assertEqual(L1, ("thing", -1, (",", pypeg2.blank, "thing"))) self.assertEqual(L2, ("hello", "world", -1, (",", pypeg2.blank, "hello", "world"))) class ParserTestCase(unittest.TestCase): pass class TypeErrorTestCase(ParserTestCase): def runTest(self): parser = pypeg2.Parser() with self.assertRaises(pypeg2.GrammarTypeError): parser.parse("hello, world", 23) class ParseTerminalStringTestCase1(ParserTestCase): def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", "hello") self.assertEqual(r, (", world", None)) class ParseTerminalStringTestCase2(ParserTestCase): def runTest(self): parser = pypeg2.Parser() with self.assertRaises(SyntaxError): r = parser.parse("hello, world", "world") class ParseKeywordTestCase1(ParserTestCase): def runTest(self): parser = pypeg2.Parser() r = parser.parse("hallo, world", pypeg2.K("hallo")) self.assertEqual(r, (", world", None)) pypeg2.Keyword.table[pypeg2.K("hallo")] class ParseKeywordTestCase2(ParserTestCase): def runTest(self): parser = pypeg2.Parser() with self.assertRaises(SyntaxError): r = parser.parse("hello, world", pypeg2.K("werld")) pypeg2.Keyword.table[pypeg2.K("werld")] class ParseKeywordTestCase3(ParserTestCase): def runTest(self): parser = pypeg2.Parser() with self.assertRaises(SyntaxError): r = parser.parse(", world", pypeg2.K("hallo")) pypeg2.Keyword.table[pypeg2.K("hallo")] class ParseRegexTestCase1(ParserTestCase): def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", re.compile(r"h.[lx]l\S", re.U)) self.assertEqual(r, (", world", "hello")) class ParseRegexTestCase2(ParserTestCase): def runTest(self): parser = pypeg2.Parser() with self.assertRaises(SyntaxError): r = parser.parse("hello, world", re.compile(r"\d", re.U)) class ParseSymbolTestCase1(ParserTestCase): def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", pypeg2.Symbol) self.assertEqual(r, (", world", pypeg2.Symbol("hello"))) class ParseSymbolTestCase2(ParserTestCase): def runTest(self): parser = pypeg2.Parser() with self.assertRaises(SyntaxError): r = parser.parse(", world", pypeg2.Symbol) class ParseAttributeTestCase(ParserTestCase): def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", pypeg2.attr("some", pypeg2.Symbol)) self.assertEqual( r, ( ', world', pypeg2.attr.Class(name='some', thing=pypeg2.Symbol('hello'), subtype=None) ) ) class ParseTupleTestCase1(ParserTestCase): def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", (pypeg2.name(), ",", pypeg2.name())) self.assertEqual( r, ( '', [ pypeg2.attr.Class(name='name', thing=pypeg2.Symbol('hello'), subtype=None), pypeg2.attr.Class(name='name', thing=pypeg2.Symbol('world'), subtype=None) ] ) ) class ParseTupleTestCase2(ParserTestCase): def runTest(self): parser = pypeg2.Parser() with self.assertRaises(ValueError): parser.parse("hello, world", (-23, "x")) class ParseSomeTestCase1(ParserTestCase): def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", pypeg2.some(re.compile(r"\w", re.U))) self.assertEqual(r, (', world', ['h', 'e', 'l', 'l', 'o'])) class ParseSomeTestCase2(ParserTestCase): def runTest(self): parser = pypeg2.Parser() with self.assertRaises(SyntaxError): r = parser.parse("hello, world", pypeg2.some(re.compile(r"\d", re.U))) class ParseMaybeSomeTestCase1(ParserTestCase): def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", pypeg2.maybe_some(re.compile(r"\w", re.U))) self.assertEqual(r, (', world', ['h', 'e', 'l', 'l', 'o'])) class ParseMaybeSomeTestCase2(ParserTestCase): def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", pypeg2.maybe_some(re.compile(r"\d", re.U))) self.assertEqual(r, ('hello, world', [])) class ParseCardinalityTestCase1(ParserTestCase): def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", (5, re.compile(r"\w", re.U))) self.assertEqual(r, (', world', ['h', 'e', 'l', 'l', 'o'])) class ParseCardinalityTestCase2(ParserTestCase): def runTest(self): parser = pypeg2.Parser() with self.assertRaises(SyntaxError): r = parser.parse("hello, world", (6, re.compile(r"\w", re.U))) class ParseOptionsTestCase1(ParserTestCase): def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", [re.compile(r"\d+", re.U), pypeg2.word]) self.assertEqual(r, (', world', 'hello')) class ParseOptionsTestCase2(ParserTestCase): def runTest(self): parser = pypeg2.Parser() with self.assertRaises(SyntaxError): r = parser.parse("hello, world", ["x", "y"]) class ParseListTestCase1(ParserTestCase): class Chars(pypeg2.List): grammar = pypeg2.some(re.compile(r"\w", re.U)), pypeg2.attr("comma", ",") def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", ParseListTestCase1.Chars) self.assertEqual(r, ( 'world', ParseListTestCase1.Chars(['h', 'e', 'l', 'l', 'o'])) ) self.assertEqual(r[1].comma, None) class ParseListTestCase2(ParserTestCase): class Digits(pypeg2.List): grammar = pypeg2.some(re.compile(r"\d", re.U)) def runTest(self): parser = pypeg2.Parser() with self.assertRaises(SyntaxError): r = parser.parse("hello, world", ParseListTestCase2.Digits) class ParseClassTestCase1(ParserTestCase): class Word(str): grammar = pypeg2.word def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", ParseClassTestCase1.Word) self.assertEqual(type(r[1]), ParseClassTestCase1.Word) self.assertEqual(r[1], "hello") class ParseClassTestCase2(ParserTestCase): class Word(str): grammar = pypeg2.word, pypeg2.attr("comma", ",") def __init__(self, data): self.polished = False def polish(self): self.polished = True def runTest(self): parser = pypeg2.Parser() r = parser.parse("hello, world", ParseClassTestCase2.Word) self.assertEqual(type(r[1]), ParseClassTestCase2.Word) self.assertEqual(r[1], "hello") self.assertTrue(r[1].polished) self.assertEqual(r[1].comma, None) class Parm(object): grammar = pypeg2.name(), "=", pypeg2.attr("value", int) class Parms(pypeg2.Namespace): grammar = (pypeg2.csl(Parm), pypeg2.flag("fullstop", "."), pypeg2.flag("semicolon", ";")) class ParseNLTestCase1(ParserTestCase): def runTest(self): parser = pypeg2.Parser() parser.comment = pypeg2.comment_c t, parms = parser.parse("x=23 /* Illuminati */, y=42 /* the answer */;", Parms) self.assertEqual(parms["x"].value, 23) self.assertEqual(parms["y"].value, 42) self.assertEqual(parms.fullstop, False) self.assertEqual(parms.semicolon, True) class EnumTest(pypeg2.Symbol): grammar = pypeg2.Enum( pypeg2.K("int"), pypeg2.K("long") ) class ParseEnumTestCase1(ParserTestCase): def runTest(self): parser = pypeg2.Parser() t, r = parser.parse("int", EnumTest) self.assertEqual(r, "int") class ParseEnumTestCase2(ParserTestCase): def runTest(self): parser = pypeg2.Parser() with self.assertRaises(SyntaxError): t, r = parser.parse("float", EnumTest) class ParseInvisibleTestCase(ParserTestCase): class C1(str): grammar = pypeg2.ignore("!"), pypeg2.restline def runTest(self): r = pypeg2.parse("!all", type(self).C1) self.assertEqual(str(r), "all") self.assertEqual(r._ignore1, None) class ParseOmitTestCase(ParserTestCase): def runTest(self): r = pypeg2.parse("hello", pypeg2.omit(pypeg2.word)) self.assertEqual(r, None) class ComposeTestCase(unittest.TestCase): pass class ComposeString(object): grammar = "something" class ComposeStringTestCase(ComposeTestCase): def runTest(self): x = ComposeString() t = pypeg2.compose(x) self.assertEqual(t, "something") class ComposeRegex(str): grammar = pypeg2.word class ComposeRegexTestCase(ComposeTestCase): def runTest(self): x = ComposeRegex("something") t = pypeg2.compose(x) self.assertEqual(t, "something") class ComposeKeyword(object): grammar = pypeg2.K("hallo") class ComposeKeywordTestCase(ComposeTestCase): def runTest(self): x = ComposeKeyword() t = pypeg2.compose(x) self.assertEqual(t, "hallo") class ComposeSymbol(pypeg2.Symbol): pass class ComposeSymbolTestCase(ComposeTestCase): def runTest(self): x = ComposeSymbol("hello") t = pypeg2.compose(x) self.assertEqual(t, "hello") class ComposeAttribute(object): grammar = pypeg2.name() class ComposeAttributeTestCase(ComposeTestCase): def runTest(self): x = ComposeAttribute() x.name = pypeg2.Symbol("hello") t = pypeg2.compose(x) self.assertEqual(t, "hello") class ComposeFlag(object): grammar = pypeg2.flag("mark", "MARK") class ComposeFlagTestCase1(ComposeTestCase): def runTest(self): x = ComposeFlag() x.mark = True t = pypeg2.compose(x) self.assertEqual(t, "MARK") class ComposeFlagTestCase2(ComposeTestCase): def runTest(self): x = ComposeFlag() x.mark = False t = pypeg2.compose(x) self.assertEqual(t, "") class ComposeTuple(pypeg2.List): grammar = pypeg2.csl(pypeg2.word) class ComposeTupleTestCase(ComposeTestCase): def runTest(self): x = ComposeTuple(["hello", "world"]) t = pypeg2.compose(x) self.assertEqual(t, "hello, world") class ComposeList(str): grammar = [ re.compile(r"\d+", re.U), pypeg2.word ] class ComposeListTestCase(ComposeTestCase): def runTest(self): x = ComposeList("hello") t = pypeg2.compose(x) self.assertEqual(t, "hello") class ComposeIntTestCase(ComposeTestCase): def runTest(self): x = pypeg2.compose(23, int) self.assertEqual(x, "23") class C2(str): grammar = pypeg2.attr("some", "!"), pypeg2.restline class ComposeInvisibleTestCase(ParserTestCase): def runTest(self): r = pypeg2.parse("!all", C2) self.assertEqual(str(r), "all") self.assertEqual(r.some, None) t = pypeg2.compose(r, C2) self.assertEqual(t, "!all") class ComposeOmitTestCase(ParserTestCase): def runTest(self): t = pypeg2.compose('hello', pypeg2.omit(pypeg2.word)) self.assertEqual(t, "") class CslPython32Compatibility(ParserTestCase): def runTest(self): try: g = eval("pypeg2.csl('hello', 'world', separator=';')") except TypeError: return self.assertEqual(g, ("hello", "world", -1, (";", pypeg2.blank, "hello", "world"))) if __name__ == '__main__': unittest.main() pyPEG2-2.15.2/pypeg2/test/__init__.py0000644000175000017500000000000012600574741020247 0ustar florianflorian00000000000000pyPEG2-2.15.2/pypeg2/test/test_xmlast.py0000644000175000017500000000724312600574741021077 0ustar florianflorian00000000000000from __future__ import unicode_literals try: str = unicode except NameError: pass import unittest import re, sys import pypeg2, pypeg2.xmlast class Another(object): grammar = pypeg2.name(), "=", pypeg2.attr("value") class Something(pypeg2.List): grammar = pypeg2.name(), pypeg2.some(Another), str class Thing2etreeTestCase1(unittest.TestCase): def runTest(self): s = Something() s.name = "hello" a1 = Another() a1.name = "bla" a1.value = "blub" a2 = Another() a2.name = "foo" a2.value = "bar" s.append(a1) s.append(a2) s.append("hello, world") root = pypeg2.xmlast.create_tree(s) self.assertEqual(root.tag, "Something") self.assertEqual(root.attrib["name"], "hello") try: import lxml except ImportError: self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'hello, world') else: self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'hello, world') class SomethingElse(pypeg2.Namespace): grammar = pypeg2.name(), pypeg2.some(Another) class Thing2etreeTestCase2(unittest.TestCase): def runTest(self): s = SomethingElse() s.name = "hello" a1 = Another() a1.name = "bla" a1.value = "blub" a2 = Another() a2.name = "foo" a2.value = "bar" s[a1.name] = a1 s[a2.name] = a2 root = pypeg2.xmlast.create_tree(s) self.assertEqual(root.tag, "SomethingElse") self.assertEqual(root.attrib["name"], "hello") try: import lxml except ImportError: self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'') else: self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'') class Thing2XMLTestCase3(unittest.TestCase): class C1(str): grammar = pypeg2.ignore("!"), pypeg2.restline def runTest(self): r = pypeg2.parse("!all", type(self).C1) xml = pypeg2.xmlast.thing2xml(r) self.assertEqual(xml, b"all") class Key(str): grammar = pypeg2.name(), "=", pypeg2.restline class XML2ThingTestCase1(unittest.TestCase): def runTest(self): xml = b'bar' thing = pypeg2.xmlast.xml2thing(xml, globals()) self.assertEqual(thing.name, pypeg2.Symbol("foo")) self.assertEqual(thing, "bar") class Instruction(str): pass class Parameter(object): grammar = pypeg2.attr("typing", str), pypeg2.name() class Parameters(pypeg2.Namespace): grammar = pypeg2.optional(pypeg2.csl(Parameter)) class Function(pypeg2.List): grammar = pypeg2.name(), pypeg2.attr("parms", Parameters), "{", pypeg2.maybe_some(Instruction), "}" class XML2ThingTestCase2(unittest.TestCase): def runTest(self): xml = b'do_this' f = pypeg2.xmlast.xml2thing(xml, globals()) self.assertEqual(f.name, pypeg2.Symbol("f")) self.assertEqual(f.parms["a"].name, pypeg2.Symbol("a")) self.assertEqual(f.parms["a"].typing, pypeg2.Symbol("int")) self.assertEqual(f[0], "do_this") if __name__ == '__main__': unittest.main()