pax_global_header00006660000000000000000000000064122754555640014531gustar00rootroot0000000000000052 comment=ab1cfab86a2307a5009287030e3b9b0c0ea58464 untokenize-0.1.1/000077500000000000000000000000001227545556400137235ustar00rootroot00000000000000untokenize-0.1.1/.gitignore000066400000000000000000000001231227545556400157070ustar00rootroot00000000000000.*.swp *.pyc MANIFEST README.html __pycache__/ .travis-solo/ build/ dist/ htmlcov/ untokenize-0.1.1/.travis.yml000066400000000000000000000004541227545556400160370ustar00rootroot00000000000000language: python python: - "2.6" - "2.7" - "pypy" - "3.2" - "3.3" install: - if [ "$TRAVIS_PYTHON_VERSION" == "2.6" ]; then pip --quiet install argparse unittest2; fi - python setup.py --quiet install script: - python test_untokenize.py - python test_acid.py . untokenize-0.1.1/MANIFEST.in000066400000000000000000000001751227545556400154640ustar00rootroot00000000000000include MANIFEST.in include README.rst include test_untokenize.py exclude .travis.yml exclude Makefile exclude test_acid.py untokenize-0.1.1/Makefile000066400000000000000000000012061227545556400153620ustar00rootroot00000000000000check: pep8 untokenize.py setup.py test_acid.py pep257 untokenize.py setup.py test_acid.py pylint \ --rcfile=/dev/null \ --reports=no \ --disable=invalid-name \ untokenize.py setup.py test_acid.py check-manifest python setup.py --long-description | rst2html --strict > /dev/null scspell untokenize.py setup.py test_untokenize.py test_acid.py README.rst coverage: @rm -f .coverage @coverage run test_untokenize.py @coverage report @coverage html @rm -f .coverage @python -m webbrowser -n "file://${PWD}/htmlcov/index.html" mutant: @mut.py -t untokenize -u test_untokenize -mc readme: @restview --long-description --strict untokenize-0.1.1/README.rst000066400000000000000000000013101227545556400154050ustar00rootroot00000000000000============ untokenize ============ *untokenize* transforms tokens into source code. Unlike the standard library's ``tokenize.untokenize()``, it preserves the original whitespace between tokens. .. image:: https://travis-ci.org/myint/untokenize.png?branch=master :target: https://travis-ci.org/myint/untokenize :alt: Build status Usage ===== .. code-block:: python import untokenize source_code = untokenize.untokenize(tokens) Tests ===== To run the unit tests:: $ ./test_untokenize.py There is also an acid test. It tokenizes Python code and confirms that the code generated by untokenize exactly matches the original source code from before tokenization:: $ ./test_acid.py untokenize-0.1.1/setup.py000077500000000000000000000023341227545556400154420ustar00rootroot00000000000000#!/usr/bin/env python """Setup for untokenize.""" import ast from distutils import core def version(): """Return version string.""" with open('untokenize.py') as input_file: for line in input_file: if line.startswith('__version__'): return ast.parse(line).body[0].value.s with open('README.rst') as readme: core.setup(name='untokenize', version=version(), description='Transforms tokens into original source code ' '(while preserving whitespace).', long_description=readme.read(), license='Expat License', author='Steven Myint', url='https://github.com/myint/untokenize', classifiers=['Intended Audience :: Developers', 'Environment :: Console', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'License :: OSI Approved :: MIT License'], keywords='tokenize,untokenize,transform,generate', py_modules=['untokenize']) untokenize-0.1.1/test_acid.py000077500000000000000000000105521227545556400162420ustar00rootroot00000000000000#!/usr/bin/env python """Test that untokenize always generates the expected output. That is, the output of untokenize.untokenize(tokenize.generate_tokens(file_input.readline)) is exactly equal to the input file. """ from __future__ import print_function from __future__ import unicode_literals import io import os import sys import tokenize import untokenize try: unicode except NameError: unicode = str def open_with_encoding(filename, encoding, mode='r'): """Return opened file with a specific encoding.""" return io.open(filename, mode=mode, encoding=encoding, newline='') # Preserve line endings def detect_encoding(filename): """Return file encoding.""" try: with open(filename, 'rb') as input_file: from lib2to3.pgen2 import tokenize as lib2to3_tokenize encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0] # Check for correctness of encoding. with open_with_encoding(filename, encoding) as input_file: input_file.read() return encoding except (SyntaxError, LookupError, UnicodeDecodeError): return 'latin-1' def diff(before, after): """Return diff of two files.""" import difflib return ''.join(difflib.unified_diff( before.splitlines(True), after.splitlines(True), lineterm='\n')) def run(filename): """Check untokenize with file. Return True on success. """ with open_with_encoding(filename, encoding=detect_encoding(filename)) as input_file: source_code = input_file.read() # Handle files with trailing whitespace, but no final newline. # tokenize.generate_tokens() will not report the trailing whitespace in # such a case. if source_code.endswith((' ', '\t')): source_code = source_code.rstrip() string_io = io.StringIO(source_code) generated = untokenize.untokenize( tokenize.generate_tokens(string_io.readline)) if source_code == generated: return True else: print('untokenize failed on ' + filename, file=sys.stderr) print(diff(source_code, generated), file=sys.stderr) def process_args(): """Return processed arguments (options and positional arguments).""" import argparse parser = argparse.ArgumentParser() parser.add_argument('files', nargs='*', help='files to format') return parser.parse_args() def check(args): """Run test recursively on directory of files. Return False if the fix results in broken syntax. """ if args.files: dir_paths = args.files else: dir_paths = [path for path in sys.path if os.path.isdir(path)] filenames = dir_paths completed_filenames = set() while filenames: try: name = os.path.realpath(filenames.pop(0)) if not os.path.exists(name): # Invalid symlink. continue if name in completed_filenames: print('---> Skipping previously tested ' + name, file=sys.stderr) continue else: completed_filenames.update(name) if os.path.isdir(name): for root, directories, children in os.walk(unicode(name)): filenames += [os.path.join(root, f) for f in children if f.endswith('.py') and not f.startswith('.')] directories[:] = [d for d in directories if not d.startswith('.')] else: print('---> Testing with ' + name, file=sys.stderr) if not run(os.path.join(name)): return False except (IndentationError, tokenize.TokenError, UnicodeDecodeError, UnicodeEncodeError) as exception: print('---> Skipping bad file {0} ({1})'.format(name, exception), file=sys.stderr) continue return True def main(): """Run main.""" return 0 if check(process_args()) else 1 if __name__ == '__main__': try: sys.exit(main()) except KeyboardInterrupt: sys.exit(1) untokenize-0.1.1/test_untokenize.py000077500000000000000000000031521227545556400175330ustar00rootroot00000000000000#!/usr/bin/env python """Test suite for untokenize.""" from __future__ import (absolute_import, division, print_function, unicode_literals) import io import sys import tokenize if sys.version_info < (2, 7): import unittest2 as unittest else: import unittest import untokenize class TestUnits(unittest.TestCase): def check(self, source_code): string_io = io.StringIO(source_code) self.assertEqual( source_code, untokenize.untokenize( tokenize.generate_tokens(string_io.readline))) def test_untokenize(self): self.check(''' def zap(): """Hello zap. """; 1 x \t= \t\t \t 1 ''') def test_untokenize_with_tab_indentation(self): self.check(""" if True: \tdef zap(): \t\tx \t= \t\t \t 1 """) def test_untokenize_with_backslash_in_comment(self): self.check(r''' def foo(): """Hello foo.""" def zap(): bar(1) # \ ''') def test_untokenize_with_escaped_newline(self): self.check(r'''def foo(): """Hello foo.""" x = \ 1 ''') def test_untokenize_with_empty_string(self): self.check('') @unittest.skipIf(sys.version_info < (3, 0), 'We are testing tokenize.ENCODING in Python 3') def test_untokenize_with_encoding(self): source = '0' bytes_io = io.BytesIO(source.encode('us-ascii')) self.assertEqual( source, untokenize.untokenize(tokenize.tokenize(bytes_io.readline))) if __name__ == '__main__': unittest.main() untokenize-0.1.1/untokenize.py000066400000000000000000000051121227545556400164670ustar00rootroot00000000000000# Copyright (C) 2013-2014 Steven Myint # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """Transform tokens into original source code.""" import tokenize __version__ = '0.1.1' def untokenize(tokens): """Return source code based on tokens. This is like tokenize.untokenize(), but it preserves spacing between tokens. So if the original soure code had multiple spaces between some tokens or if escaped newlines were used, those things will be reflected by untokenize(). """ text = '' previous_line = '' last_row = 0 last_column = -1 last_non_whitespace_token_type = None for (token_type, token_string, start, end, line) in tokens: if hasattr(tokenize, 'ENCODING') and token_type == tokenize.ENCODING: continue (start_row, start_column) = start (end_row, end_column) = end # Preserve escaped newlines. if ( last_non_whitespace_token_type != tokenize.COMMENT and start_row > last_row and previous_line.endswith(('\\\n', '\\\r\n', '\\\r')) ): text += previous_line[len(previous_line.rstrip(' \t\n\r\\')):] # Preserve spacing. if start_row > last_row: last_column = 0 if start_column > last_column: text += line[last_column:start_column] text += token_string previous_line = line last_row = end_row last_column = end_column if token_type not in [tokenize.INDENT, tokenize.NEWLINE, tokenize.NL]: last_non_whitespace_token_type = token_type return text