binaryornot-0.2.0/0000755000076500000240000000000012217613330014653 5ustar audreyrstaff00000000000000binaryornot-0.2.0/AUTHORS.rst0000644000076500000240000000035212217612202016527 0ustar audreyrstaff00000000000000======= Credits ======= Development Lead ---------------- * Audrey Roy (`@audreyr`_) Contributors ------------ * Nick Coghlan (`@ncoghlan`_) .. _`@audreyr`: https://github.com/audreyr .. _`@ncoghlan`: https://github.com/ncoghlan binaryornot-0.2.0/binaryornot/0000755000076500000240000000000012217613330017221 5ustar audreyrstaff00000000000000binaryornot-0.2.0/binaryornot/__init__.py0000644000076500000240000000012012217612606021330 0ustar audreyrstaff00000000000000__author__ = 'Audrey Roy' __email__ = 'audreyr@gmail.com' __version__ = '0.2.0' binaryornot-0.2.0/binaryornot/check.py0000755000076500000240000000064112217611401020651 0ustar audreyrstaff00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- """ binaryornot.check ----------------- Main code for checking if a file is binary or text. """ from .helpers import get_starting_chunk, is_binary_string def is_binary(filename): """ :param filename: File to check. :returns: True if it's a binary file, otherwise False. """ chunk = get_starting_chunk(filename) return is_binary_string(chunk) binaryornot-0.2.0/binaryornot/helpers.py0000755000076500000240000000364212217611401021242 0ustar audreyrstaff00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- """ binaryornot.helpers ------------------- Helper utilities used by BinaryOrNot. """ def print_as_hex(s): """ Print a string as hex bytes. """ print(":".join("{0:x}".format(ord(c)) for c in s)) def get_starting_chunk(filename): """ :param filename: File to open and get the first little chunk of. :returns: Starting chunk of bytes. """ # Ensure we open the file in binary mode with open(filename, 'rb') as f: chunk = f.read(1024) return chunk def is_binary_string(bytes_to_check): """ :param bytes: A chunk of bytes to check. :returns: True if appears to be a binary, otherwise False. """ # Uses a simplified version of the Perl detection algorithm, # based roughly on Eli Bendersky's translation to Python: # http://eli.thegreenplace.net/2011/10/19/perls-guess-if-file-is-text-or-binary-implemented-in-python/ # This is biased slightly more in favour of deeming files as text # files than the Perl algorithm, since all ASCII compatible character # sets are accepted as text, not just utf-8 # Empty files are considered text files if not bytes_to_check: return False # Check for NUL bytes first if b'\x00' in bytes_to_check: return True # Now check for a high percentage of ASCII control characters printable_extended_ascii = b'\n\r\t\f\b' if bytes is str: # Python 2 means we need to invoke chr() explicitly printable_extended_ascii += b''.join(map(chr, range(32, 256))) else: # Python 3 means bytes accepts integer input directly printable_extended_ascii += bytes(range(32, 256)) # Binary if control chars are > 30% of the string control_chars = bytes_to_check.translate(None, printable_extended_ascii) nontext_ratio = float(len(control_chars)) / float(len(bytes_to_check)) return nontext_ratio > 0.3 binaryornot-0.2.0/binaryornot.egg-info/0000755000076500000240000000000012217613330020713 5ustar audreyrstaff00000000000000binaryornot-0.2.0/binaryornot.egg-info/dependency_links.txt0000644000076500000240000000000112217613330024761 0ustar audreyrstaff00000000000000 binaryornot-0.2.0/binaryornot.egg-info/not-zip-safe0000644000076500000240000000000112217613256023150 0ustar audreyrstaff00000000000000 binaryornot-0.2.0/binaryornot.egg-info/PKG-INFO0000644000076500000240000000715312217613330022016 0ustar audreyrstaff00000000000000Metadata-Version: 1.0 Name: binaryornot Version: 0.2.0 Summary: Ultra-lightweight pure Python package to check if a file is binary or text. Home-page: https://github.com/audreyr/binaryornot Author: Audrey Roy Author-email: audreyr@gmail.com License: BSD Description: ============================= BinaryOrNot ============================= .. image:: https://badge.fury.io/py/binaryornot.png :target: http://badge.fury.io/py/binaryornot .. image:: https://travis-ci.org/audreyr/binaryornot.png?branch=master :target: https://travis-ci.org/audreyr/binaryornot .. image:: https://pypip.in/d/binaryornot/badge.png :target: https://crate.io/packages/binaryornot?version=latest Ultra-lightweight pure Python package to guess whether a file is binary or text, using a heuristic similar to Perl's `pp_fttext` and its analysis by @eliben. * Free software: BSD license * Documentation: http://binaryornot.readthedocs.org Status ------ It works, and I'm using this package in various places. But it doesn't cover all edge cases yet. The code could be improved. Pull requests welcome! As of now, it is based on these snippets, but that may change: * http://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python * http://stackoverflow.com/questions/1446549/how-to-identify-binary-and-text-files-using-python * http://code.activestate.com/recipes/173220/ * http://eli.thegreenplace.net/2011/10/19/perls-guess-if-file-is-text-or-binary-implemented-in-python/ Features -------- Has tests for these file types: * Text: .css, .json, .txt, .svg * Binary: .eot, .otf, ttf, .woff, .png, .jpg, .tiff, .bmp Why? ---- You may be thinking, "I can write this in 2 lines of code?!" It's actually not that easy. Here's a great article about how *perldoc*'s heuristic to guess file types works: http://eli.thegreenplace.net/2011/10/19/perls-guess-if-file-is-text-or-binary-implemented-in-python/ Also, this package saves you from having to write and thoroughly test those 2 lines of code with all sorts of weird file types, cross-platform. Credits ------- * Special thanks to Eli Bendersky (@eliben) for his writeup explaining the heuristic and his implementation, which this is largely based on. * Source code from Perl's `pp_fttext`: https://github.com/mirrors/perl/blob/blead/pp_sys.c#L3287 History ------- 0.2.0 (2013-09-22) ++++++++++++++++++ * Complete rewrite of everything. 0.1.1 (2013-08-17) ++++++++++++++++++ * Tests pass under Python 2.6, 2.7, 3.3, PyPy. 0.1.0 (2013-08-17) ++++++++++++++++++ * First release on PyPI. Keywords: binaryornot Platform: UNKNOWN Classifier: Development Status :: 3 - Alpha Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: BSD License Classifier: Natural Language :: English Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.6 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.3 binaryornot-0.2.0/binaryornot.egg-info/SOURCES.txt0000644000076500000240000000050212217613330022574 0ustar audreyrstaff00000000000000AUTHORS.rst CONTRIBUTING.rst HISTORY.rst LICENSE MANIFEST.in README.rst setup.py binaryornot/__init__.py binaryornot/check.py binaryornot/helpers.py binaryornot.egg-info/PKG-INFO binaryornot.egg-info/SOURCES.txt binaryornot.egg-info/dependency_links.txt binaryornot.egg-info/not-zip-safe binaryornot.egg-info/top_level.txtbinaryornot-0.2.0/binaryornot.egg-info/top_level.txt0000644000076500000240000000001412217613330023440 0ustar audreyrstaff00000000000000binaryornot binaryornot-0.2.0/CONTRIBUTING.rst0000644000076500000240000000616312203530360017316 0ustar audreyrstaff00000000000000============ Contributing ============ Contributions are welcome, and they are greatly appreciated! Every little bit helps, and credit will always be given. You can contribute in many ways: Types of Contributions ---------------------- Report Bugs ~~~~~~~~~~~ Report bugs at https://github.com/audreyr/binaryornot/issues. If you are reporting a bug, please include: * Your operating system name and version. * Any details about your local setup that might be helpful in troubleshooting. * Detailed steps to reproduce the bug. Fix Bugs ~~~~~~~~ Look through the GitHub issues for bugs. Anything tagged with "bug" is open to whoever wants to implement it. Implement Features ~~~~~~~~~~~~~~~~~~ Look through the GitHub issues for features. Anything tagged with "feature" is open to whoever wants to implement it. Write Documentation ~~~~~~~~~~~~~~~~~~~ BinaryOrNot could always use more documentation, whether as part of the official BinaryOrNot docs, in docstrings, or even on the web in blog posts, articles, and such. Submit Feedback ~~~~~~~~~~~~~~~ The best way to send feedback is to file an issue at https://github.com/audreyr/binaryornot/issues. If you are proposing a feature: * Explain in detail how it would work. * Keep the scope as narrow as possible, to make it easier to implement. * Remember that this is a volunteer-driven project, and that contributions are welcome :) Get Started! ------------ Ready to contribute? Here's how to set up `binaryornot` for local development. 1. Fork the `binaryornot` repo on GitHub. 2. Clone your fork locally:: $ git clone git@github.com:your_name_here/binaryornot.git 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: $ mkvirtualenv binaryornot $ cd binaryornot/ $ python setup.py develop 4. Create a branch for local development:: $ git checkout -b name-of-your-bugfix-or-feature Now you can make your changes locally. 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: $ flake8 binaryornot tests $ python setup.py test $ tox To get flake8 and tox, just pip install them into your virtualenv. 6. Commit your changes and push your branch to GitHub:: $ git add . $ git commit -m "Your detailed description of your changes." $ git push origin name-of-your-bugfix-or-feature 7. Submit a pull request through the GitHub website. Pull Request Guidelines ----------------------- Before you submit a pull request, check that it meets these guidelines: 1. The pull request should include tests. 2. If the pull request adds functionality, the docs should be updated. Put your new functionality into a function with a docstring, and add the feature to the list in README.rst. 3. The pull request should work for Python 2.6, 2.7, and 3.3, and for PyPy. Check https://travis-ci.org/audreyr/binaryornot/pull_requests and make sure that the tests pass for all supported Python versions. Tips ---- To run a subset of tests:: $ python -m unittest tests.test_binaryornotbinaryornot-0.2.0/HISTORY.rst0000644000076500000240000000040412217612517016552 0ustar audreyrstaff00000000000000.. :changelog: History ------- 0.2.0 (2013-09-22) ++++++++++++++++++ * Complete rewrite of everything. 0.1.1 (2013-08-17) ++++++++++++++++++ * Tests pass under Python 2.6, 2.7, 3.3, PyPy. 0.1.0 (2013-08-17) ++++++++++++++++++ * First release on PyPI. binaryornot-0.2.0/LICENSE0000644000076500000240000000267412203530360015665 0ustar audreyrstaff00000000000000Copyright (c) 2013, Audrey Roy All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of BinaryOrNot nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.binaryornot-0.2.0/MANIFEST.in0000644000076500000240000000014312203530360016403 0ustar audreyrstaff00000000000000include AUTHORS.rst include CONTRIBUTING.rst include HISTORY.rst include LICENSE include README.rstbinaryornot-0.2.0/PKG-INFO0000644000076500000240000000715312217613330015756 0ustar audreyrstaff00000000000000Metadata-Version: 1.0 Name: binaryornot Version: 0.2.0 Summary: Ultra-lightweight pure Python package to check if a file is binary or text. Home-page: https://github.com/audreyr/binaryornot Author: Audrey Roy Author-email: audreyr@gmail.com License: BSD Description: ============================= BinaryOrNot ============================= .. image:: https://badge.fury.io/py/binaryornot.png :target: http://badge.fury.io/py/binaryornot .. image:: https://travis-ci.org/audreyr/binaryornot.png?branch=master :target: https://travis-ci.org/audreyr/binaryornot .. image:: https://pypip.in/d/binaryornot/badge.png :target: https://crate.io/packages/binaryornot?version=latest Ultra-lightweight pure Python package to guess whether a file is binary or text, using a heuristic similar to Perl's `pp_fttext` and its analysis by @eliben. * Free software: BSD license * Documentation: http://binaryornot.readthedocs.org Status ------ It works, and I'm using this package in various places. But it doesn't cover all edge cases yet. The code could be improved. Pull requests welcome! As of now, it is based on these snippets, but that may change: * http://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python * http://stackoverflow.com/questions/1446549/how-to-identify-binary-and-text-files-using-python * http://code.activestate.com/recipes/173220/ * http://eli.thegreenplace.net/2011/10/19/perls-guess-if-file-is-text-or-binary-implemented-in-python/ Features -------- Has tests for these file types: * Text: .css, .json, .txt, .svg * Binary: .eot, .otf, ttf, .woff, .png, .jpg, .tiff, .bmp Why? ---- You may be thinking, "I can write this in 2 lines of code?!" It's actually not that easy. Here's a great article about how *perldoc*'s heuristic to guess file types works: http://eli.thegreenplace.net/2011/10/19/perls-guess-if-file-is-text-or-binary-implemented-in-python/ Also, this package saves you from having to write and thoroughly test those 2 lines of code with all sorts of weird file types, cross-platform. Credits ------- * Special thanks to Eli Bendersky (@eliben) for his writeup explaining the heuristic and his implementation, which this is largely based on. * Source code from Perl's `pp_fttext`: https://github.com/mirrors/perl/blob/blead/pp_sys.c#L3287 History ------- 0.2.0 (2013-09-22) ++++++++++++++++++ * Complete rewrite of everything. 0.1.1 (2013-08-17) ++++++++++++++++++ * Tests pass under Python 2.6, 2.7, 3.3, PyPy. 0.1.0 (2013-08-17) ++++++++++++++++++ * First release on PyPI. Keywords: binaryornot Platform: UNKNOWN Classifier: Development Status :: 3 - Alpha Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: BSD License Classifier: Natural Language :: English Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.6 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.3 binaryornot-0.2.0/README.rst0000644000076500000240000000403612217577103016354 0ustar audreyrstaff00000000000000============================= BinaryOrNot ============================= .. image:: https://badge.fury.io/py/binaryornot.png :target: http://badge.fury.io/py/binaryornot .. image:: https://travis-ci.org/audreyr/binaryornot.png?branch=master :target: https://travis-ci.org/audreyr/binaryornot .. image:: https://pypip.in/d/binaryornot/badge.png :target: https://crate.io/packages/binaryornot?version=latest Ultra-lightweight pure Python package to guess whether a file is binary or text, using a heuristic similar to Perl's `pp_fttext` and its analysis by @eliben. * Free software: BSD license * Documentation: http://binaryornot.readthedocs.org Status ------ It works, and I'm using this package in various places. But it doesn't cover all edge cases yet. The code could be improved. Pull requests welcome! As of now, it is based on these snippets, but that may change: * http://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python * http://stackoverflow.com/questions/1446549/how-to-identify-binary-and-text-files-using-python * http://code.activestate.com/recipes/173220/ * http://eli.thegreenplace.net/2011/10/19/perls-guess-if-file-is-text-or-binary-implemented-in-python/ Features -------- Has tests for these file types: * Text: .css, .json, .txt, .svg * Binary: .eot, .otf, ttf, .woff, .png, .jpg, .tiff, .bmp Why? ---- You may be thinking, "I can write this in 2 lines of code?!" It's actually not that easy. Here's a great article about how *perldoc*'s heuristic to guess file types works: http://eli.thegreenplace.net/2011/10/19/perls-guess-if-file-is-text-or-binary-implemented-in-python/ Also, this package saves you from having to write and thoroughly test those 2 lines of code with all sorts of weird file types, cross-platform. Credits ------- * Special thanks to Eli Bendersky (@eliben) for his writeup explaining the heuristic and his implementation, which this is largely based on. * Source code from Perl's `pp_fttext`: https://github.com/mirrors/perl/blob/blead/pp_sys.c#L3287 binaryornot-0.2.0/setup.cfg0000644000076500000240000000007312217613330016474 0ustar audreyrstaff00000000000000[egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 binaryornot-0.2.0/setup.py0000755000076500000240000000251012217612557016400 0ustar audreyrstaff00000000000000#!/usr/bin/env python import os import sys import binaryornot try: from setuptools import setup except ImportError: from distutils.core import setup if sys.argv[-1] == 'publish': os.system('python setup.py sdist upload') sys.exit() readme = open('README.rst').read() history = open('HISTORY.rst').read().replace('.. :changelog:', '') setup( name='binaryornot', version='0.2.0', description='Ultra-lightweight pure Python package to check if a file is binary or text.', long_description=readme + '\n\n' + history, author='Audrey Roy', author_email='audreyr@gmail.com', url='https://github.com/audreyr/binaryornot', packages=[ 'binaryornot', ], package_dir={'binaryornot': 'binaryornot'}, include_package_data=True, install_requires=[ ], license="BSD", zip_safe=False, keywords='binaryornot', classifiers=[ 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Natural Language :: English', "Programming Language :: Python :: 2", 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', ], test_suite='tests', )