././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1602718803.7976947 patiencediff-0.2.1/0000755000175000017500000000000000000000000014712 5ustar00jelmerjelmer00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1590202056.0 patiencediff-0.2.1/.bzrignore0000644000175000017500000000004500000000000016713 0ustar00jelmerjelmer00000000000000build patiencediff.egg-info dist/ *~ ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1602718803.7976947 patiencediff-0.2.1/.github/0000755000175000017500000000000000000000000016252 5ustar00jelmerjelmer00000000000000././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1602718803.7976947 patiencediff-0.2.1/.github/workflows/0000755000175000017500000000000000000000000020307 5ustar00jelmerjelmer00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1591046155.0 patiencediff-0.2.1/.github/workflows/pythonpackage.yml0000644000175000017500000000176700000000000023702 0ustar00jelmerjelmer00000000000000name: Python package on: [push, pull_request] jobs: build: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: [2.7, 3.5, 3.6, 3.7, 3.8, pypy3] fail-fast: false steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install -U pip flake8 setuptools - name: Style checks run: | python -m flake8 - name: Build run: | python setup.py build_ext -i # Building C extensions doesn't work for Python 2.7 on Windows. if: "!(matrix.os == 'windows-latest' && matrix.python-version == '2.7')" - name: Test suite run run: | python -m unittest patiencediff.test_patiencediff env: PYTHONHASHSEED: random ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1591046376.0 patiencediff-0.2.1/.github/workflows/pythonpublish.yml0000644000175000017500000000316100000000000023743 0ustar00jelmerjelmer00000000000000name: Upload Python Package on: push: tags: - v* jobs: deploy: runs-on: ${{ matrix.os }} strategy: matrix: os: [macos-latest, windows-latest] python-version: ['3.5', '3.6', '3.7', '3.8'] include: - os: ubuntu-latest python-version: '3.x' # path encoding exclude: - os: windows-latest python-version: 3.5 fail-fast: false steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel twine - name: Run test suite run: | python -m unittest patiencediff.test_patiencediff - name: Build run: | python setup.py sdist bdist_wheel mkdir wheelhouse mv dist/*.whl wheelhouse if: "matrix.os != 'ubuntu-latest'" - name: Build and publish (Linux) uses: RalfG/python-wheels-manylinux-build@v0.2.2 if: "matrix.os == 'ubuntu-latest'" - name: Publish (Linux) env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | twine upload wheelhouse/*manylinux* if: "matrix.os == 'ubuntu-latest'" - name: Publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | twine upload wheelhouse/* if: "matrix.os != 'ubuntu-latest'" ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1602718739.0 patiencediff-0.2.1/.gitignore0000644000175000017500000000007300000000000016702 0ustar00jelmerjelmer00000000000000dist build __pycache__ *~ *.so *.pyc patiencediff.egg-info ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1590202056.0 patiencediff-0.2.1/AUTHORS0000644000175000017500000000022700000000000015763 0ustar00jelmerjelmer00000000000000John Arbash Meinel Lukáš Lalinský Martin Pool Jelmer Vernooij ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1590202056.0 patiencediff-0.2.1/COPYING0000644000175000017500000004325400000000000015755 0ustar00jelmerjelmer00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1590202056.0 patiencediff-0.2.1/MANIFEST.in0000644000175000017500000000004300000000000016445 0ustar00jelmerjelmer00000000000000include AUTHORS include README.rst ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1602718803.7976947 patiencediff-0.2.1/PKG-INFO0000644000175000017500000000446200000000000016015 0ustar00jelmerjelmer00000000000000Metadata-Version: 1.2 Name: patiencediff Version: 0.2.1 Summary: Python implementation of the patiencediff algorithm. Home-page: https://www.breezy-vcs.org/ Maintainer: Breezy Developers Maintainer-email: team@breezy-vcs.org License: GNU GPLv2 or later Description: This package contains the implementation of the ``patiencediff`` algorithm, as `first described `_ by Bram Cohen. Like Python's ``difflib``, this module provides both a convience ``unified_diff`` function for the generation of unified diffs of text files as well as a SequenceMatcher that can be used on arbitrary lists. Patiencediff provides a good balance of performance, nice output for humans, and implementation simplicity. The code in this package was extracted from the `Bazaar `_ code base. The package comes with two implementations: * A Python implementation (_patiencediff_py.py); this implementation only requires a Python interpreter and is the more readable version of the two * A C implementation implementation (_patiencediff_c.c); this implementation is faster, but requires a C compiler and is less readable Usage ===== To invoke patiencediff from the command-line:: python -m patiencediff file_a file_b Or from Python: >>> import patiencediff >>> print ''.join(patiencediff.unified_diff( ... ['a\n', 'b\n', 'b\n', 'c\n'], ... ['a\n', 'c\n', 'b\n'])) --- +++ @@ -1,4 +1,3 @@ a +c b -b -c Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+) Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Operating System :: POSIX ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1590202056.0 patiencediff-0.2.1/README.rst0000644000175000017500000000237200000000000016405 0ustar00jelmerjelmer00000000000000This package contains the implementation of the ``patiencediff`` algorithm, as `first described `_ by Bram Cohen. Like Python's ``difflib``, this module provides both a convience ``unified_diff`` function for the generation of unified diffs of text files as well as a SequenceMatcher that can be used on arbitrary lists. Patiencediff provides a good balance of performance, nice output for humans, and implementation simplicity. The code in this package was extracted from the `Bazaar `_ code base. The package comes with two implementations: * A Python implementation (_patiencediff_py.py); this implementation only requires a Python interpreter and is the more readable version of the two * A C implementation implementation (_patiencediff_c.c); this implementation is faster, but requires a C compiler and is less readable Usage ===== To invoke patiencediff from the command-line:: python -m patiencediff file_a file_b Or from Python: >>> import patiencediff >>> print ''.join(patiencediff.unified_diff( ... ['a\n', 'b\n', 'b\n', 'c\n'], ... ['a\n', 'c\n', 'b\n'])) --- +++ @@ -1,4 +1,3 @@ a +c b -b -c ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1590202056.0 patiencediff-0.2.1/build.cmd0000644000175000017500000000150100000000000016473 0ustar00jelmerjelmer00000000000000@echo off :: To build extensions for 64 bit Python 3, we need to configure environment :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of: :: MS Windows SDK for Windows 7 and .NET Framework 4 :: :: More details at: :: https://github.com/cython/cython/wiki/CythonExtensionsOnWindows IF "%DISTUTILS_USE_SDK%"=="1" ( ECHO Configuring environment to build with MSVC on a 64bit architecture ECHO Using Windows SDK 7.1 "C:\Program Files\Microsoft SDKs\Windows\v7.1\Setup\WindowsSdkVer.exe" -q -version:v7.1 CALL "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 /release SET MSSdk=1 REM Need the following to allow tox to see the SDK compiler SET TOX_TESTENV_PASSENV=DISTUTILS_USE_SDK MSSdk INCLUDE LIB ) ELSE ( ECHO Using default MSVC build environment ) CALL %* ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1602718803.7976947 patiencediff-0.2.1/patiencediff/0000755000175000017500000000000000000000000017333 5ustar00jelmerjelmer00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1602718795.0 patiencediff-0.2.1/patiencediff/__init__.py0000644000175000017500000001161100000000000021444 0ustar00jelmerjelmer00000000000000# Copyright (C) 2005, 2006, 2007 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import import os import sys import time import difflib __all__ = ['PatienceSequenceMatcher', 'unified_diff', 'unified_diff_files'] __version__ = (0, 2, 1) # This is a version of unified_diff which only adds a factory parameter # so that you can override the default SequenceMatcher # this has been submitted as a patch to python def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', tofiledate='', n=3, lineterm='\n', sequencematcher=None): r""" Compare two sequences of lines; generate the delta as a unified diff. Unified diffs are a compact way of showing line changes and a few lines of context. The number of context lines is set by 'n' which defaults to three. By default, the diff control lines (those with ---, +++, or @@) are created with a trailing newline. This is helpful so that inputs created from file.readlines() result in diffs that are suitable for file.writelines() since both the inputs and outputs have trailing newlines. For inputs that do not have trailing newlines, set the lineterm argument to "" so that the output will be uniformly newline free. The unidiff format normally has a header for filenames and modification times. Any or all of these may be specified using strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification times are normally expressed in the format returned by time.ctime(). Example: >>> for line in unified_diff('one two three four'.split(), ... 'zero one tree four'.split(), 'Original', 'Current', ... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003', ... lineterm=''): ... print line --- Original Sat Jan 26 23:30:50 1991 +++ Current Fri Jun 06 10:20:52 2003 @@ -1,4 +1,4 @@ +zero one -two -three +tree four """ if sequencematcher is None: sequencematcher = difflib.SequenceMatcher if fromfiledate: fromfiledate = '\t' + str(fromfiledate) if tofiledate: tofiledate = '\t' + str(tofiledate) started = False for group in sequencematcher(None, a, b).get_grouped_opcodes(n): if not started: yield '--- %s%s%s' % (fromfile, fromfiledate, lineterm) yield '+++ %s%s%s' % (tofile, tofiledate, lineterm) started = True i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4] yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm) for tag, i1, i2, j1, j2 in group: if tag == 'equal': for line in a[i1:i2]: yield ' ' + line continue if tag == 'replace' or tag == 'delete': for line in a[i1:i2]: yield '-' + line if tag == 'replace' or tag == 'insert': for line in b[j1:j2]: yield '+' + line def unified_diff_files(a, b, sequencematcher=None): """Generate the diff for two files. """ # Should this actually be an error? if a == b: return [] if a == '-': lines_a = sys.stdin.readlines() time_a = time.time() else: with open(a, 'r') as f: lines_a = f.readlines() time_a = os.stat(a).st_mtime # noqa: F841 if b == '-': lines_b = sys.stdin.readlines() time_b = time.time() else: with open(b, 'r') as f: lines_b = f.readlines() time_b = os.stat(b).st_mtime # noqa: F841 # TODO: Include fromfiledate and tofiledate return unified_diff(lines_a, lines_b, fromfile=a, tofile=b, sequencematcher=sequencematcher) try: from ._patiencediff_c import ( unique_lcs_c as unique_lcs, recurse_matches_c as recurse_matches, PatienceSequenceMatcher_c as PatienceSequenceMatcher ) except ImportError: from ._patiencediff_py import ( # noqa: F401 unique_lcs_py as unique_lcs, recurse_matches_py as recurse_matches, PatienceSequenceMatcher_py as PatienceSequenceMatcher ) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1602718660.0 patiencediff-0.2.1/patiencediff/__main__.py0000644000175000017500000000356000000000000021431 0ustar00jelmerjelmer00000000000000#!/usr/bin/env python # Copyright (C) 2005, 2006, 2007 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import import sys import difflib from . import PatienceSequenceMatcher, unified_diff_files def main(args): import optparse p = optparse.OptionParser(usage='%prog [options] file_a file_b' '\nFiles can be "-" to read from stdin') p.add_option('--patience', dest='matcher', action='store_const', const='patience', default='patience', help='Use the patience difference algorithm') p.add_option('--difflib', dest='matcher', action='store_const', const='difflib', default='patience', help='Use python\'s difflib algorithm') algorithms = { 'patience': PatienceSequenceMatcher, 'difflib': difflib.SequenceMatcher} (opts, args) = p.parse_args(args) matcher = algorithms[opts.matcher] if len(args) != 2: print('You must supply 2 filenames to diff') return -1 for line in unified_diff_files(args[0], args[1], sequencematcher=matcher): sys.stdout.write(line) if __name__ == '__main__': sys.exit(main(sys.argv[1:])) ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1590202056.0 patiencediff-0.2.1/patiencediff/_patiencediff_c.c0000644000175000017500000011035000000000000022561 0ustar00jelmerjelmer00000000000000/* Copyright (C) 2007, 2010 Canonical Ltd This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Function equate_lines based on bdiff.c from Mercurial. Copyright (C) 2005, 2006 Matt Mackall Functions unique_lcs/recurse_matches based on _patiencediff_py.py. Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd */ #include #include #include #if defined(__GNUC__) # define inline __inline__ #elif defined(_MSC_VER) # define inline __inline #else # define inline #endif #define MIN(a, b) (((a) > (b)) ? (b) : (a)) #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #define SENTINEL -1 /* malloc returns NULL on some platforms if you try to allocate nothing, * causing and * . On glibc it passes, but * let's make it fail to aid testing. */ #define guarded_malloc(x) ( ((x) > 0) ? malloc(x) : NULL ) enum { OP_EQUAL = 0, OP_INSERT, OP_DELETE, OP_REPLACE }; /* values from this array need to correspont to the order of the enum above */ static char *opcode_names[] = { "equal", "insert", "delete", "replace", }; struct line { long hash; /* hash code of the string/object */ Py_ssize_t next; /* next line from the same equivalence class */ Py_ssize_t equiv; /* equivalence class */ PyObject *data; }; struct bucket { Py_ssize_t a_head; /* first item in `a` from this equivalence class */ Py_ssize_t a_count; Py_ssize_t b_head; /* first item in `b` from this equivalence class */ Py_ssize_t b_count; Py_ssize_t a_pos; Py_ssize_t b_pos; }; struct hashtable { Py_ssize_t last_a_pos; Py_ssize_t last_b_pos; Py_ssize_t size; struct bucket *table; }; struct matching_line { Py_ssize_t a; /* index of the line in `a` */ Py_ssize_t b; /* index of the line in `b` */ }; struct matching_block { Py_ssize_t a; /* index of the first line in `a` */ Py_ssize_t b; /* index of the first line in `b` */ Py_ssize_t len; /* length of the block */ }; struct matching_blocks { struct matching_block *matches; Py_ssize_t count; }; struct opcode { int tag; Py_ssize_t i1; Py_ssize_t i2; Py_ssize_t j1; Py_ssize_t j2; }; typedef struct { PyObject_HEAD Py_ssize_t asize; Py_ssize_t bsize; struct line *a; struct line *b; struct hashtable hashtable; Py_ssize_t *backpointers; } PatienceSequenceMatcher; static inline Py_ssize_t bisect_left(Py_ssize_t *list, Py_ssize_t item, Py_ssize_t lo, Py_ssize_t hi) { while (lo < hi) { Py_ssize_t mid = lo / 2 + hi / 2 + (lo % 2 + hi % 2) / 2; if (list[mid] < item) lo = mid + 1; else hi = mid; } return lo; } static inline int compare_lines(struct line *a, struct line *b) { return ((a->hash != b->hash) || PyObject_RichCompareBool(a->data, b->data, Py_EQ) == 0); } static inline int find_equivalence_class(struct bucket *hashtable, Py_ssize_t hsize, struct line *lines, struct line *ref_lines, Py_ssize_t i) { Py_ssize_t j; for (j = lines[i].hash & hsize; hashtable[j].b_head != SENTINEL; j = (j + 1) & hsize) { if (!compare_lines(lines + i, ref_lines + hashtable[j].b_head)) { break; } } return j; } static int equate_lines(struct hashtable *result, struct line *lines_a, struct line *lines_b, Py_ssize_t asize, Py_ssize_t bsize) { Py_ssize_t i, j, hsize; struct bucket *hashtable; /* check for overflow, we need the table to be at least bsize+1 */ if (bsize == PY_SSIZE_T_MAX) { PyErr_SetNone(PyExc_OverflowError); return 0; } /* build a hash table of the next highest power of 2 */ hsize = 1; while (hsize < bsize + 1) hsize *= 2; /* can't be 0 */ hashtable = (struct bucket *) guarded_malloc(sizeof(struct bucket) * hsize); if (hashtable == NULL) { PyErr_NoMemory(); return 0; } /* initialise the hashtable */ for (i = 0; i < hsize; i++) { hashtable[i].a_count = 0; hashtable[i].b_count = 0; hashtable[i].a_head = SENTINEL; hashtable[i].b_head = SENTINEL; } hsize--; /* add lines from lines_b to the hash table chains. iterating backwards so the matching lines are sorted to the linked list by the line number (because we are adding new lines to the head of the list) */ for (i = bsize - 1; i >= 0; i--) { /* find the first hashtable entry, which is either empty or contains the same line as lines_b[i] */ j = find_equivalence_class(hashtable, hsize, lines_b, lines_b, i); /* set the equivalence class */ lines_b[i].equiv = j; /* add to the head of the equivalence class */ lines_b[i].next = hashtable[j].b_head; hashtable[j].b_head = i; hashtable[j].b_count++; } /* match items from lines_a to their equivalence class in lines_b. again, iterating backwards for the right order of the linked lists */ for (i = asize - 1; i >= 0; i--) { /* find the first hash entry, which is either empty or contains the same line as lines_a[i] */ j = find_equivalence_class(hashtable, hsize, lines_a, lines_b, i); /* set the equivalence class, even if we are not interested in this line, because the values are not pre-filled */ lines_a[i].equiv = j; /* we are not interested in lines which are not also in lines_b */ if (hashtable[j].b_head == SENTINEL) continue; /* add to the head of the equivalence class */ lines_a[i].next = hashtable[j].a_head; hashtable[j].a_head = i; hashtable[j].a_count++; } result->last_a_pos = -1; result->last_b_pos = -1; result->size = hsize + 1; result->table = hashtable; return 1; } /* Finds longest common subsequence of unique lines in a[alo:ahi] and b[blo:bhi]. Parameter backpointers must have allocated memory for at least 4 * (bhi - blo) ints. */ Py_ssize_t unique_lcs(struct matching_line *answer, struct hashtable *hashtable, Py_ssize_t *backpointers, struct line *lines_a, struct line *lines_b, Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi) { Py_ssize_t i, k, equiv, apos, bpos, norm_apos, norm_bpos, bsize, stacksize; Py_ssize_t *stacks, *lasts, *btoa; struct bucket *h; k = 0; stacksize = 0; bsize = bhi - blo; h = hashtable->table; /* "unpack" the allocated memory */ stacks = backpointers + bsize; lasts = stacks + bsize; btoa = lasts + bsize; /* initialise the backpointers */ for (i = 0; i < bsize; i++) backpointers[i] = SENTINEL; if (hashtable->last_a_pos == -1 || hashtable->last_a_pos > alo) for (i = 0; i < hashtable->size; i++) h[i].a_pos = h[i].a_head; hashtable->last_a_pos = alo; if (hashtable->last_b_pos == -1 || hashtable->last_b_pos > blo) for (i = 0; i < hashtable->size; i++) h[i].b_pos = h[i].b_head; hashtable->last_b_pos = blo; for (bpos = blo; bpos < bhi; bpos++) { equiv = lines_b[bpos].equiv; /* no lines in a or b */ if (h[equiv].a_count == 0 || h[equiv].b_count == 0) continue; /* find an unique line in lines_a that matches lines_b[bpos] if we find more than one line within the range alo:ahi, jump to the next line from lines_b immediately */ apos = SENTINEL; /* loop through all lines in the linked list */ for (i = h[equiv].a_pos; i != SENTINEL; i = lines_a[i].next) { /* the index is lower than alo, continue to the next line */ if (i < alo) { h[equiv].a_pos = i; continue; } /* the index is higher than ahi, stop searching */ if (i >= ahi) break; /* if the line is within our range, check if it's a duplicate */ if (apos != SENTINEL) goto nextb; /* save index to the line */ apos = i; } /* this line has no equivalent in lines_a[alo:ahi] */ if (apos == SENTINEL) goto nextb; /* check for duplicates of this line in lines_b[blo:bhi] */ /* loop through all lines in the linked list */ for (i = h[equiv].b_pos; i != SENTINEL; i = lines_b[i].next) { /* the index is lower than blo, continue to the next line */ if (i < blo) { h[equiv].b_pos = i; continue; } /* the index is higher than bhi, stop searching */ if (i >= bhi) break; /* if this isn't the line with started with and it's within our range, it's a duplicate */ if (i != bpos) goto nextb; } /* use normalised indexes ([0,ahi-alo) instead of [alo,ahi)) for the patience sorting algorithm */ norm_bpos = bpos - blo; norm_apos = apos - alo; btoa[norm_bpos] = norm_apos; /* Ok, how does this work... We have a list of matching lines from two lists, a and b. These matches are stored in variable `btoa`. As we are iterating over this table by bpos, the lines from b already form an increasing sequence. We need to "sort" also the lines from a using the patience sorting algorithm, ignoring the lines which would need to be swapped. http://en.wikipedia.org/wiki/Patience_sorting For each pair of lines, we need to place the line from a on either an existing pile that has higher value on the top or create a new pile. Variable `stacks` represents the tops of these piles and in variable `lasts` we store the lines from b, that correspond to the lines from a in `stacks`. Whenever we place a new line on top of a pile, we store a backpointer to the line (b) from top of the previous pile. This means that after the loop, variable `backpointers` will contain an index to the previous matching lines that forms an increasing sequence (over both indexes a and b) with the current matching lines. If either index a or b of the previous matching lines would be higher than indexes of the current one or if the indexes of the current one are 0, it will contain SENTINEL. To construct the LCS, we will just need to follow these backpointers from the top of the last pile and stop when we reach SENTINEL. */ /* as an optimization, check if the next line comes at the end, because it usually does */ if (stacksize && stacks[stacksize - 1] < norm_apos) k = stacksize; /* as an optimization, check if the next line comes right after the previous line, because usually it does */ else if (stacksize && (stacks[k] < norm_apos) && (k == stacksize - 1 || stacks[k + 1] > norm_apos)) k += 1; else k = bisect_left(stacks, norm_apos, 0, stacksize); if (k > 0) backpointers[norm_bpos] = lasts[k - 1]; if (k < stacksize) { stacks[k] = norm_apos; lasts[k] = norm_bpos; } else { stacks[stacksize] = norm_apos; lasts[stacksize] = norm_bpos; stacksize += 1; } nextb: ; } if (stacksize == 0) return 0; /* backtrace the structures to find the LCS */ i = 0; k = lasts[stacksize - 1]; while (k != SENTINEL) { answer[i].a = btoa[k]; answer[i].b = k; k = backpointers[k]; i++; } return i; } /* Adds a new line to the list of matching blocks, either extending the current block or adding a new one. */ static inline void add_matching_line(struct matching_blocks *answer, Py_ssize_t a, Py_ssize_t b) { Py_ssize_t last_index = answer->count - 1; if ((last_index >= 0) && (a == answer->matches[last_index].a + answer->matches[last_index].len) && (b == answer->matches[last_index].b + answer->matches[last_index].len)) { /* enlarge the last block */ answer->matches[last_index].len++; } else { /* create a new block */ last_index++; answer->matches[last_index].a = a; answer->matches[last_index].b = b; answer->matches[last_index].len = 1; answer->count++; } } static int recurse_matches(struct matching_blocks *answer, struct hashtable *hashtable, Py_ssize_t *backpointers, struct line *a, struct line *b, Py_ssize_t alo, Py_ssize_t blo, Py_ssize_t ahi, Py_ssize_t bhi, int maxrecursion) { int res; Py_ssize_t new, last_a_pos, last_b_pos, lcs_size, nahi, nbhi, i, apos, bpos; struct matching_line *lcs; if (maxrecursion < 0) return 1; if (alo == ahi || blo == bhi) return 1; new = 0; last_a_pos = alo - 1; last_b_pos = blo - 1; lcs = (struct matching_line *)guarded_malloc(sizeof(struct matching_line) * (bhi - blo)); if (lcs == NULL) return 0; lcs_size = unique_lcs(lcs, hashtable, backpointers, a, b, alo, blo, ahi, bhi); /* recurse between lines which are unique in each file and match */ for (i = lcs_size - 1; i >= 0; i--) { apos = alo + lcs[i].a; bpos = blo + lcs[i].b; if (last_a_pos + 1 != apos || last_b_pos + 1 != bpos) { res = recurse_matches(answer, hashtable, backpointers, a, b, last_a_pos + 1, last_b_pos + 1, apos, bpos, maxrecursion - 1); if (!res) goto error; } last_a_pos = apos; last_b_pos = bpos; add_matching_line(answer, apos, bpos); new = 1; } free(lcs); lcs = NULL; /* find matches between the last match and the end */ if (new > 0) { res = recurse_matches(answer, hashtable, backpointers, a, b, last_a_pos + 1, last_b_pos + 1, ahi, bhi, maxrecursion - 1); if (!res) goto error; } /* find matching lines at the very beginning */ else if (a[alo].equiv == b[blo].equiv) { while (alo < ahi && blo < bhi && a[alo].equiv == b[blo].equiv) add_matching_line(answer, alo++, blo++); res = recurse_matches(answer, hashtable, backpointers, a, b, alo, blo, ahi, bhi, maxrecursion - 1); if (!res) goto error; } /* find matching lines at the very end */ else if (a[ahi - 1].equiv == b[bhi - 1].equiv) { nahi = ahi - 1; nbhi = bhi - 1; while (nahi > alo && nbhi > blo && a[nahi - 1].equiv == b[nbhi - 1].equiv) { nahi--; nbhi--; } res = recurse_matches(answer, hashtable, backpointers, a, b, last_a_pos + 1, last_b_pos + 1, nahi, nbhi, maxrecursion - 1); if (!res) goto error; for (i = 0; i < ahi - nahi; i++) add_matching_line(answer, nahi + i, nbhi + i); } return 1; error: free(lcs); return 0; } static void delete_lines(struct line *lines, Py_ssize_t size) { struct line *line = lines; while (size-- > 0) { Py_XDECREF(line->data); line++; } free(lines); } static Py_ssize_t load_lines(PyObject *orig, struct line **lines) { Py_ssize_t size, i; struct line *line; PyObject *seq, *item; seq = PySequence_Fast(orig, "sequence expected"); if (seq == NULL) { return -1; } size = PySequence_Fast_GET_SIZE(seq); if (size == 0) { Py_DECREF(seq); return 0; } /* Allocate a memory block for line data, initialized to 0 */ line = *lines = (struct line *)calloc(size, sizeof(struct line)); if (line == NULL) { PyErr_NoMemory(); Py_DECREF(seq); return -1; } for (i = 0; i < size; i++) { item = PySequence_Fast_GET_ITEM(seq, i); Py_INCREF(item); line->data = item; line->hash = PyObject_Hash(item); if (line->hash == (-1)) { /* Propogate the hash exception */ size = -1; goto cleanup; } line->next = SENTINEL; line++; } cleanup: Py_DECREF(seq); if (size == -1) { /* Error -- cleanup unused object references */ delete_lines(*lines, i); *lines = NULL; } return size; } static PyObject * py_unique_lcs(PyObject *self, PyObject *args) { PyObject *aseq, *bseq, *res, *item; Py_ssize_t asize, bsize, i, nmatches, *backpointers = NULL; struct line *a = NULL, *b = NULL; struct matching_line *matches = NULL; struct hashtable hashtable; if (!PyArg_ParseTuple(args, "OO", &aseq, &bseq)) return NULL; hashtable.table = NULL; asize = load_lines(aseq, &a); bsize = load_lines(bseq, &b); if (asize == -1 || bsize == -1) goto error; if (!equate_lines(&hashtable, a, b, asize, bsize)) goto error; if (bsize > 0) { matches = (struct matching_line *)guarded_malloc(sizeof(struct matching_line) * bsize); if (matches == NULL) goto error; backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * bsize * 4); if (backpointers == NULL) goto error; } nmatches = unique_lcs(matches, &hashtable, backpointers, a, b, 0, 0, asize, bsize); res = PyList_New(nmatches); for (i = 0; i < nmatches; i++) { item = Py_BuildValue("nn", matches[nmatches - i - 1].a, matches[nmatches - i - 1].b); if (item == NULL) goto error; if (PyList_SetItem(res, i, item) != 0) goto error; } free(backpointers); free(matches); free(hashtable.table); delete_lines(b, bsize); delete_lines(a, asize); return res; error: free(backpointers); free(matches); free(hashtable.table); delete_lines(b, bsize); delete_lines(a, asize); return NULL; } static PyObject * py_recurse_matches(PyObject *self, PyObject *args) { PyObject *aseq, *bseq, *item, *answer; int maxrecursion, res; Py_ssize_t i, j, asize, bsize, alo, blo, ahi, bhi; Py_ssize_t *backpointers = NULL; struct line *a = NULL, *b = NULL; struct hashtable hashtable; struct matching_blocks matches; if (!PyArg_ParseTuple(args, "OOnnnnOi", &aseq, &bseq, &alo, &blo, &ahi, &bhi, &answer, &maxrecursion)) return NULL; hashtable.table = NULL; matches.matches = NULL; asize = load_lines(aseq, &a); bsize = load_lines(bseq, &b); if (asize == -1 || bsize == -1) goto error; if (!equate_lines(&hashtable, a, b, asize, bsize)) goto error; matches.count = 0; if (bsize > 0) { matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * bsize); if (matches.matches == NULL) goto error; backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * bsize * 4); if (backpointers == NULL) goto error; } else { matches.matches = NULL; backpointers = NULL; } res = recurse_matches(&matches, &hashtable, backpointers, a, b, alo, blo, ahi, bhi, maxrecursion); if (!res) goto error; for (i = 0; i < matches.count; i++) { for (j = 0; j < matches.matches[i].len; j++) { item = Py_BuildValue("nn", matches.matches[i].a + j, matches.matches[i].b + j); if (item == NULL) goto error; if (PyList_Append(answer, item) != 0) goto error; } } free(backpointers); free(matches.matches); free(hashtable.table); delete_lines(b, bsize); delete_lines(a, asize); Py_RETURN_NONE; error: free(backpointers); free(matches.matches); free(hashtable.table); delete_lines(b, bsize); delete_lines(a, asize); return NULL; } static PyObject * PatienceSequenceMatcher_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { PyObject *junk, *a, *b; PatienceSequenceMatcher *self; self = (PatienceSequenceMatcher *)type->tp_alloc(type, 0); if (self != NULL) { if (!PyArg_ParseTuple(args, "OOO", &junk, &a, &b)) { Py_DECREF(self); return NULL; } self->asize = load_lines(a, &(self->a)); self->bsize = load_lines(b, &(self->b)); if (self->asize == -1 || self->bsize == -1) { Py_DECREF(self); return NULL; } if (!equate_lines(&self->hashtable, self->a, self->b, self->asize, self->bsize)) { Py_DECREF(self); return NULL; } if (self->bsize > 0) { self->backpointers = (Py_ssize_t *)guarded_malloc(sizeof(Py_ssize_t) * self->bsize * 4); if (self->backpointers == NULL) { Py_DECREF(self); PyErr_NoMemory(); return NULL; } } else { self->backpointers = NULL; } } return (PyObject *)self; } static void PatienceSequenceMatcher_dealloc(PatienceSequenceMatcher* self) { free(self->backpointers); free(self->hashtable.table); delete_lines(self->b, self->bsize); delete_lines(self->a, self->asize); ((PyObject *)self)->ob_type->tp_free((PyObject *)self); } static char PatienceSequenceMatcher_get_matching_blocks_doc[] = "Return list of triples describing matching subsequences.\n" "\n" "Each triple is of the form (i, j, n), and means that\n" "a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in\n" "i and in j.\n" "\n" "The last triple is a dummy, (len(a), len(b), 0), and is the only\n" "triple with n==0.\n" "\n" ">>> s = PatienceSequenceMatcher(None, \"abxcd\", \"abcd\")\n" ">>> s.get_matching_blocks()\n" "[(0, 0, 2), (3, 2, 2), (5, 4, 0)]\n"; static PyObject * PatienceSequenceMatcher_get_matching_blocks(PatienceSequenceMatcher* self) { PyObject *answer, *item; int res; Py_ssize_t i; struct matching_blocks matches; matches.count = 0; if (self->bsize > 0) { matches.matches = (struct matching_block *) guarded_malloc(sizeof(struct matching_block) * self->bsize); if (matches.matches == NULL) return PyErr_NoMemory(); } else matches.matches = NULL; res = recurse_matches(&matches, &self->hashtable, self->backpointers, self->a, self->b, 0, 0, self->asize, self->bsize, 10); if (!res) { free(matches.matches); return PyErr_NoMemory(); } answer = PyList_New(matches.count + 1); if (answer == NULL) { free(matches.matches); return NULL; } for (i = 0; i < matches.count; i++) { item = Py_BuildValue("nnn", matches.matches[i].a, matches.matches[i].b, matches.matches[i].len); if (item == NULL) goto error; if (PyList_SetItem(answer, i, item) != 0) goto error; } item = Py_BuildValue("nnn", self->asize, self->bsize, 0); if (item == NULL) goto error; if (PyList_SetItem(answer, i, item) != 0) goto error; free(matches.matches); return answer; error: free(matches.matches); Py_DECREF(answer); return NULL; } static char PatienceSequenceMatcher_get_opcodes_doc[] = "Return list of 5-tuples describing how to turn a into b.\n" "\n" "Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple\n" "has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the\n" "tuple preceding it, and likewise for j1 == the previous j2.\n" "\n" "The tags are strings, with these meanings:\n" "\n" "'replace': a[i1:i2] should be replaced by b[j1:j2]\n" "'delete': a[i1:i2] should be deleted.\n" " Note that j1==j2 in this case.\n" "'insert': b[j1:j2] should be inserted at a[i1:i1].\n" " Note that i1==i2 in this case.\n" "'equal': a[i1:i2] == b[j1:j2]\n" "\n" ">>> a = \"qabxcd\"\n" ">>> b = \"abycdf\"\n" ">>> s = PatienceSequenceMatcher(None, a, b)\n" ">>> for tag, i1, i2, j1, j2 in s.get_opcodes():\n" "... print (\"%7s a[%d:%d] (%s) b[%d:%d] (%s)\" %\n" "... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2]))\n" " delete a[0:1] (q) b[0:0] ()\n" " equal a[1:3] (ab) b[0:2] (ab)\n" "replace a[3:4] (x) b[2:3] (y)\n" " equal a[4:6] (cd) b[3:5] (cd)\n" " insert a[6:6] () b[5:6] (f)\n"; static PyObject * PatienceSequenceMatcher_get_opcodes(PatienceSequenceMatcher* self) { PyObject *answer, *item; Py_ssize_t i, j, k, ai, bj; int tag, res; struct matching_blocks matches; matches.count = 0; matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * (self->bsize + 1)); if (matches.matches == NULL) return PyErr_NoMemory(); res = recurse_matches(&matches, &self->hashtable, self->backpointers, self->a, self->b, 0, 0, self->asize, self->bsize, 10); if (!res) { free(matches.matches); return PyErr_NoMemory(); } matches.matches[matches.count].a = self->asize; matches.matches[matches.count].b = self->bsize; matches.matches[matches.count].len = 0; matches.count++; answer = PyList_New(0); if (answer == NULL) { free(matches.matches); return NULL; } i = j = 0; for (k = 0; k < matches.count; k++) { ai = matches.matches[k].a; bj = matches.matches[k].b; tag = -1; if (i < ai && j < bj) tag = OP_REPLACE; else if (i < ai) tag = OP_DELETE; else if (j < bj) tag = OP_INSERT; if (tag != -1) { item = Py_BuildValue("snnnn", opcode_names[tag], i, ai, j, bj); if (item == NULL) goto error; if (PyList_Append(answer, item) != 0) goto error; } i = ai + matches.matches[k].len; j = bj + matches.matches[k].len; if (matches.matches[k].len > 0) { item = Py_BuildValue("snnnn", opcode_names[OP_EQUAL], ai, i, bj, j); if (item == NULL) goto error; if (PyList_Append(answer, item) != 0) goto error; } } free(matches.matches); return answer; error: free(matches.matches); Py_DECREF(answer); return NULL; } static char PatienceSequenceMatcher_get_grouped_opcodes_doc[] = "Isolate change clusters by eliminating ranges with no changes.\n" "\n" "Return a list of groups with upto n lines of context.\n" "Each group is in the same format as returned by get_opcodes().\n" "\n" ">>> from pprint import pprint\n" ">>> a = map(str, range(1,40))\n" ">>> b = a[:]\n" ">>> b[8:8] = ['i'] # Make an insertion\n" ">>> b[20] += 'x' # Make a replacement\n" ">>> b[23:28] = [] # Make a deletion\n" ">>> b[30] += 'y' # Make another replacement\n" ">>> pprint(PatienceSequenceMatcher(None,a,b).get_grouped_opcodes())\n" "[[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],\n" " [('equal', 16, 19, 17, 20),\n" " ('replace', 19, 20, 20, 21),\n" " ('equal', 20, 22, 21, 23),\n" " ('delete', 22, 27, 23, 23),\n" " ('equal', 27, 30, 23, 26)],\n" " [('equal', 31, 34, 27, 30),\n" " ('replace', 34, 35, 30, 31),\n" " ('equal', 35, 38, 31, 34)]]\n"; static PyObject * PatienceSequenceMatcher_get_grouped_opcodes(PatienceSequenceMatcher* self, PyObject *args) { PyObject *answer, *group, *item; Py_ssize_t i, j, k, ai, bj, size, ncodes, tag; Py_ssize_t i1, i2, j1, j2; int n = 3, nn, res; struct matching_blocks matches; struct opcode *codes; if (!PyArg_ParseTuple(args, "|i", &n)) return NULL; matches.count = 0; matches.matches = (struct matching_block *)guarded_malloc(sizeof(struct matching_block) * (self->bsize + 1)); if (matches.matches == NULL) return PyErr_NoMemory(); res = recurse_matches(&matches, &self->hashtable, self->backpointers, self->a, self->b, 0, 0, self->asize, self->bsize, 10); if (!res) { free(matches.matches); return PyErr_NoMemory(); } matches.matches[matches.count].a = self->asize; matches.matches[matches.count].b = self->bsize; matches.matches[matches.count].len = 0; matches.count++; ncodes = 0; codes = (struct opcode *)guarded_malloc(sizeof(struct opcode) * matches.count * 2); if (codes == NULL) { free(matches.matches); return PyErr_NoMemory(); } i = j = 0; for (k = 0; k < matches.count; k++) { ai = matches.matches[k].a; bj = matches.matches[k].b; tag = -1; if (i < ai && j < bj) tag = OP_REPLACE; else if (i < ai) tag = OP_DELETE; else if (j < bj) tag = OP_INSERT; if (tag != -1) { codes[ncodes].tag = tag; codes[ncodes].i1 = i; codes[ncodes].i2 = ai; codes[ncodes].j1 = j; codes[ncodes].j2 = bj; ncodes++; } i = ai + matches.matches[k].len; j = bj + matches.matches[k].len; if (matches.matches[k].len > 0) { codes[ncodes].tag = OP_EQUAL; codes[ncodes].i1 = ai; codes[ncodes].i2 = i; codes[ncodes].j1 = bj; codes[ncodes].j2 = j; ncodes++; } } if (ncodes == 0) { codes[ncodes].tag = OP_EQUAL; codes[ncodes].i1 = 0; codes[ncodes].i2 = 1; codes[ncodes].j1 = 0; codes[ncodes].j2 = 1; ncodes++; } /* fixup leading and trailing groups if they show no changes. */ if (codes[0].tag == OP_EQUAL) { codes[0].i1 = MAX(codes[0].i1, codes[0].i2 - n); codes[0].j1 = MAX(codes[0].j1, codes[0].j2 - n); } if (codes[ncodes - 1].tag == OP_EQUAL) { codes[ncodes - 1].i2 = MIN(codes[ncodes - 1].i2, codes[ncodes - 1].i1 + n); codes[ncodes - 1].j2 = MIN(codes[ncodes - 1].j2, codes[ncodes - 1].j1 + n); } group = NULL; answer = PyList_New(0); if (answer == NULL) goto error; group = PyList_New(0); if (group == NULL) goto error; nn = n + n; tag = -1; for (i = 0; i < ncodes; i++) { tag = codes[i].tag; i1 = codes[i].i1; i2 = codes[i].i2; j1 = codes[i].j1; j2 = codes[i].j2; /* end the current group and start a new one whenever there is a large range with no changes. */ if (tag == OP_EQUAL && i2 - i1 > nn) { item = Py_BuildValue("snnnn", opcode_names[tag], i1, MIN(i2, i1 + n), j1, MIN(j2, j1 + n)); if (item == NULL) goto error; if (PyList_Append(group, item) != 0) goto error; if (PyList_Append(answer, group) != 0) goto error; group = PyList_New(0); if (group == NULL) goto error; i1 = MAX(i1, i2 - n); j1 = MAX(j1, j2 - n); } item = Py_BuildValue("snnnn", opcode_names[tag], i1, i2, j1 ,j2); if (item == NULL) goto error; if (PyList_Append(group, item) != 0) goto error; } size = PyList_Size(group); if (size > 0 && !(size == 1 && tag == OP_EQUAL)) { if (PyList_Append(answer, group) != 0) goto error; } else Py_DECREF(group); free(codes); free(matches.matches); return answer; error: free(codes); free(matches.matches); Py_DECREF(group); Py_DECREF(answer); return NULL; } static PyMethodDef PatienceSequenceMatcher_methods[] = { {"get_matching_blocks", (PyCFunction)PatienceSequenceMatcher_get_matching_blocks, METH_NOARGS, PatienceSequenceMatcher_get_matching_blocks_doc}, {"get_opcodes", (PyCFunction)PatienceSequenceMatcher_get_opcodes, METH_NOARGS, PatienceSequenceMatcher_get_opcodes_doc}, {"get_grouped_opcodes", (PyCFunction)PatienceSequenceMatcher_get_grouped_opcodes, METH_VARARGS, PatienceSequenceMatcher_get_grouped_opcodes_doc}, {NULL} }; static char PatienceSequenceMatcher_doc[] = "C implementation of PatienceSequenceMatcher"; static PyTypeObject PatienceSequenceMatcherType = { PyVarObject_HEAD_INIT(NULL, 0) "PatienceSequenceMatcher", /* tp_name */ sizeof(PatienceSequenceMatcher), /* tp_basicsize */ 0, /* tp_itemsize */ (destructor)PatienceSequenceMatcher_dealloc, /* tp_dealloc */ NULL, /* tp_print */ NULL, /* tp_getattr */ NULL, /* tp_setattr */ NULL, /* tp_compare */ NULL, /* tp_repr */ NULL, /* tp_as_number */ NULL, /* tp_as_sequence */ NULL, /* tp_as_mapping */ NULL, /* tp_hash */ NULL, /* tp_call */ NULL, /* tp_str */ NULL, /* tp_getattro */ NULL, /* tp_setattro */ NULL, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT, /* tp_flags */ PatienceSequenceMatcher_doc, /* tp_doc */ NULL, /* tp_traverse */ NULL, /* tp_clear */ NULL, /* tp_richcompare */ 0, /* tp_weaklistoffset */ NULL, /* tp_iter */ NULL, /* tp_iternext */ PatienceSequenceMatcher_methods, /* tp_methods */ NULL, /* tp_members */ NULL, /* tp_getset */ NULL, /* tp_base */ NULL, /* tp_dict */ NULL, /* tp_descr_get */ NULL, /* tp_descr_set */ 0, /* tp_dictoffset */ NULL, /* tp_init */ NULL, /* tp_alloc */ PatienceSequenceMatcher_new, /* NULL */ NULL, /* tp_free */ }; static PyMethodDef cpatiencediff_methods[] = { {"unique_lcs_c", py_unique_lcs, METH_VARARGS}, {"recurse_matches_c", py_recurse_matches, METH_VARARGS}, {NULL, NULL} }; static PyObject * moduleinit(void) { PyObject* m; if (PyType_Ready(&PatienceSequenceMatcherType) < 0) return NULL; #if PY_MAJOR_VERSION >= 3 static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_patiencediff_c", /* m_name */ "C implementation of PatienceSequenceMatcher", /* m_doc */ -1, /* m_size */ cpatiencediff_methods, /* m_methods */ NULL, /* m_reload */ NULL, /* m_traverse */ NULL, /* m_clear*/ NULL, /* m_free */ }; m = PyModule_Create(&moduledef); #else m = Py_InitModule3("_patiencediff_c", cpatiencediff_methods, "C implementation of PatienceSequenceMatcher"); #endif if (m == NULL) return NULL; Py_INCREF(&PatienceSequenceMatcherType); PyModule_AddObject(m, "PatienceSequenceMatcher_c", (PyObject *)&PatienceSequenceMatcherType); return m; } #if PY_MAJOR_VERSION >= 3 PyMODINIT_FUNC PyInit__patiencediff_c(void) { return moduleinit(); } #else PyMODINIT_FUNC init_patiencediff_c(void) { moduleinit(); } #endif /* vim: sw=4 et */ ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1590202904.0 patiencediff-0.2.1/patiencediff/_patiencediff_py.py0000644000175000017500000002175000000000000023202 0ustar00jelmerjelmer00000000000000#!/usr/bin/env python # Copyright (C) 2005 Bram Cohen, Copyright (C) 2005, 2006 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from __future__ import absolute_import from bisect import bisect import difflib class MaxRecursionDepth(Exception): def __init__(self): super(MaxRecursionDepth, self).__init__('max recursion depth reached') def unique_lcs_py(a, b): """Find the longest common subset for unique lines. :param a: An indexable object (such as string or list of strings) :param b: Another indexable object (such as string or list of strings) :return: A list of tuples, one for each line which is matched. [(line_in_a, line_in_b), ...] This only matches lines which are unique on both sides. This helps prevent common lines from over influencing match results. The longest common subset uses the Patience Sorting algorithm: http://en.wikipedia.org/wiki/Patience_sorting """ # set index[line in a] = position of line in a unless # a is a duplicate, in which case it's set to None index = {} for i, line in enumerate(a): if line in index: index[line] = None else: index[line] = i # make btoa[i] = position of line i in a, unless # that line doesn't occur exactly once in both, # in which case it's set to None btoa = [None] * len(b) index2 = {} for pos, line in enumerate(b): next = index.get(line) if next is not None: if line in index2: # unset the previous mapping, which we now know to # be invalid because the line isn't unique btoa[index2[line]] = None del index[line] else: index2[line] = pos btoa[pos] = next # this is the Patience sorting algorithm # see http://en.wikipedia.org/wiki/Patience_sorting backpointers = [None] * len(b) stacks = [] lasts = [] k = 0 for bpos, apos in enumerate(btoa): if apos is None: continue # as an optimization, check if the next line comes at the end, # because it usually does if stacks and stacks[-1] < apos: k = len(stacks) # as an optimization, check if the next line comes right after # the previous line, because usually it does elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or stacks[k+1] > apos): k += 1 else: k = bisect(stacks, apos) if k > 0: backpointers[bpos] = lasts[k-1] if k < len(stacks): stacks[k] = apos lasts[k] = bpos else: stacks.append(apos) lasts.append(bpos) if len(lasts) == 0: return [] result = [] k = lasts[-1] while k is not None: result.append((btoa[k], k)) k = backpointers[k] result.reverse() return result def recurse_matches_py(a, b, alo, blo, ahi, bhi, answer, maxrecursion): """Find all of the matching text in the lines of a and b. :param a: A sequence :param b: Another sequence :param alo: The start location of a to check, typically 0 :param ahi: The start location of b to check, typically 0 :param ahi: The maximum length of a to check, typically len(a) :param bhi: The maximum length of b to check, typically len(b) :param answer: The return array. Will be filled with tuples indicating [(line_in_a, line_in_b)] :param maxrecursion: The maximum depth to recurse. Must be a positive integer. :return: None, the return value is in the parameter answer, which should be a list """ if maxrecursion < 0: # this will never happen normally, this check is to prevent DOS attacks raise MaxRecursionDepth() oldlength = len(answer) if alo == ahi or blo == bhi: return last_a_pos = alo-1 last_b_pos = blo-1 for apos, bpos in unique_lcs_py(a[alo:ahi], b[blo:bhi]): # recurse between lines which are unique in each file and match apos += alo bpos += blo # Most of the time, you will have a sequence of similar entries if last_a_pos+1 != apos or last_b_pos+1 != bpos: recurse_matches_py( a, b, last_a_pos+1, last_b_pos+1, apos, bpos, answer, maxrecursion - 1) last_a_pos = apos last_b_pos = bpos answer.append((apos, bpos)) if len(answer) > oldlength: # find matches between the last match and the end recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1, ahi, bhi, answer, maxrecursion - 1) elif a[alo] == b[blo]: # find matching lines at the very beginning while alo < ahi and blo < bhi and a[alo] == b[blo]: answer.append((alo, blo)) alo += 1 blo += 1 recurse_matches_py(a, b, alo, blo, ahi, bhi, answer, maxrecursion - 1) elif a[ahi - 1] == b[bhi - 1]: # find matching lines at the very end nahi = ahi - 1 nbhi = bhi - 1 while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]: nahi -= 1 nbhi -= 1 recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1, nahi, nbhi, answer, maxrecursion - 1) for i in range(ahi - nahi): answer.append((nahi + i, nbhi + i)) def _collapse_sequences(matches): """Find sequences of lines. Given a sequence of [(line_in_a, line_in_b),] find regions where they both increment at the same time """ answer = [] start_a = start_b = None length = 0 for i_a, i_b in matches: if (start_a is not None and (i_a == start_a + length) and (i_b == start_b + length)): length += 1 else: if start_a is not None: answer.append((start_a, start_b, length)) start_a = i_a start_b = i_b length = 1 if length != 0: answer.append((start_a, start_b, length)) return answer def _check_consistency(answer): # For consistency sake, make sure all matches are only increasing next_a = -1 next_b = -1 for (a, b, match_len) in answer: if a < next_a: raise ValueError('Non increasing matches for a') if b < next_b: raise ValueError('Non increasing matches for b') next_a = a + match_len next_b = b + match_len class PatienceSequenceMatcher_py(difflib.SequenceMatcher): """Compare a pair of sequences using longest common subset.""" _do_check_consistency = True def __init__(self, isjunk=None, a='', b=''): if isjunk is not None: raise NotImplementedError('Currently we do not support' ' isjunk for sequence matching') difflib.SequenceMatcher.__init__(self, isjunk, a, b) def get_matching_blocks(self): """Return list of triples describing matching subsequences. Each triple is of the form (i, j, n), and means that a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in i and in j. The last triple is a dummy, (len(a), len(b), 0), and is the only triple with n==0. >>> s = PatienceSequenceMatcher(None, "abxcd", "abcd") >>> s.get_matching_blocks() [(0, 0, 2), (3, 2, 2), (5, 4, 0)] """ # jam 20060525 This is the python 2.4.1 difflib get_matching_blocks # implementation which uses __helper. 2.4.3 got rid of helper for # doing it inline with a queue. # We should consider doing the same for recurse_matches if self.matching_blocks is not None: return self.matching_blocks matches = [] recurse_matches_py(self.a, self.b, 0, 0, len(self.a), len(self.b), matches, 10) # Matches now has individual line pairs of # line A matches line B, at the given offsets self.matching_blocks = _collapse_sequences(matches) self.matching_blocks.append((len(self.a), len(self.b), 0)) if PatienceSequenceMatcher_py._do_check_consistency: if __debug__: _check_consistency(self.matching_blocks) return self.matching_blocks ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1590202848.0 patiencediff-0.2.1/patiencediff/test_patiencediff.py0000644000175000017500000005467700000000000023410 0ustar00jelmerjelmer00000000000000# Copyright (C) 2005, 2006, 2007 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import os import patiencediff import shutil import sys import tempfile import unittest from . import _patiencediff_py if sys.version_info[0] == 3: unichr = chr class TestPatienceDiffLib(unittest.TestCase): def setUp(self): super(TestPatienceDiffLib, self).setUp() self._unique_lcs = _patiencediff_py.unique_lcs_py self._recurse_matches = _patiencediff_py.recurse_matches_py self._PatienceSequenceMatcher = \ _patiencediff_py.PatienceSequenceMatcher_py def test_diff_unicode_string(self): a = ''.join([unichr(i) for i in range(4000, 4500, 3)]) b = ''.join([unichr(i) for i in range(4300, 4800, 2)]) sm = self._PatienceSequenceMatcher(None, a, b) mb = sm.get_matching_blocks() self.assertEqual(35, len(mb)) def test_unique_lcs(self): unique_lcs = self._unique_lcs self.assertEqual(unique_lcs('', ''), []) self.assertEqual(unique_lcs('', 'a'), []) self.assertEqual(unique_lcs('a', ''), []) self.assertEqual(unique_lcs('a', 'a'), [(0, 0)]) self.assertEqual(unique_lcs('a', 'b'), []) self.assertEqual(unique_lcs('ab', 'ab'), [(0, 0), (1, 1)]) self.assertEqual( unique_lcs('abcde', 'cdeab'), [(2, 0), (3, 1), (4, 2)]) self.assertEqual( unique_lcs('cdeab', 'abcde'), [(0, 2), (1, 3), (2, 4)]) self.assertEqual( unique_lcs('abXde', 'abYde'), [(0, 0), (1, 1), (3, 3), (4, 4)]) self.assertEqual(unique_lcs('acbac', 'abc'), [(2, 1)]) def test_recurse_matches(self): def test_one(a, b, matches): test_matches = [] self._recurse_matches( a, b, 0, 0, len(a), len(b), test_matches, 10) self.assertEqual(test_matches, matches) test_one(['a', '', 'b', '', 'c'], ['a', 'a', 'b', 'c', 'c'], [(0, 0), (2, 2), (4, 4)]) test_one(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'], [(0, 0), (2, 1), (4, 2)]) # Even though 'bc' is not unique globally, and is surrounded by # non-matching lines, we should still match, because they are locally # unique test_one('abcdbce', 'afbcgdbce', [(0, 0), (1, 2), (2, 3), (3, 5), (4, 6), (5, 7), (6, 8)]) # recurse_matches doesn't match non-unique # lines surrounded by bogus text. # The update has been done in patiencediff.SequenceMatcher instead # This is what it could be # test_one('aBccDe', 'abccde', [(0,0), (2,2), (3,3), (5,5)]) # This is what it currently gives: test_one('aBccDe', 'abccde', [(0, 0), (5, 5)]) def assertDiffBlocks(self, a, b, expected_blocks): """Check that the sequence matcher returns the correct blocks. :param a: A sequence to match :param b: Another sequence to match :param expected_blocks: The expected output, not including the final matching block (len(a), len(b), 0) """ matcher = self._PatienceSequenceMatcher(None, a, b) blocks = matcher.get_matching_blocks() last = blocks.pop() self.assertEqual((len(a), len(b), 0), last) self.assertEqual(expected_blocks, blocks) def test_matching_blocks(self): # Some basic matching tests self.assertDiffBlocks('', '', []) self.assertDiffBlocks([], [], []) self.assertDiffBlocks('abc', '', []) self.assertDiffBlocks('', 'abc', []) self.assertDiffBlocks('abcd', 'abcd', [(0, 0, 4)]) self.assertDiffBlocks('abcd', 'abce', [(0, 0, 3)]) self.assertDiffBlocks('eabc', 'abce', [(1, 0, 3)]) self.assertDiffBlocks('eabce', 'abce', [(1, 0, 4)]) self.assertDiffBlocks('abcde', 'abXde', [(0, 0, 2), (3, 3, 2)]) self.assertDiffBlocks('abcde', 'abXYZde', [(0, 0, 2), (3, 5, 2)]) self.assertDiffBlocks('abde', 'abXYZde', [(0, 0, 2), (2, 5, 2)]) # This may check too much, but it checks to see that # a copied block stays attached to the previous section, # not the later one. # difflib would tend to grab the trailing longest match # which would make the diff not look right self.assertDiffBlocks('abcdefghijklmnop', 'abcdefxydefghijklmnop', [(0, 0, 6), (6, 11, 10)]) # make sure it supports passing in lists self.assertDiffBlocks( ['hello there\n', 'world\n', 'how are you today?\n'], ['hello there\n', 'how are you today?\n'], [(0, 0, 1), (2, 1, 1)]) # non unique lines surrounded by non-matching lines # won't be found self.assertDiffBlocks('aBccDe', 'abccde', [(0, 0, 1), (5, 5, 1)]) # But they only need to be locally unique self.assertDiffBlocks( 'aBcDec', 'abcdec', [(0, 0, 1), (2, 2, 1), (4, 4, 2)]) # non unique blocks won't be matched self.assertDiffBlocks('aBcdEcdFg', 'abcdecdfg', [(0, 0, 1), (8, 8, 1)]) # but locally unique ones will self.assertDiffBlocks( 'aBcdEeXcdFg', 'abcdecdfg', [(0, 0, 1), (2, 2, 2), (5, 4, 1), (7, 5, 2), (10, 8, 1)]) self.assertDiffBlocks('abbabbXd', 'cabbabxd', [(7, 7, 1)]) self.assertDiffBlocks('abbabbbb', 'cabbabbc', []) self.assertDiffBlocks('bbbbbbbb', 'cbbbbbbc', []) def test_matching_blocks_tuples(self): # Some basic matching tests self.assertDiffBlocks([], [], []) self.assertDiffBlocks([('a',), ('b',), ('c,')], [], []) self.assertDiffBlocks([], [('a',), ('b',), ('c,')], []) self.assertDiffBlocks([('a',), ('b',), ('c,')], [('a',), ('b',), ('c,')], [(0, 0, 3)]) self.assertDiffBlocks([('a',), ('b',), ('c,')], [('a',), ('b',), ('d,')], [(0, 0, 2)]) self.assertDiffBlocks([('d',), ('b',), ('c,')], [('a',), ('b',), ('c,')], [(1, 1, 2)]) self.assertDiffBlocks([('d',), ('a',), ('b',), ('c,')], [('a',), ('b',), ('c,')], [(1, 0, 3)]) self.assertDiffBlocks([('a', 'b'), ('c', 'd'), ('e', 'f')], [('a', 'b'), ('c', 'X'), ('e', 'f')], [(0, 0, 1), (2, 2, 1)]) self.assertDiffBlocks([('a', 'b'), ('c', 'd'), ('e', 'f')], [('a', 'b'), ('c', 'dX'), ('e', 'f')], [(0, 0, 1), (2, 2, 1)]) def test_opcodes(self): def chk_ops(a, b, expected_codes): s = self._PatienceSequenceMatcher(None, a, b) self.assertEqual(expected_codes, s.get_opcodes()) chk_ops('', '', []) chk_ops([], [], []) chk_ops('abc', '', [('delete', 0, 3, 0, 0)]) chk_ops('', 'abc', [('insert', 0, 0, 0, 3)]) chk_ops('abcd', 'abcd', [('equal', 0, 4, 0, 4)]) chk_ops('abcd', 'abce', [('equal', 0, 3, 0, 3), ('replace', 3, 4, 3, 4) ]) chk_ops('eabc', 'abce', [('delete', 0, 1, 0, 0), ('equal', 1, 4, 0, 3), ('insert', 4, 4, 3, 4) ]) chk_ops('eabce', 'abce', [('delete', 0, 1, 0, 0), ('equal', 1, 5, 0, 4) ]) chk_ops('abcde', 'abXde', [('equal', 0, 2, 0, 2), ('replace', 2, 3, 2, 3), ('equal', 3, 5, 3, 5) ]) chk_ops('abcde', 'abXYZde', [('equal', 0, 2, 0, 2), ('replace', 2, 3, 2, 5), ('equal', 3, 5, 5, 7) ]) chk_ops('abde', 'abXYZde', [('equal', 0, 2, 0, 2), ('insert', 2, 2, 2, 5), ('equal', 2, 4, 5, 7) ]) chk_ops('abcdefghijklmnop', 'abcdefxydefghijklmnop', [('equal', 0, 6, 0, 6), ('insert', 6, 6, 6, 11), ('equal', 6, 16, 11, 21) ]) chk_ops( ['hello there\n', 'world\n', 'how are you today?\n'], ['hello there\n', 'how are you today?\n'], [('equal', 0, 1, 0, 1), ('delete', 1, 2, 1, 1), ('equal', 2, 3, 1, 2), ]) chk_ops('aBccDe', 'abccde', [('equal', 0, 1, 0, 1), ('replace', 1, 5, 1, 5), ('equal', 5, 6, 5, 6), ]) chk_ops('aBcDec', 'abcdec', [('equal', 0, 1, 0, 1), ('replace', 1, 2, 1, 2), ('equal', 2, 3, 2, 3), ('replace', 3, 4, 3, 4), ('equal', 4, 6, 4, 6), ]) chk_ops('aBcdEcdFg', 'abcdecdfg', [('equal', 0, 1, 0, 1), ('replace', 1, 8, 1, 8), ('equal', 8, 9, 8, 9) ]) chk_ops('aBcdEeXcdFg', 'abcdecdfg', [('equal', 0, 1, 0, 1), ('replace', 1, 2, 1, 2), ('equal', 2, 4, 2, 4), ('delete', 4, 5, 4, 4), ('equal', 5, 6, 4, 5), ('delete', 6, 7, 5, 5), ('equal', 7, 9, 5, 7), ('replace', 9, 10, 7, 8), ('equal', 10, 11, 8, 9) ]) def test_grouped_opcodes(self): def chk_ops(a, b, expected_codes, n=3): s = self._PatienceSequenceMatcher(None, a, b) self.assertEqual(expected_codes, list(s.get_grouped_opcodes(n))) chk_ops('', '', []) chk_ops([], [], []) chk_ops('abc', '', [[('delete', 0, 3, 0, 0)]]) chk_ops('', 'abc', [[('insert', 0, 0, 0, 3)]]) chk_ops('abcd', 'abcd', []) chk_ops('abcd', 'abce', [[('equal', 0, 3, 0, 3), ('replace', 3, 4, 3, 4) ]]) chk_ops('eabc', 'abce', [[('delete', 0, 1, 0, 0), ('equal', 1, 4, 0, 3), ('insert', 4, 4, 3, 4)]]) chk_ops('abcdefghijklmnop', 'abcdefxydefghijklmnop', [[('equal', 3, 6, 3, 6), ('insert', 6, 6, 6, 11), ('equal', 6, 9, 11, 14) ]]) chk_ops('abcdefghijklmnop', 'abcdefxydefghijklmnop', [[('equal', 2, 6, 2, 6), ('insert', 6, 6, 6, 11), ('equal', 6, 10, 11, 15) ]], 4) chk_ops('Xabcdef', 'abcdef', [[('delete', 0, 1, 0, 0), ('equal', 1, 4, 0, 3) ]]) chk_ops('abcdef', 'abcdefX', [[('equal', 3, 6, 3, 6), ('insert', 6, 6, 6, 7) ]]) def test_multiple_ranges(self): # There was an earlier bug where we used a bad set of ranges, # this triggers that specific bug, to make sure it doesn't regress self.assertDiffBlocks('abcdefghijklmnop', 'abcXghiYZQRSTUVWXYZijklmnop', [(0, 0, 3), (6, 4, 3), (9, 20, 7)]) self.assertDiffBlocks('ABCd efghIjk L', 'AxyzBCn mo pqrstuvwI1 2 L', [(0, 0, 1), (1, 4, 2), (9, 19, 1), (12, 23, 3)]) # These are rot13 code snippets. self.assertDiffBlocks('''\ trg nqqrq jura lbh nqq n svyr va gur qverpgbel. """ gnxrf_netf = ['svyr*'] gnxrf_bcgvbaf = ['ab-erphefr'] qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr): sebz omeyvo.nqq vzcbeg fzneg_nqq, nqq_ercbegre_cevag, nqq_ercbegre_ahyy vs vf_dhvrg(): ercbegre = nqq_ercbegre_ahyy ryfr: ercbegre = nqq_ercbegre_cevag fzneg_nqq(svyr_yvfg, abg ab_erphefr, ercbegre) pynff pzq_zxqve(Pbzznaq): '''.splitlines(True), '''\ trg nqqrq jura lbh nqq n svyr va gur qverpgbel. --qel-eha jvyy fubj juvpu svyrf jbhyq or nqqrq, ohg abg npghnyyl nqq gurz. """ gnxrf_netf = ['svyr*'] gnxrf_bcgvbaf = ['ab-erphefr', 'qel-eha'] qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr, qel_eha=Snyfr): vzcbeg omeyvo.nqq vs qel_eha: vs vf_dhvrg(): # Guvf vf cbvagyrff, ohg V'q engure abg envfr na reebe npgvba = omeyvo.nqq.nqq_npgvba_ahyy ryfr: npgvba = omeyvo.nqq.nqq_npgvba_cevag ryvs vf_dhvrg(): npgvba = omeyvo.nqq.nqq_npgvba_nqq ryfr: npgvba = omeyvo.nqq.nqq_npgvba_nqq_naq_cevag omeyvo.nqq.fzneg_nqq(svyr_yvfg, abg ab_erphefr, npgvba) pynff pzq_zxqve(Pbzznaq): '''.splitlines(True), [(0, 0, 1), (1, 4, 2), (9, 19, 1), (12, 23, 3)]) def test_patience_unified_diff(self): txt_a = ['hello there\n', 'world\n', 'how are you today?\n'] txt_b = ['hello there\n', 'how are you today?\n'] unified_diff = patiencediff.unified_diff psm = self._PatienceSequenceMatcher self.assertEqual(['--- \n', '+++ \n', '@@ -1,3 +1,2 @@\n', ' hello there\n', '-world\n', ' how are you today?\n' ], list(unified_diff( txt_a, txt_b, sequencematcher=psm))) txt_a = [x+'\n' for x in 'abcdefghijklmnop'] txt_b = [x+'\n' for x in 'abcdefxydefghijklmnop'] # This is the result with LongestCommonSubstring matching self.assertEqual(['--- \n', '+++ \n', '@@ -1,6 +1,11 @@\n', ' a\n', ' b\n', ' c\n', '+d\n', '+e\n', '+f\n', '+x\n', '+y\n', ' d\n', ' e\n', ' f\n'], list(unified_diff(txt_a, txt_b))) # And the patience diff self.assertEqual(['--- \n', '+++ \n', '@@ -4,6 +4,11 @@\n', ' d\n', ' e\n', ' f\n', '+x\n', '+y\n', '+d\n', '+e\n', '+f\n', ' g\n', ' h\n', ' i\n', ], list(unified_diff( txt_a, txt_b, sequencematcher=psm))) def test_patience_unified_diff_with_dates(self): txt_a = ['hello there\n', 'world\n', 'how are you today?\n'] txt_b = ['hello there\n', 'how are you today?\n'] unified_diff = patiencediff.unified_diff psm = self._PatienceSequenceMatcher self.assertEqual(['--- a\t2008-08-08\n', '+++ b\t2008-09-09\n', '@@ -1,3 +1,2 @@\n', ' hello there\n', '-world\n', ' how are you today?\n' ], list(unified_diff( txt_a, txt_b, fromfile='a', tofile='b', fromfiledate='2008-08-08', tofiledate='2008-09-09', sequencematcher=psm))) class TestPatienceDiffLib_c(TestPatienceDiffLib): def setUp(self): super(TestPatienceDiffLib_c, self).setUp() try: from . import _patiencediff_c except ImportError: self.skipTest('C extension not built') self._unique_lcs = _patiencediff_c.unique_lcs_c self._recurse_matches = _patiencediff_c.recurse_matches_c self._PatienceSequenceMatcher = \ _patiencediff_c.PatienceSequenceMatcher_c def test_unhashable(self): """We should get a proper exception here.""" # We need to be able to hash items in the sequence, lists are # unhashable, and thus cannot be diffed self.assertRaises( TypeError, self._PatienceSequenceMatcher, None, [[]], []) self.assertRaises( TypeError, self._PatienceSequenceMatcher, None, ['valid', []], []) self.assertRaises( TypeError, self._PatienceSequenceMatcher, None, ['valid'], [[]]) self.assertRaises( TypeError, self._PatienceSequenceMatcher, None, ['valid'], ['valid', []]) class TestPatienceDiffLibFiles(unittest.TestCase): def setUp(self): super(TestPatienceDiffLibFiles, self).setUp() self._PatienceSequenceMatcher = \ _patiencediff_py.PatienceSequenceMatcher_py self.test_dir = tempfile.mkdtemp() self.addCleanup(lambda: shutil.rmtree(self.test_dir)) def test_patience_unified_diff_files(self): txt_a = [b'hello there\n', b'world\n', b'how are you today?\n'] txt_b = [b'hello there\n', b'how are you today?\n'] with open(os.path.join(self.test_dir, 'a1'), 'wb') as f: f.writelines(txt_a) with open(os.path.join(self.test_dir, 'b1'), 'wb') as f: f.writelines(txt_b) unified_diff_files = patiencediff.unified_diff_files psm = self._PatienceSequenceMatcher old_pwd = os.getcwd() os.chdir(self.test_dir) try: self.assertEqual(['--- a1\n', '+++ b1\n', '@@ -1,3 +1,2 @@\n', ' hello there\n', '-world\n', ' how are you today?\n', ], list(unified_diff_files( 'a1', 'b1', sequencematcher=psm))) finally: os.chdir(old_pwd) txt_a = [x+'\n' for x in 'abcdefghijklmnop'] txt_b = [x+'\n' for x in 'abcdefxydefghijklmnop'] with open(os.path.join(self.test_dir, 'a2'), 'w') as f: f.writelines(txt_a) with open(os.path.join(self.test_dir, 'b2'), 'w') as f: f.writelines(txt_b) # This is the result with LongestCommonSubstring matching os.chdir(self.test_dir) try: self.assertEqual(['--- a2\n', '+++ b2\n', '@@ -1,6 +1,11 @@\n', ' a\n', ' b\n', ' c\n', '+d\n', '+e\n', '+f\n', '+x\n', '+y\n', ' d\n', ' e\n', ' f\n'], list(unified_diff_files('a2', 'b2'))) # And the patience diff self.assertEqual(['--- a2\n', '+++ b2\n', '@@ -4,6 +4,11 @@\n', ' d\n', ' e\n', ' f\n', '+x\n', '+y\n', '+d\n', '+e\n', '+f\n', ' g\n', ' h\n', ' i\n'], list(unified_diff_files('a2', 'b2', sequencematcher=psm))) finally: os.chdir(old_pwd) class TestPatienceDiffLibFiles_c(TestPatienceDiffLibFiles): def setUp(self): super(TestPatienceDiffLibFiles_c, self).setUp() try: from . import _patiencediff_c except ImportError: self.skipTest('C extension not built') self._PatienceSequenceMatcher = \ _patiencediff_c.PatienceSequenceMatcher_c class TestUsingCompiledIfAvailable(unittest.TestCase): def test_PatienceSequenceMatcher(self): try: from ._patiencediff_c import PatienceSequenceMatcher_c except ImportError: from ._patiencediff_py import PatienceSequenceMatcher_py self.assertIs(PatienceSequenceMatcher_py, patiencediff.PatienceSequenceMatcher) else: self.assertIs(PatienceSequenceMatcher_c, patiencediff.PatienceSequenceMatcher) def test_unique_lcs(self): try: from ._patiencediff_c import unique_lcs_c except ImportError: from ._patiencediff_py import unique_lcs_py self.assertIs(unique_lcs_py, patiencediff.unique_lcs) else: self.assertIs(unique_lcs_c, patiencediff.unique_lcs) def test_recurse_matches(self): try: from ._patiencediff_c import recurse_matches_c except ImportError: from ._patiencediff_py import recurse_matches_py self.assertIs(recurse_matches_py, patiencediff.recurse_matches) else: self.assertIs(recurse_matches_c, patiencediff.recurse_matches) ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1602718803.7976947 patiencediff-0.2.1/patiencediff.egg-info/0000755000175000017500000000000000000000000021025 5ustar00jelmerjelmer00000000000000././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1602718803.0 patiencediff-0.2.1/patiencediff.egg-info/PKG-INFO0000644000175000017500000000446200000000000022130 0ustar00jelmerjelmer00000000000000Metadata-Version: 1.2 Name: patiencediff Version: 0.2.1 Summary: Python implementation of the patiencediff algorithm. Home-page: https://www.breezy-vcs.org/ Maintainer: Breezy Developers Maintainer-email: team@breezy-vcs.org License: GNU GPLv2 or later Description: This package contains the implementation of the ``patiencediff`` algorithm, as `first described `_ by Bram Cohen. Like Python's ``difflib``, this module provides both a convience ``unified_diff`` function for the generation of unified diffs of text files as well as a SequenceMatcher that can be used on arbitrary lists. Patiencediff provides a good balance of performance, nice output for humans, and implementation simplicity. The code in this package was extracted from the `Bazaar `_ code base. The package comes with two implementations: * A Python implementation (_patiencediff_py.py); this implementation only requires a Python interpreter and is the more readable version of the two * A C implementation implementation (_patiencediff_c.c); this implementation is faster, but requires a C compiler and is less readable Usage ===== To invoke patiencediff from the command-line:: python -m patiencediff file_a file_b Or from Python: >>> import patiencediff >>> print ''.join(patiencediff.unified_diff( ... ['a\n', 'b\n', 'b\n', 'c\n'], ... ['a\n', 'c\n', 'b\n'])) --- +++ @@ -1,4 +1,3 @@ a +c b -b -c Platform: UNKNOWN Classifier: Development Status :: 4 - Beta Classifier: License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+) Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Operating System :: POSIX ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1602718803.0 patiencediff-0.2.1/patiencediff.egg-info/SOURCES.txt0000644000175000017500000000067300000000000022717 0ustar00jelmerjelmer00000000000000.bzrignore .gitignore AUTHORS COPYING MANIFEST.in README.rst build.cmd setup.py .github/workflows/pythonpackage.yml .github/workflows/pythonpublish.yml patiencediff/__init__.py patiencediff/__main__.py patiencediff/_patiencediff_c.c patiencediff/_patiencediff_py.py patiencediff/test_patiencediff.py patiencediff.egg-info/PKG-INFO patiencediff.egg-info/SOURCES.txt patiencediff.egg-info/dependency_links.txt patiencediff.egg-info/top_level.txt././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1602718803.0 patiencediff-0.2.1/patiencediff.egg-info/dependency_links.txt0000644000175000017500000000000100000000000025073 0ustar00jelmerjelmer00000000000000 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1602718803.0 patiencediff-0.2.1/patiencediff.egg-info/top_level.txt0000644000175000017500000000001500000000000023553 0ustar00jelmerjelmer00000000000000patiencediff ././@PaxHeader0000000000000000000000000000003400000000000011452 xustar000000000000000028 mtime=1602718803.7976947 patiencediff-0.2.1/setup.cfg0000644000175000017500000000004600000000000016533 0ustar00jelmerjelmer00000000000000[egg_info] tag_build = tag_date = 0 ././@PaxHeader0000000000000000000000000000002600000000000011453 xustar000000000000000022 mtime=1602718774.0 patiencediff-0.2.1/setup.py0000755000175000017500000000315300000000000016431 0ustar00jelmerjelmer00000000000000#!/usr/bin/env python3 # encoding: utf-8 from setuptools import setup, Extension from distutils import core with open('README.rst', 'r') as f: long_description = f.read() ext_modules = [ Extension( 'patiencediff._patiencediff_c', ['patiencediff/_patiencediff_c.c'])] class Distribution(core.Distribution): def is_pure(self): if self.pure: return True def has_ext_modules(self): return not self.pure global_options = core.Distribution.global_options + [ ('pure', None, "use pure Python code instead of C " "extensions (slower on CPython)")] pure = False setup(name="patiencediff", description="Python implementation of the patiencediff algorithm.", long_description=long_description, version="0.2.1", maintainer="Breezy Developers", maintainer_email="team@breezy-vcs.org", license="GNU GPLv2 or later", url="https://www.breezy-vcs.org/", packages=['patiencediff'], test_suite='patiencediff.test_patiencediff', distclass=Distribution, classifiers=[ 'Development Status :: 4 - Beta', 'License :: OSI Approved :: ' 'GNU General Public License v2 or later (GPLv2+)', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Operating System :: POSIX', ], ext_modules=ext_modules)