pax_global_header00006660000000000000000000000064140306371610014513gustar00rootroot0000000000000052 comment=b109c3c837fadb842552215a57c38041a4ef9422 moreorless-0.4.0/000077500000000000000000000000001403063716100137065ustar00rootroot00000000000000moreorless-0.4.0/.github/000077500000000000000000000000001403063716100152465ustar00rootroot00000000000000moreorless-0.4.0/.github/workflows/000077500000000000000000000000001403063716100173035ustar00rootroot00000000000000moreorless-0.4.0/.github/workflows/build.yml000066400000000000000000000013611403063716100211260ustar00rootroot00000000000000name: Build on: push: branches: - master - tmp-* tags: - v* pull_request: jobs: moreorless: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: python-version: [3.6, 3.7, 3.8] os: [macOS-latest, ubuntu-latest, windows-latest] steps: - name: Checkout uses: actions/checkout@v1 - name: Set Up Python ${{ matrix.python-version }} uses: actions/setup-python@v1 with: python-version: ${{ matrix.python-version }} - name: Install run: | python -m pip install --upgrade pip make setup pip install -U . - name: Test run: make test - name: Lint run: make lint moreorless-0.4.0/.gitignore000066400000000000000000000023221403063716100156750ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ # Visual Studio Code .vscode/ moreorless-0.4.0/.vars.ini000066400000000000000000000004231403063716100154370ustar00rootroot00000000000000[vars] pypi_name = moreorless short_desc = Python diff wrapper url = https://github.com/thatch/moreorless/ author = Tim Hatch author_email = tim@timhatch.com package = moreorless envdir = {envdir} year = 2020 package_name = morelorless author_website = http://timhatch.com/ moreorless-0.4.0/LICENSE000066400000000000000000000020521403063716100147120ustar00rootroot00000000000000MIT License Copyright (c) 2020 Tim Hatch Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. moreorless-0.4.0/MANIFEST.in000066400000000000000000000001011403063716100154340ustar00rootroot00000000000000include *.md LICENSE recursive-include moreorless *.txt py.typed moreorless-0.4.0/Makefile000066400000000000000000000013521403063716100153470ustar00rootroot00000000000000PYTHON?=python SOURCES=moreorless setup.py .PHONY: venv venv: $(PYTHON) -m venv .venv source .venv/bin/activate && make setup @echo 'run `source .venv/bin/activate` to use virtualenv' # The rest of these are intended to be run within the venv, where python points # to whatever was used to set up the venv. .PHONY: setup setup: python -m pip install -Ur requirements-dev.txt .PHONY: test test: python -m coverage run -m moreorless.tests $(TESTOPTS) python -m coverage report .PHONY: format format: python -m ufmt format $(SOURCES) .PHONY: lint lint: python -m ufmt check $(SOURCES) python -m flake8 $(SOURCES) mypy --strict moreorless .PHONY: release release: rm -rf dist python setup.py sdist bdist_wheel twine upload dist/* moreorless-0.4.0/README.md000066400000000000000000000021221403063716100151620ustar00rootroot00000000000000# morelorless This is a thin wrapper around `difflib.unified_diff` that Does The Right Thing for "No newline at eof". The args are also simplified compared to `difflib`: ```py moreorless.unified_diff( astr: str, bstr: str, filename: str, n: int = 3, ) -> str: ... # raises moreorless.patch.PatchException moreorless.patch.apply_single_file( contents: str, patch: str, allow_offsets: bool = True, ) -> str: ... # uses click to write to stdout with colors moreorless.click.echo_color_unified_diff( astr: str, bstr: str, filename: str, n: int = 3 ) -> None: ... # if you want to use unified_diff yourself first (e.g. in another process) moreorless.click.echo_color_precomputed_diff( diff: str, ) -> None: ... ``` # License morelorless is copyright [Tim Hatch](http://timhatch.com/), and licensed under the MIT license. I am providing code in this repository to you under an open source license. This is my personal repository; the license you receive to my code is from me and not from my employer. See the `LICENSE` file for details. moreorless-0.4.0/moreorless/000077500000000000000000000000001403063716100161005ustar00rootroot00000000000000moreorless-0.4.0/moreorless/__init__.py000066400000000000000000000017511403063716100202150ustar00rootroot00000000000000import difflib import os.path __all__ = ["unified_diff"] def unified_diff( astr: str, bstr: str, filename: str, n: int = 3, ) -> str: """ Returns a unified diff string for the two inputs. Does not currently support creation or deletion where one of the filenames is `/dev/null` or patchlevels other than `-p1`. Does handle the "no newline at end of file" properly UNLIKE DIFFLIB. """ if os.path.isabs(filename): a_filename = filename b_filename = filename else: a_filename = f"a/{filename}" b_filename = f"b/{filename}" buf = [] gen = difflib.unified_diff( astr.splitlines(True), bstr.splitlines(True), a_filename, b_filename, n=n, ) for line in gen: buf.append(line) if not line.endswith("\n"): # Assume this is the only case where it can happen buf.append("\n\\ No newline at end of file\n") return "".join(buf) moreorless-0.4.0/moreorless/click.py000066400000000000000000000023121403063716100175350ustar00rootroot00000000000000import sys from pathlib import Path import click from . import unified_diff def echo_color_unified_diff(astr: str, bstr: str, filename: str, n: int = 3) -> None: """ Just like `moreorless.unified_diff` except using `click.secho`. """ echo_color_precomputed_diff(unified_diff(astr, bstr, filename, n)) def echo_color_precomputed_diff(diff: str) -> None: """ Like `echo_color_unified_diff`, but for precomputed diff results. """ for line in diff.splitlines(True): # TODO benchmark and see if constructing the string up front is faster if line.startswith("---") or line.startswith("+++"): click.secho(line, bold=True, nl=False) elif line.startswith("@@"): click.secho(line, fg="cyan", nl=False) elif line.startswith("-"): click.secho(line, fg="red", nl=False) elif line.startswith("+"): click.secho(line, fg="green", nl=False) else: click.secho(line, nl=False) def main(afile: str, bfile: str) -> None: # pragma: no cover echo_color_unified_diff(Path(afile).read_text(), Path(bfile).read_text(), afile) if __name__ == "__main__": # pragma: no cover main(*sys.argv[1:]) moreorless-0.4.0/moreorless/patch.py000066400000000000000000000136411403063716100175560ustar00rootroot00000000000000import logging import re from dataclasses import dataclass, field from typing import List, Optional, Sequence __all__ = ["apply_single_file", "PatchException"] LOG = logging.getLogger(__name__) def apply_single_file(contents: str, patch: str, allow_offsets: bool = True) -> str: """ Apply a clean patch, no fuzz, no rejects. """ lines = contents.splitlines(True) hunks = _split_hunks(patch.splitlines(True)[2:]) return "".join(_apply_hunks(lines, hunks, allow_offsets)) POSITION_LINE_RE = re.compile(r"@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@") class PatchException(Exception): pass class ContextException(PatchException): pass def _parse_position_line(position_line: str) -> List[int]: """Given an `@@` line, return the four numbers within.""" match = POSITION_LINE_RE.match(position_line) if not match: raise PatchException(f"Position line {position_line!r} failed to parse") return [ int(match.group(1)), int(match.group(2) or "1"), int(match.group(3)), int(match.group(4) or "1"), ] # TODO store the offsets too, to make filtering easier @dataclass class Hunk: position: Optional[List[int]] = None lines: List[str] = field(default_factory=list) def _split_hunks(diff_lines: Sequence[str]) -> List[Hunk]: """ Splits unified diff lines (after the file header) into hunks. """ hunks: List[Hunk] = [] hunk: Optional[Hunk] = None for line in diff_lines: if line.startswith("@@"): # Start a new hunk if hunk: hunks.append(hunk) hunk = Hunk(_parse_position_line(line)) # There should not be '---' or '+++' lines here, they are stripped off # in apply_single_file. if not hunk: raise PatchException(f"Lines without hunk header at {line!r}") hunk.lines.append(line) if hunk and hunk.lines: hunks.append(hunk) return hunks def _apply_hunks(lines: List[str], hunks: List[Hunk], allow_offsets: bool) -> List[str]: work = lines[:] file_offset = 0 # accumulation of delta prev_line = 0 for hunk in hunks: assert hunk.position is not None pos = hunk.position[:] # If length is zero, this is a no-context deletion and per # https://www.artima.com/weblogs/viewpost.jsp?thread=164293 the numbers # are off by one from being actual line numbers. :/ if pos[3] == 0: pos[2] += 1 if pos[1] == 0: pos[0] += 1 cur_line = pos[0] + file_offset - 1 # Meld "No newline at end of file" up a line tmp = hunk.lines[:] for i in range(len(tmp) - 1, 0, -1): if tmp[i].startswith("\\ No newline"): del tmp[i] # strips newline (including dos newlines, although we don't # produce a those in moreorless.unified_diff) if tmp[i - 1].endswith("\r\n"): tmp[i - 1] = tmp[i - 1][:-2] else: tmp[i - 1] = tmp[i - 1][:-1] if allow_offsets: tmp2 = [t[1:] for t in tmp if t[0] in (" ", "-")] # TODO if hunks overlap, this checks against the already-modified # one for context, which seems wrong. Unmodified file is something # like _context_match(lines, tmp2, ..., prev_line+file_offset)-file_offset # On a proper patch this always takes in cur_line and returns cur_line new_line = _context_match(work, tmp2, prev_line, len(work), cur_line) if new_line is None: raise PatchException(f"Failed to apply with offset at {cur_line}") if cur_line != new_line: LOG.info(f"Offset {new_line-cur_line}") cur_line = new_line for line in tmp[1:]: if line.startswith("-"): if line[1:] != work[cur_line]: raise PatchException(f"DELETE fail at {cur_line}") del work[cur_line] elif line.startswith("+"): work.insert(cur_line, line[1:]) cur_line += 1 elif line.startswith(" "): if line[1:] != work[cur_line]: raise PatchException(f"EQUAL fail at {cur_line}") cur_line += 1 elif line.startswith("?"): # pragma: no cover pass # human readable line else: raise PatchException(f"Unknown line {line!r} at {cur_line}") file_offset += pos[3] - pos[1] prev_line = cur_line return work def _context_match( file_lines: List[str], context_lines: List[str], range_start: int, range_end: int, start: int, ) -> Optional[int]: """ Finds an offset within file_lines to match context. Returns i such that: * file_lines[i:i+len] == context_lines * i >= range_start * i <= range_end - len * minimizes abs(i-start) * minimizes i if there's a tie on abs """ cl = len(context_lines) if not range_start >= 0: raise ContextException("context error 1: negative range_start") if not range_end >= range_start: raise ContextException("context error 2: flipped range") if not range_end <= len(file_lines): raise ContextException("context error 3: past end") if not start >= range_start: raise ContextException("context error 4: start before range_start") if not start <= range_end - cl: raise ContextException("context error 5: start past range_end") for di in range(0, max(start - range_start + 1, range_end - start - cl + 1)): t1 = start - di t2 = start + di if t1 >= range_start: if all(context_lines[j] == file_lines[t1 + j] for j in range(cl)): return t1 if t2 + cl <= range_end: if all(context_lines[j] == file_lines[t2 + j] for j in range(cl)): return t2 return None moreorless-0.4.0/moreorless/py.typed000066400000000000000000000000001403063716100175650ustar00rootroot00000000000000moreorless-0.4.0/moreorless/tests/000077500000000000000000000000001403063716100172425ustar00rootroot00000000000000moreorless-0.4.0/moreorless/tests/__init__.py000066400000000000000000000000001403063716100213410ustar00rootroot00000000000000moreorless-0.4.0/moreorless/tests/__main__.py000066400000000000000000000002461403063716100213360ustar00rootroot00000000000000import unittest from .click import ColorTest # noqa: F401 from .general import ParityTest # noqa: F401 from .patch import PatchTest # noqa: F401 unittest.main() moreorless-0.4.0/moreorless/tests/click.py000066400000000000000000000024621403063716100207050ustar00rootroot00000000000000import unittest from typing import Any from unittest.mock import call, patch from .. import unified_diff from ..click import echo_color_precomputed_diff, echo_color_unified_diff class ColorTest(unittest.TestCase): @patch("click.secho") def test_echo_color_unified_diff(self, secho: Any) -> None: echo_color_unified_diff("a\nb\n", "a\nc\n", "x") secho.assert_has_calls( [ call("--- a/x\n", bold=True, nl=False), call("+++ b/x\n", bold=True, nl=False), call("@@ -1,2 +1,2 @@\n", fg="cyan", nl=False), call(" a\n", nl=False), call("-b\n", fg="red", nl=False), call("+c\n", fg="green", nl=False), ] ) @patch("click.secho") def test_echo_color_precomputed_diff(self, secho: Any) -> None: diff = unified_diff("a\nb\n", "a\nc\n", "x") echo_color_precomputed_diff(diff) secho.assert_has_calls( [ call("--- a/x\n", bold=True, nl=False), call("+++ b/x\n", bold=True, nl=False), call("@@ -1,2 +1,2 @@\n", fg="cyan", nl=False), call(" a\n", nl=False), call("-b\n", fg="red", nl=False), call("+c\n", fg="green", nl=False), ] ) moreorless-0.4.0/moreorless/tests/general.py000066400000000000000000000030021403063716100212240ustar00rootroot00000000000000import subprocess import tempfile import unittest from pathlib import Path from parameterized import parameterized from .. import unified_diff class ParityTest(unittest.TestCase): @parameterized.expand( # type: ignore [ ("a", "a"), ("a", "b"), ("a\n", "b"), ("a", "b\n"), ("a\n", "b\n"), ] ) def test_parity(self, a: str, b: str) -> None: with tempfile.TemporaryDirectory() as d: a_path = Path(d) / "a" a_path.mkdir() b_path = Path(d) / "b" b_path.mkdir() (a_path / "file").write_text(a) (b_path / "file").write_text(b) # Notably, diff exits 1 when the files are different :/ # Force the labels because it would otherwise include timestamps. proc = subprocess.run( ["diff", "--label", "a/file", "--label", "b/file", "-u", "a", "b"], cwd=d, encoding="utf-8", stdout=subprocess.PIPE, ) if "\n" in proc.stdout: expected = proc.stdout[proc.stdout.index("\n") + 1 :] else: expected = "" actual = unified_diff(a, b, "file") self.assertEqual(expected, actual) def test_absolute_paths(self) -> None: actual = unified_diff("a\n", "a\nb\n", "/file") self.assertEqual( """\ --- /file +++ /file @@ -1 +1,2 @@ a +b """, actual, ) moreorless-0.4.0/moreorless/tests/patch.py000066400000000000000000000131761403063716100207230ustar00rootroot00000000000000import random import unittest from typing import Any, List, Optional from unittest.mock import patch from parameterized import parameterized from .. import unified_diff from ..patch import ( PatchException, _context_match, _parse_position_line, _split_hunks, apply_single_file, ) class PatchTest(unittest.TestCase): @parameterized.expand( # type: ignore [ ("a", "b"), ("", "b"), ("a", ""), ("", "b\n"), ("a\n", ""), ("a\nb\n", "a\n"), ("a\nb\n", "b\n"), ("a\nb\n", "a\nb"), ("a\nb", "a\nb\n"), ] ) def test_patch(self, a: str, b: str) -> None: diff = unified_diff(a, b, "foo") result = apply_single_file(a, diff) self.assertEqual(b, result) # Although we don't produce these, allow CRLF on the "No newline" line # to strip the full previous newline. if "No newline" in diff: dos_diff = diff.replace("\n\\ No newline", "\r\n\\ No newline") result = apply_single_file(a, dos_diff) self.assertEqual(b, result) @parameterized.expand( # type: ignore [ ("", "b\r\n"), ("a\r\n", ""), ("a\r\nb\r\n", "a\r\n"), ("a\r\nb\r\n", "b\r\n"), ("a\r\nb\r\n", "a\r\nb"), ("a\r\nb", "a\r\nb\r\n"), ] ) def test_patch_crlf(self, a: str, b: str) -> None: diff = unified_diff(a, b, "foo") result = apply_single_file(a, diff) self.assertEqual(b, result) @parameterized.expand( # type: ignore [ (0,), (1,), (2,), (3,), ] ) def test_exhaustive(self, context: int) -> None: for i in range(100): a = "".join( [random.choice(["a\n", "b\n", "c\n", "d\n"]) for x in range(10)] ) b = "".join( [random.choice(["a\n", "b\n", "c\n", "d\n"]) for x in range(10)] ) diff = unified_diff(a, b, "file", context) result = apply_single_file(a, diff) self.assertEqual(b, result) @parameterized.expand( # type: ignore [ ("@@ -5 +9 @@", [5, 1, 9, 1]), ("@@ -5,2 +9,3 @@", [5, 2, 9, 3]), ("@@ invalid @@", None), ] ) def test_parse_position_line( self, line: str, expected: Optional[List[int]] ) -> None: if expected is None: with self.assertRaises(PatchException): _parse_position_line(line) else: self.assertEqual(expected, _parse_position_line(line)) @parameterized.expand( # type: ignore [ ("---\n+++\n@@ -1 +1 @@\n-invalid\n", "Failed to apply with offset at 0"), ("---\n+++\n@@ -1 +1 @@\n invalid\n", "Failed to apply with offset at 0"), ("---\n+++\n@@ -1 +1 @@\nxinvalid\n", "Unknown line 'xinvalid\\\\n' at 0"), ] ) def test_exceptions(self, diff: str, msg: str) -> None: with self.assertRaisesRegex(PatchException, msg): apply_single_file("foo\n", diff) @parameterized.expand( # type: ignore [ ("---\n+++\n@@ -1 +1 @@\n-invalid\n", "DELETE fail at 0"), ("---\n+++\n@@ -1 +1 @@\n invalid\n", "EQUAL fail at 0"), ("---\n+++\n@@ -1 +1 @@\nxinvalid\n", "Unknown line 'xinvalid\\\\n' at 0"), ] ) def test_exceptions_no_offset(self, diff: str, msg: str) -> None: with self.assertRaisesRegex(PatchException, msg): apply_single_file("foo\n", diff, allow_offsets=False) def test_split_hunks_edge_cases(self) -> None: with self.assertRaisesRegex(PatchException, "Lines without hunk header.*"): _split_hunks(["foo\n"]) self.assertEqual([], _split_hunks([])) @patch("moreorless.patch.LOG.info") def test_patch_small_offset(self, log_info: Any) -> None: a = "a\nb\nc\n" b = "a\nB\nc\n" modified = "x\n" + a expected = "x\n" + b diff = unified_diff(a, b, "foo") result = apply_single_file(modified, diff) self.assertEqual(expected, result) log_info.assert_called_with("Offset 1") @parameterized.expand( # type: ignore [ ((["0", "1", "2", "3"], 0, 5, 0), 0), # can match at start ((["0", "1", "2", "3"], 0, 5, 1), 0), # can match earlier ((["1", "2", "3", "4"], 0, 5, 0), 1), # can match later ((["4"], 0, 5, 0), 4), # can match later ((["5"], 0, 4, 3), None), # no possible match, starts past mid ] ) def test_context_match(self, args: Any, expected: Optional[int]) -> None: self.assertEqual(expected, _context_match(["0", "1", "2", "3", "4"], *args)) def test_context_match_tie(self) -> None: # ties resolve earlier self.assertEqual(0, _context_match(["0", "1", "0"], ["0"], 0, 3, 1)) def test_edge_cases(self) -> None: with self.assertRaisesRegex(PatchException, "negative range_start"): _context_match(["0", "1", "2"], ["0"], -1, 3, 0) with self.assertRaisesRegex(PatchException, "flipped range"): _context_match(["0", "1", "2"], ["0"], 3, 0, 0) with self.assertRaisesRegex(PatchException, "past end"): _context_match(["0", "1", "2"], ["0"], 0, 4, 0) with self.assertRaisesRegex(PatchException, "start before range_start"): _context_match(["0", "1", "2"], ["0"], 1, 3, 0) with self.assertRaisesRegex(PatchException, "start past range_end"): _context_match(["0", "1", "2"], ["0"], 0, 3, 3) moreorless-0.4.0/requirements-dev.txt000066400000000000000000000002131403063716100177420ustar00rootroot00000000000000black==20.8b1 coverage==4.5.4 flake8==3.7.9 mypy==0.750 parameterized==0.8.1 tox==3.14.1 twine==3.1.1 ufmt==1.1 usort==0.6.3 wheel==0.33.6 moreorless-0.4.0/setup.cfg000066400000000000000000000021551403063716100155320ustar00rootroot00000000000000[metadata] name = moreorless description = Python diff wrapper long_description = file: README.md long_description_content_type = text/markdown license = MIT url = https://github.com/thatch/moreorless/ author = Tim Hatch author_email = tim@timhatch.com [options] packages = moreorless setup_requires = setuptools_scm setuptools >= 38.3.0 python_requires = >=3.6 include_package_data = true install_requires = dataclasses; python_version < "3.7" click [bdist_wheel] universal = true [check] metadata = true strict = true [coverage:run] branch = True include = moreorless/* omit = moreorless/tests/* [coverage:report] fail_under = 100 precision = 1 show_missing = True skip_covered = True [isort] line_length = 88 multi_line_output = 3 force_grid_wrap = False include_trailing_comma = True use_parentheses = True [mypy] ignore_missing_imports = True [tox:tox] envlist = py36, py37, py38 [testenv] deps = -rrequirements-dev.txt whitelist_externals = make commands = make test setenv = py{36,37,38}: COVERAGE_FILE={envdir}/.coverage [flake8] ignore = E203, E231, E266, E302, E501, W503 max-line-length = 88 moreorless-0.4.0/setup.py000066400000000000000000000000721403063716100154170ustar00rootroot00000000000000from setuptools import setup setup(use_scm_version=True)