pax_global_header00006660000000000000000000000064146316614010014514gustar00rootroot0000000000000052 comment=978894d9aa3ac0bddc49c63cc46d64d0fd28a2e9 golang-github-aymanbagabas-go-udiff-0.2.0/000077500000000000000000000000001463166140100203445ustar00rootroot00000000000000golang-github-aymanbagabas-go-udiff-0.2.0/.github/000077500000000000000000000000001463166140100217045ustar00rootroot00000000000000golang-github-aymanbagabas-go-udiff-0.2.0/.github/UPSTREAM000066400000000000000000000000511463166140100230630ustar00rootroot000000000000003fd334ce9bcd0ffac28669e1d3b1379996ac37b1 golang-github-aymanbagabas-go-udiff-0.2.0/.github/workflows/000077500000000000000000000000001463166140100237415ustar00rootroot00000000000000golang-github-aymanbagabas-go-udiff-0.2.0/.github/workflows/build.yml000066400000000000000000000014431463166140100255650ustar00rootroot00000000000000name: build on: push: branches: - master pull_request: jobs: build: strategy: matrix: go-version: ["^1", "1.18"] os: [ubuntu-latest, macos-latest] #windows-latest runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 - uses: actions/setup-go@v3 with: go-version: ${{ matrix.go-version }} - run: | diff --version patch --version - if: "${{ matrix.os == 'windows-latest' }}" run: | git config --global core.autocrlf input git config --global core.eol lf - run: go build -v ./... - run: go test -v ./... - run: go test -v -fuzz=FuzzRoundTrip -fuzztime=1m . # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json golang-github-aymanbagabas-go-udiff-0.2.0/.github/workflows/import.yml000066400000000000000000000050011463166140100257720ustar00rootroot00000000000000name: import upstream on: schedule: - cron: "0 0 * * 0" workflow_dispatch: permissions: write-all jobs: import: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - run: | git config --global user.name "github-actions[bot]" git config --global user.email "github-actions[bot]@users.noreply.github.com" - id: tools run: | # Clone the repository to a temporary directory. tools=$(mktemp -d)/tools git clone --depth 1 "https://github.com/golang/tools" "$tools" echo "tools=$tools" >> $GITHUB_ENV # Copy the diff package to the current directory. cp -r "$tools/internal/diff/"* . # Replace the import path. find . -type f -name '*.go' -exec sed -i'' 's|golang.org/x/tools/internal/diff/|github.com/aymanbagabas/go-udiff/|g' {} + find . -type f -name '*.go' -exec sed -i'' 's|"golang.org/x/tools/internal/diff|diff "github.com/aymanbagabas/go-udiff|g' {} + # Change package name to udiff. sed -i'' 's|package diff|package udiff|g' *.go # Apply patches for p in _patches/*; do git apply $p done # Set output variables. echo "base=$(cat .github/UPSTREAM | head -n1)" >> $GITHUB_OUTPUT cd "$tools" commit=$(git rev-parse HEAD) echo "commit=$commit" >> $GITHUB_OUTPUT cd - # Update the upstream commit. if ! git update-index --refresh || ! git diff-index --quiet HEAD --; then echo "$commit" > .github/UPSTREAM fi - name: Create Pull Request id: cpr uses: peter-evans/create-pull-request@v5 with: commit-message: "feat: import upstream package" title: "Import upstream package [create-pull-request]" body: | Import upstream changes from [tree](https://github.com/golang/tools/tree/${{ steps.tools.outputs.commit }}/internal/diff). Take a look at the [diff](https://github.com/golang/tools/commits/master/internal/diff) to see what changed. branch: "import-upstream" delete-branch: true - name: Check outputs if: ${{ steps.cpr.outputs.pull-request-number }} run: | echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}" echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}" # yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json golang-github-aymanbagabas-go-udiff-0.2.0/LICENSE-BSD000066400000000000000000000027071463166140100217650ustar00rootroot00000000000000Copyright (c) 2009 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. golang-github-aymanbagabas-go-udiff-0.2.0/LICENSE-MIT000066400000000000000000000020561463166140100220030ustar00rootroot00000000000000MIT License Copyright (c) 2023 Ayman Bagabas Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. golang-github-aymanbagabas-go-udiff-0.2.0/README.md000066400000000000000000000075431463166140100216340ustar00rootroot00000000000000# µDiff

Latest Release Go Docs Build Status Go Report Card

Micro diff (µDiff) is a Go library that implements the [Myers'](http://www.xmailserver.org/diff2.pdf) diffing algorithm. It aims to provide a minimal API to compute and apply diffs with zero dependencies. It also supports generating diffs in the [Unified Format](https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html). If you are looking for a way to parse unified diffs, check out [sourcegraph/go-diff](https://github.com/sourcegraph/go-diff). This is merely a copy of the [Golang tools internal diff package](https://github.com/golang/tools/tree/master/internal/diff) with a few modifications to export package symbols. All credit goes to the [Go authors](https://go.dev/AUTHORS). ## Usage You can import the package using the following command: ```bash go get github.com/aymanbagabas/go-udiff ``` ## Examples Generate a unified diff for strings `a` and `b` with the default number of context lines (3). Use `udiff.ToUnified` to specify the number of context lines. ```go package main import ( "fmt" "github.com/aymanbagabas/go-udiff" ) func main() { a := "Hello, world!\n" b := "Hello, Go!\nSay hi to µDiff" unified := udiff.Unified("a.txt", "b.txt", a, b) fmt.Println(unified) } ``` ``` --- a.txt +++ b.txt @@ -1 +1,2 @@ -Hello, world! +Hello, Go! +Say hi to µDiff \ No newline at end of file ``` Apply changes to a string. ```go package main import ( "fmt" "github.com/aymanbagabas/go-udiff" "github.com/aymanbagabas/go-udiff/myers" ) func main() { a := "Hello, world!\n" b := "Hello, Go!\nSay hi to µDiff" edits := myers.ComputeEdits(a, b) final, err := udiff.Apply(a, edits) if err != nil { panic(err) } fmt.Println(final) } ``` ``` Hello, Go! Say hi to µDiff ``` To get a line-by-line diff and edits: ```go package main import ( "fmt" "github.com/aymanbagabas/go-udiff" "github.com/aymanbagabas/go-udiff/myers" ) func main() { a := "Hello, world!\n" b := "Hello, Go!\nSay hi to µDiff" edits := myers.ComputeEdits(a, b) d, err := udiff.ToUnifiedDiff("a.txt", "b.txt", a, edits, udiff.DefaultContextLines) if err != nil { panic(err) } for _, h := range d.Hunks { fmt.Printf("hunk: -%d, +%d\n", h.FromLine, h.ToLine) for _, l := range h.Lines { fmt.Printf("%s %q\n", l.Kind, l.Content) } } } ``` ``` hunk: -1, +1 delete "Hello, world!\n" insert "Hello, Go!\n" insert "Say hi to µDiff" ``` ## Alternatives - [sergi/go-diff](https://github.com/sergi/go-diff) No longer reliable. See [#123](https://github.com/sergi/go-diff/issues/123) and [#141](https://github.com/sergi/go-diff/pull/141). - [hexops/gotextdiff](https://github.com/hexops/gotextdiff) Takes the same approach but looks like the project is abandoned. - [sourcegraph/go-diff](https://github.com/sourcegraph/go-diff) It doesn't compute diffs. Great package for parsing and printing unified diffs. ## Contributing Please send any contributions [upstream](https://github.com/golang/tools). Pull requests made against [the upstream diff package](https://github.com/golang/tools/tree/master/internal/diff) are welcome. ## License [BSD 3-Clause](./LICENSE-BSD) and [MIT](./LICENSE-MIT). golang-github-aymanbagabas-go-udiff-0.2.0/_examples/000077500000000000000000000000001463166140100223215ustar00rootroot00000000000000golang-github-aymanbagabas-go-udiff-0.2.0/_examples/apply/000077500000000000000000000000001463166140100234465ustar00rootroot00000000000000golang-github-aymanbagabas-go-udiff-0.2.0/_examples/apply/main.go000066400000000000000000000004151463166140100247210ustar00rootroot00000000000000package main import ( "fmt" "github.com/aymanbagabas/go-udiff" ) func main() { a := "Hello, world!\n" b := "Hello, Go!\nSay hi to µDiff" edits := udiff.Strings(a, b) final, err := udiff.Apply(a, edits) if err != nil { panic(err) } fmt.Println(final) } golang-github-aymanbagabas-go-udiff-0.2.0/_examples/changes/000077500000000000000000000000001463166140100237315ustar00rootroot00000000000000golang-github-aymanbagabas-go-udiff-0.2.0/_examples/changes/main.go000066400000000000000000000006641463166140100252120ustar00rootroot00000000000000package main import ( "fmt" "github.com/aymanbagabas/go-udiff" ) func main() { a := "Hello, world!\n" b := "Hello, Go!\nSay hi to µDiff" edits := udiff.Strings(a, b) d, err := udiff.ToUnifiedDiff("a.txt", "b.txt", a, edits) if err != nil { panic(err) } for _, h := range d.Hunks { fmt.Printf("hunk: -%d, +%d\n", h.FromLine, h.ToLine) for _, l := range h.Lines { fmt.Printf("%s %q\n", l.Kind, l.Content) } } } golang-github-aymanbagabas-go-udiff-0.2.0/_examples/hello-world/000077500000000000000000000000001463166140100245515ustar00rootroot00000000000000golang-github-aymanbagabas-go-udiff-0.2.0/_examples/hello-world/main.go000066400000000000000000000003171463166140100260250ustar00rootroot00000000000000package main import ( "fmt" "github.com/aymanbagabas/go-udiff" ) func main() { a := "Hello, world!\n" b := "Hello, Go!\nSay hi to µDiff" d := udiff.Unified("a.txt", "b.txt", a, b) fmt.Println(d) } golang-github-aymanbagabas-go-udiff-0.2.0/_patches/000077500000000000000000000000001463166140100221325ustar00rootroot00000000000000golang-github-aymanbagabas-go-udiff-0.2.0/_patches/0001-fix-remove-testenv-package.patch000066400000000000000000000032061463166140100306720ustar00rootroot00000000000000From 225b4fb5cb35cf333d28ce64b9bb9f9bc492cce7 Mon Sep 17 00:00:00 2001 From: Ayman Bagabas Date: Wed, 13 Dec 2023 12:38:49 -0500 Subject: [PATCH 1/2] fix: remove testenv package Signed-off-by: Ayman Bagabas --- diff_test.go | 3 --- difftest/difftest_test.go | 2 -- 2 files changed, 5 deletions(-) diff --git a/diff_test.go b/diff_test.go index a5e18dbc06eb..dc0063db13ef 100644 --- a/diff_test.go +++ b/diff_test.go @@ -17,7 +17,6 @@ import ( diff "github.com/aymanbagabas/go-udiff" "github.com/aymanbagabas/go-udiff/difftest" - "golang.org/x/tools/internal/testenv" ) func TestApply(t *testing.T) { @@ -120,7 +119,6 @@ func TestLineEdits(t *testing.T) { } func TestToUnified(t *testing.T) { - testenv.NeedsTool(t, "patch") for _, tc := range difftest.TestCases { t.Run(tc.Name, func(t *testing.T) { unified, err := diff.ToUnified(difftest.FileA, difftest.FileB, tc.In, tc.Edits, diff.DefaultContextLines) @@ -156,7 +154,6 @@ func TestToUnified(t *testing.T) { t.Errorf("applying unified failed: got\n%q, wanted\n%q unified\n%q", got, tc.Out, unified) } - }) } } diff --git a/difftest/difftest_test.go b/difftest/difftest_test.go index 02cfca0ccc3b..4ccfe7c43bba 100644 --- a/difftest/difftest_test.go +++ b/difftest/difftest_test.go @@ -15,11 +15,9 @@ import ( "testing" "github.com/aymanbagabas/go-udiff/difftest" - "golang.org/x/tools/internal/testenv" ) func TestVerifyUnified(t *testing.T) { - testenv.NeedsTool(t, "diff") for _, test := range difftest.TestCases { t.Run(test.Name, func(t *testing.T) { if test.NoDiff { -- 2.43.0 golang-github-aymanbagabas-go-udiff-0.2.0/_patches/0002-fix-re-export-symbols.patch000066400000000000000000000173101463166140100277330ustar00rootroot00000000000000From b347a6a816ada3af6e374a480d13d469979d8c34 Mon Sep 17 00:00:00 2001 From: Ayman Bagabas Date: Wed, 13 Dec 2023 12:42:58 -0500 Subject: [PATCH 2/2] fix: re-export symbols Signed-off-by: Ayman Bagabas --- unified.go | 122 ++++++++++++++++++++++++++--------------------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/unified.go b/unified.go index 85523a1b435a..268ad228825b 100644 --- a/unified.go +++ b/unified.go @@ -41,57 +41,57 @@ func ToUnified(oldLabel, newLabel, content string, edits []Edit, contextLines in // unified represents a set of edits as a unified diff. type unified struct { - // from is the name of the original file. - from string - // to is the name of the modified file. - to string - // hunks is the set of edit hunks needed to transform the file content. - hunks []*hunk + // From is the name of the original file. + From string + // To is the name of the modified file. + To string + // Hunks is the set of edit Hunks needed to transform the file content. + Hunks []*hunk } // Hunk represents a contiguous set of line edits to apply. type hunk struct { // The line in the original source where the hunk starts. - fromLine int + FromLine int // The line in the original source where the hunk finishes. - toLine int + ToLine int // The set of line based edits to apply. - lines []line + Lines []line } // Line represents a single line operation to apply as part of a Hunk. type line struct { - // kind is the type of line this represents, deletion, insertion or copy. - kind opKind - // content is the content of this line. + // Kind is the type of line this represents, deletion, insertion or copy. + Kind OpKind + // Content is the Content of this line. // For deletion it is the line being removed, for all others it is the line // to put in the output. - content string + Content string } -// opKind is used to denote the type of operation a line represents. -type opKind int +// OpKind is used to denote the type of operation a line represents. +type OpKind int const ( - // opDelete is the operation kind for a line that is present in the input + // Delete is the operation kind for a line that is present in the input // but not in the output. - opDelete opKind = iota - // opInsert is the operation kind for a line that is new in the output. - opInsert - // opEqual is the operation kind for a line that is the same in the input and + Delete OpKind = iota + // Insert is the operation kind for a line that is new in the output. + Insert + // Equal is the operation kind for a line that is the same in the input and // output, often used to provide context around edited lines. - opEqual + Equal ) // String returns a human readable representation of an OpKind. It is not // intended for machine processing. -func (k opKind) String() string { +func (k OpKind) String() string { switch k { - case opDelete: + case Delete: return "delete" - case opInsert: + case Insert: return "insert" - case opEqual: + case Equal: return "equal" default: panic("unknown operation kind") @@ -103,8 +103,8 @@ func (k opKind) String() string { func toUnified(fromName, toName string, content string, edits []Edit, contextLines int) (unified, error) { gap := contextLines * 2 u := unified{ - from: fromName, - to: toName, + From: fromName, + To: toName, } if len(edits) == 0 { return u, nil @@ -129,35 +129,35 @@ func toUnified(fromName, toName string, content string, edits []Edit, contextLin switch { case h != nil && start == last: - //direct extension + // direct extension case h != nil && start <= last+gap: - //within range of previous lines, add the joiners + // within range of previous lines, add the joiners addEqualLines(h, lines, last, start) default: - //need to start a new hunk + // need to start a new hunk if h != nil { // add the edge to the previous hunk addEqualLines(h, lines, last, last+contextLines) - u.hunks = append(u.hunks, h) + u.Hunks = append(u.Hunks, h) } toLine += start - last h = &hunk{ - fromLine: start + 1, - toLine: toLine + 1, + FromLine: start + 1, + ToLine: toLine + 1, } // add the edge to the new hunk delta := addEqualLines(h, lines, start-contextLines, start) - h.fromLine -= delta - h.toLine -= delta + h.FromLine -= delta + h.ToLine -= delta } last = start for i := start; i < end; i++ { - h.lines = append(h.lines, line{kind: opDelete, content: lines[i]}) + h.Lines = append(h.Lines, line{Kind: Delete, Content: lines[i]}) last++ } if edit.New != "" { for _, content := range splitLines(edit.New) { - h.lines = append(h.lines, line{kind: opInsert, content: content}) + h.Lines = append(h.Lines, line{Kind: Insert, Content: content}) toLine++ } } @@ -165,7 +165,7 @@ func toUnified(fromName, toName string, content string, edits []Edit, contextLin if h != nil { // add the edge to the final hunk addEqualLines(h, lines, last, last+contextLines) - u.hunks = append(u.hunks, h) + u.Hunks = append(u.Hunks, h) } return u, nil } @@ -187,7 +187,7 @@ func addEqualLines(h *hunk, lines []string, start, end int) int { if i >= len(lines) { return delta } - h.lines = append(h.lines, line{kind: opEqual, content: lines[i]}) + h.Lines = append(h.Lines, line{Kind: Equal, Content: lines[i]}) delta++ } return delta @@ -196,19 +196,19 @@ func addEqualLines(h *hunk, lines []string, start, end int) int { // String converts a unified diff to the standard textual form for that diff. // The output of this function can be passed to tools like patch. func (u unified) String() string { - if len(u.hunks) == 0 { + if len(u.Hunks) == 0 { return "" } b := new(strings.Builder) - fmt.Fprintf(b, "--- %s\n", u.from) - fmt.Fprintf(b, "+++ %s\n", u.to) - for _, hunk := range u.hunks { + fmt.Fprintf(b, "--- %s\n", u.From) + fmt.Fprintf(b, "+++ %s\n", u.To) + for _, hunk := range u.Hunks { fromCount, toCount := 0, 0 - for _, l := range hunk.lines { - switch l.kind { - case opDelete: + for _, l := range hunk.Lines { + switch l.Kind { + case Delete: fromCount++ - case opInsert: + case Insert: toCount++ default: fromCount++ @@ -217,32 +217,32 @@ func (u unified) String() string { } fmt.Fprint(b, "@@") if fromCount > 1 { - fmt.Fprintf(b, " -%d,%d", hunk.fromLine, fromCount) - } else if hunk.fromLine == 1 && fromCount == 0 { + fmt.Fprintf(b, " -%d,%d", hunk.FromLine, fromCount) + } else if hunk.FromLine == 1 && fromCount == 0 { // Match odd GNU diff -u behavior adding to empty file. fmt.Fprintf(b, " -0,0") } else { - fmt.Fprintf(b, " -%d", hunk.fromLine) + fmt.Fprintf(b, " -%d", hunk.FromLine) } if toCount > 1 { - fmt.Fprintf(b, " +%d,%d", hunk.toLine, toCount) - } else if hunk.toLine == 1 && toCount == 0 { + fmt.Fprintf(b, " +%d,%d", hunk.ToLine, toCount) + } else if hunk.ToLine == 1 && toCount == 0 { // Match odd GNU diff -u behavior adding to empty file. fmt.Fprintf(b, " +0,0") } else { - fmt.Fprintf(b, " +%d", hunk.toLine) + fmt.Fprintf(b, " +%d", hunk.ToLine) } fmt.Fprint(b, " @@\n") - for _, l := range hunk.lines { - switch l.kind { - case opDelete: - fmt.Fprintf(b, "-%s", l.content) - case opInsert: - fmt.Fprintf(b, "+%s", l.content) + for _, l := range hunk.Lines { + switch l.Kind { + case Delete: + fmt.Fprintf(b, "-%s", l.Content) + case Insert: + fmt.Fprintf(b, "+%s", l.Content) default: - fmt.Fprintf(b, " %s", l.content) + fmt.Fprintf(b, " %s", l.Content) } - if !strings.HasSuffix(l.content, "\n") { + if !strings.HasSuffix(l.Content, "\n") { fmt.Fprintf(b, "\n\\ No newline at end of file\n") } } -- 2.43.0 golang-github-aymanbagabas-go-udiff-0.2.0/diff.go000066400000000000000000000121641463166140100216070ustar00rootroot00000000000000// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package diff computes differences between text files or strings. package udiff import ( "fmt" "sort" "strings" ) // An Edit describes the replacement of a portion of a text file. type Edit struct { Start, End int // byte offsets of the region to replace New string // the replacement } func (e Edit) String() string { return fmt.Sprintf("{Start:%d,End:%d,New:%q}", e.Start, e.End, e.New) } // Apply applies a sequence of edits to the src buffer and returns the // result. Edits are applied in order of start offset; edits with the // same start offset are applied in they order they were provided. // // Apply returns an error if any edit is out of bounds, // or if any pair of edits is overlapping. func Apply(src string, edits []Edit) (string, error) { edits, size, err := validate(src, edits) if err != nil { return "", err } // Apply edits. out := make([]byte, 0, size) lastEnd := 0 for _, edit := range edits { if lastEnd < edit.Start { out = append(out, src[lastEnd:edit.Start]...) } out = append(out, edit.New...) lastEnd = edit.End } out = append(out, src[lastEnd:]...) if len(out) != size { panic("wrong size") } return string(out), nil } // ApplyBytes is like Apply, but it accepts a byte slice. // The result is always a new array. func ApplyBytes(src []byte, edits []Edit) ([]byte, error) { res, err := Apply(string(src), edits) return []byte(res), err } // validate checks that edits are consistent with src, // and returns the size of the patched output. // It may return a different slice. func validate(src string, edits []Edit) ([]Edit, int, error) { if !sort.IsSorted(editsSort(edits)) { edits = append([]Edit(nil), edits...) SortEdits(edits) } // Check validity of edits and compute final size. size := len(src) lastEnd := 0 for _, edit := range edits { if !(0 <= edit.Start && edit.Start <= edit.End && edit.End <= len(src)) { return nil, 0, fmt.Errorf("diff has out-of-bounds edits") } if edit.Start < lastEnd { return nil, 0, fmt.Errorf("diff has overlapping edits") } size += len(edit.New) + edit.Start - edit.End lastEnd = edit.End } return edits, size, nil } // SortEdits orders a slice of Edits by (start, end) offset. // This ordering puts insertions (end = start) before deletions // (end > start) at the same point, but uses a stable sort to preserve // the order of multiple insertions at the same point. // (Apply detects multiple deletions at the same point as an error.) func SortEdits(edits []Edit) { sort.Stable(editsSort(edits)) } type editsSort []Edit func (a editsSort) Len() int { return len(a) } func (a editsSort) Less(i, j int) bool { if cmp := a[i].Start - a[j].Start; cmp != 0 { return cmp < 0 } return a[i].End < a[j].End } func (a editsSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] } // lineEdits expands and merges a sequence of edits so that each // resulting edit replaces one or more complete lines. // See ApplyEdits for preconditions. func lineEdits(src string, edits []Edit) ([]Edit, error) { edits, _, err := validate(src, edits) if err != nil { return nil, err } // Do all deletions begin and end at the start of a line, // and all insertions end with a newline? // (This is merely a fast path.) for _, edit := range edits { if edit.Start >= len(src) || // insertion at EOF edit.Start > 0 && src[edit.Start-1] != '\n' || // not at line start edit.End > 0 && src[edit.End-1] != '\n' || // not at line start edit.New != "" && edit.New[len(edit.New)-1] != '\n' { // partial insert goto expand // slow path } } return edits, nil // aligned expand: if len(edits) == 0 { return edits, nil // no edits (unreachable due to fast path) } expanded := make([]Edit, 0, len(edits)) // a guess prev := edits[0] // TODO(adonovan): opt: start from the first misaligned edit. // TODO(adonovan): opt: avoid quadratic cost of string += string. for _, edit := range edits[1:] { between := src[prev.End:edit.Start] if !strings.Contains(between, "\n") { // overlapping lines: combine with previous edit. prev.New += between + edit.New prev.End = edit.End } else { // non-overlapping lines: flush previous edit. expanded = append(expanded, expandEdit(prev, src)) prev = edit } } return append(expanded, expandEdit(prev, src)), nil // flush final edit } // expandEdit returns edit expanded to complete whole lines. func expandEdit(edit Edit, src string) Edit { // Expand start left to start of line. // (delta is the zero-based column number of start.) start := edit.Start if delta := start - 1 - strings.LastIndex(src[:start], "\n"); delta > 0 { edit.Start -= delta edit.New = src[start-delta:start] + edit.New } // Expand end right to end of line. end := edit.End if end > 0 && src[end-1] != '\n' || edit.New != "" && edit.New[len(edit.New)-1] != '\n' { if nl := strings.IndexByte(src[end:], '\n'); nl < 0 { edit.End = len(src) // extend to EOF } else { edit.End = end + nl + 1 // extend beyond \n } } edit.New += src[end:edit.End] return edit } golang-github-aymanbagabas-go-udiff-0.2.0/diff_test.go000066400000000000000000000133201463166140100226410ustar00rootroot00000000000000// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package udiff_test import ( "bytes" "math/rand" "os" "os/exec" "path/filepath" "reflect" "strings" "testing" "unicode/utf8" diff "github.com/aymanbagabas/go-udiff" "github.com/aymanbagabas/go-udiff/difftest" ) func TestApply(t *testing.T) { for _, tc := range difftest.TestCases { t.Run(tc.Name, func(t *testing.T) { got, err := diff.Apply(tc.In, tc.Edits) if err != nil { t.Fatalf("Apply(Edits) failed: %v", err) } if got != tc.Out { t.Errorf("Apply(Edits): got %q, want %q", got, tc.Out) } if tc.LineEdits != nil { got, err := diff.Apply(tc.In, tc.LineEdits) if err != nil { t.Fatalf("Apply(LineEdits) failed: %v", err) } if got != tc.Out { t.Errorf("Apply(LineEdits): got %q, want %q", got, tc.Out) } } }) } } func TestNEdits(t *testing.T) { for _, tc := range difftest.TestCases { edits := diff.Strings(tc.In, tc.Out) got, err := diff.Apply(tc.In, edits) if err != nil { t.Fatalf("Apply failed: %v", err) } if got != tc.Out { t.Fatalf("%s: got %q wanted %q", tc.Name, got, tc.Out) } if len(edits) < len(tc.Edits) { // should find subline edits t.Errorf("got %v, expected %v for %#v", edits, tc.Edits, tc) } } } func TestNRandom(t *testing.T) { rand.Seed(1) for i := 0; i < 1000; i++ { a := randstr("abω", 16) b := randstr("abωc", 16) edits := diff.Strings(a, b) got, err := diff.Apply(a, edits) if err != nil { t.Fatalf("Apply failed: %v", err) } if got != b { t.Fatalf("%d: got %q, wanted %q, starting with %q", i, got, b, a) } } } // $ go test -fuzz=FuzzRoundTrip ./internal/diff func FuzzRoundTrip(f *testing.F) { f.Fuzz(func(t *testing.T, a, b string) { if !utf8.ValidString(a) || !utf8.ValidString(b) { return // inputs must be text } edits := diff.Strings(a, b) got, err := diff.Apply(a, edits) if err != nil { t.Fatalf("Apply failed: %v", err) } if got != b { t.Fatalf("applying diff(%q, %q) gives %q; edits=%v", a, b, got, edits) } }) } func TestLineEdits(t *testing.T) { for _, tc := range difftest.TestCases { t.Run(tc.Name, func(t *testing.T) { want := tc.LineEdits if want == nil { want = tc.Edits // already line-aligned } got, err := diff.LineEdits(tc.In, tc.Edits) if err != nil { t.Fatalf("LineEdits: %v", err) } if !reflect.DeepEqual(got, want) { t.Errorf("in=<<%s>>\nout=<<%s>>\nraw edits=%s\nline edits=%s\nwant: %s", tc.In, tc.Out, tc.Edits, got, want) } // make sure that applying the edits gives the expected result fixed, err := diff.Apply(tc.In, got) if err != nil { t.Error(err) } if fixed != tc.Out { t.Errorf("Apply(LineEdits): got %q, want %q", fixed, tc.Out) } }) } } func TestToUnified(t *testing.T) { for _, tc := range difftest.TestCases { t.Run(tc.Name, func(t *testing.T) { unified, err := diff.ToUnified(difftest.FileA, difftest.FileB, tc.In, tc.Edits, diff.DefaultContextLines) if err != nil { t.Fatal(err) } if unified == "" { return } orig := filepath.Join(t.TempDir(), "original") err = os.WriteFile(orig, []byte(tc.In), 0644) if err != nil { t.Fatal(err) } temp := filepath.Join(t.TempDir(), "patched") err = os.WriteFile(temp, []byte(tc.In), 0644) if err != nil { t.Fatal(err) } cmd := exec.Command("patch", "-p0", "-u", "-s", "-o", temp, orig) cmd.Stdin = strings.NewReader(unified) cmd.Stdout = new(bytes.Buffer) cmd.Stderr = new(bytes.Buffer) if err = cmd.Run(); err != nil { t.Fatalf("%v: %q (%q) (%q)", err, cmd.String(), cmd.Stderr, cmd.Stdout) } got, err := os.ReadFile(temp) if err != nil { t.Fatal(err) } if string(got) != tc.Out { t.Errorf("applying unified failed: got\n%q, wanted\n%q unified\n%q", got, tc.Out, unified) } }) } } func TestRegressionOld001(t *testing.T) { a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage udiff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/aymanbagabas/go-udiff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n" b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage udiff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/aymanbagabas/go-udiff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n" diffs := diff.Strings(a, b) got, err := diff.Apply(a, diffs) if err != nil { t.Fatalf("Apply failed: %v", err) } if got != b { i := 0 for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ { } t.Errorf("oops %vd\n%q\n%q", diffs, got, b) t.Errorf("\n%q\n%q", got[i:], b[i:]) } } func TestRegressionOld002(t *testing.T) { a := "n\"\n)\n" b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n" diffs := diff.Strings(a, b) got, err := diff.Apply(a, diffs) if err != nil { t.Fatalf("Apply failed: %v", err) } if got != b { i := 0 for ; i < len(a) && i < len(b) && got[i] == b[i]; i++ { } t.Errorf("oops %vd\n%q\n%q", diffs, got, b) t.Errorf("\n%q\n%q", got[i:], b[i:]) } } // return a random string of length n made of characters from s func randstr(s string, n int) string { src := []rune(s) x := make([]rune, n) for i := 0; i < n; i++ { x[i] = src[rand.Intn(len(src))] } return string(x) } golang-github-aymanbagabas-go-udiff-0.2.0/difftest/000077500000000000000000000000001463166140100221545ustar00rootroot00000000000000golang-github-aymanbagabas-go-udiff-0.2.0/difftest/difftest.go000066400000000000000000000164571463166140100243300ustar00rootroot00000000000000// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package difftest supplies a set of tests that will operate on any // implementation of a diff algorithm as exposed by // diff "github.com/aymanbagabas/go-udiff" package difftest // There are two kinds of tests, semantic tests, and 'golden data' tests. // The semantic tests check that the computed diffs transform the input to // the output, and that 'patch' accepts the computed unified diffs. // The other tests just check that Edits and LineEdits haven't changed // unexpectedly. These fields may need to be changed when the diff algorithm // changes. import ( "testing" diff "github.com/aymanbagabas/go-udiff" ) const ( FileA = "from" FileB = "to" UnifiedPrefix = "--- " + FileA + "\n+++ " + FileB + "\n" ) var TestCases = []struct { Name, In, Out, Unified string Edits, LineEdits []diff.Edit // expectation (LineEdits=nil => already line-aligned) NoDiff bool }{{ Name: "empty", In: "", Out: "", }, { Name: "no_diff", In: "gargantuan\n", Out: "gargantuan\n", }, { Name: "replace_all", In: "fruit\n", Out: "cheese\n", Unified: UnifiedPrefix + ` @@ -1 +1 @@ -fruit +cheese `[1:], Edits: []diff.Edit{{Start: 0, End: 5, New: "cheese"}}, LineEdits: []diff.Edit{{Start: 0, End: 6, New: "cheese\n"}}, }, { Name: "insert_rune", In: "gord\n", Out: "gourd\n", Unified: UnifiedPrefix + ` @@ -1 +1 @@ -gord +gourd `[1:], Edits: []diff.Edit{{Start: 2, End: 2, New: "u"}}, LineEdits: []diff.Edit{{Start: 0, End: 5, New: "gourd\n"}}, }, { Name: "delete_rune", In: "groat\n", Out: "goat\n", Unified: UnifiedPrefix + ` @@ -1 +1 @@ -groat +goat `[1:], Edits: []diff.Edit{{Start: 1, End: 2, New: ""}}, LineEdits: []diff.Edit{{Start: 0, End: 6, New: "goat\n"}}, }, { Name: "replace_rune", In: "loud\n", Out: "lord\n", Unified: UnifiedPrefix + ` @@ -1 +1 @@ -loud +lord `[1:], Edits: []diff.Edit{{Start: 2, End: 3, New: "r"}}, LineEdits: []diff.Edit{{Start: 0, End: 5, New: "lord\n"}}, }, { Name: "replace_partials", In: "blanket\n", Out: "bunker\n", Unified: UnifiedPrefix + ` @@ -1 +1 @@ -blanket +bunker `[1:], Edits: []diff.Edit{ {Start: 1, End: 3, New: "u"}, {Start: 6, End: 7, New: "r"}, }, LineEdits: []diff.Edit{{Start: 0, End: 8, New: "bunker\n"}}, }, { Name: "insert_line", In: "1: one\n3: three\n", Out: "1: one\n2: two\n3: three\n", Unified: UnifiedPrefix + ` @@ -1,2 +1,3 @@ 1: one +2: two 3: three `[1:], Edits: []diff.Edit{{Start: 7, End: 7, New: "2: two\n"}}, }, { Name: "replace_no_newline", In: "A", Out: "B", Unified: UnifiedPrefix + ` @@ -1 +1 @@ -A \ No newline at end of file +B \ No newline at end of file `[1:], Edits: []diff.Edit{{Start: 0, End: 1, New: "B"}}, }, { Name: "delete_empty", In: "meow", Out: "", // GNU diff -u special case: +0,0 Unified: UnifiedPrefix + ` @@ -1 +0,0 @@ -meow \ No newline at end of file `[1:], Edits: []diff.Edit{{Start: 0, End: 4, New: ""}}, LineEdits: []diff.Edit{{Start: 0, End: 4, New: ""}}, }, { Name: "append_empty", In: "", // GNU diff -u special case: -0,0 Out: "AB\nC", Unified: UnifiedPrefix + ` @@ -0,0 +1,2 @@ +AB +C \ No newline at end of file `[1:], Edits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}}, LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}}, }, // TODO(adonovan): fix this test: GNU diff -u prints "+1,2", Unifies prints "+1,3". // { // Name: "add_start", // In: "A", // Out: "B\nCA", // Unified: UnifiedPrefix + ` // @@ -1 +1,2 @@ // -A // \ No newline at end of file // +B // +CA // \ No newline at end of file // `[1:], // Edits: []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}}, // LineEdits: []diff.TextEdit{{Span: newSpan(0, 0), NewText: "B\nC"}}, // }, { Name: "add_end", In: "A", Out: "AB", Unified: UnifiedPrefix + ` @@ -1 +1 @@ -A \ No newline at end of file +AB \ No newline at end of file `[1:], Edits: []diff.Edit{{Start: 1, End: 1, New: "B"}}, LineEdits: []diff.Edit{{Start: 0, End: 1, New: "AB"}}, }, { Name: "add_empty", In: "", Out: "AB\nC", Unified: UnifiedPrefix + ` @@ -0,0 +1,2 @@ +AB +C \ No newline at end of file `[1:], Edits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}}, LineEdits: []diff.Edit{{Start: 0, End: 0, New: "AB\nC"}}, }, { Name: "add_newline", In: "A", Out: "A\n", Unified: UnifiedPrefix + ` @@ -1 +1 @@ -A \ No newline at end of file +A `[1:], Edits: []diff.Edit{{Start: 1, End: 1, New: "\n"}}, LineEdits: []diff.Edit{{Start: 0, End: 1, New: "A\n"}}, }, { Name: "delete_front", In: "A\nB\nC\nA\nB\nB\nA\n", Out: "C\nB\nA\nB\nA\nC\n", Unified: UnifiedPrefix + ` @@ -1,7 +1,6 @@ -A -B C +B A B -B A +C `[1:], NoDiff: true, // unified diff is different but valid Edits: []diff.Edit{ {Start: 0, End: 4, New: ""}, {Start: 6, End: 6, New: "B\n"}, {Start: 10, End: 12, New: ""}, {Start: 14, End: 14, New: "C\n"}, }, LineEdits: []diff.Edit{ {Start: 0, End: 4, New: ""}, {Start: 6, End: 6, New: "B\n"}, {Start: 10, End: 12, New: ""}, {Start: 14, End: 14, New: "C\n"}, }, }, { Name: "replace_last_line", In: "A\nB\n", Out: "A\nC\n\n", Unified: UnifiedPrefix + ` @@ -1,2 +1,3 @@ A -B +C + `[1:], Edits: []diff.Edit{{Start: 2, End: 3, New: "C\n"}}, LineEdits: []diff.Edit{{Start: 2, End: 4, New: "C\n\n"}}, }, { Name: "multiple_replace", In: "A\nB\nC\nD\nE\nF\nG\n", Out: "A\nH\nI\nJ\nE\nF\nK\n", Unified: UnifiedPrefix + ` @@ -1,7 +1,7 @@ A -B -C -D +H +I +J E F -G +K `[1:], Edits: []diff.Edit{ {Start: 2, End: 8, New: "H\nI\nJ\n"}, {Start: 12, End: 14, New: "K\n"}, }, NoDiff: true, // diff algorithm produces different delete/insert pattern }, { Name: "extra_newline", In: "\nA\n", Out: "A\n", Edits: []diff.Edit{{Start: 0, End: 1, New: ""}}, Unified: UnifiedPrefix + `@@ -1,2 +1 @@ - A `, }, { Name: "unified_lines", In: "aaa\nccc\n", Out: "aaa\nbbb\nccc\n", Edits: []diff.Edit{{Start: 3, End: 3, New: "\nbbb"}}, LineEdits: []diff.Edit{{Start: 0, End: 4, New: "aaa\nbbb\n"}}, Unified: UnifiedPrefix + "@@ -1,2 +1,3 @@\n aaa\n+bbb\n ccc\n", }, { Name: "60379", In: `package a type S struct { s fmt.Stringer } `, Out: `package a type S struct { s fmt.Stringer } `, Edits: []diff.Edit{{Start: 27, End: 27, New: "\t"}}, LineEdits: []diff.Edit{{Start: 27, End: 42, New: "\ts fmt.Stringer\n"}}, Unified: UnifiedPrefix + "@@ -1,5 +1,5 @@\n package a\n \n type S struct {\n-s fmt.Stringer\n+\ts fmt.Stringer\n }\n", }, } func DiffTest(t *testing.T, compute func(before, after string) []diff.Edit) { for _, test := range TestCases { t.Run(test.Name, func(t *testing.T) { edits := compute(test.In, test.Out) got, err := diff.Apply(test.In, edits) if err != nil { t.Fatalf("Apply failed: %v", err) } unified, err := diff.ToUnified(FileA, FileB, test.In, edits, diff.DefaultContextLines) if err != nil { t.Fatalf("ToUnified: %v", err) } if got != test.Out { t.Errorf("Apply: got patched:\n%v\nfrom diff:\n%v\nexpected:\n%v", got, unified, test.Out) } if !test.NoDiff && unified != test.Unified { t.Errorf("Unified: got diff:\n%q\nexpected:\n%q diffs:%v", unified, test.Unified, edits) } }) } } golang-github-aymanbagabas-go-udiff-0.2.0/difftest/difftest_test.go000066400000000000000000000037261463166140100253620ustar00rootroot00000000000000// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package difftest supplies a set of tests that will operate on any // implementation of a diff algorithm as exposed by // diff "github.com/aymanbagabas/go-udiff" package difftest_test import ( "fmt" "os" "os/exec" "strings" "testing" "github.com/aymanbagabas/go-udiff/difftest" ) func TestVerifyUnified(t *testing.T) { for _, test := range difftest.TestCases { t.Run(test.Name, func(t *testing.T) { if test.NoDiff { t.Skip("diff tool produces expected different results") } diff, err := getDiffOutput(test.In, test.Out) if err != nil { t.Fatal(err) } if len(diff) > 0 { diff = difftest.UnifiedPrefix + diff } if diff != test.Unified { t.Errorf("unified:\n%s\ndiff -u:\n%s", test.Unified, diff) } }) } } func getDiffOutput(a, b string) (string, error) { fileA, err := os.CreateTemp("", "myers.in") if err != nil { return "", err } defer os.Remove(fileA.Name()) if _, err := fileA.Write([]byte(a)); err != nil { return "", err } if err := fileA.Close(); err != nil { return "", err } fileB, err := os.CreateTemp("", "myers.in") if err != nil { return "", err } defer os.Remove(fileB.Name()) if _, err := fileB.Write([]byte(b)); err != nil { return "", err } if err := fileB.Close(); err != nil { return "", err } cmd := exec.Command("diff", "-u", fileA.Name(), fileB.Name()) cmd.Env = append(cmd.Env, "LANG=en_US.UTF-8") out, err := cmd.CombinedOutput() if err != nil { if _, ok := err.(*exec.ExitError); !ok { return "", fmt.Errorf("failed to run diff -u %v %v: %v\n%v", fileA.Name(), fileB.Name(), err, string(out)) } } diff := string(out) if len(diff) <= 0 { return diff, nil } bits := strings.SplitN(diff, "\n", 3) if len(bits) != 3 { return "", fmt.Errorf("diff output did not have file prefix:\n%s", diff) } return bits[2], nil } golang-github-aymanbagabas-go-udiff-0.2.0/export.go000066400000000000000000000006021463166140100222120ustar00rootroot00000000000000package udiff // UnifiedDiff is a unified diff. type UnifiedDiff = unified // ToUnifiedDiff takes a file contents and a sequence of edits, and calculates // a unified diff that represents those edits. func ToUnifiedDiff(fromName, toName string, content string, edits []Edit, contextLines int) (UnifiedDiff, error) { return toUnified(fromName, toName, content, edits, contextLines) } golang-github-aymanbagabas-go-udiff-0.2.0/export_test.go000066400000000000000000000004031463166140100232500ustar00rootroot00000000000000// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package udiff // This file exports some private declarations to tests. var LineEdits = lineEdits golang-github-aymanbagabas-go-udiff-0.2.0/go.mod000066400000000000000000000000611463166140100214470ustar00rootroot00000000000000module github.com/aymanbagabas/go-udiff go 1.18 golang-github-aymanbagabas-go-udiff-0.2.0/lcs/000077500000000000000000000000001463166140100211255ustar00rootroot00000000000000golang-github-aymanbagabas-go-udiff-0.2.0/lcs/common.go000066400000000000000000000111631463166140100227460ustar00rootroot00000000000000// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs import ( "log" "sort" ) // lcs is a longest common sequence type lcs []diag // A diag is a piece of the edit graph where A[X+i] == B[Y+i], for 0<=i l[j].Len }) return l } // validate that the elements of the lcs do not overlap // (can only happen when the two-sided algorithm ends early) // expects the lcs to be sorted func (l lcs) valid() bool { for i := 1; i < len(l); i++ { if l[i-1].X+l[i-1].Len > l[i].X { return false } if l[i-1].Y+l[i-1].Len > l[i].Y { return false } } return true } // repair overlapping lcs // only called if two-sided stops early func (l lcs) fix() lcs { // from the set of diagonals in l, find a maximal non-conflicting set // this problem may be NP-complete, but we use a greedy heuristic, // which is quadratic, but with a better data structure, could be D log D. // indepedent is not enough: {0,3,1} and {3,0,2} can't both occur in an lcs // which has to have monotone x and y if len(l) == 0 { return nil } sort.Slice(l, func(i, j int) bool { return l[i].Len > l[j].Len }) tmp := make(lcs, 0, len(l)) tmp = append(tmp, l[0]) for i := 1; i < len(l); i++ { var dir direction nxt := l[i] for _, in := range tmp { if dir, nxt = overlap(in, nxt); dir == empty || dir == bad { break } } if nxt.Len > 0 && dir != bad { tmp = append(tmp, nxt) } } tmp.sort() if false && !tmp.valid() { // debug checking log.Fatalf("here %d", len(tmp)) } return tmp } type direction int const ( empty direction = iota // diag is empty (so not in lcs) leftdown // proposed acceptably to the left and below rightup // proposed diag is acceptably to the right and above bad // proposed diag is inconsistent with the lcs so far ) // overlap trims the proposed diag prop so it doesn't overlap with // the existing diag that has already been added to the lcs. func overlap(exist, prop diag) (direction, diag) { if prop.X <= exist.X && exist.X < prop.X+prop.Len { // remove the end of prop where it overlaps with the X end of exist delta := prop.X + prop.Len - exist.X prop.Len -= delta if prop.Len <= 0 { return empty, prop } } if exist.X <= prop.X && prop.X < exist.X+exist.Len { // remove the beginning of prop where overlaps with exist delta := exist.X + exist.Len - prop.X prop.Len -= delta if prop.Len <= 0 { return empty, prop } prop.X += delta prop.Y += delta } if prop.Y <= exist.Y && exist.Y < prop.Y+prop.Len { // remove the end of prop that overlaps (in Y) with exist delta := prop.Y + prop.Len - exist.Y prop.Len -= delta if prop.Len <= 0 { return empty, prop } } if exist.Y <= prop.Y && prop.Y < exist.Y+exist.Len { // remove the beginning of peop that overlaps with exist delta := exist.Y + exist.Len - prop.Y prop.Len -= delta if prop.Len <= 0 { return empty, prop } prop.X += delta // no test reaches this code prop.Y += delta } if prop.X+prop.Len <= exist.X && prop.Y+prop.Len <= exist.Y { return leftdown, prop } if exist.X+exist.Len <= prop.X && exist.Y+exist.Len <= prop.Y { return rightup, prop } // prop can't be in an lcs that contains exist return bad, prop } // manipulating Diag and lcs // prepend a diagonal (x,y)-(x+1,y+1) segment either to an empty lcs // or to its first Diag. prepend is only called to extend diagonals // the backward direction. func (lcs lcs) prepend(x, y int) lcs { if len(lcs) > 0 { d := &lcs[0] if int(d.X) == x+1 && int(d.Y) == y+1 { // extend the diagonal down and to the left d.X, d.Y = int(x), int(y) d.Len++ return lcs } } r := diag{X: int(x), Y: int(y), Len: 1} lcs = append([]diag{r}, lcs...) return lcs } // append appends a diagonal, or extends the existing one. // by adding the edge (x,y)-(x+1.y+1). append is only called // to extend diagonals in the forward direction. func (lcs lcs) append(x, y int) lcs { if len(lcs) > 0 { last := &lcs[len(lcs)-1] // Expand last element if adjoining. if last.X+last.Len == x && last.Y+last.Len == y { last.Len++ return lcs } } return append(lcs, diag{X: x, Y: y, Len: 1}) } // enforce constraint on d, k func ok(d, k int) bool { return d >= 0 && -d <= k && k <= d } golang-github-aymanbagabas-go-udiff-0.2.0/lcs/common_test.go000066400000000000000000000075461463166140100240170ustar00rootroot00000000000000// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs import ( "log" "math/rand" "strings" "testing" ) type Btest struct { a, b string lcs []string } var Btests = []Btest{ {"aaabab", "abaab", []string{"abab", "aaab"}}, {"aabbba", "baaba", []string{"aaba"}}, {"cabbx", "cbabx", []string{"cabx", "cbbx"}}, {"c", "cb", []string{"c"}}, {"aaba", "bbb", []string{"b"}}, {"bbaabb", "b", []string{"b"}}, {"baaabb", "bbaba", []string{"bbb", "baa", "bab"}}, {"baaabb", "abbab", []string{"abb", "bab", "aab"}}, {"baaba", "aaabba", []string{"aaba"}}, {"ca", "cba", []string{"ca"}}, {"ccbcbc", "abba", []string{"bb"}}, {"ccbcbc", "aabba", []string{"bb"}}, {"ccb", "cba", []string{"cb"}}, {"caef", "axe", []string{"ae"}}, {"bbaabb", "baabb", []string{"baabb"}}, // Example from Myers: {"abcabba", "cbabac", []string{"caba", "baba", "cbba"}}, {"3456aaa", "aaa", []string{"aaa"}}, {"aaa", "aaa123", []string{"aaa"}}, {"aabaa", "aacaa", []string{"aaaa"}}, {"1a", "a", []string{"a"}}, {"abab", "bb", []string{"bb"}}, {"123", "ab", []string{""}}, {"a", "b", []string{""}}, {"abc", "123", []string{""}}, {"aa", "aa", []string{"aa"}}, {"abcde", "12345", []string{""}}, {"aaa3456", "aaa", []string{"aaa"}}, {"abcde", "12345a", []string{"a"}}, {"ab", "123", []string{""}}, {"1a2", "a", []string{"a"}}, // for two-sided {"babaab", "cccaba", []string{"aba"}}, {"aabbab", "cbcabc", []string{"bab"}}, {"abaabb", "bcacab", []string{"baab"}}, {"abaabb", "abaaaa", []string{"abaa"}}, {"bababb", "baaabb", []string{"baabb"}}, {"abbbaa", "cabacc", []string{"aba"}}, {"aabbaa", "aacaba", []string{"aaaa", "aaba"}}, } func init() { log.SetFlags(log.Lshortfile) } func check(t *testing.T, str string, lcs lcs, want []string) { t.Helper() if !lcs.valid() { t.Errorf("bad lcs %v", lcs) } var got strings.Builder for _, dd := range lcs { got.WriteString(str[dd.X : dd.X+dd.Len]) } ans := got.String() for _, w := range want { if ans == w { return } } t.Fatalf("str=%q lcs=%v want=%q got=%q", str, lcs, want, ans) } func checkDiffs(t *testing.T, before string, diffs []Diff, after string) { t.Helper() var ans strings.Builder sofar := 0 // index of position in before for _, d := range diffs { if sofar < d.Start { ans.WriteString(before[sofar:d.Start]) } ans.WriteString(after[d.ReplStart:d.ReplEnd]) sofar = d.End } ans.WriteString(before[sofar:]) if ans.String() != after { t.Fatalf("diff %v took %q to %q, not to %q", diffs, before, ans.String(), after) } } func lcslen(l lcs) int { ans := 0 for _, d := range l { ans += int(d.Len) } return ans } // return a random string of length n made of characters from s func randstr(s string, n int) string { src := []rune(s) x := make([]rune, n) for i := 0; i < n; i++ { x[i] = src[rand.Intn(len(src))] } return string(x) } func TestLcsFix(t *testing.T) { tests := []struct{ before, after lcs }{ {lcs{diag{0, 0, 3}, diag{2, 2, 5}, diag{3, 4, 5}, diag{8, 9, 4}}, lcs{diag{0, 0, 2}, diag{2, 2, 1}, diag{3, 4, 5}, diag{8, 9, 4}}}, {lcs{diag{1, 1, 6}, diag{6, 12, 3}}, lcs{diag{1, 1, 5}, diag{6, 12, 3}}}, {lcs{diag{0, 0, 4}, diag{3, 5, 4}}, lcs{diag{0, 0, 3}, diag{3, 5, 4}}}, {lcs{diag{0, 20, 1}, diag{0, 0, 3}, diag{1, 20, 4}}, lcs{diag{0, 0, 3}, diag{3, 22, 2}}}, {lcs{diag{0, 0, 4}, diag{1, 1, 2}}, lcs{diag{0, 0, 4}}}, {lcs{diag{0, 0, 4}}, lcs{diag{0, 0, 4}}}, {lcs{}, lcs{}}, {lcs{diag{0, 0, 4}, diag{1, 1, 6}, diag{3, 3, 2}}, lcs{diag{0, 0, 1}, diag{1, 1, 6}}}, } for n, x := range tests { got := x.before.fix() if len(got) != len(x.after) { t.Errorf("got %v, expected %v, for %v", got, x.after, x.before) } olen := lcslen(x.after) glen := lcslen(got) if olen != glen { t.Errorf("%d: lens(%d,%d) differ, %v, %v, %v", n, glen, olen, got, x.after, x.before) } } } golang-github-aymanbagabas-go-udiff-0.2.0/lcs/doc.go000066400000000000000000000275271463166140100222360ustar00rootroot00000000000000// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // package lcs contains code to find longest-common-subsequences // (and diffs) package lcs /* Compute longest-common-subsequences of two slices A, B using algorithms from Myers' paper. A longest-common-subsequence (LCS from now on) of A and B is a maximal set of lexically increasing pairs of subscripts (x,y) with A[x]==B[y]. There may be many LCS, but they all have the same length. An LCS determines a sequence of edits that changes A into B. The key concept is the edit graph of A and B. If A has length N and B has length M, then the edit graph has vertices v[i][j] for 0 <= i <= N, 0 <= j <= M. There is a horizontal edge from v[i][j] to v[i+1][j] whenever both are in the graph, and a vertical edge from v[i][j] to f[i][j+1] similarly. When A[i] == B[j] there is a diagonal edge from v[i][j] to v[i+1][j+1]. A path between in the graph between (0,0) and (N,M) determines a sequence of edits converting A into B: each horizontal edge corresponds to removing an element of A, and each vertical edge corresponds to inserting an element of B. A vertex (x,y) is on (forward) diagonal k if x-y=k. A path in the graph is of length D if it has D non-diagonal edges. The algorithms generate forward paths (in which at least one of x,y increases at each edge), or backward paths (in which at least one of x,y decreases at each edge), or a combination. (Note that the orientation is the traditional mathematical one, with the origin in the lower-left corner.) Here is the edit graph for A:"aabbaa", B:"aacaba". (I know the diagonals look weird.) ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ b | | | ___/‾‾‾ | ___/‾‾‾ | | | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ c | | | | | | | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a a b b a a The algorithm labels a vertex (x,y) with D,k if it is on diagonal k and at the end of a maximal path of length D. (Because x-y=k it suffices to remember only the x coordinate of the vertex.) The forward algorithm: Find the longest diagonal starting at (0,0) and label its end with D=0,k=0. From that vertex take a vertical step and then follow the longest diagonal (up and to the right), and label that vertex with D=1,k=-1. From the D=0,k=0 point take a horizontal step and the follow the longest diagonal (up and to the right) and label that vertex D=1,k=1. In the same way, having labelled all the D vertices, from a vertex labelled D,k find two vertices tentatively labelled D+1,k-1 and D+1,k+1. There may be two on the same diagonal, in which case take the one with the larger x. Eventually the path gets to (N,M), and the diagonals on it are the LCS. Here is the edit graph with the ends of D-paths labelled. (So, for instance, 0/2,2 indicates that x=2,y=2 is labelled with 0, as it should be, since the first step is to go up the longest diagonal from (0,0).) A:"aabbaa", B:"aacaba" ⊙ ------- ⊙ ------- ⊙ -------(3/3,6)------- ⊙ -------(3/5,6)-------(4/6,6) a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ -------(2/3,5)------- ⊙ ------- ⊙ ------- ⊙ b | | | ___/‾‾‾ | ___/‾‾‾ | | | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ -------(3/5,4)------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ -------(1/2,3)-------(2/3,3)------- ⊙ ------- ⊙ ------- ⊙ c | | | | | | | ⊙ ------- ⊙ -------(0/2,2)-------(1/3,2)-------(2/4,2)-------(3/5,2)-------(4/6,2) a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a | ___/‾‾‾ | ___/‾‾‾ | | | ___/‾‾‾ | ___/‾‾‾ | ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ ------- ⊙ a a b b a a The 4-path is reconstructed starting at (4/6,6), horizontal to (3/5,6), diagonal to (3,4), vertical to (2/3,3), horizontal to (1/2,3), vertical to (0/2,2), and diagonal to (0,0). As expected, there are 4 non-diagonal steps, and the diagonals form an LCS. There is a symmetric backward algorithm, which gives (backwards labels are prefixed with a colon): A:"aabbaa", B:"aacaba" ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | ⊙ -------- ⊙ -------- ⊙ -------- ⊙ -------- ⊙ --------(:0/5,5)-------- ⊙ b | | | ____/‾‾‾ | ____/‾‾‾ | | | ⊙ -------- ⊙ -------- ⊙ --------(:1/3,4)-------- ⊙ -------- ⊙ -------- ⊙ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | (:3/0,3)--------(:2/1,3)-------- ⊙ --------(:2/3,3)--------(:1/4,3)-------- ⊙ -------- ⊙ c | | | | | | | ⊙ -------- ⊙ -------- ⊙ --------(:3/3,2)--------(:2/4,2)-------- ⊙ -------- ⊙ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | (:3/0,1)-------- ⊙ -------- ⊙ -------- ⊙ --------(:3/4,1)-------- ⊙ -------- ⊙ a | ____/‾‾‾ | ____/‾‾‾ | | | ____/‾‾‾ | ____/‾‾‾ | (:4/0,0)-------- ⊙ -------- ⊙ -------- ⊙ --------(:4/4,0)-------- ⊙ -------- ⊙ a a b b a a Neither of these is ideal for use in an editor, where it is undesirable to send very long diffs to the front end. It's tricky to decide exactly what 'very long diffs' means, as "replace A by B" is very short. We want to control how big D can be, by stopping when it gets too large. The forward algorithm then privileges common prefixes, and the backward algorithm privileges common suffixes. Either is an undesirable asymmetry. Fortunately there is a two-sided algorithm, implied by results in Myers' paper. Here's what the labels in the edit graph look like. A:"aabbaa", B:"aacaba" ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | ⊙ --------- ⊙ --------- ⊙ --------- (2/3,5) --------- ⊙ --------- (:0/5,5)--------- ⊙ b | | | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ⊙ --------- ⊙ --------- ⊙ --------- (:1/3,4)--------- ⊙ --------- ⊙ --------- ⊙ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | ⊙ --------- (:2/1,3)--------- (1/2,3) ---------(2:2/3,3)--------- (:1/4,3)--------- ⊙ --------- ⊙ c | | | | | | | ⊙ --------- ⊙ --------- (0/2,2) --------- (1/3,2) ---------(2:2/4,2)--------- ⊙ --------- ⊙ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ a | ____/‾‾‾‾ | ____/‾‾‾‾ | | | ____/‾‾‾‾ | ____/‾‾‾‾ | ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ --------- ⊙ a a b b a a The algorithm stopped when it saw the backwards 2-path ending at (1,3) and the forwards 2-path ending at (3,5). The criterion is a backwards path ending at (u,v) and a forward path ending at (x,y), where u <= x and the two points are on the same diagonal. (Here the edgegraph has a diagonal, but the criterion is x-y=u-v.) Myers proves there is a forward 2-path from (0,0) to (1,3), and that together with the backwards 2-path ending at (1,3) gives the expected 4-path. Unfortunately the forward path has to be constructed by another run of the forward algorithm; it can't be found from the computed labels. That is the worst case. Had the code noticed (x,y)=(u,v)=(3,3) the whole path could be reconstructed from the edgegraph. The implementation looks for a number of special cases to try to avoid computing an extra forward path. If the two-sided algorithm has stop early (because D has become too large) it will have found a forward LCS and a backwards LCS. Ideally these go with disjoint prefixes and suffixes of A and B, but disjointness may fail and the two computed LCS may conflict. (An easy example is where A is a suffix of B, and shares a short prefix. The backwards LCS is all of A, and the forward LCS is a prefix of A.) The algorithm combines the two to form a best-effort LCS. In the worst case the forward partial LCS may have to be recomputed. */ /* Eugene Myers paper is titled "An O(ND) Difference Algorithm and Its Variations" and can be found at http://www.xmailserver.org/diff2.pdf (There is a generic implementation of the algorithm the repository with git hash b9ad7e4ade3a686d608e44475390ad428e60e7fc) */ golang-github-aymanbagabas-go-udiff-0.2.0/lcs/git.sh000066400000000000000000000016541463166140100222520ustar00rootroot00000000000000#!/bin/bash # # Copyright 2022 The Go Authors. All rights reserved. # Use of this source code is governed by a BSD-style # license that can be found in the LICENSE file. # # Creates a zip file containing all numbered versions # of the commit history of a large source file, for use # as input data for the tests of the diff algorithm. # # Run script from root of the x/tools repo. set -eu # WARNING: This script will install the latest version of $file # The largest real source file in the x/tools repo. # file=internal/lsp/source/completion/completion.go # file=internal/lsp/source/diagnostics.go file=internal/lsp/protocol/tsprotocol.go tmp=$(mktemp -d) git log $file | awk '/^commit / {print $2}' | nl -ba -nrz | while read n hash; do git checkout --quiet $hash $file cp -f $file $tmp/$n done (cd $tmp && zip -q - *) > testdata.zip rm -fr $tmp git restore --staged $file git restore $file echo "Created testdata.zip" golang-github-aymanbagabas-go-udiff-0.2.0/lcs/labels.go000066400000000000000000000021231463166140100227140ustar00rootroot00000000000000// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs import ( "fmt" ) // For each D, vec[D] has length D+1, // and the label for (D, k) is stored in vec[D][(D+k)/2]. type label struct { vec [][]int } // Temporary checking DO NOT COMMIT true TO PRODUCTION CODE const debug = false // debugging. check that the (d,k) pair is valid // (that is, -d<=k<=d and d+k even) func checkDK(D, k int) { if k >= -D && k <= D && (D+k)%2 == 0 { return } panic(fmt.Sprintf("out of range, d=%d,k=%d", D, k)) } func (t *label) set(D, k, x int) { if debug { checkDK(D, k) } for len(t.vec) <= D { t.vec = append(t.vec, nil) } if t.vec[D] == nil { t.vec[D] = make([]int, D+1) } t.vec[D][(D+k)/2] = x // known that D+k is even } func (t *label) get(d, k int) int { if debug { checkDK(d, k) } return int(t.vec[d][(d+k)/2]) } func newtriang(limit int) label { if limit < 100 { // Preallocate if limit is not large. return label{vec: make([][]int, limit)} } return label{} } golang-github-aymanbagabas-go-udiff-0.2.0/lcs/old.go000066400000000000000000000315321463166140100222360ustar00rootroot00000000000000// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs // TODO(adonovan): remove unclear references to "old" in this package. import ( "fmt" ) // A Diff is a replacement of a portion of A by a portion of B. type Diff struct { Start, End int // offsets of portion to delete in A ReplStart, ReplEnd int // offset of replacement text in B } // DiffStrings returns the differences between two strings. // It does not respect rune boundaries. func DiffStrings(a, b string) []Diff { return diff(stringSeqs{a, b}) } // DiffBytes returns the differences between two byte sequences. // It does not respect rune boundaries. func DiffBytes(a, b []byte) []Diff { return diff(bytesSeqs{a, b}) } // DiffRunes returns the differences between two rune sequences. func DiffRunes(a, b []rune) []Diff { return diff(runesSeqs{a, b}) } func diff(seqs sequences) []Diff { // A limit on how deeply the LCS algorithm should search. The value is just a guess. const maxDiffs = 100 diff, _ := compute(seqs, twosided, maxDiffs/2) return diff } // compute computes the list of differences between two sequences, // along with the LCS. It is exercised directly by tests. // The algorithm is one of {forward, backward, twosided}. func compute(seqs sequences, algo func(*editGraph) lcs, limit int) ([]Diff, lcs) { if limit <= 0 { limit = 1 << 25 // effectively infinity } alen, blen := seqs.lengths() g := &editGraph{ seqs: seqs, vf: newtriang(limit), vb: newtriang(limit), limit: limit, ux: alen, uy: blen, delta: alen - blen, } lcs := algo(g) diffs := lcs.toDiffs(alen, blen) return diffs, lcs } // editGraph carries the information for computing the lcs of two sequences. type editGraph struct { seqs sequences vf, vb label // forward and backward labels limit int // maximal value of D // the bounding rectangle of the current edit graph lx, ly, ux, uy int delta int // common subexpression: (ux-lx)-(uy-ly) } // toDiffs converts an LCS to a list of edits. func (lcs lcs) toDiffs(alen, blen int) []Diff { var diffs []Diff var pa, pb int // offsets in a, b for _, l := range lcs { if pa < l.X || pb < l.Y { diffs = append(diffs, Diff{pa, l.X, pb, l.Y}) } pa = l.X + l.Len pb = l.Y + l.Len } if pa < alen || pb < blen { diffs = append(diffs, Diff{pa, alen, pb, blen}) } return diffs } // --- FORWARD --- // fdone decides if the forwward path has reached the upper right // corner of the rectangle. If so, it also returns the computed lcs. func (e *editGraph) fdone(D, k int) (bool, lcs) { // x, y, k are relative to the rectangle x := e.vf.get(D, k) y := x - k if x == e.ux && y == e.uy { return true, e.forwardlcs(D, k) } return false, nil } // run the forward algorithm, until success or up to the limit on D. func forward(e *editGraph) lcs { e.setForward(0, 0, e.lx) if ok, ans := e.fdone(0, 0); ok { return ans } // from D to D+1 for D := 0; D < e.limit; D++ { e.setForward(D+1, -(D + 1), e.getForward(D, -D)) if ok, ans := e.fdone(D+1, -(D + 1)); ok { return ans } e.setForward(D+1, D+1, e.getForward(D, D)+1) if ok, ans := e.fdone(D+1, D+1); ok { return ans } for k := -D + 1; k <= D-1; k += 2 { // these are tricky and easy to get backwards lookv := e.lookForward(k, e.getForward(D, k-1)+1) lookh := e.lookForward(k, e.getForward(D, k+1)) if lookv > lookh { e.setForward(D+1, k, lookv) } else { e.setForward(D+1, k, lookh) } if ok, ans := e.fdone(D+1, k); ok { return ans } } } // D is too large // find the D path with maximal x+y inside the rectangle and // use that to compute the found part of the lcs kmax := -e.limit - 1 diagmax := -1 for k := -e.limit; k <= e.limit; k += 2 { x := e.getForward(e.limit, k) y := x - k if x+y > diagmax && x <= e.ux && y <= e.uy { diagmax, kmax = x+y, k } } return e.forwardlcs(e.limit, kmax) } // recover the lcs by backtracking from the farthest point reached func (e *editGraph) forwardlcs(D, k int) lcs { var ans lcs for x := e.getForward(D, k); x != 0 || x-k != 0; { if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) { // if (x-1,y) is labelled D-1, x--,D--,k--,continue D, k, x = D-1, k-1, x-1 continue } else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) { // if (x,y-1) is labelled D-1, x, D--,k++, continue D, k = D-1, k+1 continue } // if (x-1,y-1)--(x,y) is a diagonal, prepend,x--,y--, continue y := x - k ans = ans.prepend(x+e.lx-1, y+e.ly-1) x-- } return ans } // start at (x,y), go up the diagonal as far as possible, // and label the result with d func (e *editGraph) lookForward(k, relx int) int { rely := relx - k x, y := relx+e.lx, rely+e.ly if x < e.ux && y < e.uy { x += e.seqs.commonPrefixLen(x, e.ux, y, e.uy) } return x } func (e *editGraph) setForward(d, k, relx int) { x := e.lookForward(k, relx) e.vf.set(d, k, x-e.lx) } func (e *editGraph) getForward(d, k int) int { x := e.vf.get(d, k) return x } // --- BACKWARD --- // bdone decides if the backward path has reached the lower left corner func (e *editGraph) bdone(D, k int) (bool, lcs) { // x, y, k are relative to the rectangle x := e.vb.get(D, k) y := x - (k + e.delta) if x == 0 && y == 0 { return true, e.backwardlcs(D, k) } return false, nil } // run the backward algorithm, until success or up to the limit on D. func backward(e *editGraph) lcs { e.setBackward(0, 0, e.ux) if ok, ans := e.bdone(0, 0); ok { return ans } // from D to D+1 for D := 0; D < e.limit; D++ { e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1) if ok, ans := e.bdone(D+1, -(D + 1)); ok { return ans } e.setBackward(D+1, D+1, e.getBackward(D, D)) if ok, ans := e.bdone(D+1, D+1); ok { return ans } for k := -D + 1; k <= D-1; k += 2 { // these are tricky and easy to get wrong lookv := e.lookBackward(k, e.getBackward(D, k-1)) lookh := e.lookBackward(k, e.getBackward(D, k+1)-1) if lookv < lookh { e.setBackward(D+1, k, lookv) } else { e.setBackward(D+1, k, lookh) } if ok, ans := e.bdone(D+1, k); ok { return ans } } } // D is too large // find the D path with minimal x+y inside the rectangle and // use that to compute the part of the lcs found kmax := -e.limit - 1 diagmin := 1 << 25 for k := -e.limit; k <= e.limit; k += 2 { x := e.getBackward(e.limit, k) y := x - (k + e.delta) if x+y < diagmin && x >= 0 && y >= 0 { diagmin, kmax = x+y, k } } if kmax < -e.limit { panic(fmt.Sprintf("no paths when limit=%d?", e.limit)) } return e.backwardlcs(e.limit, kmax) } // recover the lcs by backtracking func (e *editGraph) backwardlcs(D, k int) lcs { var ans lcs for x := e.getBackward(D, k); x != e.ux || x-(k+e.delta) != e.uy; { if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) { // D--, k--, x unchanged D, k = D-1, k-1 continue } else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) { // D--, k++, x++ D, k, x = D-1, k+1, x+1 continue } y := x - (k + e.delta) ans = ans.append(x+e.lx, y+e.ly) x++ } return ans } // start at (x,y), go down the diagonal as far as possible, func (e *editGraph) lookBackward(k, relx int) int { rely := relx - (k + e.delta) // forward k = k + e.delta x, y := relx+e.lx, rely+e.ly if x > 0 && y > 0 { x -= e.seqs.commonSuffixLen(0, x, 0, y) } return x } // convert to rectangle, and label the result with d func (e *editGraph) setBackward(d, k, relx int) { x := e.lookBackward(k, relx) e.vb.set(d, k, x-e.lx) } func (e *editGraph) getBackward(d, k int) int { x := e.vb.get(d, k) return x } // -- TWOSIDED --- func twosided(e *editGraph) lcs { // The termination condition could be improved, as either the forward // or backward pass could succeed before Myers' Lemma applies. // Aside from questions of efficiency (is the extra testing cost-effective) // this is more likely to matter when e.limit is reached. e.setForward(0, 0, e.lx) e.setBackward(0, 0, e.ux) // from D to D+1 for D := 0; D < e.limit; D++ { // just finished a backwards pass, so check if got, ok := e.twoDone(D, D); ok { return e.twolcs(D, D, got) } // do a forwards pass (D to D+1) e.setForward(D+1, -(D + 1), e.getForward(D, -D)) e.setForward(D+1, D+1, e.getForward(D, D)+1) for k := -D + 1; k <= D-1; k += 2 { // these are tricky and easy to get backwards lookv := e.lookForward(k, e.getForward(D, k-1)+1) lookh := e.lookForward(k, e.getForward(D, k+1)) if lookv > lookh { e.setForward(D+1, k, lookv) } else { e.setForward(D+1, k, lookh) } } // just did a forward pass, so check if got, ok := e.twoDone(D+1, D); ok { return e.twolcs(D+1, D, got) } // do a backward pass, D to D+1 e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1) e.setBackward(D+1, D+1, e.getBackward(D, D)) for k := -D + 1; k <= D-1; k += 2 { // these are tricky and easy to get wrong lookv := e.lookBackward(k, e.getBackward(D, k-1)) lookh := e.lookBackward(k, e.getBackward(D, k+1)-1) if lookv < lookh { e.setBackward(D+1, k, lookv) } else { e.setBackward(D+1, k, lookh) } } } // D too large. combine a forward and backward partial lcs // first, a forward one kmax := -e.limit - 1 diagmax := -1 for k := -e.limit; k <= e.limit; k += 2 { x := e.getForward(e.limit, k) y := x - k if x+y > diagmax && x <= e.ux && y <= e.uy { diagmax, kmax = x+y, k } } if kmax < -e.limit { panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit)) } lcs := e.forwardlcs(e.limit, kmax) // now a backward one // find the D path with minimal x+y inside the rectangle and // use that to compute the lcs diagmin := 1 << 25 // infinity for k := -e.limit; k <= e.limit; k += 2 { x := e.getBackward(e.limit, k) y := x - (k + e.delta) if x+y < diagmin && x >= 0 && y >= 0 { diagmin, kmax = x+y, k } } if kmax < -e.limit { panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit)) } lcs = append(lcs, e.backwardlcs(e.limit, kmax)...) // These may overlap (e.forwardlcs and e.backwardlcs return sorted lcs) ans := lcs.fix() return ans } // Does Myers' Lemma apply? func (e *editGraph) twoDone(df, db int) (int, bool) { if (df+db+e.delta)%2 != 0 { return 0, false // diagonals cannot overlap } kmin := -db + e.delta if -df > kmin { kmin = -df } kmax := db + e.delta if df < kmax { kmax = df } for k := kmin; k <= kmax; k += 2 { x := e.vf.get(df, k) u := e.vb.get(db, k-e.delta) if u <= x { // is it worth looking at all the other k? for l := k; l <= kmax; l += 2 { x := e.vf.get(df, l) y := x - l u := e.vb.get(db, l-e.delta) v := u - l if x == u || u == 0 || v == 0 || y == e.uy || x == e.ux { return l, true } } return k, true } } return 0, false } func (e *editGraph) twolcs(df, db, kf int) lcs { // db==df || db+1==df x := e.vf.get(df, kf) y := x - kf kb := kf - e.delta u := e.vb.get(db, kb) v := u - kf // Myers proved there is a df-path from (0,0) to (u,v) // and a db-path from (x,y) to (N,M). // In the first case the overall path is the forward path // to (u,v) followed by the backward path to (N,M). // In the second case the path is the backward path to (x,y) // followed by the forward path to (x,y) from (0,0). // Look for some special cases to avoid computing either of these paths. if x == u { // "babaab" "cccaba" // already patched together lcs := e.forwardlcs(df, kf) lcs = append(lcs, e.backwardlcs(db, kb)...) return lcs.sort() } // is (u-1,v) or (u,v-1) labelled df-1? // if so, that forward df-1-path plus a horizontal or vertical edge // is the df-path to (u,v), then plus the db-path to (N,M) if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 { // "aabbab" "cbcabc" lcs := e.forwardlcs(df-1, u-1-v) lcs = append(lcs, e.backwardlcs(db, kb)...) return lcs.sort() } if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u { // "abaabb" "bcacab" lcs := e.forwardlcs(df-1, u-(v-1)) lcs = append(lcs, e.backwardlcs(db, kb)...) return lcs.sort() } // The path can't possibly contribute to the lcs because it // is all horizontal or vertical edges if u == 0 || v == 0 || x == e.ux || y == e.uy { // "abaabb" "abaaaa" if u == 0 || v == 0 { return e.backwardlcs(db, kb) } return e.forwardlcs(df, kf) } // is (x+1,y) or (x,y+1) labelled db-1? if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 { // "bababb" "baaabb" lcs := e.backwardlcs(db-1, kb+1) lcs = append(lcs, e.forwardlcs(df, kf)...) return lcs.sort() } if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x { // "abbbaa" "cabacc" lcs := e.backwardlcs(db-1, kb-1) lcs = append(lcs, e.forwardlcs(df, kf)...) return lcs.sort() } // need to compute another path // "aabbaa" "aacaba" lcs := e.backwardlcs(db, kb) oldx, oldy := e.ux, e.uy e.ux = u e.uy = v lcs = append(lcs, forward(e)...) e.ux, e.uy = oldx, oldy return lcs.sort() } golang-github-aymanbagabas-go-udiff-0.2.0/lcs/old_test.go000066400000000000000000000175201463166140100232760ustar00rootroot00000000000000// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs import ( "fmt" "log" "math/rand" "os" "strings" "testing" ) func TestAlgosOld(t *testing.T) { for i, algo := range []func(*editGraph) lcs{forward, backward, twosided} { t.Run(strings.Fields("forward backward twosided")[i], func(t *testing.T) { for _, tx := range Btests { lim := len(tx.a) + len(tx.b) diffs, lcs := compute(stringSeqs{tx.a, tx.b}, algo, lim) check(t, tx.a, lcs, tx.lcs) checkDiffs(t, tx.a, diffs, tx.b) diffs, lcs = compute(stringSeqs{tx.b, tx.a}, algo, lim) check(t, tx.b, lcs, tx.lcs) checkDiffs(t, tx.b, diffs, tx.a) } }) } } func TestIntOld(t *testing.T) { // need to avoid any characters in btests lfill, rfill := "AAAAAAAAAAAA", "BBBBBBBBBBBB" for _, tx := range Btests { if len(tx.a) < 2 || len(tx.b) < 2 { continue } left := tx.a + lfill right := tx.b + rfill lim := len(tx.a) + len(tx.b) diffs, lcs := compute(stringSeqs{left, right}, twosided, lim) check(t, left, lcs, tx.lcs) checkDiffs(t, left, diffs, right) diffs, lcs = compute(stringSeqs{right, left}, twosided, lim) check(t, right, lcs, tx.lcs) checkDiffs(t, right, diffs, left) left = lfill + tx.a right = rfill + tx.b diffs, lcs = compute(stringSeqs{left, right}, twosided, lim) check(t, left, lcs, tx.lcs) checkDiffs(t, left, diffs, right) diffs, lcs = compute(stringSeqs{right, left}, twosided, lim) check(t, right, lcs, tx.lcs) checkDiffs(t, right, diffs, left) } } func TestSpecialOld(t *testing.T) { // exercises lcs.fix a := "golang.org/x/tools/intern" b := "github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/intern" diffs, lcs := compute(stringSeqs{a, b}, twosided, 4) if !lcs.valid() { t.Errorf("%d,%v", len(diffs), lcs) } } func TestRegressionOld001(t *testing.T) { a := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/aymanbagabas/go-udiff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n" b := "// Copyright 2019 The Go Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style\n// license that can be found in the LICENSE file.\n\npackage diff_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/google/safehtml/template\"\n\t\"golang.org/x/tools/gopls/internal/lsp/diff\"\n\t\"github.com/aymanbagabas/go-udiff/difftest\"\n\t\"golang.org/x/tools/gopls/internal/span\"\n)\n" for i := 1; i < len(b); i++ { diffs, lcs := compute(stringSeqs{a, b}, twosided, i) // 14 from gopls if !lcs.valid() { t.Errorf("%d,%v", len(diffs), lcs) } checkDiffs(t, a, diffs, b) } } func TestRegressionOld002(t *testing.T) { a := "n\"\n)\n" b := "n\"\n\t\"golang.org/x//nnal/stack\"\n)\n" for i := 1; i <= len(b); i++ { diffs, lcs := compute(stringSeqs{a, b}, twosided, i) if !lcs.valid() { t.Errorf("%d,%v", len(diffs), lcs) } checkDiffs(t, a, diffs, b) } } func TestRegressionOld003(t *testing.T) { a := "golang.org/x/hello v1.0.0\nrequire golang.org/x/unused v1" b := "golang.org/x/hello v1" for i := 1; i <= len(a); i++ { diffs, lcs := compute(stringSeqs{a, b}, twosided, i) if !lcs.valid() { t.Errorf("%d,%v", len(diffs), lcs) } checkDiffs(t, a, diffs, b) } } func TestRandOld(t *testing.T) { rand.Seed(1) for i := 0; i < 1000; i++ { // TODO(adonovan): use ASCII and bytesSeqs here? The use of // non-ASCII isn't relevant to the property exercised by the test. a := []rune(randstr("abω", 16)) b := []rune(randstr("abωc", 16)) seq := runesSeqs{a, b} const lim = 24 // large enough to get true lcs _, forw := compute(seq, forward, lim) _, back := compute(seq, backward, lim) _, two := compute(seq, twosided, lim) if lcslen(two) != lcslen(forw) || lcslen(forw) != lcslen(back) { t.Logf("\n%v\n%v\n%v", forw, back, two) t.Fatalf("%d forw:%d back:%d two:%d", i, lcslen(forw), lcslen(back), lcslen(two)) } if !two.valid() || !forw.valid() || !back.valid() { t.Errorf("check failure") } } } // TestDiffAPI tests the public API functions (Diff{Bytes,Strings,Runes}) // to ensure at least miminal parity of the three representations. func TestDiffAPI(t *testing.T) { for _, test := range []struct { a, b string wantStrings, wantBytes, wantRunes string }{ {"abcXdef", "abcxdef", "[{3 4 3 4}]", "[{3 4 3 4}]", "[{3 4 3 4}]"}, // ASCII {"abcωdef", "abcΩdef", "[{3 5 3 5}]", "[{3 5 3 5}]", "[{3 4 3 4}]"}, // non-ASCII } { gotStrings := fmt.Sprint(DiffStrings(test.a, test.b)) if gotStrings != test.wantStrings { t.Errorf("DiffStrings(%q, %q) = %v, want %v", test.a, test.b, gotStrings, test.wantStrings) } gotBytes := fmt.Sprint(DiffBytes([]byte(test.a), []byte(test.b))) if gotBytes != test.wantBytes { t.Errorf("DiffBytes(%q, %q) = %v, want %v", test.a, test.b, gotBytes, test.wantBytes) } gotRunes := fmt.Sprint(DiffRunes([]rune(test.a), []rune(test.b))) if gotRunes != test.wantRunes { t.Errorf("DiffRunes(%q, %q) = %v, want %v", test.a, test.b, gotRunes, test.wantRunes) } } } func BenchmarkTwoOld(b *testing.B) { tests := genBench("abc", 96) for i := 0; i < b.N; i++ { for _, tt := range tests { _, two := compute(stringSeqs{tt.before, tt.after}, twosided, 100) if !two.valid() { b.Error("check failed") } } } } func BenchmarkForwOld(b *testing.B) { tests := genBench("abc", 96) for i := 0; i < b.N; i++ { for _, tt := range tests { _, two := compute(stringSeqs{tt.before, tt.after}, forward, 100) if !two.valid() { b.Error("check failed") } } } } func genBench(set string, n int) []struct{ before, after string } { // before and after for benchmarks. 24 strings of length n with // before and after differing at least once, and about 5% rand.Seed(3) var ans []struct{ before, after string } for i := 0; i < 24; i++ { // maybe b should have an approximately known number of diffs a := randstr(set, n) cnt := 0 bb := make([]rune, 0, n) for _, r := range a { if rand.Float64() < .05 { cnt++ r = 'N' } bb = append(bb, r) } if cnt == 0 { // avoid == shortcut bb[n/2] = 'N' } ans = append(ans, struct{ before, after string }{a, string(bb)}) } return ans } // This benchmark represents a common case for a diff command: // large file with a single relatively small diff in the middle. // (It's not clear whether this is representative of gopls workloads // or whether it is important to gopls diff performance.) // // TODO(adonovan) opt: it could be much faster. For example, // comparing a file against itself is about 10x faster than with the // small deletion in the middle. Strangely, comparing a file against // itself minus the last byte is faster still; I don't know why. // There is much low-hanging fruit here for further improvement. func BenchmarkLargeFileSmallDiff(b *testing.B) { data, err := os.ReadFile("old.go") // large file if err != nil { log.Fatal(err) } n := len(data) src := string(data) dst := src[:n*49/100] + src[n*51/100:] // remove 2% from the middle b.Run("string", func(b *testing.B) { for i := 0; i < b.N; i++ { compute(stringSeqs{src, dst}, twosided, len(src)+len(dst)) } }) srcBytes := []byte(src) dstBytes := []byte(dst) b.Run("bytes", func(b *testing.B) { for i := 0; i < b.N; i++ { compute(bytesSeqs{srcBytes, dstBytes}, twosided, len(srcBytes)+len(dstBytes)) } }) srcRunes := []rune(src) dstRunes := []rune(dst) b.Run("runes", func(b *testing.B) { for i := 0; i < b.N; i++ { compute(runesSeqs{srcRunes, dstRunes}, twosided, len(srcRunes)+len(dstRunes)) } }) } golang-github-aymanbagabas-go-udiff-0.2.0/lcs/sequence.go000066400000000000000000000056721463166140100232760ustar00rootroot00000000000000// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package lcs // This file defines the abstract sequence over which the LCS algorithm operates. // sequences abstracts a pair of sequences, A and B. type sequences interface { lengths() (int, int) // len(A), len(B) commonPrefixLen(ai, aj, bi, bj int) int // len(commonPrefix(A[ai:aj], B[bi:bj])) commonSuffixLen(ai, aj, bi, bj int) int // len(commonSuffix(A[ai:aj], B[bi:bj])) } type stringSeqs struct{ a, b string } func (s stringSeqs) lengths() (int, int) { return len(s.a), len(s.b) } func (s stringSeqs) commonPrefixLen(ai, aj, bi, bj int) int { return commonPrefixLenString(s.a[ai:aj], s.b[bi:bj]) } func (s stringSeqs) commonSuffixLen(ai, aj, bi, bj int) int { return commonSuffixLenString(s.a[ai:aj], s.b[bi:bj]) } // The explicit capacity in s[i:j:j] leads to more efficient code. type bytesSeqs struct{ a, b []byte } func (s bytesSeqs) lengths() (int, int) { return len(s.a), len(s.b) } func (s bytesSeqs) commonPrefixLen(ai, aj, bi, bj int) int { return commonPrefixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj]) } func (s bytesSeqs) commonSuffixLen(ai, aj, bi, bj int) int { return commonSuffixLenBytes(s.a[ai:aj:aj], s.b[bi:bj:bj]) } type runesSeqs struct{ a, b []rune } func (s runesSeqs) lengths() (int, int) { return len(s.a), len(s.b) } func (s runesSeqs) commonPrefixLen(ai, aj, bi, bj int) int { return commonPrefixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj]) } func (s runesSeqs) commonSuffixLen(ai, aj, bi, bj int) int { return commonSuffixLenRunes(s.a[ai:aj:aj], s.b[bi:bj:bj]) } // TODO(adonovan): optimize these functions using ideas from: // - https://go.dev/cl/408116 common.go // - https://go.dev/cl/421435 xor_generic.go // TODO(adonovan): factor using generics when available, // but measure performance impact. // commonPrefixLen* returns the length of the common prefix of a[ai:aj] and b[bi:bj]. func commonPrefixLenBytes(a, b []byte) int { n := min(len(a), len(b)) i := 0 for i < n && a[i] == b[i] { i++ } return i } func commonPrefixLenRunes(a, b []rune) int { n := min(len(a), len(b)) i := 0 for i < n && a[i] == b[i] { i++ } return i } func commonPrefixLenString(a, b string) int { n := min(len(a), len(b)) i := 0 for i < n && a[i] == b[i] { i++ } return i } // commonSuffixLen* returns the length of the common suffix of a[ai:aj] and b[bi:bj]. func commonSuffixLenBytes(a, b []byte) int { n := min(len(a), len(b)) i := 0 for i < n && a[len(a)-1-i] == b[len(b)-1-i] { i++ } return i } func commonSuffixLenRunes(a, b []rune) int { n := min(len(a), len(b)) i := 0 for i < n && a[len(a)-1-i] == b[len(b)-1-i] { i++ } return i } func commonSuffixLenString(a, b string) int { n := min(len(a), len(b)) i := 0 for i < n && a[len(a)-1-i] == b[len(b)-1-i] { i++ } return i } func min(x, y int) int { if x < y { return x } else { return y } } golang-github-aymanbagabas-go-udiff-0.2.0/myers/000077500000000000000000000000001463166140100215035ustar00rootroot00000000000000golang-github-aymanbagabas-go-udiff-0.2.0/myers/diff.go000066400000000000000000000126211463166140100227440ustar00rootroot00000000000000// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package myers implements the Myers diff algorithm. package myers import ( "strings" diff "github.com/aymanbagabas/go-udiff" ) // Sources: // https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/ // https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2 func ComputeEdits(before, after string) []diff.Edit { beforeLines := splitLines(before) ops := operations(beforeLines, splitLines(after)) // Build a table mapping line number to offset. lineOffsets := make([]int, 0, len(beforeLines)+1) total := 0 for i := range beforeLines { lineOffsets = append(lineOffsets, total) total += len(beforeLines[i]) } lineOffsets = append(lineOffsets, total) // EOF edits := make([]diff.Edit, 0, len(ops)) for _, op := range ops { start, end := lineOffsets[op.I1], lineOffsets[op.I2] switch op.Kind { case opDelete: // Delete: before[I1:I2] is deleted. edits = append(edits, diff.Edit{Start: start, End: end}) case opInsert: // Insert: after[J1:J2] is inserted at before[I1:I1]. if content := strings.Join(op.Content, ""); content != "" { edits = append(edits, diff.Edit{Start: start, End: end, New: content}) } } } return edits } // opKind is used to denote the type of operation a line represents. type opKind int const ( opDelete opKind = iota // line deleted from input (-) opInsert // line inserted into output (+) opEqual // line present in input and output ) func (kind opKind) String() string { switch kind { case opDelete: return "delete" case opInsert: return "insert" case opEqual: return "equal" default: panic("unknown opKind") } } type operation struct { Kind opKind Content []string // content from b I1, I2 int // indices of the line in a J1 int // indices of the line in b, J2 implied by len(Content) } // operations returns the list of operations to convert a into b, consolidating // operations for multiple lines and not including equal lines. func operations(a, b []string) []*operation { if len(a) == 0 && len(b) == 0 { return nil } trace, offset := shortestEditSequence(a, b) snakes := backtrack(trace, len(a), len(b), offset) M, N := len(a), len(b) var i int solution := make([]*operation, len(a)+len(b)) add := func(op *operation, i2, j2 int) { if op == nil { return } op.I2 = i2 if op.Kind == opInsert { op.Content = b[op.J1:j2] } solution[i] = op i++ } x, y := 0, 0 for _, snake := range snakes { if len(snake) < 2 { continue } var op *operation // delete (horizontal) for snake[0]-snake[1] > x-y { if op == nil { op = &operation{ Kind: opDelete, I1: x, J1: y, } } x++ if x == M { break } } add(op, x, y) op = nil // insert (vertical) for snake[0]-snake[1] < x-y { if op == nil { op = &operation{ Kind: opInsert, I1: x, J1: y, } } y++ } add(op, x, y) op = nil // equal (diagonal) for x < snake[0] { x++ y++ } if x >= M && y >= N { break } } return solution[:i] } // backtrack uses the trace for the edit sequence computation and returns the // "snakes" that make up the solution. A "snake" is a single deletion or // insertion followed by zero or diagonals. func backtrack(trace [][]int, x, y, offset int) [][]int { snakes := make([][]int, len(trace)) d := len(trace) - 1 for ; x > 0 && y > 0 && d > 0; d-- { V := trace[d] if len(V) == 0 { continue } snakes[d] = []int{x, y} k := x - y var kPrev int if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) { kPrev = k + 1 } else { kPrev = k - 1 } x = V[kPrev+offset] y = x - kPrev } if x < 0 || y < 0 { return snakes } snakes[d] = []int{x, y} return snakes } // shortestEditSequence returns the shortest edit sequence that converts a into b. func shortestEditSequence(a, b []string) ([][]int, int) { M, N := len(a), len(b) V := make([]int, 2*(N+M)+1) offset := N + M trace := make([][]int, N+M+1) // Iterate through the maximum possible length of the SES (N+M). for d := 0; d <= N+M; d++ { copyV := make([]int, len(V)) // k lines are represented by the equation y = x - k. We move in // increments of 2 because end points for even d are on even k lines. for k := -d; k <= d; k += 2 { // At each point, we either go down or to the right. We go down if // k == -d, and we go to the right if k == d. We also prioritize // the maximum x value, because we prefer deletions to insertions. var x int if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) { x = V[k+1+offset] // down } else { x = V[k-1+offset] + 1 // right } y := x - k // Diagonal moves while we have equal contents. for x < M && y < N && a[x] == b[y] { x++ y++ } V[k+offset] = x // Return if we've exceeded the maximum values. if x == M && y == N { // Makes sure to save the state of the array before returning. copy(copyV, V) trace[d] = copyV return trace, offset } } // Save the state of the array. copy(copyV, V) trace[d] = copyV } return nil, 0 } func splitLines(text string) []string { lines := strings.SplitAfter(text, "\n") if lines[len(lines)-1] == "" { lines = lines[:len(lines)-1] } return lines } golang-github-aymanbagabas-go-udiff-0.2.0/myers/diff_test.go000066400000000000000000000005551463166140100240060ustar00rootroot00000000000000// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package myers_test import ( "testing" "github.com/aymanbagabas/go-udiff/difftest" "github.com/aymanbagabas/go-udiff/myers" ) func TestDiff(t *testing.T) { difftest.DiffTest(t, myers.ComputeEdits) } golang-github-aymanbagabas-go-udiff-0.2.0/ndiff.go000066400000000000000000000046741463166140100217740ustar00rootroot00000000000000// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package udiff import ( "bytes" "unicode/utf8" "github.com/aymanbagabas/go-udiff/lcs" ) // Strings computes the differences between two strings. // The resulting edits respect rune boundaries. func Strings(before, after string) []Edit { if before == after { return nil // common case } if isASCII(before) && isASCII(after) { // TODO(adonovan): opt: specialize diffASCII for strings. return diffASCII([]byte(before), []byte(after)) } return diffRunes([]rune(before), []rune(after)) } // Bytes computes the differences between two byte slices. // The resulting edits respect rune boundaries. func Bytes(before, after []byte) []Edit { if bytes.Equal(before, after) { return nil // common case } if isASCII(before) && isASCII(after) { return diffASCII(before, after) } return diffRunes(runes(before), runes(after)) } func diffASCII(before, after []byte) []Edit { diffs := lcs.DiffBytes(before, after) // Convert from LCS diffs. res := make([]Edit, len(diffs)) for i, d := range diffs { res[i] = Edit{d.Start, d.End, string(after[d.ReplStart:d.ReplEnd])} } return res } func diffRunes(before, after []rune) []Edit { diffs := lcs.DiffRunes(before, after) // The diffs returned by the lcs package use indexes // into whatever slice was passed in. // Convert rune offsets to byte offsets. res := make([]Edit, len(diffs)) lastEnd := 0 utf8Len := 0 for i, d := range diffs { utf8Len += runesLen(before[lastEnd:d.Start]) // text between edits start := utf8Len utf8Len += runesLen(before[d.Start:d.End]) // text deleted by this edit res[i] = Edit{start, utf8Len, string(after[d.ReplStart:d.ReplEnd])} lastEnd = d.End } return res } // runes is like []rune(string(bytes)) without the duplicate allocation. func runes(bytes []byte) []rune { n := utf8.RuneCount(bytes) runes := make([]rune, n) for i := 0; i < n; i++ { r, sz := utf8.DecodeRune(bytes) bytes = bytes[sz:] runes[i] = r } return runes } // runesLen returns the length in bytes of the UTF-8 encoding of runes. func runesLen(runes []rune) (len int) { for _, r := range runes { len += utf8.RuneLen(r) } return len } // isASCII reports whether s contains only ASCII. func isASCII[S string | []byte](s S) bool { for i := 0; i < len(s); i++ { if s[i] >= utf8.RuneSelf { return false } } return true } golang-github-aymanbagabas-go-udiff-0.2.0/unified.go000066400000000000000000000154621463166140100223260ustar00rootroot00000000000000// Copyright 2019 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package udiff import ( "fmt" "log" "strings" ) // DefaultContextLines is the number of unchanged lines of surrounding // context displayed by Unified. Use ToUnified to specify a different value. const DefaultContextLines = 3 // Unified returns a unified diff of the old and new strings. // The old and new labels are the names of the old and new files. // If the strings are equal, it returns the empty string. func Unified(oldLabel, newLabel, old, new string) string { edits := Strings(old, new) unified, err := ToUnified(oldLabel, newLabel, old, edits, DefaultContextLines) if err != nil { // Can't happen: edits are consistent. log.Fatalf("internal error in diff.Unified: %v", err) } return unified } // ToUnified applies the edits to content and returns a unified diff, // with contextLines lines of (unchanged) context around each diff hunk. // The old and new labels are the names of the content and result files. // It returns an error if the edits are inconsistent; see ApplyEdits. func ToUnified(oldLabel, newLabel, content string, edits []Edit, contextLines int) (string, error) { u, err := toUnified(oldLabel, newLabel, content, edits, contextLines) if err != nil { return "", err } return u.String(), nil } // unified represents a set of edits as a unified diff. type unified struct { // From is the name of the original file. From string // To is the name of the modified file. To string // Hunks is the set of edit Hunks needed to transform the file content. Hunks []*hunk } // Hunk represents a contiguous set of line edits to apply. type hunk struct { // The line in the original source where the hunk starts. FromLine int // The line in the original source where the hunk finishes. ToLine int // The set of line based edits to apply. Lines []line } // Line represents a single line operation to apply as part of a Hunk. type line struct { // Kind is the type of line this represents, deletion, insertion or copy. Kind OpKind // Content is the Content of this line. // For deletion it is the line being removed, for all others it is the line // to put in the output. Content string } // OpKind is used to denote the type of operation a line represents. type OpKind int const ( // Delete is the operation kind for a line that is present in the input // but not in the output. Delete OpKind = iota // Insert is the operation kind for a line that is new in the output. Insert // Equal is the operation kind for a line that is the same in the input and // output, often used to provide context around edited lines. Equal ) // String returns a human readable representation of an OpKind. It is not // intended for machine processing. func (k OpKind) String() string { switch k { case Delete: return "delete" case Insert: return "insert" case Equal: return "equal" default: panic("unknown operation kind") } } // toUnified takes a file contents and a sequence of edits, and calculates // a unified diff that represents those edits. func toUnified(fromName, toName string, content string, edits []Edit, contextLines int) (unified, error) { gap := contextLines * 2 u := unified{ From: fromName, To: toName, } if len(edits) == 0 { return u, nil } var err error edits, err = lineEdits(content, edits) // expand to whole lines if err != nil { return u, err } lines := splitLines(content) var h *hunk last := 0 toLine := 0 for _, edit := range edits { // Compute the zero-based line numbers of the edit start and end. // TODO(adonovan): opt: compute incrementally, avoid O(n^2). start := strings.Count(content[:edit.Start], "\n") end := strings.Count(content[:edit.End], "\n") if edit.End == len(content) && len(content) > 0 && content[len(content)-1] != '\n' { end++ // EOF counts as an implicit newline } switch { case h != nil && start == last: // direct extension case h != nil && start <= last+gap: // within range of previous lines, add the joiners addEqualLines(h, lines, last, start) default: // need to start a new hunk if h != nil { // add the edge to the previous hunk addEqualLines(h, lines, last, last+contextLines) u.Hunks = append(u.Hunks, h) } toLine += start - last h = &hunk{ FromLine: start + 1, ToLine: toLine + 1, } // add the edge to the new hunk delta := addEqualLines(h, lines, start-contextLines, start) h.FromLine -= delta h.ToLine -= delta } last = start for i := start; i < end; i++ { h.Lines = append(h.Lines, line{Kind: Delete, Content: lines[i]}) last++ } if edit.New != "" { for _, content := range splitLines(edit.New) { h.Lines = append(h.Lines, line{Kind: Insert, Content: content}) toLine++ } } } if h != nil { // add the edge to the final hunk addEqualLines(h, lines, last, last+contextLines) u.Hunks = append(u.Hunks, h) } return u, nil } func splitLines(text string) []string { lines := strings.SplitAfter(text, "\n") if lines[len(lines)-1] == "" { lines = lines[:len(lines)-1] } return lines } func addEqualLines(h *hunk, lines []string, start, end int) int { delta := 0 for i := start; i < end; i++ { if i < 0 { continue } if i >= len(lines) { return delta } h.Lines = append(h.Lines, line{Kind: Equal, Content: lines[i]}) delta++ } return delta } // String converts a unified diff to the standard textual form for that diff. // The output of this function can be passed to tools like patch. func (u unified) String() string { if len(u.Hunks) == 0 { return "" } b := new(strings.Builder) fmt.Fprintf(b, "--- %s\n", u.From) fmt.Fprintf(b, "+++ %s\n", u.To) for _, hunk := range u.Hunks { fromCount, toCount := 0, 0 for _, l := range hunk.Lines { switch l.Kind { case Delete: fromCount++ case Insert: toCount++ default: fromCount++ toCount++ } } fmt.Fprint(b, "@@") if fromCount > 1 { fmt.Fprintf(b, " -%d,%d", hunk.FromLine, fromCount) } else if hunk.FromLine == 1 && fromCount == 0 { // Match odd GNU diff -u behavior adding to empty file. fmt.Fprintf(b, " -0,0") } else { fmt.Fprintf(b, " -%d", hunk.FromLine) } if toCount > 1 { fmt.Fprintf(b, " +%d,%d", hunk.ToLine, toCount) } else if hunk.ToLine == 1 && toCount == 0 { // Match odd GNU diff -u behavior adding to empty file. fmt.Fprintf(b, " +0,0") } else { fmt.Fprintf(b, " +%d", hunk.ToLine) } fmt.Fprint(b, " @@\n") for _, l := range hunk.Lines { switch l.Kind { case Delete: fmt.Fprintf(b, "-%s", l.Content) case Insert: fmt.Fprintf(b, "+%s", l.Content) default: fmt.Fprintf(b, " %s", l.Content) } if !strings.HasSuffix(l.Content, "\n") { fmt.Fprintf(b, "\n\\ No newline at end of file\n") } } } return b.String() }