// Copyright 2013 Google Inc. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package diff implements a linewise diff algorithm. package diff import ( "bytes" "fmt" "strings" ) // Chunk represents a piece of the diff. A chunk will not have both added and // deleted lines. Equal lines are always after any added or deleted lines. // A Chunk may or may not have any lines in it, especially for the first or last // chunk in a computation. type Chunk struct { Added []string Deleted []string Equal []string } func (c *Chunk) empty() bool { return len(c.Added) == 0 && len(c.Deleted) == 0 && len(c.Equal) == 0 } // Diff returns a string containing a line-by-line unified diff of the linewise // changes required to make A into B. Each line is prefixed with '+', '-', or // ' ' to indicate if it should be added, removed, or is correct respectively. func Diff(A, B string) string { aLines := strings.Split(A, "\n") bLines := strings.Split(B, "\n") chunks := DiffChunks(aLines, bLines) buf := new(bytes.Buffer) for _, c := range chunks { for _, line := range c.Added { fmt.Fprintf(buf, "+%s\n", line) } for _, line := range c.Deleted { fmt.Fprintf(buf, "-%s\n", line) } for _, line := range c.Equal { fmt.Fprintf(buf, " %s\n", line) } } return strings.TrimRight(buf.String(), "\n") } // DiffChunks uses an O(D(N+M)) shortest-edit-script algorithm // to compute the edits required from A to B and returns the // edit chunks. func DiffChunks(a, b []string) []Chunk { // algorithm: http://www.xmailserver.org/diff2.pdf // We'll need these quantities a lot. alen, blen := len(a), len(b) // M, N // At most, it will require len(a) deletions and len(b) additions // to transform a into b. maxPath := alen + blen // MAX if maxPath == 0 { // degenerate case: two empty lists are the same return nil } // Store the endpoint of the path for diagonals. // We store only the a index, because the b index on any diagonal // (which we know during the loop below) is aidx-diag. // endpoint[maxPath] represents the 0 diagonal. // // Stated differently: // endpoint[d] contains the aidx of a furthest reaching path in diagonal d endpoint := make([]int, 2*maxPath+1) // V saved := make([][]int, 0, 8) // Vs save := func() { dup := make([]int, len(endpoint)) copy(dup, endpoint) saved = append(saved, dup) } var editDistance int // D dLoop: for editDistance = 0; editDistance <= maxPath; editDistance++ { // The 0 diag(onal) represents equality of a and b. Each diagonal to // the left is numbered one lower, to the right is one higher, from // -alen to +blen. Negative diagonals favor differences from a, // positive diagonals favor differences from b. The edit distance to a // diagonal d cannot be shorter than d itself. // // The iterations of this loop cover either odds or evens, but not both, // If odd indices are inputs, even indices are outputs and vice versa. for diag := -editDistance; diag <= editDistance; diag += 2 { // k var aidx int // x switch { case diag == -editDistance: // This is a new diagonal; copy from previous iter aidx = endpoint[maxPath-editDistance+1] + 0 case diag == editDistance: // This is a new diagonal; copy from previous iter aidx = endpoint[maxPath+editDistance-1] + 1 case endpoint[maxPath+diag+1] > endpoint[maxPath+diag-1]: // diagonal d+1 was farther along, so use that aidx = endpoint[maxPath+diag+1] + 0 default: // diagonal d-1 was farther (or the same), so use that aidx = endpoint[maxPath+diag-1] + 1 } // On diagonal d, we can compute bidx from aidx. bidx := aidx - diag // y // See how far we can go on this diagonal before we find a difference. for aidx < alen && bidx < blen && a[aidx] == b[bidx] { aidx++ bidx++ } // Store the end of the current edit chain. endpoint[maxPath+diag] = aidx // If we've found the end of both inputs, we're done! if aidx >= alen && bidx >= blen { save() // save the final path break dLoop } } save() // save the current path } if editDistance == 0 { return nil } chunks := make([]Chunk, editDistance+1) x, y := alen, blen for d := editDistance; d > 0; d-- { endpoint := saved[d] diag := x - y insert := diag == -d || (diag != d && endpoint[maxPath+diag-1] < endpoint[maxPath+diag+1]) x1 := endpoint[maxPath+diag] var x0, xM, kk int if insert { kk = diag + 1 x0 = endpoint[maxPath+kk] xM = x0 } else { kk = diag - 1 x0 = endpoint[maxPath+kk] xM = x0 + 1 } y0 := x0 - kk var c Chunk if insert { c.Added = b[y0:][:1] } else { c.Deleted = a[x0:][:1] } if xM < x1 { c.Equal = a[xM:][:x1-xM] } x, y = x0, y0 chunks[d] = c } if x > 0 { chunks[0].Equal = a[:x] } if chunks[0].empty() { chunks = chunks[1:] } if len(chunks) == 0 { return nil } return chunks }