Initial revision
This commit is contained in:
parent
f900154e7a
commit
06fb77a82b
19 changed files with 3139 additions and 0 deletions
57
source/org/apache/commons/jrcs/diff/myers/DiffNode.java
Normal file
57
source/org/apache/commons/jrcs/diff/myers/DiffNode.java
Normal file
|
@ -0,0 +1,57 @@
|
|||
package org.apache.commons.jrcs.diff.myers;
|
||||
|
||||
/**
|
||||
* <p>Title: </p>
|
||||
* <p>Description: </p>
|
||||
* <p>Copyright: Copyright (c) 2002</p>
|
||||
* <p>Company: </p>
|
||||
* @author not attributable
|
||||
* @version 1.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* A diffnode in a diffpath.
|
||||
* <p>
|
||||
* A DiffNode and its previous node mark a delta between
|
||||
* two input sequences, that is, two differing subsequences
|
||||
* between (possibly zero length) matching sequences.
|
||||
*
|
||||
* {@link DiffNode DiffNodes} and {@link Snake Snakes} allow for compression
|
||||
* of diffpaths, as each snake is represented by a single {@link Snake Snake}
|
||||
* node and each contiguous series of insertions and deletions is represented
|
||||
* by a single {@link DiffNode DiffNodes}.
|
||||
*
|
||||
* @version $Revision: 1.1 $ $Date: 2003/07/14 12:22:29 $
|
||||
* @author <a href="mailto:juanco@suigeneris.org">Juanco Anez</a>
|
||||
*
|
||||
*/
|
||||
public final class DiffNode
|
||||
extends PathNode
|
||||
{
|
||||
/**
|
||||
* Constructs a DiffNode.
|
||||
* <p>
|
||||
* DiffNodes are compressed. That means that
|
||||
* the path pointed to by the <code>prev</code> parameter
|
||||
* will be followed using {@link PathNode#previousSnake}
|
||||
* until a non-diff node is found.
|
||||
*
|
||||
* @param the position in the original sequence
|
||||
* @param the position in the revised sequence
|
||||
* @param prev the previous node in the path.
|
||||
*/
|
||||
public DiffNode(int i, int j, PathNode prev)
|
||||
{
|
||||
super(i, j, (prev == null ? null : prev.previousSnake()) );
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @return false, always
|
||||
*/
|
||||
public boolean isSnake()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
222
source/org/apache/commons/jrcs/diff/myers/MyersDiff.java
Normal file
222
source/org/apache/commons/jrcs/diff/myers/MyersDiff.java
Normal file
|
@ -0,0 +1,222 @@
|
|||
/*
|
||||
* ====================================================================
|
||||
*
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 1999-2003 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution, if
|
||||
* any, must include the following acknowlegement:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowlegement may appear in the software itself,
|
||||
* if and wherever such third-party acknowlegements normally appear.
|
||||
*
|
||||
* 4. The names "The Jakarta Project", "Commons", and "Apache Software
|
||||
* Foundation" must not be used to endorse or promote products derived
|
||||
* from this software without prior written permission. For written
|
||||
* permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache"
|
||||
* nor may "Apache" appear in their names without prior written
|
||||
* permission of the Apache Group.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.commons.jrcs.diff.myers;
|
||||
|
||||
import org.apache.commons.jrcs.diff.*;
|
||||
|
||||
/**
|
||||
* A clean-room implementation of
|
||||
* <a href="http://www.cs.arizona.edu/people/gene/">
|
||||
* Eugene Myers</a> differencing algorithm.
|
||||
* <p>
|
||||
* See the paper at
|
||||
* <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
|
||||
* http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps</a>
|
||||
*
|
||||
* @version $Revision: 1.1 $ $Date: 2003/07/14 12:22:30 $
|
||||
* @author <a href="mailto:juanco@suigeneris.org">Juanco Anez</a>
|
||||
* @see Delta
|
||||
* @see Revision
|
||||
* @see Diff
|
||||
*/
|
||||
public class MyersDiff
|
||||
implements DiffAlgorithm
|
||||
{
|
||||
/**
|
||||
* Constructs an instance of the Myers differencing algorithm.
|
||||
*/
|
||||
public MyersDiff()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public Revision diff(Object[] orig, Object[] rev)
|
||||
throws DifferentiationFailedException
|
||||
{
|
||||
PathNode path = buildPath(orig, rev);
|
||||
return buildRevision(path, orig, rev);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the minimum diffpath that expresses de differences
|
||||
* between the original and revised sequences, according
|
||||
* to Gene Myers differencing algorithm.
|
||||
*
|
||||
* @param orig The original sequence.
|
||||
* @param rev The revised sequence.
|
||||
* @return A minimum {@link PathNode Path} accross the differences graph.
|
||||
* @throws DifferentiationFailedException if a diff path could not be found.
|
||||
*/
|
||||
public static PathNode buildPath(Object[] orig, Object[] rev)
|
||||
throws DifferentiationFailedException
|
||||
{
|
||||
if (orig == null)
|
||||
throw new IllegalArgumentException("original sequence is null");
|
||||
if (rev == null)
|
||||
throw new IllegalArgumentException("revised sequence is null");
|
||||
|
||||
// these are local constants
|
||||
final int N = orig.length;
|
||||
final int M = rev.length;
|
||||
|
||||
final int MAX = N + M + 1;
|
||||
final int size = 1 + 2 * MAX;
|
||||
final int middle = (size + 1) / 2;
|
||||
final PathNode diagonal[] = new PathNode[size];
|
||||
|
||||
PathNode path = null;
|
||||
|
||||
diagonal[middle + 1] = new Snake(0, -1, null);
|
||||
for (int d = 0; d < MAX; d++)
|
||||
{
|
||||
for (int k = -d; k <= d; k += 2)
|
||||
{
|
||||
final int kmiddle = middle + k;
|
||||
final int kplus = kmiddle + 1;
|
||||
final int kminus = kmiddle - 1;
|
||||
PathNode prev = null;
|
||||
|
||||
int i;
|
||||
if ( (k == -d) ||
|
||||
(k != d && diagonal[kminus].i < diagonal[kplus].i))
|
||||
{
|
||||
i = diagonal[kplus].i;
|
||||
prev = diagonal[kplus];
|
||||
}
|
||||
else
|
||||
{
|
||||
i = diagonal[kminus].i + 1;
|
||||
prev = diagonal[kminus];
|
||||
}
|
||||
|
||||
diagonal[kminus] = null; // no longer used
|
||||
|
||||
int j = i - k;
|
||||
|
||||
PathNode node = new DiffNode(i, j, prev);
|
||||
|
||||
// orig and rev are zero-based
|
||||
// but the algorithm is one-based
|
||||
// that's why there's no +1 when indexing the sequences
|
||||
while (i < N && j < M && orig[i].equals(rev[j]))
|
||||
{
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
if (i > node.i)
|
||||
node = new Snake(i, j, node);
|
||||
|
||||
diagonal[kmiddle] = node;
|
||||
|
||||
if (i >= N && j >= M)
|
||||
{
|
||||
return diagonal[kmiddle];
|
||||
}
|
||||
}
|
||||
diagonal[middle+d-1] = null;
|
||||
|
||||
}
|
||||
// According to Myers, this cannot happen
|
||||
throw new DifferentiationFailedException("could not find a diff path");
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a {@link Revision} from a difference path.
|
||||
*
|
||||
* @param path The path.
|
||||
* @param orig The original sequence.
|
||||
* @param rev The revised sequence.
|
||||
* @return A {@link Revision} script corresponding to the path.
|
||||
* @throws DifferentiationFailedException if a {@link Revision} could
|
||||
* not be built from the given path.
|
||||
*/
|
||||
public static Revision buildRevision(PathNode path, Object[] orig, Object[] rev)
|
||||
{
|
||||
if (path == null)
|
||||
throw new IllegalArgumentException("path is null");
|
||||
if (orig == null)
|
||||
throw new IllegalArgumentException("original sequence is null");
|
||||
if (rev == null)
|
||||
throw new IllegalArgumentException("revised sequence is null");
|
||||
|
||||
Revision revision = new Revision();
|
||||
if (path.isSnake())
|
||||
path = path.prev;
|
||||
while (path != null && path.prev != null && path.prev.j >= 0)
|
||||
{
|
||||
if(path.isSnake())
|
||||
throw new IllegalStateException("bad diffpath: found snake when looking for diff");
|
||||
int i = path.i;
|
||||
int j = path.j;
|
||||
|
||||
path = path.prev;
|
||||
int ianchor = path.i;
|
||||
int janchor = path.j;
|
||||
|
||||
Delta delta = Delta.newDelta(new Chunk(orig, ianchor, i - ianchor),
|
||||
new Chunk(rev, janchor, j - janchor));
|
||||
revision.insertDelta(delta);
|
||||
if (path.isSnake())
|
||||
path = path.prev;
|
||||
}
|
||||
return revision;
|
||||
}
|
||||
|
||||
}
|
146
source/org/apache/commons/jrcs/diff/myers/PathNode.java
Normal file
146
source/org/apache/commons/jrcs/diff/myers/PathNode.java
Normal file
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* ====================================================================
|
||||
*
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 1999-2003 The Apache Software Foundation.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution, if
|
||||
* any, must include the following acknowlegement:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowlegement may appear in the software itself,
|
||||
* if and wherever such third-party acknowlegements normally appear.
|
||||
*
|
||||
* 4. The names "The Jakarta Project", "Commons", and "Apache Software
|
||||
* Foundation" must not be used to endorse or promote products derived
|
||||
* from this software without prior written permission. For written
|
||||
* permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache"
|
||||
* nor may "Apache" appear in their names without prior written
|
||||
* permission of the Apache Group.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.commons.jrcs.diff.myers;
|
||||
|
||||
/**
|
||||
* A node in a diffpath.
|
||||
*
|
||||
* @version $Revision: 1.1 $ $Date: 2003/07/14 12:22:30 $
|
||||
* @author <a href="mailto:juanco@suigeneris.org">Juanco Anez</a>
|
||||
*
|
||||
* @see DiffNode
|
||||
* @see Snake
|
||||
*
|
||||
*/
|
||||
public abstract class PathNode
|
||||
{
|
||||
/** Position in the original sequence. */
|
||||
public final int i;
|
||||
/** Position in the revised sequence. */
|
||||
public final int j;
|
||||
/** The previous node in the path. */
|
||||
public final PathNode prev;
|
||||
|
||||
/**
|
||||
* Concatenates a new path node with an existing diffpath.
|
||||
* @param i The position in the original sequence for the new node.
|
||||
* @param j The position in the revised sequence for the new node.
|
||||
* @param prev The previous node in the path.
|
||||
*/
|
||||
public PathNode(int i, int j, PathNode prev)
|
||||
{
|
||||
this.i = i;
|
||||
this.j = j;
|
||||
this.prev = prev;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this node a {@link Snake Snake node}?
|
||||
* @return true if this is a {@link Snake Snake node}
|
||||
*/
|
||||
public abstract boolean isSnake();
|
||||
|
||||
/**
|
||||
* Is this a bootstrap node?
|
||||
* <p>
|
||||
* In bottstrap nodes one of the two corrdinates is
|
||||
* less than zero.
|
||||
* @return tru if this is a bootstrap node.
|
||||
*/
|
||||
public boolean isBootstrap()
|
||||
{
|
||||
return i < 0 || j < 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips sequences of {@link DiffNode DiffNodes} until a
|
||||
* {@link Snake} or bootstrap node is found, or the end
|
||||
* of the path is reached.
|
||||
* @return The next first {@link Snake} or bootstrap node in the path, or
|
||||
* <code>null</code>
|
||||
* if none found.
|
||||
*/
|
||||
public final PathNode previousSnake()
|
||||
{
|
||||
if (isBootstrap())
|
||||
return null;
|
||||
if (!isSnake() && prev != null)
|
||||
return prev.previousSnake();
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
public String toString()
|
||||
{
|
||||
StringBuffer buf = new StringBuffer("[");
|
||||
PathNode node = this;
|
||||
while (node != null)
|
||||
{
|
||||
buf.append("(");
|
||||
buf.append(Integer.toString(node.i));
|
||||
buf.append(",");
|
||||
buf.append(Integer.toString(node.j));
|
||||
buf.append(")");
|
||||
node = node.prev;
|
||||
}
|
||||
buf.append("]");
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
97
source/org/apache/commons/jrcs/diff/myers/Snake.java
Normal file
97
source/org/apache/commons/jrcs/diff/myers/Snake.java
Normal file
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* ====================================================================
|
||||
*
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 1999-2003 The Apache Software Foundation.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution, if
|
||||
* any, must include the following acknowlegement:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowlegement may appear in the software itself,
|
||||
* if and wherever such third-party acknowlegements normally appear.
|
||||
*
|
||||
* 4. The names "The Jakarta Project", "Commons", and "Apache Software
|
||||
* Foundation" must not be used to endorse or promote products derived
|
||||
* from this software without prior written permission. For written
|
||||
* permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache"
|
||||
* nor may "Apache" appear in their names without prior written
|
||||
* permission of the Apache Group.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.commons.jrcs.diff.myers;
|
||||
|
||||
/**
|
||||
* Represents a snake in a diffpath.
|
||||
* <p>
|
||||
*
|
||||
* {@link DiffNode DiffNodes} and {@link Snake Snakes} allow for compression
|
||||
* of diffpaths, as each snake is represented by a single {@link Snake Snake}
|
||||
* node and each contiguous series of insertions and deletions is represented
|
||||
* by a single {@link DiffNode DiffNodes}.
|
||||
*
|
||||
* @version $Revision: 1.1 $ $Date: 2003/07/14 12:22:30 $
|
||||
* @author <a href="mailto:juanco@suigeneris.org">Juanco Anez</a>
|
||||
*
|
||||
*/
|
||||
public final class Snake
|
||||
extends PathNode
|
||||
{
|
||||
/**
|
||||
* Constructs a snake node.
|
||||
*
|
||||
* @param the position in the original sequence
|
||||
* @param the position in the revised sequence
|
||||
* @param prev the previous node in the path.
|
||||
*/
|
||||
public Snake(int i, int j, PathNode prev)
|
||||
{
|
||||
super(i, j, prev);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* @return true always
|
||||
*/
|
||||
public boolean isSnake()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue