Solved

# Text compare algorithm

Posted on 2008-11-11
610 Views
I'm looking for code for an ASP page that takes two strings and returns the differences between the two. The basic concept is easy enough to execute, but I need something more akin to the comparison performed on Wikipedia's history section - show what has been added, removed and modified on a line by line basis, not just comparing characters.
0
Question by:talonv

LVL 28

Expert Comment

Here is a Javascript Based Compare which does the line by line comparison.

Highlights the differences

``````<html>

<title>Document/Page Title</title>

<style type="text/css">

</style>

<!-- Do not edit these library functions -->

<script language="javascript">

function escape(s) {

var n = s;

n = n.replace(/&/g, "&amp;");

n = n.replace(/</g, "&lt;");

n = n.replace(/>/g, "&gt;");

n = n.replace(/"/g, "&quot;");

return n;

}

function diffString( o, n ) {

o = o.replace(/\s+\$/, '');

n = n.replace(/\s+\$/, '');

var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );

var str = "";

var oSpace = o.match(/\s+/g);

if (oSpace == null) {

oSpace = ["\n"];

} else {

oSpace.push("\n");

}

var nSpace = n.match(/\s+/g);

if (nSpace == null) {

nSpace = ["\n"];

} else {

nSpace.push("\n");

}

if (out.n.length == 0) {

for (var i = 0; i < out.o.length; i++) {

str += "<del style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";

}

} else {

if (out.n[0].text == null) {

for (n = 0; n < out.o.length && out.o[n].text == null; n++) {

str += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";

}

}

for ( var i = 0; i < out.n.length; i++ ) {

if (out.n[i].text == null) {

str += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";

} else {

var pre = "";

for (n = out.n[i].row + 1; n < out.o.length && out.o[n].text == null; n++ ) {

pre += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";

}

str += " " + out.n[i].text + nSpace[i] + pre;

}

}

}

return str;

}

function randomColor() {

return "rgb(" + (Math.random() * 100) + "%, " +

(Math.random() * 100) + "%, " +

(Math.random() * 100) + "%)";

}

function diffString2( o, n ) {

o = o.replace(/\s+\$/, '');

n = n.replace(/\s+\$/, '');

var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );

var oSpace = o.match(/\s+/g);

if (oSpace == null) {

oSpace = ["\n"];

} else {

oSpace.push("\n");

}

var nSpace = n.match(/\s+/g);

if (nSpace == null) {

nSpace = ["\n"];

} else {

nSpace.push("\n");

}

var os = "";

var colors = new Array();

for (var i = 0; i < out.o.length; i++) {

colors[i] = randomColor();

if (out.o[i].text != null) {

os += '<span style="background-color: ' +colors[i]+ '">' +

escape(out.o[i].text) + oSpace[i] + "</span>";

} else {

os += "<del  style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";

}

}

var ns = "";

for (var i = 0; i < out.n.length; i++) {

if (out.n[i].text != null) {

ns += '<span style="background-color: ' +colors[out.n[i].row]+ '">' +

escape(out.n[i].text) + nSpace[i] + "</span>";

} else {

ns += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";

}

}

return { o : os , n : ns };

}

function diff( o, n ) {

var ns = new Object();

var os = new Object();

for ( var i = 0; i < n.length; i++ ) {

if ( ns[ n[i] ] == null )

ns[ n[i] ] = { rows: new Array(), o: null };

ns[ n[i] ].rows.push( i );

}

for ( var i = 0; i < o.length; i++ ) {

if ( os[ o[i] ] == null )

os[ o[i] ] = { rows: new Array(), n: null };

os[ o[i] ].rows.push( i );

}

for ( var i in ns ) {

if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {

n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };

o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };

}

}

for ( var i = 0; i < n.length - 1; i++ ) {

if ( n[i].text != null && n[i+1].text == null && n[i].row + 1 < o.length && o[ n[i].row + 1 ].text == null &&

n[i+1] == o[ n[i].row + 1 ] ) {

n[i+1] = { text: n[i+1], row: n[i].row + 1 };

o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };

}

}

for ( var i = n.length - 1; i > 0; i-- ) {

if ( n[i].text != null && n[i-1].text == null && n[i].row > 0 && o[ n[i].row - 1 ].text == null &&

n[i-1] == o[ n[i].row - 1 ] ) {

n[i-1] = { text: n[i-1], row: n[i].row - 1 };

o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };

}

}

return { o: o, n: n };

}

</script>

<script language="javascript">

function CompareStrings (s1, s2, dObj)

{

var lc1 = s1.split(/[\n]/);

var lc2 = s2.split(/[\n]/);

var lc = lc2.length;

if (lc1.length > lc2.length) { lc = lc1.length; }

var lx =0;

str = "";

for (lx =0; lx < lc ; lx++) {

if (lx > lc1.length) { ls1 = char(13); }

else { ls1 = lc1[lx]; }

if (lx > lc2.length) { ls2 = char(13); }

else { ls2 = lc2[lx]; }

str += "L " +  lx + ": " + diffString (ls1, ls2) + "<br>";

}

document.getElementById('diff').innerHTML = str;

dObj.value = str;

}

</script>

<body>

<form name="myform">

<textarea name="t1" cols="40" wrap="hard">

This is first string.

The red brown fox jumped over the rolling log.

var total = 0;

for (ix =0; ix < 3; ix++) {

total += ix;

}

</textarea>

<textarea name="t2" cols="40" wrap="hard">

This is second string.

The brown spotted fox leaped over the rolling log

var total = 10;

for (ix =0; lx < 4; ix++) {

total += ix;

}

</textarea>

<br>

<input type="button" value="Compare" onclick="CompareStrings(this.form.t1.value, this.form.t2.value, this.form.t3);">

<br>Diff Textarea</br>

<textarea style="display:none;" name="t3" cols="40" wrap="hard">

</textarea>

</form>

<div id="diff">

</div>

</body>

</html>
``````
0

Author Comment

That's almost perfect - is there a way it could detect if lines have been added or removed? Right now, if a line is added in one and not the other, it just returns an error.
0

LVL 28

Accepted Solution

<html>
<title>Document/Page Title</title>
<style type="text/css">
</style>
<!-- Do not edit these library functions -->
<script language="javascript">
function escape(s) {
var n = s;
n = n.replace(/&/g, "&amp;");
n = n.replace(/</g, "<");
n = n.replace(/>/g, ">");
n = n.replace(/"/g, "&quot;");

return n;
}

function diffString( o, n ) {
o = o.replace(/\s+\$/, '');
n = n.replace(/\s+\$/, '');

var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );
var str = "";

var oSpace = o.match(/\s+/g);
if (oSpace == null) {
oSpace = ["\n"];
} else {
oSpace.push("\n");
}
var nSpace = n.match(/\s+/g);
if (nSpace == null) {
nSpace = ["\n"];
} else {
nSpace.push("\n");
}

if (out.n.length == 0) {
for (var i = 0; i < out.o.length; i++) {
str += "<del style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
}
} else {
if (out.n[0].text == null) {
for (n = 0; n < out.o.length && out.o[n].text == null; n++) {
str += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
}
}

for ( var i = 0; i < out.n.length; i++ ) {
if (out.n[i].text == null) {
str += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
} else {
var pre = "";

for (n = out.n[i].row + 1; n < out.o.length && out.o[n].text == null; n++ ) {
pre += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
}
str += " " + out.n[i].text + nSpace[i] + pre;
}
}
}

return str;
}

function randomColor() {
return "rgb(" + (Math.random() * 100) + "%, " +
(Math.random() * 100) + "%, " +
(Math.random() * 100) + "%)";
}
function diffString2( o, n ) {
o = o.replace(/\s+\$/, '');
n = n.replace(/\s+\$/, '');

var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );

var oSpace = o.match(/\s+/g);
if (oSpace == null) {
oSpace = ["\n"];
} else {
oSpace.push("\n");
}
var nSpace = n.match(/\s+/g);
if (nSpace == null) {
nSpace = ["\n"];
} else {
nSpace.push("\n");
}

var os = "";
var colors = new Array();
for (var i = 0; i < out.o.length; i++) {
colors[i] = randomColor();

if (out.o[i].text != null) {
os += '<span style="background-color: ' +colors[i]+ '">' +
escape(out.o[i].text) + oSpace[i] + "</span>";
} else {
os += "<del  style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
}
}

var ns = "";
for (var i = 0; i < out.n.length; i++) {
if (out.n[i].text != null) {
ns += '<span style="background-color: ' +colors[out.n[i].row]+ '">' +
escape(out.n[i].text) + nSpace[i] + "</span>";
} else {
ns += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
}
}

return { o : os , n : ns };
}

function diff( o, n ) {
var ns = new Object();
var os = new Object();

for ( var i = 0; i < n.length; i++ ) {
if ( ns[ n[i] ] == null )
ns[ n[i] ] = { rows: new Array(), o: null };
ns[ n[i] ].rows.push( i );
}

for ( var i = 0; i < o.length; i++ ) {
if ( os[ o[i] ] == null )
os[ o[i] ] = { rows: new Array(), n: null };
os[ o[i] ].rows.push( i );
}

for ( var i in ns ) {
if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {
n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };
o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };
}
}

for ( var i = 0; i < n.length - 1; i++ ) {
if ( n[i].text != null && n[i+1].text == null && n[i].row + 1 < o.length && o[ n[i].row + 1 ].text == null &&
n[i+1] == o[ n[i].row + 1 ] ) {
n[i+1] = { text: n[i+1], row: n[i].row + 1 };
o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };
}
}

for ( var i = n.length - 1; i > 0; i-- ) {
if ( n[i].text != null && n[i-1].text == null && n[i].row > 0 && o[ n[i].row - 1 ].text == null &&
n[i-1] == o[ n[i].row - 1 ] ) {
n[i-1] = { text: n[i-1], row: n[i].row - 1 };
o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };
}
}

return { o: o, n: n };
}
</script>

<script language="javascript">
function CompareStrings (s1, s2, dObj)
{
var lc1 = s1.split(/[\n]/);
var lc2 = s2.split(/[\n]/);
var lc = lc2.length;
if (lc1.length > lc2.length) { lc = lc1.length; }
var lx =0;
str = "";
for (lx =0; lx < lc ; lx++) {
if ((lx+1) > lc1.length) { ls1 = String.fromCharCode(13); }
else { ls1 = lc1[lx]; }
if ((lx+1) > lc2.length) { ls2 = String.fromCharCode(13); }
else { ls2 = lc2[lx]; }
if (ls1.length ==0) { ls1 = String.fromCharCode(13); }
if (ls2.length ==0) { ls2 = String.fromCharCode(13); }
str += "L " +  lx + ": " + diffString (ls1, ls2) + "<br>";
}

document.getElementById('diff').innerHTML = str;
dObj.value = str;
}
</script>
<body>
<form name="myform">
<textarea name="t1" cols="40" wrap="hard">
This is first string.
The red brown fox jumped over the rolling log.
var total = 0;
for (ix =0; ix < 3; ix++) {
total += ix;
}
</textarea>
<textarea name="t2" cols="40" wrap="hard">
This is second string.
The brown spotted fox leaped over the rolling log
var total = 10;
for (ix =0; lx < 4; ix++) {
total += ix;
}
</textarea>
<br>
<input type="button" value="Compare" onclick="CompareStrings(this.form.t1.value, this.form.t2.value, this.form.t3);">
<br>Diff Textarea</br>
<textarea style="display:none;" name="t3" cols="40" wrap="hard">
</textarea>
</form>
<div id="diff">
</div>
</body>
</html>
0

Author Closing Comment

Thank you! When a new line is reached, it seems to throw the index out and mark everything after that as new, though. It still fits what I asked, mind you. Thanks!
0

## Featured Post

Things That Drive Us Nuts Have you noticed the use of the reCaptcha feature at EE and other web sites?  It wants you to read and retype something that looks like this.Insanity!  It's not EE's fault - that's just the way reCaptcha works.  But it is …
This article describes how to create custom column layout styles for Bootstrap. The article uses 5 columns to illustrate the concept, but the principle can be extended to any number of columns.
The viewer will the learn the benefit of plain text editors and code an HTML5 based template for use in further tutorials.
The viewer will learn the basics of jQuery, including how to invoke it on a web page. Reference your jQuery libraries: (CODE) Include your new external js/jQuery file: (CODE) Write your first lines of code to setup your site for jQuery.: (CODE)