Text compare algorithm

I'm looking for code for an ASP page that takes two strings and returns the differences between the two. The basic concept is easy enough to execute, but I need something more akin to the comparison performed on Wikipedia's history section - show what has been added, removed and modified on a line by line basis, not just comparing characters.
talonvAsked:
Who is Participating?
I wear a lot of hats...

"The solutions and answers provided on Experts Exchange have been extremely helpful to me over the last few years. I wear a lot of hats - Developer, Database Administrator, Help Desk, etc., so I know a lot of things but not a lot about one thing. Experts Exchange gives me answers from people who do know a lot about one thing, in a easy to use platform." -Todd S.

Pravin AsarPrincipal Systems EngineerCommented:
Here is a Javascript Based Compare which does the line by line comparison.

Highlights the differences

<html>
<head>
<title>Document/Page Title</title>
<style type="text/css">
</style>
<!-- Do not edit these library functions -->
<script language="javascript">
function escape(s) {
    var n = s;
    n = n.replace(/&/g, "&amp;");
    n = n.replace(/</g, "&lt;");
    n = n.replace(/>/g, "&gt;");
    n = n.replace(/"/g, "&quot;");
 
    return n;
}
 
function diffString( o, n ) {
  o = o.replace(/\s+$/, '');
  n = n.replace(/\s+$/, '');
 
  var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );
  var str = "";
 
  var oSpace = o.match(/\s+/g);
  if (oSpace == null) {
    oSpace = ["\n"];
  } else {
    oSpace.push("\n");
  }
  var nSpace = n.match(/\s+/g);
  if (nSpace == null) {
    nSpace = ["\n"];
  } else {
    nSpace.push("\n");
  }
 
  if (out.n.length == 0) {
      for (var i = 0; i < out.o.length; i++) {
        str += "<del style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
      }
  } else {
    if (out.n[0].text == null) {
      for (n = 0; n < out.o.length && out.o[n].text == null; n++) {
        str += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
      }
    }
 
    for ( var i = 0; i < out.n.length; i++ ) {
      if (out.n[i].text == null) {
        str += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
      } else {
        var pre = "";
 
        for (n = out.n[i].row + 1; n < out.o.length && out.o[n].text == null; n++ ) {
          pre += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
        }
        str += " " + out.n[i].text + nSpace[i] + pre;
      }
    }
  }
  
  return str;
}
 
function randomColor() {
    return "rgb(" + (Math.random() * 100) + "%, " + 
                    (Math.random() * 100) + "%, " + 
                    (Math.random() * 100) + "%)";
}
function diffString2( o, n ) {
  o = o.replace(/\s+$/, '');
  n = n.replace(/\s+$/, '');
 
  var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );
 
  var oSpace = o.match(/\s+/g);
  if (oSpace == null) {
    oSpace = ["\n"];
  } else {
    oSpace.push("\n");
  }
  var nSpace = n.match(/\s+/g);
  if (nSpace == null) {
    nSpace = ["\n"];
  } else {
    nSpace.push("\n");
  }
 
  var os = "";
  var colors = new Array();
  for (var i = 0; i < out.o.length; i++) {
      colors[i] = randomColor();
 
      if (out.o[i].text != null) {
          os += '<span style="background-color: ' +colors[i]+ '">' + 
                escape(out.o[i].text) + oSpace[i] + "</span>";
      } else {
          os += "<del  style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
      }
  }
 
  var ns = "";
  for (var i = 0; i < out.n.length; i++) {
      if (out.n[i].text != null) {
          ns += '<span style="background-color: ' +colors[out.n[i].row]+ '">' + 
                escape(out.n[i].text) + nSpace[i] + "</span>";
      } else {
          ns += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
      }
  }
 
  return { o : os , n : ns };
}
 
function diff( o, n ) {
  var ns = new Object();
  var os = new Object();
  
  for ( var i = 0; i < n.length; i++ ) {
    if ( ns[ n[i] ] == null )
      ns[ n[i] ] = { rows: new Array(), o: null };
    ns[ n[i] ].rows.push( i );
  }
  
  for ( var i = 0; i < o.length; i++ ) {
    if ( os[ o[i] ] == null )
      os[ o[i] ] = { rows: new Array(), n: null };
    os[ o[i] ].rows.push( i );
  }
  
  for ( var i in ns ) {
    if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {
      n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };
      o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };
    }
  }
  
  for ( var i = 0; i < n.length - 1; i++ ) {
    if ( n[i].text != null && n[i+1].text == null && n[i].row + 1 < o.length && o[ n[i].row + 1 ].text == null && 
         n[i+1] == o[ n[i].row + 1 ] ) {
      n[i+1] = { text: n[i+1], row: n[i].row + 1 };
      o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };
    }
  }
  
  for ( var i = n.length - 1; i > 0; i-- ) {
    if ( n[i].text != null && n[i-1].text == null && n[i].row > 0 && o[ n[i].row - 1 ].text == null && 
         n[i-1] == o[ n[i].row - 1 ] ) {
      n[i-1] = { text: n[i-1], row: n[i].row - 1 };
      o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };
    }
  }
  
  return { o: o, n: n };
}
</script>
 
<script language="javascript">
function CompareStrings (s1, s2, dObj)
{
var lc1 = s1.split(/[\n]/);
var lc2 = s2.split(/[\n]/);
var lc = lc2.length;
if (lc1.length > lc2.length) { lc = lc1.length; }
var lx =0; 
str = "";
for (lx =0; lx < lc ; lx++) {
   if (lx > lc1.length) { ls1 = char(13); }
   else { ls1 = lc1[lx]; }
   if (lx > lc2.length) { ls2 = char(13); }
   else { ls2 = lc2[lx]; }
str += "L " +  lx + ": " + diffString (ls1, ls2) + "<br>";    
}
 
document.getElementById('diff').innerHTML = str; 
dObj.value = str;
}
</script>
</head>
<body>
<form name="myform">
<textarea name="t1" cols="40" wrap="hard">
This is first string.
The red brown fox jumped over the rolling log.
var total = 0;
for (ix =0; ix < 3; ix++) {
   total += ix;
}
</textarea>
<textarea name="t2" cols="40" wrap="hard">
This is second string.
The brown spotted fox leaped over the rolling log
var total = 10;
for (ix =0; lx < 4; ix++) {
   total += ix;
}
</textarea>
<br>
<input type="button" value="Compare" onclick="CompareStrings(this.form.t1.value, this.form.t2.value, this.form.t3);">
<br>Diff Textarea</br>
<textarea style="display:none;" name="t3" cols="40" wrap="hard">
</textarea>
</form>
<div id="diff">
</div>
</body>
</html>

Open in new window

0
talonvAuthor Commented:
That's almost perfect - is there a way it could detect if lines have been added or removed? Right now, if a line is added in one and not the other, it just returns an error.
0
Pravin AsarPrincipal Systems EngineerCommented:
<html>
<head>
<title>Document/Page Title</title>
<style type="text/css">
</style>
<!-- Do not edit these library functions -->
<script language="javascript">
function escape(s) {
    var n = s;
    n = n.replace(/&/g, "&amp;");
    n = n.replace(/</g, "<");
    n = n.replace(/>/g, ">");
    n = n.replace(/"/g, "&quot;");

    return n;
}

function diffString( o, n ) {
  o = o.replace(/\s+$/, '');
  n = n.replace(/\s+$/, '');

  var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );
  var str = "";

  var oSpace = o.match(/\s+/g);
  if (oSpace == null) {
    oSpace = ["\n"];
  } else {
    oSpace.push("\n");
  }
  var nSpace = n.match(/\s+/g);
  if (nSpace == null) {
    nSpace = ["\n"];
  } else {
    nSpace.push("\n");
  }

  if (out.n.length == 0) {
      for (var i = 0; i < out.o.length; i++) {
        str += "<del style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
      }
  } else {
    if (out.n[0].text == null) {
      for (n = 0; n < out.o.length && out.o[n].text == null; n++) {
        str += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
      }
    }

    for ( var i = 0; i < out.n.length; i++ ) {
      if (out.n[i].text == null) {
        str += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
      } else {
        var pre = "";

        for (n = out.n[i].row + 1; n < out.o.length && out.o[n].text == null; n++ ) {
          pre += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
        }
        str += " " + out.n[i].text + nSpace[i] + pre;
      }
    }
  }
 
  return str;
}

function randomColor() {
    return "rgb(" + (Math.random() * 100) + "%, " +
                    (Math.random() * 100) + "%, " +
                    (Math.random() * 100) + "%)";
}
function diffString2( o, n ) {
  o = o.replace(/\s+$/, '');
  n = n.replace(/\s+$/, '');

  var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );

  var oSpace = o.match(/\s+/g);
  if (oSpace == null) {
    oSpace = ["\n"];
  } else {
    oSpace.push("\n");
  }
  var nSpace = n.match(/\s+/g);
  if (nSpace == null) {
    nSpace = ["\n"];
  } else {
    nSpace.push("\n");
  }

  var os = "";
  var colors = new Array();
  for (var i = 0; i < out.o.length; i++) {
      colors[i] = randomColor();

      if (out.o[i].text != null) {
          os += '<span style="background-color: ' +colors[i]+ '">' +
                escape(out.o[i].text) + oSpace[i] + "</span>";
      } else {
          os += "<del  style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
      }
  }

  var ns = "";
  for (var i = 0; i < out.n.length; i++) {
      if (out.n[i].text != null) {
          ns += '<span style="background-color: ' +colors[out.n[i].row]+ '">' +
                escape(out.n[i].text) + nSpace[i] + "</span>";
      } else {
          ns += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
      }
  }

  return { o : os , n : ns };
}

function diff( o, n ) {
  var ns = new Object();
  var os = new Object();
 
  for ( var i = 0; i < n.length; i++ ) {
    if ( ns[ n[i] ] == null )
      ns[ n[i] ] = { rows: new Array(), o: null };
    ns[ n[i] ].rows.push( i );
  }
 
  for ( var i = 0; i < o.length; i++ ) {
    if ( os[ o[i] ] == null )
      os[ o[i] ] = { rows: new Array(), n: null };
    os[ o[i] ].rows.push( i );
  }
 
  for ( var i in ns ) {
    if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {
      n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };
      o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };
    }
  }
 
  for ( var i = 0; i < n.length - 1; i++ ) {
    if ( n[i].text != null && n[i+1].text == null && n[i].row + 1 < o.length && o[ n[i].row + 1 ].text == null && 
         n[i+1] == o[ n[i].row + 1 ] ) {
      n[i+1] = { text: n[i+1], row: n[i].row + 1 };
      o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };
    }
  }
 
  for ( var i = n.length - 1; i > 0; i-- ) {
    if ( n[i].text != null && n[i-1].text == null && n[i].row > 0 && o[ n[i].row - 1 ].text == null && 
         n[i-1] == o[ n[i].row - 1 ] ) {
      n[i-1] = { text: n[i-1], row: n[i].row - 1 };
      o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };
    }
  }
 
  return { o: o, n: n };
}
</script>

<script language="javascript">
function CompareStrings (s1, s2, dObj)
{
var lc1 = s1.split(/[\n]/);
var lc2 = s2.split(/[\n]/);
var lc = lc2.length;
if (lc1.length > lc2.length) { lc = lc1.length; }
var lx =0;
str = "";
for (lx =0; lx < lc ; lx++) {
   if ((lx+1) > lc1.length) { ls1 = String.fromCharCode(13); }
   else { ls1 = lc1[lx]; }
   if ((lx+1) > lc2.length) { ls2 = String.fromCharCode(13); }
   else { ls2 = lc2[lx]; }
      if (ls1.length ==0) { ls1 = String.fromCharCode(13); }
      if (ls2.length ==0) { ls2 = String.fromCharCode(13); }
str += "L " +  lx + ": " + diffString (ls1, ls2) + "<br>";    
}

document.getElementById('diff').innerHTML = str;
dObj.value = str;
}
</script>
</head>
<body>
<form name="myform">
<textarea name="t1" cols="40" wrap="hard">
This is first string.
The red brown fox jumped over the rolling log.
var total = 0;
for (ix =0; ix < 3; ix++) {
   total += ix;
}
</textarea>
<textarea name="t2" cols="40" wrap="hard">
This is second string.
The brown spotted fox leaped over the rolling log
var total = 10;
for (ix =0; lx < 4; ix++) {
   total += ix;
}
</textarea>
<br>
<input type="button" value="Compare" onclick="CompareStrings(this.form.t1.value, this.form.t2.value, this.form.t3);">
<br>Diff Textarea</br>
<textarea style="display:none;" name="t3" cols="40" wrap="hard">
</textarea>
</form>
<div id="diff">
</div>
</body>
</html>
0

Experts Exchange Solution brought to you by

Your issues matter to us.

Facing a tech roadblock? Get the help and guidance you need from experienced professionals who care. Ask your question anytime, anywhere, with no hassle.

Start your 7-day free trial
talonvAuthor Commented:
Thank you! When a new line is reached, it seems to throw the index out and mark everything after that as new, though. It still fits what I asked, mind you. Thanks!
0
It's more than this solution.Get answers and train to solve all your tech problems - anytime, anywhere.Try it for free Edge Out The Competitionfor your dream job with proven skills and certifications.Get started today Stand Outas the employee with proven skills.Start learning today for free Move Your Career Forwardwith certification training in the latest technologies.Start your trial today
ASP

From novice to tech pro — start learning today.

Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.