Link to home
Start Free TrialLog in
Avatar of talonv
talonv

asked on

Text compare algorithm

I'm looking for code for an ASP page that takes two strings and returns the differences between the two. The basic concept is easy enough to execute, but I need something more akin to the comparison performed on Wikipedia's history section - show what has been added, removed and modified on a line by line basis, not just comparing characters.
Avatar of Pravin Asar
Pravin Asar
Flag of United States of America image

Here is a Javascript Based Compare which does the line by line comparison.

Highlights the differences

<html>
<head>
<title>Document/Page Title</title>
<style type="text/css">
</style>
<!-- Do not edit these library functions -->
<script language="javascript">
function escape(s) {
    var n = s;
    n = n.replace(/&/g, "&amp;");
    n = n.replace(/</g, "&lt;");
    n = n.replace(/>/g, "&gt;");
    n = n.replace(/"/g, "&quot;");
 
    return n;
}
 
function diffString( o, n ) {
  o = o.replace(/\s+$/, '');
  n = n.replace(/\s+$/, '');
 
  var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );
  var str = "";
 
  var oSpace = o.match(/\s+/g);
  if (oSpace == null) {
    oSpace = ["\n"];
  } else {
    oSpace.push("\n");
  }
  var nSpace = n.match(/\s+/g);
  if (nSpace == null) {
    nSpace = ["\n"];
  } else {
    nSpace.push("\n");
  }
 
  if (out.n.length == 0) {
      for (var i = 0; i < out.o.length; i++) {
        str += "<del style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
      }
  } else {
    if (out.n[0].text == null) {
      for (n = 0; n < out.o.length && out.o[n].text == null; n++) {
        str += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
      }
    }
 
    for ( var i = 0; i < out.n.length; i++ ) {
      if (out.n[i].text == null) {
        str += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
      } else {
        var pre = "";
 
        for (n = out.n[i].row + 1; n < out.o.length && out.o[n].text == null; n++ ) {
          pre += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
        }
        str += " " + out.n[i].text + nSpace[i] + pre;
      }
    }
  }
  
  return str;
}
 
function randomColor() {
    return "rgb(" + (Math.random() * 100) + "%, " + 
                    (Math.random() * 100) + "%, " + 
                    (Math.random() * 100) + "%)";
}
function diffString2( o, n ) {
  o = o.replace(/\s+$/, '');
  n = n.replace(/\s+$/, '');
 
  var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );
 
  var oSpace = o.match(/\s+/g);
  if (oSpace == null) {
    oSpace = ["\n"];
  } else {
    oSpace.push("\n");
  }
  var nSpace = n.match(/\s+/g);
  if (nSpace == null) {
    nSpace = ["\n"];
  } else {
    nSpace.push("\n");
  }
 
  var os = "";
  var colors = new Array();
  for (var i = 0; i < out.o.length; i++) {
      colors[i] = randomColor();
 
      if (out.o[i].text != null) {
          os += '<span style="background-color: ' +colors[i]+ '">' + 
                escape(out.o[i].text) + oSpace[i] + "</span>";
      } else {
          os += "<del  style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
      }
  }
 
  var ns = "";
  for (var i = 0; i < out.n.length; i++) {
      if (out.n[i].text != null) {
          ns += '<span style="background-color: ' +colors[out.n[i].row]+ '">' + 
                escape(out.n[i].text) + nSpace[i] + "</span>";
      } else {
          ns += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
      }
  }
 
  return { o : os , n : ns };
}
 
function diff( o, n ) {
  var ns = new Object();
  var os = new Object();
  
  for ( var i = 0; i < n.length; i++ ) {
    if ( ns[ n[i] ] == null )
      ns[ n[i] ] = { rows: new Array(), o: null };
    ns[ n[i] ].rows.push( i );
  }
  
  for ( var i = 0; i < o.length; i++ ) {
    if ( os[ o[i] ] == null )
      os[ o[i] ] = { rows: new Array(), n: null };
    os[ o[i] ].rows.push( i );
  }
  
  for ( var i in ns ) {
    if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {
      n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };
      o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };
    }
  }
  
  for ( var i = 0; i < n.length - 1; i++ ) {
    if ( n[i].text != null && n[i+1].text == null && n[i].row + 1 < o.length && o[ n[i].row + 1 ].text == null && 
         n[i+1] == o[ n[i].row + 1 ] ) {
      n[i+1] = { text: n[i+1], row: n[i].row + 1 };
      o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };
    }
  }
  
  for ( var i = n.length - 1; i > 0; i-- ) {
    if ( n[i].text != null && n[i-1].text == null && n[i].row > 0 && o[ n[i].row - 1 ].text == null && 
         n[i-1] == o[ n[i].row - 1 ] ) {
      n[i-1] = { text: n[i-1], row: n[i].row - 1 };
      o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };
    }
  }
  
  return { o: o, n: n };
}
</script>
 
<script language="javascript">
function CompareStrings (s1, s2, dObj)
{
var lc1 = s1.split(/[\n]/);
var lc2 = s2.split(/[\n]/);
var lc = lc2.length;
if (lc1.length > lc2.length) { lc = lc1.length; }
var lx =0; 
str = "";
for (lx =0; lx < lc ; lx++) {
   if (lx > lc1.length) { ls1 = char(13); }
   else { ls1 = lc1[lx]; }
   if (lx > lc2.length) { ls2 = char(13); }
   else { ls2 = lc2[lx]; }
str += "L " +  lx + ": " + diffString (ls1, ls2) + "<br>";    
}
 
document.getElementById('diff').innerHTML = str; 
dObj.value = str;
}
</script>
</head>
<body>
<form name="myform">
<textarea name="t1" cols="40" wrap="hard">
This is first string.
The red brown fox jumped over the rolling log.
var total = 0;
for (ix =0; ix < 3; ix++) {
   total += ix;
}
</textarea>
<textarea name="t2" cols="40" wrap="hard">
This is second string.
The brown spotted fox leaped over the rolling log
var total = 10;
for (ix =0; lx < 4; ix++) {
   total += ix;
}
</textarea>
<br>
<input type="button" value="Compare" onclick="CompareStrings(this.form.t1.value, this.form.t2.value, this.form.t3);">
<br>Diff Textarea</br>
<textarea style="display:none;" name="t3" cols="40" wrap="hard">
</textarea>
</form>
<div id="diff">
</div>
</body>
</html>

Open in new window

Avatar of talonv
talonv

ASKER

That's almost perfect - is there a way it could detect if lines have been added or removed? Right now, if a line is added in one and not the other, it just returns an error.
ASKER CERTIFIED SOLUTION
Avatar of Pravin Asar
Pravin Asar
Flag of United States of America image

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial
Avatar of talonv

ASKER

Thank you! When a new line is reached, it seems to throw the index out and mark everything after that as new, though. It still fits what I asked, mind you. Thanks!