Want to protect your cyber security and still get fast solutions? Ask a secure question today.Go Premium

x
  • Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 619
  • Last Modified:

Text compare algorithm

I'm looking for code for an ASP page that takes two strings and returns the differences between the two. The basic concept is easy enough to execute, but I need something more akin to the comparison performed on Wikipedia's history section - show what has been added, removed and modified on a line by line basis, not just comparing characters.
0
talonv
Asked:
talonv
  • 2
  • 2
1 Solution
 
Pravin AsarPrincipal Systems EngineerCommented:
Here is a Javascript Based Compare which does the line by line comparison.

Highlights the differences

<html>
<head>
<title>Document/Page Title</title>
<style type="text/css">
</style>
<!-- Do not edit these library functions -->
<script language="javascript">
function escape(s) {
    var n = s;
    n = n.replace(/&/g, "&amp;");
    n = n.replace(/</g, "&lt;");
    n = n.replace(/>/g, "&gt;");
    n = n.replace(/"/g, "&quot;");
 
    return n;
}
 
function diffString( o, n ) {
  o = o.replace(/\s+$/, '');
  n = n.replace(/\s+$/, '');
 
  var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );
  var str = "";
 
  var oSpace = o.match(/\s+/g);
  if (oSpace == null) {
    oSpace = ["\n"];
  } else {
    oSpace.push("\n");
  }
  var nSpace = n.match(/\s+/g);
  if (nSpace == null) {
    nSpace = ["\n"];
  } else {
    nSpace.push("\n");
  }
 
  if (out.n.length == 0) {
      for (var i = 0; i < out.o.length; i++) {
        str += "<del style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
      }
  } else {
    if (out.n[0].text == null) {
      for (n = 0; n < out.o.length && out.o[n].text == null; n++) {
        str += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
      }
    }
 
    for ( var i = 0; i < out.n.length; i++ ) {
      if (out.n[i].text == null) {
        str += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
      } else {
        var pre = "";
 
        for (n = out.n[i].row + 1; n < out.o.length && out.o[n].text == null; n++ ) {
          pre += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
        }
        str += " " + out.n[i].text + nSpace[i] + pre;
      }
    }
  }
  
  return str;
}
 
function randomColor() {
    return "rgb(" + (Math.random() * 100) + "%, " + 
                    (Math.random() * 100) + "%, " + 
                    (Math.random() * 100) + "%)";
}
function diffString2( o, n ) {
  o = o.replace(/\s+$/, '');
  n = n.replace(/\s+$/, '');
 
  var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );
 
  var oSpace = o.match(/\s+/g);
  if (oSpace == null) {
    oSpace = ["\n"];
  } else {
    oSpace.push("\n");
  }
  var nSpace = n.match(/\s+/g);
  if (nSpace == null) {
    nSpace = ["\n"];
  } else {
    nSpace.push("\n");
  }
 
  var os = "";
  var colors = new Array();
  for (var i = 0; i < out.o.length; i++) {
      colors[i] = randomColor();
 
      if (out.o[i].text != null) {
          os += '<span style="background-color: ' +colors[i]+ '">' + 
                escape(out.o[i].text) + oSpace[i] + "</span>";
      } else {
          os += "<del  style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
      }
  }
 
  var ns = "";
  for (var i = 0; i < out.n.length; i++) {
      if (out.n[i].text != null) {
          ns += '<span style="background-color: ' +colors[out.n[i].row]+ '">' + 
                escape(out.n[i].text) + nSpace[i] + "</span>";
      } else {
          ns += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
      }
  }
 
  return { o : os , n : ns };
}
 
function diff( o, n ) {
  var ns = new Object();
  var os = new Object();
  
  for ( var i = 0; i < n.length; i++ ) {
    if ( ns[ n[i] ] == null )
      ns[ n[i] ] = { rows: new Array(), o: null };
    ns[ n[i] ].rows.push( i );
  }
  
  for ( var i = 0; i < o.length; i++ ) {
    if ( os[ o[i] ] == null )
      os[ o[i] ] = { rows: new Array(), n: null };
    os[ o[i] ].rows.push( i );
  }
  
  for ( var i in ns ) {
    if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {
      n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };
      o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };
    }
  }
  
  for ( var i = 0; i < n.length - 1; i++ ) {
    if ( n[i].text != null && n[i+1].text == null && n[i].row + 1 < o.length && o[ n[i].row + 1 ].text == null && 
         n[i+1] == o[ n[i].row + 1 ] ) {
      n[i+1] = { text: n[i+1], row: n[i].row + 1 };
      o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };
    }
  }
  
  for ( var i = n.length - 1; i > 0; i-- ) {
    if ( n[i].text != null && n[i-1].text == null && n[i].row > 0 && o[ n[i].row - 1 ].text == null && 
         n[i-1] == o[ n[i].row - 1 ] ) {
      n[i-1] = { text: n[i-1], row: n[i].row - 1 };
      o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };
    }
  }
  
  return { o: o, n: n };
}
</script>
 
<script language="javascript">
function CompareStrings (s1, s2, dObj)
{
var lc1 = s1.split(/[\n]/);
var lc2 = s2.split(/[\n]/);
var lc = lc2.length;
if (lc1.length > lc2.length) { lc = lc1.length; }
var lx =0; 
str = "";
for (lx =0; lx < lc ; lx++) {
   if (lx > lc1.length) { ls1 = char(13); }
   else { ls1 = lc1[lx]; }
   if (lx > lc2.length) { ls2 = char(13); }
   else { ls2 = lc2[lx]; }
str += "L " +  lx + ": " + diffString (ls1, ls2) + "<br>";    
}
 
document.getElementById('diff').innerHTML = str; 
dObj.value = str;
}
</script>
</head>
<body>
<form name="myform">
<textarea name="t1" cols="40" wrap="hard">
This is first string.
The red brown fox jumped over the rolling log.
var total = 0;
for (ix =0; ix < 3; ix++) {
   total += ix;
}
</textarea>
<textarea name="t2" cols="40" wrap="hard">
This is second string.
The brown spotted fox leaped over the rolling log
var total = 10;
for (ix =0; lx < 4; ix++) {
   total += ix;
}
</textarea>
<br>
<input type="button" value="Compare" onclick="CompareStrings(this.form.t1.value, this.form.t2.value, this.form.t3);">
<br>Diff Textarea</br>
<textarea style="display:none;" name="t3" cols="40" wrap="hard">
</textarea>
</form>
<div id="diff">
</div>
</body>
</html>

Open in new window

0
 
talonvAuthor Commented:
That's almost perfect - is there a way it could detect if lines have been added or removed? Right now, if a line is added in one and not the other, it just returns an error.
0
 
Pravin AsarPrincipal Systems EngineerCommented:
<html>
<head>
<title>Document/Page Title</title>
<style type="text/css">
</style>
<!-- Do not edit these library functions -->
<script language="javascript">
function escape(s) {
    var n = s;
    n = n.replace(/&/g, "&amp;");
    n = n.replace(/</g, "<");
    n = n.replace(/>/g, ">");
    n = n.replace(/"/g, "&quot;");

    return n;
}

function diffString( o, n ) {
  o = o.replace(/\s+$/, '');
  n = n.replace(/\s+$/, '');

  var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );
  var str = "";

  var oSpace = o.match(/\s+/g);
  if (oSpace == null) {
    oSpace = ["\n"];
  } else {
    oSpace.push("\n");
  }
  var nSpace = n.match(/\s+/g);
  if (nSpace == null) {
    nSpace = ["\n"];
  } else {
    nSpace.push("\n");
  }

  if (out.n.length == 0) {
      for (var i = 0; i < out.o.length; i++) {
        str += "<del style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
      }
  } else {
    if (out.n[0].text == null) {
      for (n = 0; n < out.o.length && out.o[n].text == null; n++) {
        str += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
      }
    }

    for ( var i = 0; i < out.n.length; i++ ) {
      if (out.n[i].text == null) {
        str += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
      } else {
        var pre = "";

        for (n = out.n[i].row + 1; n < out.o.length && out.o[n].text == null; n++ ) {
          pre += "<del style='color: blue;'>" + escape(out.o[n]) + oSpace[n] + "</del>";
        }
        str += " " + out.n[i].text + nSpace[i] + pre;
      }
    }
  }
 
  return str;
}

function randomColor() {
    return "rgb(" + (Math.random() * 100) + "%, " +
                    (Math.random() * 100) + "%, " +
                    (Math.random() * 100) + "%)";
}
function diffString2( o, n ) {
  o = o.replace(/\s+$/, '');
  n = n.replace(/\s+$/, '');

  var out = diff(o == "" ? [] : o.split(/\s+/), n == "" ? [] : n.split(/\s+/) );

  var oSpace = o.match(/\s+/g);
  if (oSpace == null) {
    oSpace = ["\n"];
  } else {
    oSpace.push("\n");
  }
  var nSpace = n.match(/\s+/g);
  if (nSpace == null) {
    nSpace = ["\n"];
  } else {
    nSpace.push("\n");
  }

  var os = "";
  var colors = new Array();
  for (var i = 0; i < out.o.length; i++) {
      colors[i] = randomColor();

      if (out.o[i].text != null) {
          os += '<span style="background-color: ' +colors[i]+ '">' +
                escape(out.o[i].text) + oSpace[i] + "</span>";
      } else {
          os += "<del  style='color: blue;'>" + escape(out.o[i]) + oSpace[i] + "</del>";
      }
  }

  var ns = "";
  for (var i = 0; i < out.n.length; i++) {
      if (out.n[i].text != null) {
          ns += '<span style="background-color: ' +colors[out.n[i].row]+ '">' +
                escape(out.n[i].text) + nSpace[i] + "</span>";
      } else {
          ns += "<ins style='color: red;'>" + escape(out.n[i]) + nSpace[i] + "</ins>";
      }
  }

  return { o : os , n : ns };
}

function diff( o, n ) {
  var ns = new Object();
  var os = new Object();
 
  for ( var i = 0; i < n.length; i++ ) {
    if ( ns[ n[i] ] == null )
      ns[ n[i] ] = { rows: new Array(), o: null };
    ns[ n[i] ].rows.push( i );
  }
 
  for ( var i = 0; i < o.length; i++ ) {
    if ( os[ o[i] ] == null )
      os[ o[i] ] = { rows: new Array(), n: null };
    os[ o[i] ].rows.push( i );
  }
 
  for ( var i in ns ) {
    if ( ns[i].rows.length == 1 && typeof(os[i]) != "undefined" && os[i].rows.length == 1 ) {
      n[ ns[i].rows[0] ] = { text: n[ ns[i].rows[0] ], row: os[i].rows[0] };
      o[ os[i].rows[0] ] = { text: o[ os[i].rows[0] ], row: ns[i].rows[0] };
    }
  }
 
  for ( var i = 0; i < n.length - 1; i++ ) {
    if ( n[i].text != null && n[i+1].text == null && n[i].row + 1 < o.length && o[ n[i].row + 1 ].text == null && 
         n[i+1] == o[ n[i].row + 1 ] ) {
      n[i+1] = { text: n[i+1], row: n[i].row + 1 };
      o[n[i].row+1] = { text: o[n[i].row+1], row: i + 1 };
    }
  }
 
  for ( var i = n.length - 1; i > 0; i-- ) {
    if ( n[i].text != null && n[i-1].text == null && n[i].row > 0 && o[ n[i].row - 1 ].text == null && 
         n[i-1] == o[ n[i].row - 1 ] ) {
      n[i-1] = { text: n[i-1], row: n[i].row - 1 };
      o[n[i].row-1] = { text: o[n[i].row-1], row: i - 1 };
    }
  }
 
  return { o: o, n: n };
}
</script>

<script language="javascript">
function CompareStrings (s1, s2, dObj)
{
var lc1 = s1.split(/[\n]/);
var lc2 = s2.split(/[\n]/);
var lc = lc2.length;
if (lc1.length > lc2.length) { lc = lc1.length; }
var lx =0;
str = "";
for (lx =0; lx < lc ; lx++) {
   if ((lx+1) > lc1.length) { ls1 = String.fromCharCode(13); }
   else { ls1 = lc1[lx]; }
   if ((lx+1) > lc2.length) { ls2 = String.fromCharCode(13); }
   else { ls2 = lc2[lx]; }
      if (ls1.length ==0) { ls1 = String.fromCharCode(13); }
      if (ls2.length ==0) { ls2 = String.fromCharCode(13); }
str += "L " +  lx + ": " + diffString (ls1, ls2) + "<br>";    
}

document.getElementById('diff').innerHTML = str;
dObj.value = str;
}
</script>
</head>
<body>
<form name="myform">
<textarea name="t1" cols="40" wrap="hard">
This is first string.
The red brown fox jumped over the rolling log.
var total = 0;
for (ix =0; ix < 3; ix++) {
   total += ix;
}
</textarea>
<textarea name="t2" cols="40" wrap="hard">
This is second string.
The brown spotted fox leaped over the rolling log
var total = 10;
for (ix =0; lx < 4; ix++) {
   total += ix;
}
</textarea>
<br>
<input type="button" value="Compare" onclick="CompareStrings(this.form.t1.value, this.form.t2.value, this.form.t3);">
<br>Diff Textarea</br>
<textarea style="display:none;" name="t3" cols="40" wrap="hard">
</textarea>
</form>
<div id="diff">
</div>
</body>
</html>
0
 
talonvAuthor Commented:
Thank you! When a new line is reached, it seems to throw the index out and mark everything after that as new, though. It still fits what I asked, mind you. Thanks!
0

Featured Post

Free Tool: Site Down Detector

Helpful to verify reports of your own downtime, or to double check a downed website you are trying to access.

One of a set of tools we are providing to everyone as a way of saying thank you for being a part of the community.

  • 2
  • 2
Tackle projects and never again get stuck behind a technical roadblock.
Join Now