• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 270
  • Last Modified:

Count or sort times string is found in text

This code works fine but I need either the total number of times each word is duplicated, (currently set to 4) then display only duplicates with duplicate count on a separate line above, or even below the list of words, or I need the duplicated listed in descending order of how many times repeated. I actually do not need to display any words which are not duplicated, just those which are duplicated (and the count).

Currently, this is far too difficult to make much sense of a full page of text with the duplicate words simply highlighted.

<!DOCTYPE html>
<html>
<head>
  <script type='text/javascript' src='http://code.jquery.com/jquery-1.5.js'></script>
  
  <style type='text/css'>
    span.duplicate { background: #ffdddd;
		}
  </style>

<script type='text/javascript'>//<![CDATA[ 
$(function(){
var text = $('p').text(),
    words = text.split(' '),
    sortedWords = words.slice(0).sort(),
    duplicateWords = [],
    sentences = text.split('.'),
    sortedSentences = sentences.slice(0).sort(),
    duplicateSentences = [];


for (var a=0; a<sortedWords.length-1;a++) {
if ( sortedWords[a].length > 4 
    && sortedWords[a+1].length > 4 
    && sortedWords[a+1] == sortedWords[a])
{
    duplicateWords.push(sortedWords[a]);
}
} 

$('a.words').click(function(){
    var highlighted = $.map(words, function(word){
        if ($.inArray(word, duplicateWords) > -1)
            return '<span class="duplicate">' + word + '</span>';
        else return word;
    });
    $('p').html(highlighted.join(' '));
    return false;
});



});//]]>  

</script>
</head>
<body>

  <p>Bob is attempting to find the reason Janet is attempting to understand Bob and to reason with him.</p>

<hr />
<a class="words" href="#">Find duplicate words</a>
  
</body>
</html>

Open in new window

0
Qsorb
Asked:
Qsorb
  • 3
  • 2
  • 2
1 Solution
 
Roopesh ReddyIT AnalystCommented:
Hi,

Check this -

<!DOCTYPE html>
<html>
<head>
  <script type='text/javascript' src='http://code.jquery.com/jquery-1.5.js'></script>
   
  <style type='text/css'>
   span.duplicate { background: yellow; }
  </style>
</head>
<body>

<p>Bob is attempting to find the reason Janet is attempting to understand Bob and to reason with him.</p>
<hr />
<ul id="dupList">
    
</ul>

<script type='text/javascript'>//<![CDATA[
    var text = $('p').text(),
    words = text.split(' '),
    sortedWords = words.slice(0).sort(),
    duplicateWords = []
    highlighted = [];

    var duplicateList = [];

    for (var i = 0; i < sortedWords.length - 1; i++) {
        var r1 = new RegExp('^' + sortedWords[i + 1] + '(\\.?)$'),
        r2 = new RegExp('^' + sortedWords[i] + '(\\.?)$')
        if (r1.test(sortedWords[i]) || r2.test(sortedWords[i + 1])) {
            duplicateWords.push(sortedWords[i].replace('.', ''));
        }
    }

    duplicateWords = $.unique(duplicateWords);


    for (var j = 0, m = []; j < words.length; j++) {
        var isDuplicate = false;
        for (var k = 0; k < duplicateWords.length; k++) {
            var re = new RegExp('^' + duplicateWords[k] + '(\\.?)$');
            if (re.test(words[j])) {
                isDuplicate = true;
                duplicateList.push(words[j]);
                break;
            }
        }

        m.push(isDuplicate);
        if (!m[j] && m[j - 1])
            highlighted.push('</span>');
        else if (m[j] && !m[j - 1])
            highlighted.push('<span class="duplicate">');
        highlighted.push(words[j]);

    }

    $('p').html(highlighted.join(' '));
    var result = [];
    for (var i = 0; i < duplicateList.length; i++) {
        for (var j = 0; j < duplicateList.length; j++) {
            if (duplicateList[i] == duplicateList[j]) {
                var isInList = $.inArray(duplicateList[i], result);
                if(isInList == -1)
                    result.push(duplicateList[i]);
            }
        }
    }

    for (var i = 0; i < result.length; i++) {
        $('<li>' + result[i] + '</li>').appendTo('#dupList');
    }

</script>
  
</body>
</html>

Open in new window


It displays the list of words duplicated.

Check this link to display the count - http://newcodeandroll.blogspot.in/2012/01/how-to-find-duplicates-in-array-in.html

Hope it helps u...
0
 
Roopesh ReddyIT AnalystCommented:
Hi,

Here we go the complete sample -

<!DOCTYPE html>
<html>
<head>
  <script type='text/javascript' src='http://code.jquery.com/jquery-1.5.js'></script>
   
  <style type='text/css'>
   span.duplicate { background: yellow; }
  </style>
</head>
<body>

<p>Bob is attempting to find the reason Janet is attempting to understand Bob and to reason with him.</p>
<hr />
<ul id="dupList">
    
</ul>

<script type='text/javascript'>//<![CDATA[
    var text = $('p').text(),
    words = text.split(' '),
    sortedWords = words.slice(0).sort(),
    duplicateWords = []
    highlighted = [];

    var duplicateList = [];

    for (var i = 0; i < sortedWords.length - 1; i++) {
        var r1 = new RegExp('^' + sortedWords[i + 1] + '(\\.?)$'),
        r2 = new RegExp('^' + sortedWords[i] + '(\\.?)$')
        if (r1.test(sortedWords[i]) || r2.test(sortedWords[i + 1])) {
            duplicateWords.push(sortedWords[i].replace('.', ''));
        }
    }

    duplicateWords = $.unique(duplicateWords);


    for (var j = 0, m = []; j < words.length; j++) {
        var isDuplicate = false;
        for (var k = 0; k < duplicateWords.length; k++) {
            var re = new RegExp('^' + duplicateWords[k] + '(\\.?)$');
            if (re.test(words[j])) {
                isDuplicate = true;
                duplicateList.push(words[j]);
                break;
            }
        }

        m.push(isDuplicate);
        if (!m[j] && m[j - 1])
            highlighted.push('</span>');
        else if (m[j] && !m[j - 1])
            highlighted.push('<span class="duplicate">');
        highlighted.push(words[j]);

    }

    $('p').html(highlighted.join(' '));
    var result = [];
    for (var i = 0; i < duplicateList.length; i++) {
        for (var j = 0; j < duplicateList.length; j++) {
            if (duplicateList[i] == duplicateList[j]) {
                var isInList = $.inArray(duplicateList[i], result);
                if(isInList == -1)
                    result.push(duplicateList[i]);
            }
        }
    }


    for (var i = 0; i < result.length; i++) {
        var count = 0;
        for (j = 0; j < words.length; j++) {
            if (result[i] == words[j]) {
                count++;
            }
        }
            $('<li>' + result[i]  +', '+ count+'</li>').appendTo('#dupList');
    }

</script>
  
</body>
</html>

Open in new window


Hope it helps u...
0
 
Slick812Commented:
not sure about the HTML output you might want ? but I had this javascript function called  getDups(txt)  which got the duplicate words from some Text input, and gave a two line (in html) for each word that was more than one in the Text, I changed it to only do words over 4 in length, here is the js I used to test it for this -

<script>
var tx1 = "Bobob is attempting-to find him the reason, Janet is attempting. to understand Bobob and to! reason with him a!s Bobob in reason or reason.";

function getDups(txt) { // Only has duplicates
// I had to remove most common punctuation
// BUT not every one, so BeWare if the text has other punctuation
txt=txt.replace(/,/g,"");
txt=txt.replace(/\./g,"");
txt=txt.replace(/\?/g,"");
txt=txt.replace(/!/g,"");
txt=txt.replace(/\"/g,"");
txt=txt.replace(/\'/g,"");
txt=txt.replace(/\-/g," ");
txt=txt.replace(/;/g,"");
txt=txt.replace(/:/g," ");

wordary = txt.split(' ');
wordary = wordary.sort();

var dNum = 1;
var dups = {};
for (var i=0; i < wordary.length-2; i++) {
if ( wordary[i].length > 4 
    && wordary[i+1].length > 4 
    && wordary[i+1] == wordary[i]) {
	++dNum;
	dups[wordary[i]] = dNum;
	} else dNum = 1;
}

txt = "";
var key = "";
for (key in dups) {
    txt+= key +"<br /> has "+dups[key]+" duplicates,<br />";
    }
return txt;
}


function showUm(text) {
text = getDups(text);
document.getElementById("de").innerHTML = text;
}

</script>

Open in new window


with this html -

<button onclick="showUm(tx1)"> Get Duplicates </button>
<p id="de">dups here</p>
0
Cloud Class® Course: MCSA MCSE Windows Server 2012

This course teaches how to install and configure Windows Server 2012 R2.  It is the first step on your path to becoming a Microsoft Certified Solutions Expert (MCSE).

 
QsorbAuthor Commented:
roopeshredd:

Thanks for the attempts. Unfortunately, both suggestions gave me the same result. I used only your code but could not get it to do anything other than what is seen in the image I attached.

But good news is that Slick81 has a suggestions that is just about usable for me. He will get all or the majority of the points but I have one additional need to get this to work. See below.
roopeshreddy-1.gif
0
 
QsorbAuthor Commented:
Slick81:

Hey, that works! But because the text can be quite large, I need the results sorted by highest number of hits descending, on top. I really only need the top ten results but if that's a pain, then just sort to highest result first. Is this doable?
0
 
Slick812Commented:
? ?
yea it's doable, I would think. .  but I have the output string created in this -
for (key in dups) {
    txt+= key +"<br /> has "+dups[key]+" duplicates,<br />";
    }

 the  dups[key]  has each Number of duplicates
for me to sort an Object would require me to do it by math and array[] restack to get a new object with reordered properties

maybe tomorrow, I'll have time, I have some code in pascal that does that, but none in javascript that I remember, not as easy as it sounds.
0
 
QsorbAuthor Commented:
No problem. I'll make it another question because you were certainly able to do what I asked. I'll post the new question in a few minutes. Thanks so much.
0
Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.

Join & Write a Comment

Featured Post

Cloud Class® Course: SQL Server Core 2016

This course will introduce you to SQL Server Core 2016, as well as teach you about SSMS, data tools, installation, server configuration, using Management Studio, and writing and executing queries.

  • 3
  • 2
  • 2
Tackle projects and never again get stuck behind a technical roadblock.
Join Now