• Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 154
  • Last Modified:

problem related to counting occurrences of words in web documents

hello everyone, I the following article: http://www.experts-exchange.com/Web/WebDevSoftware/Flash/Q_21771192.html#16172959, it's been presented an approach for counting within a specific web document the occurrences of a word. There's only one problem: when using large website (say www.cnn.com ) it's too slow. As consequence the application can't run. Does anyone know a solution to this problem or an alternative?
Please I would be very grateful if you can help me as it's very important for my current work
many thanks in advance
k
0
kugo
Asked:
kugo
  • 2
1 Solution
 
MontoyaProcess Improvement MgrCommented:
I think I'd do that with PHP and then have flash read the value of a variable in php. That would be much, much faster.

0
 
Aneesh ChopraCommented:
Here is the fixed code....

Now this code will not hang at all, page loading might take time,
it depends on size of page...

Just put this code in a new FLA's first frame and set the html page url in the code..
---------
//set the file name here....
var htmlFile = "http://www.google.com";
// set the word here...
var countThisWord = "google";
var myLoadVar = new LoadVars();
myLoadVar.ignoreWhite = true;
myLoadVar.onData = function(dat)
{
      if (dat != undefined)
      {
            processHtmlData(dat);
      } else
      {
            trace("page load failed");
      }
};
myLoadVar.load(htmlFile);
//
function processHtmlData(dat)
{
      dat = dat.toLowerCase();
      countThisWord = countThisWord.toLowerCase();
      tmp = dat.substring(dat.indexOf("<title>"), dat.indexOf("</title>")+8);
      //trace(tmp);
      dat = dat.split(tmp).join("~");
      //dat = dat.indexOf("<") != -1
      this.onEnterFrame = function()
      {
            if (dat.indexOf("<") != -1 && dat.indexOf(">") != -1)
            {
                  tmp = dat.substring(dat.indexOf("<"), dat.indexOf(">")+1);
                  dat = dat.split(tmp).join("~");
                  trace("processing....");

            } else
            {
                  processStep2(dat);
                  delete this.onEnterFrame;
            }
      };
}
function processStep2(dat)
{
      dat = dat.split("&nbsp;").join("~");
      dat = dat.split(" ").join("~");
      dat = dat.split("\n").join("~");
      dat = dat.split("\r").join("~");
      //
      dat = dat.split(countThisWord);
      //
      trace('"'+countThisWord+'" word occurance count is: '+int(dat.length-1));
}
-------------

Rgds
Aneesh
0
 
Aneesh ChopraCommented:
Ok, here is the best optimized code...

page loading take 5-15 seconds, and it gives the result instantly....

=======
var htmlFile = "http://www.cnn.com";
// set the word here...
var countThisWord = "Live";
var myLoadVar = new LoadVars();
myLoadVar.ignoreWhite = true;
myLoadVar.onData = function(dat)
{
      if (dat != undefined)
      {
            trace("loaded");
            dat = dat.toLowerCase();
            countThisWord = countThisWord.toLowerCase();
            dat = dat.split(countThisWord);
            //
            trace('"'+countThisWord+'" word occurance count is: '+int(dat.length-1));
      } else
      {
            trace("page load failed");
      }
};
myLoadVar.load(htmlFile);
===========


NOTE: this solution give 99.9% accurate results

Rgds
Aneesh
0
 
kugoAuthor Commented:
aneesh, thanks thanks thanks a lot!!
i am extremely grateful to you!
k
0

Featured Post

Keep up with what's happening at Experts Exchange!

Sign up to receive Decoded, a new monthly digest with product updates, feature release info, continuing education opportunities, and more.

  • 2
Tackle projects and never again get stuck behind a technical roadblock.
Join Now