[Webinar] Streamline your web hosting managementRegister Today

x
  • Status: Solved
  • Priority: Medium
  • Security: Public
  • Views: 479
  • Last Modified:

google scraper code

I'm using a google scraper code, which i'm including, to pull top 50 results for a specific search terms.
It dumps the information into a google excel doc .
how can i adjust the code, that instead of giving me the top 50 results, it should split the results to give me just results 1-10, then 11-20, then 21-30 etc.
so I will be able to track better exactly which page the results came from.
/* ---------------------------------------------------------------------------
 * google_scraper.js
 * https://github.com/chrisle/google_scraper.js
 *
 * @desc    Google Scraper for Google Docs Spreadsheet.
 * @author  Chris Le - @djchrisle - chrisl at seerinteractive.com
 * @license MIT (see: http://www.opensource.org/licenses/mit-license.php)
 * @version 1.0.1
 * -------------------------------------------------------------------------*/

var SeerJs_GoogleScraper = (function() {

  var errorOccurred;

  /**
   * Gets stuff inside two tags
   * @param  {string} haystack String to look into
   * @param  {string} start Starting tag
   * @param  {string} end Ending tag
   * @return {string} Stuff inside the two tags
   */
  function getInside(haystack, start, end) {
    var startIndex = haystack.indexOf(start) + start.length;
    var endIndex = haystack.indexOf(end);
    return haystack.substr(startIndex, endIndex - startIndex);
  }

  /**
   * Fetch keywords from Google.  Returns error message if an error occurs.
   * @param {string} kw Keyword
   * @param {array} params Extra parameters as an array of key, values.
   */
  function fetch(kw, optResults) {
    errorOccurred = false;
    optResults = optResults || 10;
    try {
      var url = 'http://www.google.com/search?q=' + kw + "&num=" + optResults;
      return UrlFetchApp.fetch(url).getContentText()
    } catch(e) {
      errorOccurred = true;
      return e;
    }
  }

  /**
   * Extracts the URL from an organic result. Returns false if nothing is found.
   * @param {string} result XML string of the result
   */
  function extractUrl(result) {
    var url;
    if (result.match(/\/url\?q=/)) {
      url = getInside(result, "?q=", "&amp");
      return (url != '') ? url : false
    }
    return false;
  }

  /**
   * Extracts the organic results from the page and puts them into an array.
   * One per element.  Each element is an XMLElement.
   */
  function extractOrganic(html) {
    html = html.replace(/\n|\r/g, '');
    var allOrganic = html.match(/<li class=\"g\">(.*)<\/li>/gi).toString(),
        results = allOrganic.split("<li class=\"g\">"),
        organicData = [],
        i = 0,
        len = results.length,
        url;
    while(i < len) {
      url = extractUrl(results[i]);
      if (url && url.indexOf('http') == 0) {
        organicData.push(url);
      }
      i++;
    }
    return organicData;
  }

  /**
   * Transpose an array from row to cols
   */
  function transpose(ary) {
    var i = 0, len = ary.length, ret = [];
    while(i < len) {
      ret.push([ary[i]]);
      i++;
    }
    return ret;
  }

  //--------------------------------------------------------------------------

  return {
    /**
     * Returns Google SERPs for a given keyword
     * @param  {string} kw Keyword
     */
    get: function(kw, optResults) {
      var result = fetch(kw, optResults);
      if (errorOccurred) { return result; }
      return transpose(extractOrganic(result));
    }
  }
  
})();

function googleScraper(keyword, optResults) {
  return SeerJs_GoogleScraper.get(keyword, optResults);
}

function test() { 
  var withArg = googleScraper("seer interactive", 20);
  var noArg = googleScraper("seer interactive");
  return 0;
}

Open in new window

0
rivkamak
Asked:
rivkamak
  • 2
1 Solution
 
Tony McCreathTechnical SEO ConsultantCommented:
If it goes to excel, can't you just add a column that works out the page number from the position?
0
 
rivkamakAuthor Commented:
That’s the least of my problems. The main thing is to get the top 50 search results in a spreadsheet broken up by page.
0
 
Tony McCreathTechnical SEO ConsultantCommented:
I proposed it as an answer to your problems.

Instead of hacking into this code, why not solve it in the spreadsheet. I'm sure it's a lot easier to create a column where it's value is based on the row number divided by 10?
0

Featured Post

Never miss a deadline with monday.com

The revolutionary project management tool is here!   Plan visually with a single glance and make sure your projects get done.

  • 2
Tackle projects and never again get stuck behind a technical roadblock.
Join Now