Link to home
Start Free TrialLog in
Avatar of Member_2_5230414
Member_2_5230414

asked on

PHP screen scraper running slow - how can i speed it up?

Hi,

I am currently running a web scraper to pull race results but its taking so long that it can time out.

Is there any way I can speed up the process by making the code run faster - cutting out some of the searching ect?

I have seen some posts about multi curl??? would that help???
 

    $url  = "http://form.timeform.betfair.com/daypage?date=20150516"; //WILL NEED TO PULL TOMORROWS DATE AS DD-MM-YYY
    $html = curl($url);
    $dom  = new DOMDocument();
    @$dom->loadHTML($html);
    $dom->preserveWhiteSpace = false;
    $xpath                   = new DOMXPath($dom);
    //pull the individual cards for the day
    //li class="rac-cardsclass="ix ixc"
    $getdropdown             = '//div[contains(@data-location, "RACING_COUNTRY_GB_IE")]//div[contains(@class, "course")]';
    $getdropdown2            = $xpath->query($getdropdown);
    //loop through each individual card
    foreach($getdropdown2 as $dropresults) {
        //loop through and get all the a tags
        $arr = $dropresults->getElementsByTagName("a");
        foreach($arr as $item) {
            //only grab the links which point to the results page
            if(strpos($item->getAttribute('href'), 'raceresult') !== false) {
                //grab the code
                $code = explode("=", $item->getAttribute('href'));
                $code = end($code);
                $url  = "http://form.timeform.betfair.com/raceresult?raceId=" . $code; //WILL NEED TO PULL TOMORROWS DATE AS DD-MM-YYY
                $html = curl($url);
                $dom  = new DOMDocument();
                @$dom->loadHTML($html);
                $dom->preserveWhiteSpace = false;
                $xpath                   = new DOMXPath($dom);
                $spanTexts               = array();
                //get the place name
                $getplacename            = '//span[contains(@class, "locality")]';
                $getplacename2           = $xpath->query($getplacename);
                //loop through each individual card
                foreach($getplacename2 as $getplacename22) {
                    $venue = $getplacename22->textContent;
                } //$getplacename2 as $getplacename22
                $gettime  = '//abbr [contains(@class, "dtstart")]';
                //get the Date and the Time
                $gettime2 = $xpath->query($gettime);
    			
                foreach($gettime2 as $gettime22) {
                    $Dateandtime = date(trim($gettime22->getAttribute('title')), strtotime('+5 hours'));
                } //$gettime2 as $gettime22
                //pull the data for the race e.g going money ect
                $getdropdown22  = '//div[contains(@class, "content")]/p';
                $getdropdown222 = $xpath->query($getdropdown22);
    			$getracename            = '//div[contains(@class, "content")]//p[contains(@class, "race-description")]';
                $getracename2           = $xpath->query($getracename);
    			$racename = "";
    			 foreach($getracename2 as $getracename) {
    			 $racename = trim(str_replace("'", "", $getracename->textContent));
    			}
    			            foreach($getdropdown222 as $dropresults2) {
    			
                         foreach($dropresults2->childNodes as $node) {
                        if(is_object($node) && $node->nodeType === XML_ELEMENT_NODE && strtolower($node->tagName) === 'span') {
                            $spanTexts[] = (string) $node->textContent;
                         
                        } 
    					} 
                    if(count($spanTexts) < 6)
                        continue;
                    list($going, $distance, $age, $prizemoney, $runners, $racetype) = $spanTexts;
                    $going      = str_replace(array(
                        'Â', 
                        'Going:',
                        '|'
                    ), '', $going);
                    $distance   = miletofurlong($distance = trim(GetBetween($distance, ':', 'Â')));
                    $age        = trim(GetBetween($age, ':', 'Â'));
                    $prizemoney = trim(GetBetween($prizemoney, '£', 'Â'));
                    $runners    = trim(GetBetween($runners, ':', 'Â'));
                    $racetype   = trim(GetBetween($racetype, ':', 'Â'));
                } 
                //pull the individual horse data
                $getdropdown  = '//div[contains(@class, "table-container")]//tbody//tr';
                $getdropdown2 = $xpath->query($getdropdown);
                //loop through each individual card
                foreach($getdropdown2 as $dropresults) {
                    $position   = $dropresults->childNodes->item(0)->childNodes->item(1)->textContent;
                    $draw       = str_replace(array('(',')'), '', $dropresults->childNodes->item(0)->childNodes->item(3)->textContent);
                    $losingdist = str_replace('Â', '', trim($dropresults->childNodes->item(2)->textContent));
                    if(strpos($losingdist, '¾') !== false) {
                        $losingdist = str_replace('¾', '.75', $losingdist);
                    } 
                    if(strpos($losingdist, '½') !== false) {
                        $losingdist = str_replace('½', '.5', $losingdist);
                    } 
                    if(strpos($losingdist, '¼') !== false) {
                        $losingdist = str_replace('¼', '.25', $losingdist);
                    } 
                    $losingdist;
                    $horse    = trim(preg_replace("/\([^\)]+\)/","",str_replace("'","",trim($dropresults->childNodes->item(4)->textContent))));
                    $horseage = trim($dropresults->childNodes->item(6)->textContent);
                    $weight   = trim($dropresults->childNodes->item(8)->childNodes->item(1)->textContent);
                    $or       = str_replace(array('(',')'), '', trim($dropresults->childNodes->item(8)->childNodes->item(3)->textContent));
                    str_replace('-', '', $eq = trim($dropresults->childNodes->item(10)->textContent));
                    $jockey              = trim($dropresults->childNodes->item(12)->childNodes->item(1)->textContent);
                    $trainer             = trim($dropresults->childNodes->item(12)->childNodes->item(4)->textContent);
                    $highandlowinrunning = trim($dropresults->childNodes->item(14)->childNodes->item(1)->textContent);
                    $highandlow          = explode("/", $highandlowinrunning);
                    str_replace('-', '', $lowodds = trim($highandlow['1']));
                    str_replace('-', '', $highodds = trim($highandlow['0']));
                    $bfsp      = trim($dropresults->childNodes->item(16)->childNodes->item(1)->textContent);
                    $isp       = trim(str_replace('/', '', $dropresults->childNodes->item(16)->childNodes->item(3)->textContent));
                    $placeodds = trim($dropresults->childNodes->item(18)->textContent);
    				
     $venue = mysqli_real_escape_string($db,	$venue);
     $Dateandtime = mysqli_real_escape_string($db,$Dateandtime);
     $going = mysqli_real_escape_string($db,	$going);
     $distance = mysqli_real_escape_string($db,$distance);
     $age = mysqli_real_escape_string($db,$age);
     $prizemoney = mysqli_real_escape_string($db,$prizemoney);
     $runners = mysqli_real_escape_string($db,$runners );
     $racetype = mysqli_real_escape_string($db,$racetype);
     $position  = mysqli_real_escape_string($db,$position );
     $draw  = mysqli_real_escape_string($db,$draw);
     $losingdist = mysqli_real_escape_string($db,$losingdist);
     $horse = mysqli_real_escape_string($db,$horse );
     $age = mysqli_real_escape_string($db,$age);
     $weight = mysqli_real_escape_string($db,$weight);
     $or = mysqli_real_escape_string($db,$or );
     $eq = mysqli_real_escape_string($db,$eq );
     $jockey = mysqli_real_escape_string($db,$jockey);
     $trainer = mysqli_real_escape_string($db,$trainer);
     $lowodds = mysqli_real_escape_string($db,$lowodds);
     $highodds = mysqli_real_escape_string($db,$highodds);
     $bfsp = mysqli_real_escape_string($db,$bfsp);
     $isp = mysqli_real_escape_string($db,$isp);
     $placeodds = mysqli_real_escape_string($db,$placeodds);
     $racename = mysqli_real_escape_string($db,$racename);

Open in new window


Any advice would be great
ASKER CERTIFIED SOLUTION
Avatar of Ray Paseur
Ray Paseur
Flag of United States of America image

Link to home
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
Start Free Trial