Member_2_5230414
asked on
PHP screen scraper running slow - how can i speed it up?
Hi,
I am currently running a web scraper to pull race results but its taking so long that it can time out.
Is there any way I can speed up the process by making the code run faster - cutting out some of the searching ect?
I have seen some posts about multi curl??? would that help???
Any advice would be great
I am currently running a web scraper to pull race results but its taking so long that it can time out.
Is there any way I can speed up the process by making the code run faster - cutting out some of the searching ect?
I have seen some posts about multi curl??? would that help???
$url = "http://form.timeform.betfair.com/daypage?date=20150516"; //WILL NEED TO PULL TOMORROWS DATE AS DD-MM-YYY
$html = curl($url);
$dom = new DOMDocument();
@$dom->loadHTML($html);
$dom->preserveWhiteSpace = false;
$xpath = new DOMXPath($dom);
//pull the individual cards for the day
//li class="rac-cardsclass="ix ixc"
$getdropdown = '//div[contains(@data-location, "RACING_COUNTRY_GB_IE")]//div[contains(@class, "course")]';
$getdropdown2 = $xpath->query($getdropdown);
//loop through each individual card
foreach($getdropdown2 as $dropresults) {
//loop through and get all the a tags
$arr = $dropresults->getElementsByTagName("a");
foreach($arr as $item) {
//only grab the links which point to the results page
if(strpos($item->getAttribute('href'), 'raceresult') !== false) {
//grab the code
$code = explode("=", $item->getAttribute('href'));
$code = end($code);
$url = "http://form.timeform.betfair.com/raceresult?raceId=" . $code; //WILL NEED TO PULL TOMORROWS DATE AS DD-MM-YYY
$html = curl($url);
$dom = new DOMDocument();
@$dom->loadHTML($html);
$dom->preserveWhiteSpace = false;
$xpath = new DOMXPath($dom);
$spanTexts = array();
//get the place name
$getplacename = '//span[contains(@class, "locality")]';
$getplacename2 = $xpath->query($getplacename);
//loop through each individual card
foreach($getplacename2 as $getplacename22) {
$venue = $getplacename22->textContent;
} //$getplacename2 as $getplacename22
$gettime = '//abbr [contains(@class, "dtstart")]';
//get the Date and the Time
$gettime2 = $xpath->query($gettime);
foreach($gettime2 as $gettime22) {
$Dateandtime = date(trim($gettime22->getAttribute('title')), strtotime('+5 hours'));
} //$gettime2 as $gettime22
//pull the data for the race e.g going money ect
$getdropdown22 = '//div[contains(@class, "content")]/p';
$getdropdown222 = $xpath->query($getdropdown22);
$getracename = '//div[contains(@class, "content")]//p[contains(@class, "race-description")]';
$getracename2 = $xpath->query($getracename);
$racename = "";
foreach($getracename2 as $getracename) {
$racename = trim(str_replace("'", "", $getracename->textContent));
}
foreach($getdropdown222 as $dropresults2) {
foreach($dropresults2->childNodes as $node) {
if(is_object($node) && $node->nodeType === XML_ELEMENT_NODE && strtolower($node->tagName) === 'span') {
$spanTexts[] = (string) $node->textContent;
}
}
if(count($spanTexts) < 6)
continue;
list($going, $distance, $age, $prizemoney, $runners, $racetype) = $spanTexts;
$going = str_replace(array(
'Â',
'Going:',
'|'
), '', $going);
$distance = miletofurlong($distance = trim(GetBetween($distance, ':', 'Â')));
$age = trim(GetBetween($age, ':', 'Â'));
$prizemoney = trim(GetBetween($prizemoney, '£', 'Â'));
$runners = trim(GetBetween($runners, ':', 'Â'));
$racetype = trim(GetBetween($racetype, ':', 'Â'));
}
//pull the individual horse data
$getdropdown = '//div[contains(@class, "table-container")]//tbody//tr';
$getdropdown2 = $xpath->query($getdropdown);
//loop through each individual card
foreach($getdropdown2 as $dropresults) {
$position = $dropresults->childNodes->item(0)->childNodes->item(1)->textContent;
$draw = str_replace(array('(',')'), '', $dropresults->childNodes->item(0)->childNodes->item(3)->textContent);
$losingdist = str_replace('Â', '', trim($dropresults->childNodes->item(2)->textContent));
if(strpos($losingdist, '¾') !== false) {
$losingdist = str_replace('¾', '.75', $losingdist);
}
if(strpos($losingdist, '½') !== false) {
$losingdist = str_replace('½', '.5', $losingdist);
}
if(strpos($losingdist, '¼') !== false) {
$losingdist = str_replace('¼', '.25', $losingdist);
}
$losingdist;
$horse = trim(preg_replace("/\([^\)]+\)/","",str_replace("'","",trim($dropresults->childNodes->item(4)->textContent))));
$horseage = trim($dropresults->childNodes->item(6)->textContent);
$weight = trim($dropresults->childNodes->item(8)->childNodes->item(1)->textContent);
$or = str_replace(array('(',')'), '', trim($dropresults->childNodes->item(8)->childNodes->item(3)->textContent));
str_replace('-', '', $eq = trim($dropresults->childNodes->item(10)->textContent));
$jockey = trim($dropresults->childNodes->item(12)->childNodes->item(1)->textContent);
$trainer = trim($dropresults->childNodes->item(12)->childNodes->item(4)->textContent);
$highandlowinrunning = trim($dropresults->childNodes->item(14)->childNodes->item(1)->textContent);
$highandlow = explode("/", $highandlowinrunning);
str_replace('-', '', $lowodds = trim($highandlow['1']));
str_replace('-', '', $highodds = trim($highandlow['0']));
$bfsp = trim($dropresults->childNodes->item(16)->childNodes->item(1)->textContent);
$isp = trim(str_replace('/', '', $dropresults->childNodes->item(16)->childNodes->item(3)->textContent));
$placeodds = trim($dropresults->childNodes->item(18)->textContent);
$venue = mysqli_real_escape_string($db, $venue);
$Dateandtime = mysqli_real_escape_string($db,$Dateandtime);
$going = mysqli_real_escape_string($db, $going);
$distance = mysqli_real_escape_string($db,$distance);
$age = mysqli_real_escape_string($db,$age);
$prizemoney = mysqli_real_escape_string($db,$prizemoney);
$runners = mysqli_real_escape_string($db,$runners );
$racetype = mysqli_real_escape_string($db,$racetype);
$position = mysqli_real_escape_string($db,$position );
$draw = mysqli_real_escape_string($db,$draw);
$losingdist = mysqli_real_escape_string($db,$losingdist);
$horse = mysqli_real_escape_string($db,$horse );
$age = mysqli_real_escape_string($db,$age);
$weight = mysqli_real_escape_string($db,$weight);
$or = mysqli_real_escape_string($db,$or );
$eq = mysqli_real_escape_string($db,$eq );
$jockey = mysqli_real_escape_string($db,$jockey);
$trainer = mysqli_real_escape_string($db,$trainer);
$lowodds = mysqli_real_escape_string($db,$lowodds);
$highodds = mysqli_real_escape_string($db,$highodds);
$bfsp = mysqli_real_escape_string($db,$bfsp);
$isp = mysqli_real_escape_string($db,$isp);
$placeodds = mysqli_real_escape_string($db,$placeodds);
$racename = mysqli_real_escape_string($db,$racename);
Any advice would be great
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.