https://en.wikipedia.org/wiki/List_of_Iranian_cities_by_population
http://localhost/city.php
Tehran
Mashhad
Isfahan
Karaj
Tabriz
Shiraz
Ahvaz
Qom
Kermanshah
Orumieh
Rasht
Zahedan
Kerman
Arak
Hamedan
Yazd
Ardabil
Bandar Abbas
Eslamshahr
Qazvin
Zanjan
Khorramabad
Sanandaj
Malard
Shahr-e Qods
Kashan
Gorgan
Golestan
Sari
Shahriar
Dezful
Khomeinishahr
Borujerd
Nishapur
Sabzevar
Najafabad
Amol
Babol
Varamin
Abadan
Pakdasht
Khoy
Saveh
Bojnourd
Qa'em Shahr
Bushehr
Gharchak
Sirjan
Birjand
Ilam
Bukan
Maragheh
Malayer
Shahrekord
Nasimshahr
Mahshahr
Semnan
Rafsanjan
Mahabad
Gonbad-e Qabus
Shahinshahr
Shahrood
Saqqez
Marvdasht
Zabol
Torbat-e Heydarieh
Khorramshahr
Andimeshk
Marand
Shahreza
Miandoab
Izeh
Bandar-e Anzali
Jahrom
Jiroft
Marivan
Kamal Shahr
Yasuj
Nazarabad
Behbahan
Bam
Shush
Fasa
Quchan
Masjed Soleyman
Mohammadshahr
Dorud
<?php
$filename = 'newfile.txt';
if (file_exists($filename)) {
echo 'The file '.$filename.' exists.</br>';
} else {
echo 'The file '.$filename.' does not exist.</br>';
}
?>
/var/www/html# cat city.php
<?php
$matches = array();
$dom = new DOMDocument;
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://en.wikipedia.org/wiki/List_of_Iranian_cities_by_population");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$html = curl_exec($ch);
curl_close($ch);
$dom->loadHTML($html );
$tableNumber = 0;
foreach ($dom->getElementsByTagName('table') as $table) {
if (!$table->hasAttribute('class')) {
continue;
}
$tableNumber = $tableNumber + 1;
$class = explode(' ', $table->getAttribute('class'));
if (in_array('wikitable', $class)) {
$matches[] = $table->getElementsByTagName('tr');
}
if ($tableNumber == 1) {
$myfile = fopen("newfile.txt", "w") or die("Unable to open file!");
foreach ($table->getElementsByTagName('tr') as $tr) {
$tds = $tr->getElementsByTagName('td');
fwrite($myfile, $tds->item(1)->nodeValue."\n");
}
fclose($myfile);
}
}
echo "Done";
?>
root@kali:/var/www/html#
/var/www/html# ls -l
total 36
-rw-r--r-- 1 root root 22 Apr 11 22:57 check.php
-rwxrwxrwx 1 root root 1019 Apr 11 22:15 city.php
-rw-r--r-- 1 root root 75 Mar 30 15:41 composer.json
-rw-r--r-- 1 root root 11971 Mar 30 15:41 composer.lock
drwxrwxrwx 3 root root 4096 Mar 30 16:32 inst
drwxr-xr-x 3 root root 4096 Apr 11 13:52 login
drwxrwxrwx 8 root root 4096 Mar 30 15:58 vendor
Unable to open file!
/var/www/html# ls -al
total 44
drwxr-xr-x 5 root root 4096 Apr 11 22:57 .
drwxr-xr-x 4 root root 4096 Feb 23 15:38 ..
-rw-r--r-- 1 root root 22 Apr 11 22:57 check.php
-rwxrwxrwx 1 root root 1019 Apr 11 22:15 city.php
-rw-r--r-- 1 root root 75 Mar 30 15:41 composer.json
-rw-r--r-- 1 root root 11971 Mar 30 15:41 composer.lock
drwxrwxrwx 3 root root 4096 Mar 30 16:32 inst
drwxr-xr-x 3 root root 4096 Apr 11 13:52 login
drwxrwxrwx 8 root root 4096 Mar 30 15:58 vendor
<?php // demo/temp_mehran.php
/**
* https://www.experts-exchange.com/questions/29015495/curl-parse-data-from-site.html
*/
error_reporting(E_ALL);
echo '<pre>';
// A CLASS TO REPRESENT INFORMATION ABOUT A CITY
Class City
{
public $rank, $name, $province, $founded, $pop2011;
}
// A COLLECTION OF EXTRACTED DATA
$cities = [];
// READ FROM THE WIKIPEDIA
$url = 'https://en.wikipedia.org/wiki/List_of_Iranian_cities_by_population';
$doc = file_get_contents($url);
if (!$doc) trigger_error("Unable to read $url", E_USER_ERROR);
// ACTIVATE THIS TO SEE THE HTML DOCUMENT
// echo htmlentities($doc);
// TRIM THE DOCUMENT TO ISOLATE THE INFORMATION WE WANT
$sig = '<table class="wikitable sortable">';
$doc = explode($sig, $doc);
$sig = '</table>';
$doc = explode($sig, $doc[1]);
$doc = $doc[0];
// TIDY UP SOME UNRULY TAGS
$doc = str_replace('<td align="center">', '<td>', $doc);
$sig = '<tr>';
$trs = explode($sig, $doc);
unset($trs[0], $trs[1]);
// PROCESS THE DATA IN EACH TABLE ROW
$sig = '<td>';
foreach ($trs as $tr)
{
$tds = explode($sig, $tr);
$city = new City;
$city->rank = trim( strip_tags($tds[1]) );
$city->name = trim( strip_tags($tds[2]) );
$city->province = trim( strip_tags($tds[3]) );
$city->founded = trim( strip_tags($tds[4]) );
$city->founded = substr($city->founded,0,4);
$pop2011 = explode('</span>', $tds[5]);
$city->pop2011 = trim( strip_tags($pop2011[1]) );
$cities[] = $city;
}
// SHOW THE WORK PRODUCT
print_r($cities);
# ls -al
total 44
drwxrwxrwx 5 root root 4096 Apr 11 22:57 .
drwxr-xr-x 4 root root 4096 Feb 23 15:38 ..
-rwxrwxrwx 1 root root 22 Apr 11 22:57 check.php
-rwxrwxrwx 1 root root 1019 Apr 11 22:15 city.php
-rwxrwxrwx 1 root root 75 Mar 30 15:41 composer.json
-rwxrwxrwx 1 root root 11971 Mar 30 15:41 composer.lock
drwxrwxrwx 3 root root 4096 Mar 30 16:32 inst
drwxrwxrwx 3 root root 4096 Apr 11 13:52 login
drwxrwxrwx 8 root root 4096 Mar 30 15:58 vendor
https://iconoun.com/demo/temp_mehran.php
Open in new window