One-time geocode of 300k street, city addresses

DanielAttard
DanielAttard used Ask the Experts™
on
I have a list of about 300,000 addresses consisting of:

- street address
- municipality
- province

I would like to find out the latitude and longitude for each of these addresses.  I also want to do this in advance for all addresses, rather than do it as-needed.  

Any ideas for the best way to accomplish this?  

p.s.  This large number of addresses to be geocoded will be a one-time occurrence.  I won't have to do a large number again, but I will have a need to geocode single addresses one at a time as they are added in future.
Comment
Watch Question

Do more with

Expert Office
EXPERT OFFICE® is a registered trademark of EXPERTS EXCHANGE®
Yahoo PlaceFinder
http://developer.yahoo.com/geo/placefinder/

Google Geocoding
https://developers.google.com/maps/documentation/geocoding/

Take notice of the different daily limits
Most Valuable Expert 2011
Top Expert 2016
Commented:
Here is my example for a Geocoder script.  In addition to daily limits, you may want to check the terms of use carefully before you extract and store Yahoo or Google data.  You may require a commercial license if you are going to use the data for anything that is not free and open to the public.

Best of luck with it, ~Ray
<?php // RAY_class_SimpleGeoCoder.php
error_reporting(E_ALL);


// DEMONSTRATE HOW TO CALL THE YAHOO! AND GOOGLE GEOCODERS


// A NOMINAL DATA CLEAN-UP FUNCTION
function get_clean_text_string($s)
{
    return preg_replace('/[^A-Z 0-9_\.,\-#]/i', NULL, $s);
}


// A FREEFORM LOCATION STATEMENT IS OPTIONAL (BUT NOT USED HERE)
$location = $a = $c = $s = $z = NULL;

// THESE ARE THE GET-FORM VARIABLES
$vars = array( 'a', 'c', 's', 'z' );

// FILTER THE ARGUMENTS
if (!empty($_GET))
{
    foreach ($_GET as $k => $v)
    {
        if (!in_array($k, $vars)) continue;
        $$k = get_clean_text_string($v);
    }
    if ($a != NULL) { $location .= $a . ' '; }
    if ($c != NULL) { $location .= $c . ' '; }
    if ($s != NULL) { $location .= $s . ' '; }
    if ($z != NULL) { $location .= $z . ' '; }
    $location = trim($location);
}

// PREPARE THE FORM
$form = <<<ENDFORM
<doctype html>
<head>
<title>$a $c $s $z Yahoo/Google Geocoder Query</title>
</head>
<body>
<form method="get">
Addr: <input type="text" name="a" autocomplete="off" value="$a" />
City: <input type="text" name="c" autocomplete="off" value="$c" />
ST:   <input type="text" name="s" autocomplete="off" value="$s" size="2" />
Zip:  <input type="text" name="z" autocomplete="off" value="$z" size="8" />
<input type="submit" value="Go" />
</form>
</body>
</html>
ENDFORM;

if (!$location)
{
    echo $form;
    die();
}


// PREPARE THE GEOCODER
$y_demo          = new SimpleGeoCoder;
$y_demo->address = $a;
$y_demo->city    = $c;
$y_demo->state   = $s;
$y_demo->zip     = $z;

// TEST THE YAHOO! GEOCODER
$y_demo->geocodeYahoo();
echo "<pre>";
echo PHP_EOL;
echo "YAHOO! DATA FOR <strong>$location</strong>";
echo PHP_EOL;
print_r($y_demo);
echo "</pre>";



// PREPARE THE GEOCODER
$g_demo           = new SimpleGeoCoder;
$g_demo->address  = $a;
$g_demo->city     = $c;
$g_demo->state    = $s;
$g_demo->zip      = $z;

// TEST THE GOOGLE GEOCODER
$g_demo->geocodeGoogle();
echo "<pre>";
echo PHP_EOL;
echo "GOOGLE DATA FOR <strong>$location</strong>";
echo PHP_EOL;
print_r($g_demo);
echo "</pre>";


// ALL DONE
echo $form;
die();



// SIMPLE GEOCODER CLASS
class SimpleGeoCoder
{

    // DECLARE THE INPUT DATA
    public $location; // USE THIS FOR A FREEFORM QUERY, OR USE THESE PARTS
    public $address;
    public $city;
    public $state;
    public $zip;

    // DECLARE THE OUTPUT DATA
    public $latitude;
    public $longitude;
    public $precision;
    public $warning;
    public $geocoder;

    // DECLARE THE CONSTRUCTOR
    public function __construct()
    {
        $this->latitude  = 0.0;
        $this->longitude = 0.0;
        $this->precision = 0;   // HIGHER IS BETTER, BUT Y AND G USE DIFFERENT SCALES
        $this->warning   = '';
        $this->geocoder  = '';
    }

    // DECLARE THE YAHOO! VERSION OF THE WORKHORSE
    public function geocodeYahoo()
    {
        $this->geocoder = 'Yahoo!';
        $yahooUrl       = "http://where.yahooapis.com/geocode?gflags=Q&appid=";
        if (defined("YAHOO_API"))
        {
            $yahooUrl .= YAHOO_API;
        }

        // YOU CAN ASK FOR A FREEFORM QUERY
        if ($this->location != '')
        {
            $yahooUrl .= "&location=" . urlencode($this->location);
        }

        // YOU CAN ASK FOR INDIVIDUAL PIECES OF AN ADDRESS
        else
        {
            $loc = urlencode
            ( trim($this->address)
            . ', '
            . trim($this->city)
            . ', '
            . trim($this->state)
            . ' '
            . trim($this->zip)
            )
            ;
            $yahooUrl .= "&location=" . $loc;
        }

        // EXECUTE YAHOO GEOCODER QUERY SEE http://developer.yahoo.com/geo/placefinder/guide/requests.html
        // NOTE - USE ERROR SUPPRESSION OR IT WILL BARK OUT THE YAHOO API KEY - ON FAILURE RETURNS HTTP 400 BAD REQUEST
        if ($yfp = @fopen($yahooUrl, 'r'))
        {
            $yahooResponse = '';
            while (!feof($yfp)) { $yahooResponse .= fgets($yfp); }
            fclose($yfp);
        }
        else
        {
            return FALSE;
        }

        // EXAMINE THE RESULT
        if ($yahooResponse != '') // NOT EMPTY, WE GOT DATA
        {
            $ydata    = new SimpleXMLElement($yahooResponse);
            // echo "<pre>";var_dump($ydata);

            // CHECK FOR ANY ERROR MESSAGE, IF NONE, EXTRACT THE DATA POINTS
            $y_err    = (string)$ydata->Error;
            if ($y_err)
            {
                $this->warning   = $ydata->ErrorMessage;
                return FALSE;
            }
            else
            {
                $this->precision = (string)$ydata->Result->quality;
                $this->latitude  = (string)$ydata->Result->latitude;
                $this->longitude = (string)$ydata->Result->longitude;
                $this->address   = (string)$ydata->Result->line1;
                $this->city      = (string)$ydata->Result->city;
                $this->state     = (string)$ydata->Result->statecode;
                $this->zip       = (string)$ydata->Result->postal;

                $this->location
                = (string)$ydata->Result->line1
                . ', '
                . (string)$ydata->Result->line2
                . ' '
                . (string)$ydata->Result->line3
                ;
            }
        }

        // NO RESULT - SOMETHING IS SICK AT YAHOO
        else
        {
            return FALSE;
        }
        return TRUE;
    } // END function geocodeYahoo



    // DECLARE THE GOOGLE VERSION OF THE WORKHORSE
    public function geocodeGoogle()
    {
        $this->geocoder = 'Google';
        $googleUrl      = "http://maps.google.com/maps/geo?output=csv";
        $googleUrl      = "http://maps.googleapis.com/maps/api/geocode/xml?sensor=false";
        if (defined("GOOGLE_API"))
        {
            $googleUrl .= "&key=" . GOOGLE_API;
        }

        // YOU CAN ASK FOR A FREEFORM QUERY
        if ($this->location != '')
        {
            $googleUrl .= "&address=" . urlencode(trim($this->location));
        }

        // YOU CAN ASK FOR INDIVIDUAL PIECES OF AN ADDRESS
        else
        {
            $loc = urlencode
            ( trim($this->address)
            . ', '
            . trim($this->city)
            . ', '
            . trim($this->state)
            . ' '
            . trim($this->zip)
            )
            ;
            $googleUrl .= "&address=$loc";
        }

        // EXECUTE GOOGLE GEOCODER QUERY SEE http://code.google.com/apis/maps/documentation/geocoding/
        if ($gfp = @fopen($googleUrl, 'r'))
        {
            $googleResponse = '';
            while (!feof($gfp)) { $googleResponse .= fgets($gfp); }
            fclose($gfp);
        }
        else
        {
            return FALSE;
        }

        // EXTRACT THE DATA
        $gdata    = new SimpleXMLElement($googleResponse);

        // ON FAILURE
        if ($gdata->status != 'OK') return FALSE;

        // GEOCODE ACCURACY IS A TEXT STRING NOW
        $this->location  = (string)$gdata->result->formatted_address;
        $this->precision = (string)$gdata->result->type;
        $this->latitude  = (string)$gdata->result->geometry->location->lat;
        $this->longitude = (string)$gdata->result->geometry->location->lng;
        return TRUE;
    } // END function geocodeGoogle

} // END class SimpleGeocoder

Open in new window

@ray wow

Author

Commented:
Absolutely, @ray wow!!

Do more with

Expert Office
Submit tech questions to Ask the Experts™ at any time to receive solutions, advice, and new ideas from leading industry professionals.

Start 7-Day Free Trial