<?php
error_reporting(E_ALL);
$ga = new GeoAverage;
$ga->addGeoCode('38.9097,-77.0231');
$ga->addGeoCode('38.9376,-77.0928');
$ga->addGeoCode('39.0807,-77.1302');
$ga->geoRedact();
$avgGeoCode = $ga->avgGeoCode;
In the end, the algorithm we used is shown below. Here is an explanation of the programming.
<?php // RAY_EE_average_location.php
error_reporting(E_ALL);
echo "<pre>";
// AN OBJECT TO HOLD GEOCODES AND DISTANCES FROM THE CENTER
Class GeoObject
{
public function __construct($geocode)
{
$this->geocode = $geocode;
$arr = explode(',', $geocode);
$this->lat = $arr[0];
$this->lon = $arr[1];
}
public function setCenter($center)
{
$this->center = $center;
$arr = explode(',', $center);
$lat = $arr[0];
$lon = $arr[1];
$this->distance = $this->compute_distance($this->lat, $this->lon, $lat, $lon, 'MILES');
return $this->distance;
}
public function getGeocode()
{
return $this->geocode;
}
public function getDistance()
{
return $this->distance;
}
// MAN PAGE: http://en.wikipedia.org/wiki/Haversine_formula
protected function compute_distance($from_lat, $from_lon, $to_lat, $to_lon, $units='KM')
{
// CHOOSE A UNIT OF MEASURE BY THE FIRST CHARACTER
$units = strtoupper(substr(trim($units),0,1));
// ENSURE THAT ALL COORDINATES ARE FLOATING POINT VALUES
$from_lat = floatval($from_lat);
$from_lon = floatval($from_lon);
$to_lat = floatval($to_lat);
$to_lon = floatval($to_lon);
// IF THE SAME POINT THE DISTANCE IS ZERO AND HAVERSINE IS NOT NEEDED
if ( ($from_lat == $to_lat) && ($from_lon == $to_lon) )
{
return 0.0;
}
// COMPUTE THE DISTANCE WITH THE HAVERSINE FORMULA
$distance
= acos
( sin(deg2rad($from_lat))
* sin(deg2rad($to_lat))
+ cos(deg2rad($from_lat))
* cos(deg2rad($to_lat))
* cos(deg2rad($from_lon - $to_lon))
)
;
$distance = rad2deg($distance);
// DISTANCE IN MILES AND KM - ADD OTHER MEASURES IF NEEDED
$miles = (float) $distance * 69.0;
$km = (float) $miles * 1.61;
// RETURN MILES
if ($units == 'M') return round($miles,1);
// RETURN KILOMETERS
if ($units == 'K') return round($km,2);
// UNITS NOT UNDERSTOOD
return('INVALID FIFTH ARGUMENT - USE MILES OR KM');
}
}
// USER SORTS FOR AN ARRAY OF GEOOBJECTS
function geoSort_Ascending($a, $b)
{
if ($a->getDistance() == $b->getDistance()) return 0;
return ($a->getDistance() < $b->getDistance()) ? -1 : 1;
}
function geoSort_Descending($a, $b)
{
if ($a->getDistance() == $b->getDistance()) return 0;
return ($a->getDistance() > $b->getDistance()) ? -1 : 1;
}
// A CLASS TO FIND THE AVERAGE LOCATION FROM A SET OF GEOCODES
Class GeoAverage
{
// GROUP OF THESE DEFINITIONS MAKES var_dump() EASIER TO READ
private $percent = 1.0;
public $pctRequired = 0;
public $acceptRadius = 25;
public $pctRetained = 0;
public $maxDistance = -1.0;
public $avgDistance = -1.0;
public $numGeoCodes = -1.0;
public $numRetained = -1.0;
public $numDiscarded = -1.0;
public $avgLat = 0.0;
public $avgLon = 0.0;
public $avgGeoCode = '0.0,0.0';
public $result = 'Unfinished';
public $geoLocale = array();
// CONSTRUCTOR SETS DEFAULT VALUES FOR RESULTS
public function __construct()
{
$this->setAcceptRadius();
$this->setPercent();
$this->setResult();
}
// THE MAXIMUM ACCEPTABLE RANGE FROM THE CENTER POINT
public function setAcceptRadius($radius=25)
{
$this->acceptRadius = $radius;
$this->setResult();
}
// THE MINIMUM ACCEPTABLE PERCENT OF GEOCODES THAT MUST BE KEPT IN THE FINAL DETERMINATION OF THE LOCALE
public function setPercent($limit=100)
{
// IF EXPRESSED IN FRACTIONAL TERMS
if ($limit <= 1.0)
{
$this->percent = $limit;
}
// IF EXPRESSED IN WHOLE NUMBER PERCENT
else
{
$limit = preg_replace('#[^0-9\.]#', NULL, $limit);
$this->percent = $limit / 100.0;
}
if (!$this->percent) $this->percent = 0.01;
if ($this->percent < 0.01) $this->percent = 0.01;
if ($this->percent > 1.00) $this->percent = 1.00;
$this->setResult();
}
// THE STATUS OF THE COMPUTATIONS
private function setResult($x='Unfinished')
{
$this->result = $x;
if ($x == 'Unfinished')
{
$this->avgGeoCode = '0.0,0.0';
$this->geoLocale = array();
}
}
public function addGeoCode($geocode)
{
$this->geoObjects[] = new GeoObject($geocode);
$this->setResult();
}
public function remGeoCode($geocode)
{
foreach ($this->geoObjects as $key => $geo)
{
$gc = (string)$geo->getGeoCode();
if ($gc == $geocode)
{
unset($this->geoObjects[$key]);
$this->setResult();
return TRUE;
}
}
return FALSE;
}
protected function setCenters($center)
{
foreach ($this->geoObjects as $key => $geo)
{
$this->geoObjects[$key]->setCenter($center);
}
}
protected function avgGeoCodes($objs)
{
$lats = array();
$lons = array();
foreach ($objs as $obj)
{
$lats[] = $obj->lat;
$lons[] = $obj->lon;
}
$this->avgLat = array_sum($lats) / count($lats);
$this->avgLon = array_sum($lons) / count($lons);
$this->setCenters( "$this->avgLat,$this->avgLon" );
// SORT THE GEOCODES
usort($objs, "geoSort_Ascending");
return $objs;
}
protected function compute_average_distances($objs)
{
$objs = $this->avgGeoCodes($objs);
$dist = 0.0;
foreach ($objs as $obj)
{
$dist += $obj->getDistance();
}
$dist = number_format( ($dist / count($objs)), 1);
return $dist;
}
// A FUNCTION TO TRY TO COALESCE THE GEOCODES UNTIL GOOD
public function geoRedact()
{
// COPY THE ARRAY OF GEO-OBJECT POINTS
$this->geoLocale = $this->geoObjects;
// THE NUMBER OF ITERATIONS WE CAN MAKE
$num = count($this->geoLocale) - 1;
while ($num)
{
// GET A NEW AVERAGE GEOCODE
$this->geoLocale = $this->avgGeoCodes($this->geoLocale);
$away = array_pop($this->geoLocale);
// COMPARE MAXIMUM DISTANCE TO ACCEPTABLE DISTANCE
$distance = $away->getDistance();
if ($distance <= $this->acceptRadius)
{
// RESTORE THIS ELEMENT BECAUSE IT IS INSIDE THE ACCEPTABLE DISTANCE
$this->geoLocale[] = $away;
$this->setGeoData();
return TRUE;
}
// REPEAT UNTIL IT COALESCES OR WE RUN OUT OF DATA
$num--;
}
// NEVER COALESCED ADEQUATELY -- RETURN THE REMAINING POINTS
$this->setGeoData();
return FALSE;
}
// A FUNCTION TO SET THE FINAL INFORMATION ABOUT THE ATTEMPT TO COALESCE
protected function setGeoData()
{
// MAXIMUM DISTANCE FROM THE RESOLVED CENTER
$away = end($this->geoLocale);
$maxDistance = $away->getDistance();
// AVERAGE DISTANCE FROM THE RESOLVED CENTER
$avgDistance = $this->compute_average_distances($this->geoLocale);
// NUMBER OF TOTAL AND DISCARDED GEOCODES
$numGeoCodes = count($this->geoObjects);
$numRetained = count($this->geoLocale);
$numDiscarded = $numGeoCodes - $numRetained;
$pctRetained = ($numRetained / $numGeoCodes) * 100;
// TIDY UP SOME STATISTICS
$this->maxDistance = number_format($maxDistance, 1);
$this->avgDistance = number_format($avgDistance, 1);
$this->numGeoCodes = number_format($numGeoCodes);
$this->numRetained = number_format($numRetained);
$this->numDiscarded = number_format($numDiscarded);
$this->pctRetained = number_format($pctRetained) . '%';
$this->pctRequired = number_format($this->percent * 100) . '%';
$this->avgLat = number_format($this->avgLat, 6);
$this->avgLon = number_format($this->avgLon, 6);
$this->avgGeoCode = "$this->avgLat,$this->avgLon";
// THE NUMBER OF ELEMENTS WE CAN REMOVE BEFORE DECLARING FAILURE
$lim = count($this->geoLocale) * (1.0 - $this->percent);
$lim = round($lim);
// SET SIGNAL OF SUCCESS OR FAILURE BASED ON AMOUNT OF RETAINED DATA
$this->setResult("Failure");
if ( $numDiscarded <= $lim) $this->setResult("Success");
// SORT THE ORIGINAL BY GREATEST DISTANCE FROM THE FINAL LOCATION
usort($this->geoObjects, "geoSort_Descending");
}
}
// TESTING OUR SCRIPT
$ga = new GeoAverage;
// LOAD UP SOME TEST DATA THAT IS CLOSE TOGETHER
$ga->addGeoCode('38.9097,-77.0231');
$ga->addGeoCode('38.9376,-77.0928');
// SET A TIGHT GEOGRAPHICAL REQUIREMENT
$ga->setAcceptRadius(10);
echo "<h2>Unfinished Object</h2>";
print_r($ga);
// PERFORM THE CALCULATIONS
$ga->geoRedact();
echo "<h2>Finished Object with Success - required 100% / 10 miles</h2>";
print_r($ga);
// ADD AN OUTLIER AND SHOW THE UNFINISHED OBJECT
$ga->addGeoCode('29.7523,-95.3670');
echo "<h2>Unfinished Object (Newly Added GeoCode)</h2>";
print_r($ga);
// RECOMPUTE AND SHOW THE FINISHED OBJECT
$ga->geoRedact();
echo "<h2>Finished Object with Failure - required 100% / 10 miles, got less than 100%</h2>";
print_r($ga);
// LOWER OUR STANDARDS AND RECOMPUTE
$ga->setPercent(0.6);
$ga->geoRedact();
echo "<h2>Finished Object with Success - required 60% / 10 miles, got more than 60%</h2>";
print_r($ga);
// ADD SOME MORE TEST DATA
$ga->addGeoCode('38.9097,-77.0231');
$ga->addGeoCode('38.9376,-77.0928');
$ga->addGeoCode('39.0807,-77.1302');
$ga->addGeoCode('39.0032,-77.1602');
$ga->addGeoCode('38.8747,-77.1130');
$ga->addGeoCode('38.6508,-77.0817');
$ga->addGeoCode('39.0274,-77.2365');
$ga->addGeoCode('38.9878,-77.1262');
$ga->addGeoCode('38.9842,-77.0842');
$ga->addGeoCode('38.9543,-77.1159');
$ga->addGeoCode('38.9599,-76.9269');
$ga->addGeoCode('38.9120,-77.2716');
$ga->addGeoCode('39.6416,-77.6780');
// CONSTRAIN THE DISTANCE AND PERCENT REQUIREMENT
$ga->setAcceptRadius(20);
$ga->setPercent('70%');
// PERFORM SOME CALCULATIONS WITH THE NEW DATA AND NEW CONSTRAINTS
$ga->geoRedact();
echo "<h2>Finished Object with Success - required 70% / 20 miles, got more than 70%</h2>";
print_r($ga);
// RAISE THE ACCEPTABLE DISTANCE
$ga->setAcceptRadius(80);
// PERFORM SOME CALCULATIONS
$ga->geoRedact();
echo "<h2>Finished Object with Success - required 70% / 80 miles, got more than 70%</h2>";
print_r($ga);
// ADD SOME OUTLIERS
$ga->addGeoCode('29.7523,-95.3670');
$ga->addGeoCode('37.3974,-122.0732');
$ga->addGeoCode('37.4192,-122.0574');
// PERFORM SOME CALCULATIONS
$ga->geoRedact();
echo "<h2>Finished Object with Success - required 70% / 80 miles, got more than 70% (some outliers)</h2>";
print_r($ga);
// ADD EVEN MORE OUTLIERS
$ga->addGeoCode('37.3974,-122.0732');
$ga->addGeoCode('37.3974,-122.0732');
$ga->addGeoCode('40.0000,-122.0574');
// RAISE OUR STANDARDS AND NARROW OUR LOCUS OF ACCEPTABLE GEOCODES
$ga->setPercent('75%');
$ga->setAcceptRadius(70);
// PERFORM SOME CALCULATIONS
$ga->geoRedact();
echo "<h2>Finished Object with Failure - required 75% / 70 miles, got less than 75% (more outliers)</h2>";
print_r($ga);
// REMOVE SOME OF THE OUTLIER GEOCODES
if ($ga->remGeoCode('37.3974,-122.0732')) echo PHP_EOL . "REMOVED '37.3974,-122.0732'";
if ($ga->remGeoCode('37.3974,-122.0732')) echo PHP_EOL . "REMOVED '37.3974,-122.0732'";
if ($ga->remGeoCode('37.3974,-122.0732')) echo PHP_EOL . "REMOVED '37.3974,-122.0732'";
if ($ga->remGeoCode('37.3974,-122.0732')) echo PHP_EOL . "REMOVED '37.3974,-122.0732'";
if ($ga->remGeoCode('37.3974,-122.0732')) echo PHP_EOL . "REMOVED '37.3974,-122.0732'";
if ($ga->remGeoCode('37.3974,-122.0732')) echo PHP_EOL . "REMOVED '37.3974,-122.0732'";
// RECALCULATE
$ga->geoRedact();
echo "<h2>Finished Object with Success - required 75% / 70 miles, got more than 75% (fewer outliers)</h2>";
print_r($ga);
Conclusion, or What's This Good For?
Have a question about something in this article? You can receive help directly from the article author. Sign up for a free trial to get started.
Comments (0)