CREATE TABLE `locations` (
`location_id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
`latitude` DECIMAL(10,8),
`longitude` DECIMAL(11,8),
PRIMARY KEY (`location_id`)
);
For the purposes of this article, I used a script to put a million random entries into that locations table. If you want to follow along, here's a simple PHP script to do this fairly quickly:
<?php
$db = new mysqli("DB HOST HERE","USER NAME HERE","PASSWORD HERE","DATABASE NAME HERE");
$db->autocommit(false);
$stmt = $db->prepare("INSERT INTO locations (latitude, longitude) VALUES (?, ?)");
$stmt->bind_param('ss', $latitude, $longitude);
for($i = 0; $i < 1000000; $i++)
{
$latitude = rand(-90,90) + (rand(0,59) / 60) + (rand(0,59) / 3600);
$longitude = rand(-179,180) + (rand(0,59) / 60) + (rand(0,59) / 3600);
$stmt->execute();
}
$db->commit();
Next thing is to find your own GPS coordinates (several sites offer this, one of them is https://www.maps.ie/coordinates.html). For demo purposes, I'm going to assume my latitude is 45.12345678 and my longitude is -111.12345678.
SELECT *, ROUND(
3959 * ACOS(
COS(RADIANS(45.12345678)) *
COS(RADIANS(latitude)) *
COS(RADIANS(longitude) - RADIANS(-111.12345678)) +
SIN(RADIANS(45.12345678)) * SIN(RADIANS(latitude))
)
,1) AS `miles_away`
FROM `locations`
HAVING `miles_away` <= 50
ORDER BY `miles_away`
The above will return all results within a 50-mile radius (the "3959" near the beginning of the query refers to the earth's radius of 6,371,000 meters, converted to miles)
EXPLAIN SELECT *, ROUND .... ORDER BY `miles_away`
You can see under the "rows" column that it looked at ALL 1,000,000 rows! In other words, it had to calculate the distance between my GPS coordinates and all 1,000,000 rows before it could figure out which ones were within a 50-mile radius.
ALTER TABLE `locations`
ADD INDEX `longitude` (`longitude`),
ADD INDEX `latitude` (`latitude`);
Now, every degree of latitude is just over 69 miles "tall" (so 45°N to 46°N is about 69 miles) and every degree of longitude is almost 49 miles (-111°W to -112°W is about 48.9 miles). So if we added a degree or two to each side of our current coordinates, we can create a very general "bounding box" can exclude MOST of the records in our locations table that are definitely WAY outside the 50-mile radius:
SELECT *, ROUND(
3959 * ACOS(
COS(RADIANS(45.12345678)) *
COS(RADIANS(latitude)) *
COS(RADIANS(longitude) - RADIANS(-111.12345678)) +
SIN(RADIANS(45.12345678)) * SIN(RADIANS(latitude))
)
,1) AS `miles_away`
FROM `locations`
WHERE
`latitude` BETWEEN 44.12345678 AND 46.12345678
AND `longitude` BETWEEN -113.12345678 AND -109.12345678
HAVING `miles_away` <= 50
ORDER BY `miles_away`
With that simple change, suddenly the query starts coming back in 0.05 seconds instead of 0.8 seconds!
DEGREES(ASIN(
SIN(RADIANS(<starting latitude>)) * COS(<miles>/3959) +
COS(RADIANS(<starting latitude>)) * SIN(<miles>/3959) * COS(RADIANS(<bearing>))
))
The same function, except for longitude:
DEGREES(RADIANS(<starting longitude>) + ATAN2(
SIN(RADIANS(<bearing>)) * SIN(<miles>/3959) * COS(RADIANS(<starting latitude>)),
COS(<miles>/3959) - SIN(RADIANS(<starting latitude>)) * SIN(RADIANS(<starting latitude>))
))
The bearing is in degrees, like a clock, where 0 degrees is directly north, 45 degrees is northeast, 90 degrees is directly east, 180 degrees is directly south, and 270 degrees is directly west.
DEGREES(ASIN(
SIN(RADIANS(45.12345678)) * COS(50/3959) +
COS(RADIANS(45.12345678)) * SIN(50/3959) * COS(RADIANS(0))
))
Longitude for 50 miles directly east of -111.12345678 (with the latitude of 45.12345678 and bearing of 90 degrees):
DEGREES(RADIANS(-111.12345678) + ATAN2(
SIN(RADIANS(90)) * SIN(50/3959) * COS(RADIANS(45.12345678)),
COS(50/3959) - SIN(RADIANS(45.12345678)) * SIN(RADIANS(45.12345678))
))
With these formulas, we can calculate the edges of our bounding box exactly 50 miles out from the center:
SELECT *, ROUND(
3959 * ACOS(
COS(RADIANS(45.12345678)) *
COS(RADIANS(latitude)) *
COS(RADIANS(longitude) - RADIANS(-111.12345678)) +
SIN(RADIANS(45.12345678)) * SIN(RADIANS(latitude))
)
,1) AS `miles_away`
FROM `locations`
WHERE
`latitude` BETWEEN
DEGREES(ASIN(SIN(RADIANS(45.12345678)) * COS(50/3959) + COS(RADIANS(45.12345678)) * SIN(50/3959) * COS(RADIANS(180))))
AND
DEGREES(ASIN(SIN(RADIANS(45.12345678)) * COS(50/3959) + COS(RADIANS(45.12345678)) * SIN(50/3959) * COS(RADIANS(0))))
AND
`longitude` BETWEEN
DEGREES(RADIANS(-111.12345678) + ATAN2(SIN(RADIANS(270)) * SIN(50/3959) * COS(RADIANS(45.12345678)), COS(50/3959) - SIN(RADIANS(45.12345678)) * SIN(RADIANS(45.12345678))))
AND
DEGREES(RADIANS(-111.12345678) + ATAN2(SIN(RADIANS(90)) * SIN(50/3959) * COS(RADIANS(45.12345678)), COS(50/3959) - SIN(RADIANS(45.12345678)) * SIN(RADIANS(45.12345678))))
HAVING `miles_away` <= 50
ORDER BY `miles_away`
The result is an instant improvement from 0.5 seconds to about 0.25 seconds! If we EXPLAIN the latest query, we now see that the # of rows being examined/calculated has dropped from 8,635 down to 5,264!
SELECT SQL_NO_CACHE *, ROUND ....etc....
Why? Because the query cache is used to hold commonly-executed queries. More often than not, you're using different starting coordinates every single time (different people checking different distances). There's very little point in filling up the query cache with query results that will probably not be seen or used again. It's better to leave that query cache memory available to other queries that truly deserve to be cached.
DELIMITER $$
CREATE PROCEDURE GetLocationsWithinXMiles(IN param_Latitude DECIMAL(10,8), IN param_Longitude DECIMAL(11,8), IN param_WithinXMiles DECIMAL(8,4))
BEGIN
SELECT SQL_NO_CACHE *, ROUND(
3959 * ACOS(
COS(RADIANS(param_Latitude)) *
COS(RADIANS(latitude)) *
COS(RADIANS(longitude) - RADIANS(param_Longitude)) +
SIN(RADIANS(param_Latitude)) * SIN(RADIANS(latitude))
)
,1) AS `miles_away`
FROM `locations`
WHERE
`latitude` BETWEEN
DEGREES(ASIN(SIN(RADIANS(param_Latitude)) * COS(param_WithinXMiles/3959) + COS(RADIANS(param_Latitude)) * SIN(param_WithinXMiles/3959) * COS(RADIANS(180))))
AND
DEGREES(ASIN(SIN(RADIANS(param_Latitude)) * COS(param_WithinXMiles/3959) + COS(RADIANS(param_Latitude)) * SIN(param_WithinXMiles/3959) * COS(RADIANS(0))))
AND
`longitude` BETWEEN
DEGREES(RADIANS(param_Longitude) + ATAN2(SIN(RADIANS(270)) * SIN(param_WithinXMiles/3959) * COS(RADIANS(param_Latitude)), COS(param_WithinXMiles/3959) - SIN(RADIANS(param_Latitude)) * SIN(RADIANS(param_Latitude))))
AND
DEGREES(RADIANS(param_Longitude) + ATAN2(SIN(RADIANS(90)) * SIN(param_WithinXMiles/3959) * COS(RADIANS(param_Latitude)), COS(param_WithinXMiles/3959) - SIN(RADIANS(param_Latitude)) * SIN(RADIANS(param_Latitude))))
HAVING `miles_away` <= param_WithinXMiles
ORDER BY `miles_away`;
END $$
DELIMITER ;
Once the stored procedure is created, you can just execute it with a very easy-to-understand CALL query:
CALL GetLocationsWithinXMiles(45.12345678, -111.12345678, 50);
Couldn't I Do This Client-Side?SELECT SQL_NO_CACHE *
FROM `locations`
WHERE
`latitude` BETWEEN DEGREES(...blah blah...) AND DEGREES(...blah blah...)
AND
`longitude` BETWEEN DEGREES(...blah blah...) AND DEGREES(...blah blah...)
However, is that truly better? Probably not. People who know me know that I advocate for doing as much processing as possible on the client-side rather than using DB resources. However, in this case, let's think about what happens and do some tests.
Have a question about something in this article? You can receive help directly from the article author. Sign up for a free trial to get started.
Comments (0)