SET DATEFORMAT YMD
GO
IF OBJECT_ID('policy') IS NOT NULL
DROP TABLE policy
CREATE TABLE policy (
id int identity(1,1) PRIMARY KEY,
policy_number int,
dt_start date,
dt_end date,
annual_premium money)
INSERT INTO policy (policy_number, dt_start, dt_end, annual_premium)
VALUES
(1, '2000-01-01', '2005-12-31', 100),
(1, '2006-01-01', '2010-12-31', 110),
(1, '1-1-2011', NULL, 100), -- 1 Good
(2, '2010-01-01', NULL, 145), -- 2 Good
(3, '2010-01-01', '2005-12-31', 110), -- 3 Good, even through the rows are entered out of order
(4, '2000-01-01', '2005-12-31', 100),
(4, '2006-01-01', '2007-09-05', 110), -- 4 Good, ended 9-5-2007
(5, '2000-01-01', '2005-12-31', 100),
(5, '2006-01-05', '2010-12-31', 120), -- 5 GAP from 12-31-2005 to 1-5-2006
(6, '2000-01-01', '2005-12-31', 100), -- 6 Good
(7, '2000-01-01', '2005-12-31', 100),
(7, '2005-12-25', '2010-12-31', 110), -- 7 OVERLAP from 12-31-2005 to 12-31-2005
(8, '2000-01-01', '2005-12-31', 100),
(8, '2006-01-01', '2006-01-01', 110), -- 8 Good row, but to make life easier we'll throw this one out.
(9, '2006-01-01', '2010-01-01', 110), -- 9 Good
(10, '2006-01-01', '2027-12-31', 110), -- 10 Good, even though it has a future end date.
(11, '2006-01-01', '2004-12-31', 110), -- 11 Bad, START DATE = END DATE.
(12, NULL, '2004-12-31', 110), -- 12 Bad, START DATE IS NULL
(13, '2011-01-01', NULL, 110),
(13, '2006-01-01', '2010-12-31', 100), -- 13 Good, even though the rows are entered out of order
(3, '2006-01-01', NULL, 110),
(14, '2000-01-01', '2005-12-31', 100),
(14, '2000-01-01', '2005-12-31', 100), -- 14 one good, one bad, duplicate.
(15, '2000-01-01', '2005-12-31', 100),
(15, '2000-01-01', '2005-12-31', 100),
(15, '2000-01-01', '2005-12-31', 100) -- 15 one good, two bad, duplicate
IF OBJECT_ID('tempdb..#policy_time_series') IS NOT NULL
DROP TABLE #policy_time_series
CREATE TABLE #policy_time_series (
id int, -- Not identity
policy_number int,
dt_start date,
dt_end date,
time_variance_days bigint,
error_type varchar(50));
with
-- Create separate sets for all the single-row errors
-- Identify start date > end date
bad_rows_bad_order as (SELECT id, policy_number, dt_start, dt_end, 'Start date after end date' as error_type FROM policy WHERE dt_end < dt_start)
-- Identify Start dates that are NULL
, bad_rows_nulls as (SELECT id, policy_number, dt_start, dt_end, 'Start date IS NULL' as error_type FROM policy WHERE dt_start IS NULL)
-- Identify Start dates = End Dates
, bad_rows_same_date as (SELECT id, policy_number, dt_start, dt_end, 'Start date = End Date' as error_type FROM policy WHERE dt_start = dt_end)
-- Identify duplicates
, bad_rows_duplicate as (
SELECT id, policy_number, dt_start, dt_end, 'Duplicate Row' as error_type
FROM (
SELECT id, policy_number, dt_start, dt_end, ROW_NUMBER() OVER (PARTITION BY policy_number, dt_start, dt_end ORDER BY (SELECT 0)) AS DuplicateRowNumber
FROM policy ) a
WHERE a.DuplicateRowNumber > 1)
-- Perform the time series for all the remaining rows
, date_sort_order as
(
SELECT id, policy_number, dt_start, dt_end, annual_premium,
ROW_NUMBER() OVER (PARTITION BY policy_number ORDER BY dt_start, dt_end) as date_sort_order
FROM policy
WHERE
id NOT IN (
SELECT id FROM bad_rows_bad_order UNION ALL
SELECT id FROM bad_rows_nulls UNION ALL
SELECT id FROM bad_rows_same_date UNION ALL
SELECT id FROM bad_rows_duplicate)
)
-- Calculate the difference in days between remaining rows, then add back the errors.
, date_sort_order_with_differences as
(
-- Good records: Calculate the difference in days between a row and the next one in line
SELECT base.id, base.policy_number, base.dt_start, base.dt_end,
compare.dt_start as dt_start_next,
DATEDIFF(DAY, base.dt_end, compare.dt_start) as time_variance_days,
NULL as error_type
FROM date_sort_order base
-- Compare dates with previous row
LEFT JOIN date_sort_order compare ON base.policy_number = compare.policy_number AND base.date_sort_order = (compare.date_sort_order - 1)
UNION ALL
SELECT id, policy_number, dt_start, dt_end, NULL as dt_start_next, NULL as time_variance_days, error_type
FROM bad_rows_bad_order
UNION ALL
SELECT id, policy_number, dt_start, dt_end, NULL as dt_start_next, NULL as time_variance_days, error_type
FROM bad_rows_nulls
UNION ALL
SELECT id, policy_number, dt_start, dt_end, NULL as dt_start_next, NULL as time_variance_days, error_type
FROM bad_rows_same_date
UNION ALL
SELECT id, policy_number, dt_start, dt_end, NULL as dt_start_next, NULL as time_variance_days, error_type
FROM bad_rows_duplicate
)
-- Now insert all of these rows into a temp table
INSERT INTO #policy_time_series (id, policy_number, dt_start, dt_end,time_variance_days, error_type)
SELECT id, policy_number, dt_start, dt_end, time_variance_days, error_type
from date_sort_order_with_differences
-- Mark error_type for gaps and overlaps
UPDATE #policy_time_series
SET error_type =
CASE WHEN time_variance_days > 1 THEN 'Gaps' WHEN time_variance_days < 1 THEN 'Overlap' END
WHERE error_type IS NULL AND time_variance_days IS NOT NULL
-- Show the results
SELECT 'All rows' as row_status, * FROM #policy_time_series ORDER BY id
SELECT 'Good rows'as row_status, * FROM #policy_time_series WHERE error_type IS NULL AND COALESCE(time_variance_days,1) = 1 ORDER BY id
SELECT 'Bad rows' as row_status, * FROM #policy_time_series WHERE error_type IS NOT NULL ORDER BY id
-- Compare dates with previous row
LEFT JOIN date_sort_order compare ON base.policy_number = compare.policy_number AND base.date_sort_order = (compare.date_sort_order - 1)
Have a question about something in this article? You can receive help directly from the article author. Sign up for a free trial to get started.
Comments (1)
Commented: