Oracle correlated subquery w/COUNT(DISTINCT) help

Jim Horn
Jim Horn used Ask the Experts™
on
Hi All

I have two tables

Name      Columns and data types
t1              groupid varchar2 (16), patientid varchar2 (255), date_range_end date
t2              groupid varchar2 (16), patientid varchar2 (255), evidence_number varchar2 (6), evidence_date date

t1 sample data
H123456, 123456789, 02-SEP-15
H123456, 123456789, 15-OCT-15
H123456, 123456789, 02-DEC-15

t2 sample data
H123456, 123456789, 2, 02-SEP-15
H123456, 123456789, 4, 02-OCT-15
H123456, 123456789, 6, 02-NOV-15
H123456, 123456789, 8, 02-NOV-15

I need to pull off a correlated subquery between these two tables where the return set has COUNT(DISTINCT t2.evidence_number) values WHERE t2.evidence_date <= t1.date_range_end.  So the return set I'm looking for is...

t1.groupid, t1.patientid, t1.date_range_end, the count(DISTINCT evidence_number WHERE t2.evidence_date <= t1.date_range_end)  
H123456, 123456789, 02-SEP-15, 1    --  2 has a date equal to 02-SEP-15
H123456, 123456789, 15-OCT-15, 2   -- 2 and 4 have dates before 15-OCT-15
H123456, 123456789, 02-DEC-15, 4   -- 2, 4, 6, and 8 are all before 02-DEC-15

Thanks in advance.
Jim
Comment
Watch Question

Do more with

Expert Office
EXPERT OFFICE® is a registered trademark of EXPERTS EXCHANGE®
Most Valuable Expert 2012
Distinguished Expert 2018

Commented:
Do your dates have the time portion?

select to_char(date_range_end,'mm/dd/yyyy hh24:mi:ss') from t1 where rownum<11;
select to_char(evidence_date,'mm/dd/yyyy hh24:mi:ss') from t2 where rownum<11;

I'll work on the query while I wait.
Jim HornSQL Server Data Dude
Most Valuable Expert 2013
Author of the Year 2015

Author

Commented:
No, just dates.
Most Valuable Expert 2012
Distinguished Expert 2018
Commented:
Don't know why you need a correlated subquery.

Try this:
/*
drop table t1 purge;
create table t1 (groupid varchar2 (16), patientid varchar2 (255), date_range_end date);

drop table t2 purge;
create table t2(groupid varchar2 (16), patientid varchar2 (255), evidence_number varchar2 (6), evidence_date date);


--t1 sample data
insert into t1 values('H123456', '123456789', to_date('02-SEP-15','DD-MON-YY'));
insert into t1 values('H123456', '123456789', to_date('15-OCT-15','DD-MON-YY'));
insert into t1 values('H123456', '123456789', to_date('02-DEC-15','DD-MON-YY'));

--t2 sample data
insert into t2 values('H123456', '123456789', '2', to_date('02-SEP-15','DD-MON-YY'));
insert into t2 values('H123456', '123456789', '4', to_date('02-OCT-15','DD-MON-YY'));
insert into t2 values('H123456', '123456789', '6', to_date('02-NOV-15','DD-MON-YY'));
insert into t2 values('H123456', '123456789', '8', to_date('02-NOV-15','DD-MON-YY'));
commit;
*/



select t1.groupid,
	t1.patientid,
	t1.date_range_end,
	sum(case when t2.evidence_date <= t1.date_range_end then 1 end) total  
from t1 join t2 on t1.groupid=t2.groupid and t1.patientid=t2.patientid
group by t1.groupid,
	t1.patientid,
	t1.date_range_end
order by t1.date_range_end
/

Open in new window

Ensure you’re charging the right price for your IT

Do you wonder if your IT business is truly profitable or if you should raise your prices? Learn how to calculate your overhead burden using our free interactive tool and use it to determine the right price for your IT services. Start calculating Now!

Jim HornSQL Server Data Dude
Most Valuable Expert 2013
Author of the Year 2015

Author

Commented:
>Don't know why you need a correlated subquery.
Because I'm relatively new to Oracle SQL?  

Thanks.
Jim
PortletPaulEE Topic Advisor
Most Valuable Expert 2014
Awarded 2013

Commented:
I would not use, or recommend, a correlated subquery. This is how I would approach it
SELECT
      t1.GROUPID
    , t2.GROUPID
    , t1.DATE_RANGE_END
    , COUNT(DISTINCT t2.evidence_number) AS x
FROM t1
      INNER JOIN t2 ON t1.GROUPID = t2.GROUPID
                  AND t1.PATIENTID = t2.PATIENTID
                  AND t2.evidence_date <= t1.DATE_RANGE_END
GROUP BY
      t1.GROUPID
    , t2.GROUPID
    , t1.DATE_RANGE_END
;

Open in new window

Based on your sample data this is the result of that query:
| GROUPID | GROUPID |              DATE_RANGE_END | X |
|---------|---------|-----------------------------|---|
| H123456 | H123456 |  December, 02 2015 00:00:00 | 4 |
| H123456 | H123456 |   October, 15 2015 00:00:00 | 2 |
| H123456 | H123456 | September, 02 2015 00:00:00 | 1 |

Open in new window

and here is the sample data as inserts
CREATE TABLE T1
    (GROUPId varchar2(16), PATIENTID int, DATE_RANGE_END date)
;

INSERT ALL 
    INTO T1 (GROUPID,PATIENTID,DATE_RANGE_END)
         VALUES ('H123456', 123456789, to_date('02-09-2015','dd-mm-yyyy'))
    INTO T1 (GROUPID,PATIENTID,DATE_RANGE_END)
         VALUES ('H123456', 123456789, to_date('15-10-2015','dd-mm-yyyy'))
    INTO T1 (GROUPID,PATIENTID,DATE_RANGE_END)
         VALUES ('H123456', 123456789, to_date('02-12-2015','dd-mm-yyyy'))
SELECT * FROM dual
;

CREATE TABLE T2
    (GROUPID varchar2(16), PATIENTID int, "EVIDENCE_NUMBER" INT, "EVIDENCE_DATE" date)
;

INSERT ALL 
    INTO T2 (GROUPID,PATIENTID, EVIDENCE_NUMBER, EVIDENCE_DATE)
         VALUES ('H123456', 123456789, 2, to_date('02-09-2015','dd-mm-yyyy'))
    INTO T2 (GROUPID,PATIENTID, EVIDENCE_NUMBER, EVIDENCE_DATE)
         VALUES ('H123456', 123456789, 4, to_date('02-10-2015','dd-mm-yyyy'))
    INTO T2 (GROUPID,PATIENTID, EVIDENCE_NUMBER, EVIDENCE_DATE)
         VALUES ('H123456', 123456789, 6, to_date('02-11-2015','dd-mm-yyyy'))
    INTO T2 (GROUPID,PATIENTID, EVIDENCE_NUMBER, EVIDENCE_DATE)
         VALUES ('H123456', 123456789, 8, to_date('02-11-2015','dd-mm-yyyy'))
SELECT * FROM dual
;

Open in new window

see this as a working demo here: http://sqlfiddle.com/#!4/c1d82/1
PortletPaulEE Topic Advisor
Most Valuable Expert 2014
Awarded 2013

Commented:
oh sorry, just hid the explain plan comment, my bad
Most Valuable Expert 2012
Distinguished Expert 2018

Commented:
OK, I scaled it up (50,000 extra rows in each table) and I see no real differences between the two.

If indexes are involved, it might make a difference.

/*
drop table t1 purge;
create table t1 (groupid varchar2 (16), patientid varchar2 (255), date_range_end date);

drop table t2 purge;
create table t2(groupid varchar2 (16), patientid varchar2 (255), evidence_number varchar2 (6), evidence_date date);


--t1 sample data
insert into t1 values('H123456', '123456789', to_date('02-SEP-15','DD-MON-YY'));
insert into t1 values('H123456', '123456789', to_date('15-OCT-15','DD-MON-YY'));
insert into t1 values('H123456', '123456789', to_date('02-DEC-15','DD-MON-YY'));

--t2 sample data
insert into t2 values('H123456', '123456789', '2', to_date('02-SEP-15','DD-MON-YY'));
insert into t2 values('H123456', '123456789', '4', to_date('02-OCT-15','DD-MON-YY'));
insert into t2 values('H123456', '123456789', '6', to_date('02-NOV-15','DD-MON-YY'));
insert into t2 values('H123456', '123456789', '8', to_date('02-NOV-15','DD-MON-YY'));
commit;

insert into t1 select dbms_random.string('a',16), '123456789', to_date('02-DEC-15','DD-MON-YY') from dual
connect by level<=50000;
insert into t2 select dbms_random.string('a',16), '123456789', '2', to_date('02-SEP-15','DD-MON-YY') from dual
connect by level<=50000;
*/

set autotrace traceonly
select t1.groupid,
	t1.patientid,
	t1.date_range_end,
	sum(case when t2.evidence_date <= t1.date_range_end then 1 end) total  
from t1 join t2 on t1.groupid=t2.groupid and t1.patientid=t2.patientid
group by t1.groupid,
	t1.patientid,
	t1.date_range_end
order by t1.date_range_end
/


SELECT
      t1.GROUPID
    , t2.GROUPID
    , t1.DATE_RANGE_END
    , COUNT(t2.evidence_number) AS x
FROM t1
      INNER JOIN t2 ON t1.GROUPID = t2.GROUPID
                  AND t1.PATIENTID = t2.PATIENTID
                  AND t2.evidence_date <= t1.DATE_RANGE_END
GROUP BY
      t1.GROUPID
    , t2.GROUPID
    , t1.DATE_RANGE_END
order by t1.date_range_end
;

set autotrace off

Open in new window

Do more with

Expert Office
Submit tech questions to Ask the Experts™ at any time to receive solutions, advice, and new ideas from leading industry professionals.

Start 7-Day Free Trial