Grayson Hampton
asked on
Find Duplicate Values in SQL
I have the following query and I need to alter it so it only returns the USERIDs" that appear more than once. I would like to return a Count along with the max record by date.
The examples in red are duplicates that would appear in the new query.
SELECT userID, firstName, placementID, dateBegin,
status, Office,
clientCorporationID
FROM (SELECT Candidate.userID, Candidate.firstName, Placement_1.placementID, Placement_1.dateBegin, Placement_1.dateEnd, Placement_1.payRate, Placement_1.status,
Placement_1.customInt3,
(SELECT CASE WHEN primaryDeptName = 'HIM - HOU' THEN 'NATIONAL' ELSE CorporateUser_1.office END AS Expr1
FROM CorporateUser
WHERE (userID = CorporateUser_1.userID)) AS Office, JobOrder_4.clientCorporationID
FROM Placement AS Placement_1 INNER JOIN
JobOrder AS JobOrder_4 ON JobOrder_4.jobOrderID = Placement_1.jobOrderID INNER JOIN
Candidate ON Placement_1.candidateID = Candidate.userID INNER JOIN
vw1stCommissionsRecruiter ON Placement_1.placementID = vw1stCommissionsRecruiter.placementID INNER JOIN
PAM00701 AS PAM00701 ON CAST(vw1stCommissionsRecruiter.userID AS varchar(20)) = PAM00701.Consultant_ID INNER JOIN
PAM00302 AS PAM00302 ON PAM00701.SBU = PAM00302.SBU INNER JOIN
PAM00301 AS PAM00301 ON PAM00701.Office = PAM00301.Office_ID INNER JOIN
CorporateUser AS CorporateUser_1 ON vw1stCommissionsRecruiter.userID = CorporateUser_1.userID
WHERE (CONVERT(DATE, Placement_1.dateBegin, 102) BETWEEN '01/01/2018' AND '05/31/2020')
AND (NOT (Placement_1.isDeleted = 1))) AS report
WHERE (Office IN ('Chicago'))
ORDER BY userID
The examples in red are duplicates that would appear in the new query.
|
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
It seems the previous solution can be simpler:
;WITH cte AS (
SELECT userID, firstName, placementID, dateBegin, status, Office, clientCorporationID
FROM (SELECT Candidate.userID, Candidate.firstName, Placement_1.placementID, Placement_1.dateBegin, Placement_1.dateEnd, Placement_1.payRate, Placement_1.status, Placement_1.customInt3, (SELECT CASE WHEN primaryDeptName = 'HIM - HOU' THEN 'NATIONAL' ELSE CorporateUser_1.office END AS Expr1
FROM CorporateUser
WHERE (userID = CorporateUser_1.userID)) AS Office, JobOrder_4.clientCorporationID
FROM Placement AS Placement_1 INNER JOIN
JobOrder AS JobOrder_4 ON JobOrder_4.jobOrderID = Placement_1.jobOrderID INNER JOIN
Candidate ON Placement_1.candidateID = Candidate.userID INNER JOIN
vw1stCommissionsRecruiter ON Placement_1.placementID = vw1stCommissionsRecruiter.placementID INNER JOIN
PAM00701 AS PAM00701 ON CAST(vw1stCommissionsRecruiter.userID AS varchar(20)) = PAM00701.Consultant_ID INNER JOIN
PAM00302 AS PAM00302 ON PAM00701.SBU = PAM00302.SBU INNER JOIN
PAM00301 AS PAM00301 ON PAM00701.Office = PAM00301.Office_ID INNER JOIN
CorporateUser AS CorporateUser_1 ON vw1stCommissionsRecruiter.userID = CorporateUser_1.userID
WHERE (CONVERT(DATE, Placement_1.dateBegin, 102) BETWEEN '01/01/2018' AND '05/31/2020') AND (NOT (Placement_1.isDeleted = 1))) AS report
WHERE (Office IN ('Chicago'))
)
SELECT userID, COUNT(*) cnt, MAX(dateBegin) maxDate FROM cte GROUP BY userID HAVING COUNT(*) > 1
ORDER BY cte.user
Important here is the fact you still may use your original query as a base for cte.
Although I'm not sure where the reference to the row_number() should go, you might try -
SELECT userID, firstName, placementID, dateBegin,
status, Office,
clientCorporationID
FROM (SELECT Candidate.userID, Candidate.firstName, Placement_1.placementID, Placement_1.dateBegin, Placement_1.dateEnd, Placement_1.payRate, Placement_1.status,
Placement_1.customInt3,
row_number() over (partition by userID, firstName order by Placement_1.dateBegin desc) rn,
count(*) over (partition by userID, firstName order by Placement_1.dateBegin desc) cnt,
(SELECT CASE WHEN primaryDeptName = 'HIM - HOU' THEN 'NATIONAL' ELSE CorporateUser_1.office END AS Expr1
FROM CorporateUser
WHERE (userID = CorporateUser_1.userID)) AS Office,
JobOrder_4.clientCorporationID
FROM Placement AS Placement_1 INNER JOIN
JobOrder AS JobOrder_4 ON JobOrder_4.jobOrderID = Placement_1.jobOrderID INNER JOIN
Candidate ON Placement_1.candidateID = Candidate.userID INNER JOIN
vw1stCommissionsRecruiter ON Placement_1.placementID = vw1stCommissionsRecruiter.placementID INNER JOIN
PAM00701 AS PAM00701 ON CAST(vw1stCommissionsRecruiter.userID AS varchar(20)) = PAM00701.Consultant_ID INNER JOIN
PAM00302 AS PAM00302 ON PAM00701.SBU = PAM00302.SBU INNER JOIN
PAM00301 AS PAM00301 ON PAM00701.Office = PAM00301.Office_ID INNER JOIN
CorporateUser AS CorporateUser_1 ON vw1stCommissionsRecruiter.userID = CorporateUser_1.userID
WHERE (CONVERT(DATE, Placement_1.dateBegin, 102) BETWEEN '01/01/2018' AND '05/31/2020')
AND (NOT (Placement_1.isDeleted = 1))
AND rn = 1) AS report
WHERE (Office IN ('Chicago'))
ORDER BY userID
Sorry, I left out the count -
SELECT userID, firstName, placementID, dateBegin,
status, Office,
clientCorporationID, cnt
FROM (SELECT ...
SELECT userID, firstName, placementID, dateBegin,
status, Office,
clientCorporationID, cnt
FROM (SELECT ...
Another perhaps simpler query would be
with cte as
(your original query),
cte2 as
(select cte.*, count(*) over (partition by userid order by office) as dupe_count,
row_number() over (partition by userid order by date_begin desc) as rn
from cte)
SELECT userID, firstName, placementID, dateBegin, status, Office, clientCorporationID, dupe_count
from cte2 where rn = 1;
with cte as
(your original query),
cte2 as
(select cte.*, count(*) over (partition by userid order by office) as dupe_count,
row_number() over (partition by userid order by date_begin desc) as rn
from cte)
SELECT userID, firstName, placementID, dateBegin, status, Office, clientCorporationID, dupe_count
from cte2 where rn = 1;
Open in new window