dbaSQL
asked on
tSQL reporting problem (grouping vs detailed)
I have 2 reports that I run on a daily basis. One is a detailed report of 'self trades'. The other is a summation of the same 'self trades'. so if I have 100 self trades for this trader on this endpoint and symbol in the detailed, I would have one record in the summation with a total selftrade count of 100. The detailed rpt runs, loading current day to working tables, then the summation runs off the same working tables. It has come to my attention that the detailed and the summation are not equating.
Here is a specific example:
from the summation:
endpoint/trader/symbol/tot alvol/tota lTrades/Se lfTradeVol /TotalSelf Trade/Self TradeVolRa tio/SelfTr adeRatio
CBOT abc ZTU8 5738 1782 99 50 0.017253398 0.028058361
CBOT def ZTU8 6216 2166 58 38 0.009330759 0.01754386
yet in the detailed report, there are 19 trades for trader abc with a volume of 63, and 15 trades for the trader def, with a volume of 32. The two together, a total volume of 95. see the selftrade count and volume for each id --
the summation:
count volume
abc 50 99
def 38 58
the detailed
count volume
abc 19 63
def 15 32
- my summation says i have 50 trades by abc on that symbol and endpoint. my detailed has only 19.
- my summation says i have 38 trades by def on that symbol and endpoint. my detailed has only 15.
this is my detailed logic:
CREATE proc detailedProc
AS
SET NOCOUNT ON
DELETE workingDB.dbo.epSelfTrade
INSERT INTO workingDB.dbo.epSelfTrade SELECT DISTINCT EndPoint FROM SERVER1.database1.dbo.EPGr oups WHERE EndPoint NOT IN (
'....a list of endpoints we ignore for this report....')
delete workingDB.dbo.SelfTrades
/* only non-spread trades */
insert workingDB.dbo.SelfTrades
select trader,acct,prevorderno,en dpoint,sym bol,side,q uantity,pr ice,tradet ime,cleari ngfee,lfla g,client, orderno,origorderno,basket id,basketn umber,quan tity,execu tedquantit y,leavesqu antity,ord ertype, limitprice,avgfillprice,or derstatus, stopprice, timeinforc e,cntrpart yID,msgID, hostname,o rdermarkin g,
inserttime,source
from database.dbo.ordertable a with (nolock)
WHERE tradetime >= LEFT(GETDATE(),11)
AND EXISTS (SELECT 1 FROM workingDB.dbo.epSelfTrade WHERE endpoint = a.endpoint)
AND symbol NOT LIKE '%-%'
AND NOT EXISTS (select 1 orderno from database.dbo.ordertable b with (nolock) where a.origorderno = b.origorderno
AND tradetime >= LEFT(GETDATE(),11)
AND EXISTS (SELECT 1 FROM workingDB.dbo.epSelfTrade WHERE endpoint = b.endpoint)
AND symbol LIKE '%-%' )
/* only spread trades */
insert workingDB.dbo.SelfTrades
SELECT trader,acct,prevorderno,en dpoint,sym bol,side,q uantity,pr ice,tradet ime,cleari ngfee,lfla g,client, orderno,origorderno,basket id,basketn umber,quan tity,execu tedquantit y,leavesqu antity,ord ertype, limitprice,avgfillprice,or derstatus, stopprice, timeinforc e,cntrpart yID,msgID, hostname,o rdermarkin g,
inserttime,source
FROM database.dbo.ordertable t1 WITH (NOLOCK)
WHERE tradetime >= LEFT(GETDATE(),11)
AND EXISTS (SELECT 1 FROM workingDB.dbo.epSelfTrade WHERE endpoint = t1.endpoint)
AND symbol LIKE '%-%'
SELECT DISTINCT b.endpoint,b.trader AS Btrader, b.acct AS BAcctID,b.Client AS BClientID, b.OrigOrderNo AS BOrigOrderNo,s.trader AS Strader,s.acct AS SAcctID,s.Client AS SClientID, s.OrigOrderNo AS SOrigOrderNo,b.Symbol, b.Quantity, b.Price,
CONVERT(VARCHAR(10),b.trad etime,101) +' '+SUBSTRING(CONVERT(VARCHA R(20),b.tr adetime,10 0),13,7) AS tradetime
FROM workingDB.dbo.SelfTrades b WITH (NOLOCK)
INNER JOIN workingDB.dbo.SelfTrades s WITH (NOLOCK)
ON s.endpoint = b.endpoint
AND s.Quantity = b.Quantity
AND s.Price = b.Price
AND s.tradetime = b.tradetime
AND s.side = 'S'
AND b.side = 'B'
ORDER BY EndPoint,Btrader,b.Symbol
SET NOCOUNT OFF
GO
--summation procedure
CREATE proc dbo.summationProc
AS
SET NOCOUNT ON
SELECT q1.EndPoint,q1.Trader,q1.S ymbol,q1.T otalVol,q1 .TotalTrad es,q2.Self TradeVol,q 2.TotalSel fTrades,
CAST(q2.SelfTradeVol AS DECIMAL(15,3))/CAST(q1.Tot alVol AS DECIMAL(15,3)) AS SelfTradeVolRatio,
CAST(q2.TotalSelfTrades AS DECIMAL(15,3))/CAST(q1.Tot alTrades AS DECIMAL(15,3)) AS TotalSelfTradeRatio
FROM (
SELECT a.EndPoint,a.Trader,a.Symb ol,SUM(qua ntity) AS TotalVol,COUNT(*) AS TotalTrades
FROM database.dbo.ordertable a WITH (NOLOCK)
WHERE tradetime >= LEFT(GETDATE(),11)
AND EXISTS (SELECT 1 FROM workingDB.dbo.epSelfTrade WHERE endpoint = a.EndPoint)
GROUP BY a.EndPoint,a.Trader,a.Symb ol ) AS q1
INNER JOIN
(SELECT b.EndPoint,b.Trader,b.Symb ol,SUM(b.q uantity)AS SelfTradeVol,COUNT(*) AS TotalSelfTrades
FROM workingDB.dbo.SelfTrades b WITH (NOLOCK)
INNER JOIN workingDB.dbo.SelfTrades s WITH (NOLOCK)
ON s.EndPoint = b.EndPoint
AND s.Quantity = b.Quantity
AND s.Price = b.Price
AND s.tradetime = b.tradetime
AND s.Side = 'S'
AND b.Side = 'B'
GROUP BY b.EndPoint,b.Trader,b.Symb ol ) AS q2
ON q1.Trader = q2.Trader
AND q1.symbol = q2.symbol
ORDER BY q1.EndPoint,q1.Trader,q1.S ymbol
SET NOCOUNT OFF
GO
I know it's kind of lengthy. I apologize for that. Does anybody see what may be causing this variance?
Here is a specific example:
from the summation:
endpoint/trader/symbol/tot
CBOT abc ZTU8 5738 1782 99 50 0.017253398 0.028058361
CBOT def ZTU8 6216 2166 58 38 0.009330759 0.01754386
yet in the detailed report, there are 19 trades for trader abc with a volume of 63, and 15 trades for the trader def, with a volume of 32. The two together, a total volume of 95. see the selftrade count and volume for each id --
the summation:
count volume
abc 50 99
def 38 58
the detailed
count volume
abc 19 63
def 15 32
- my summation says i have 50 trades by abc on that symbol and endpoint. my detailed has only 19.
- my summation says i have 38 trades by def on that symbol and endpoint. my detailed has only 15.
this is my detailed logic:
CREATE proc detailedProc
AS
SET NOCOUNT ON
DELETE workingDB.dbo.epSelfTrade
INSERT INTO workingDB.dbo.epSelfTrade SELECT DISTINCT EndPoint FROM SERVER1.database1.dbo.EPGr
'....a list of endpoints we ignore for this report....')
delete workingDB.dbo.SelfTrades
/* only non-spread trades */
insert workingDB.dbo.SelfTrades
select trader,acct,prevorderno,en
inserttime,source
from database.dbo.ordertable a with (nolock)
WHERE tradetime >= LEFT(GETDATE(),11)
AND EXISTS (SELECT 1 FROM workingDB.dbo.epSelfTrade WHERE endpoint = a.endpoint)
AND symbol NOT LIKE '%-%'
AND NOT EXISTS (select 1 orderno from database.dbo.ordertable b with (nolock) where a.origorderno = b.origorderno
AND tradetime >= LEFT(GETDATE(),11)
AND EXISTS (SELECT 1 FROM workingDB.dbo.epSelfTrade WHERE endpoint = b.endpoint)
AND symbol LIKE '%-%' )
/* only spread trades */
insert workingDB.dbo.SelfTrades
SELECT trader,acct,prevorderno,en
inserttime,source
FROM database.dbo.ordertable t1 WITH (NOLOCK)
WHERE tradetime >= LEFT(GETDATE(),11)
AND EXISTS (SELECT 1 FROM workingDB.dbo.epSelfTrade WHERE endpoint = t1.endpoint)
AND symbol LIKE '%-%'
SELECT DISTINCT b.endpoint,b.trader AS Btrader, b.acct AS BAcctID,b.Client AS BClientID, b.OrigOrderNo AS BOrigOrderNo,s.trader AS Strader,s.acct AS SAcctID,s.Client AS SClientID, s.OrigOrderNo AS SOrigOrderNo,b.Symbol, b.Quantity, b.Price,
CONVERT(VARCHAR(10),b.trad
FROM workingDB.dbo.SelfTrades b WITH (NOLOCK)
INNER JOIN workingDB.dbo.SelfTrades s WITH (NOLOCK)
ON s.endpoint = b.endpoint
AND s.Quantity = b.Quantity
AND s.Price = b.Price
AND s.tradetime = b.tradetime
AND s.side = 'S'
AND b.side = 'B'
ORDER BY EndPoint,Btrader,b.Symbol
SET NOCOUNT OFF
GO
--summation procedure
CREATE proc dbo.summationProc
AS
SET NOCOUNT ON
SELECT q1.EndPoint,q1.Trader,q1.S
CAST(q2.SelfTradeVol AS DECIMAL(15,3))/CAST(q1.Tot
CAST(q2.TotalSelfTrades AS DECIMAL(15,3))/CAST(q1.Tot
FROM (
SELECT a.EndPoint,a.Trader,a.Symb
FROM database.dbo.ordertable a WITH (NOLOCK)
WHERE tradetime >= LEFT(GETDATE(),11)
AND EXISTS (SELECT 1 FROM workingDB.dbo.epSelfTrade WHERE endpoint = a.EndPoint)
GROUP BY a.EndPoint,a.Trader,a.Symb
INNER JOIN
(SELECT b.EndPoint,b.Trader,b.Symb
FROM workingDB.dbo.SelfTrades b WITH (NOLOCK)
INNER JOIN workingDB.dbo.SelfTrades s WITH (NOLOCK)
ON s.EndPoint = b.EndPoint
AND s.Quantity = b.Quantity
AND s.Price = b.Price
AND s.tradetime = b.tradetime
AND s.Side = 'S'
AND b.Side = 'B'
GROUP BY b.EndPoint,b.Trader,b.Symb
ON q1.Trader = q2.Trader
AND q1.symbol = q2.symbol
ORDER BY q1.EndPoint,q1.Trader,q1.S
SET NOCOUNT OFF
GO
I know it's kind of lengthy. I apologize for that. Does anybody see what may be causing this variance?
Yeah it is a bit lengthy. Can you publish some create tables and insert statements to run your procedures? How are you comparing two results?
ASKER
the reports are sent out via email, as .csv's. the comparison is simply walking thru one, then looking at the other to ensure it all adds up. i've got both reports in front of me now, the example i gave is legit. it doesn't add up.
Filter on SelfTrades table is the same in both cases - so it is not the problem.
The detailed SQL is using distinct clause. Why? Can you run the query without DISTINCT?
The detailed SQL is using distinct clause. Why? Can you run the query without DISTINCT?
ASKER
yes, let me try this
ASKER
well, without the distinct, i may be getting the data, BUT now i'm getting a lot of dupes in the detailed report, so it's hard to tell.
for example, this one is in there 12 times:
EndPoint/BTrader/BAcctID/B ClientID/B OrigOrderN o/STrader/ SAcctID/SC lientID/SO rigOrderNo /Symbol/Qu antity/Pri ce/tradeTi me
CBOT abc 8135 zt-01 05Mtgprap abc 8135 zt-01 05Mtgprat ZTU8 1 105.64 06/05/2008 10:24AM
this one is in there twice:
CBOT jkl 8135 zn-02 05Mtgv2js jkl 8135 zn-03 05Mth8ykt ZNU8 1 113.17 06/05/2008 11:01AM
that's not going to work
any thoughts what may be doing this? no changes other than commenting out DISTINCT
for example, this one is in there 12 times:
EndPoint/BTrader/BAcctID/B
CBOT abc 8135 zt-01 05Mtgprap abc 8135 zt-01 05Mtgprat ZTU8 1 105.64 06/05/2008 10:24AM
this one is in there twice:
CBOT jkl 8135 zn-02 05Mtgv2js jkl 8135 zn-03 05Mth8ykt ZNU8 1 113.17 06/05/2008 11:01AM
that's not going to work
any thoughts what may be doing this? no changes other than commenting out DISTINCT
You hit the problem. Your summary query does "group by" and publishes aggregation of details. I assume you need another aggregation but on the more detailed level then your summary.
You need to rewrite the detailed query to have sum or avg on quantity, price and group by on endPoint, Btrader etc.
You need to rewrite the detailed query to have sum or avg on quantity, price and group by on endPoint, Btrader etc.
ASKER
I'm not sure I follow what you are suggesting, srnar.
Sorry for confusing. I'm not able to help you more with the problem without looking into data. I try to find some more time and generate some fake data.
Otherwise we are only spending our time.
Otherwise we are only spending our time.
ASKER
not confusing, really, i'm just not certain what you are suggesting. i'd be happy to show you some data, i'm just not certain how to do so. i don't think a cut/paste of a ton of records (detailed/summary) will go well here.
3 things will be welcome:
1. create script of SelfTrades table - can be scripted from Management studio
2. create script of ordertable - can be also scripted from Management studio
3. select top 100 * from ordertable - can be exported into Excel via clipboard from Management studio
1. create script of SelfTrades table - can be scripted from Management studio
2. create script of ordertable - can be also scripted from Management studio
3. select top 100 * from ordertable - can be exported into Excel via clipboard from Management studio
ASKER
yes, srnar, i know how to script the tables. and, i fully intended to do so, along with a capture of both the detailed data and summary data, on a specific trader/endpoint/symbol. thus, we've got one example to work with in both detailed and summary
but. i'd like to avoid posting it up here, if possible.
i can genericize the table def, and attach those, but what of the data?
but. i'd like to avoid posting it up here, if possible.
i can genericize the table def, and attach those, but what of the data?
Sorry to underestimate you, dbaSQL. It is 100 records - you can change the sensitive data (names, quantities) to stay anonymous - can be done in 10 minutes. You can send it to my mail srnar at volny.cz
ASKER
oh no, you didn't underestimate. i just wanted you to know i was pretty comfy w/the table def and data and alll, i just didn't want to post up here. doing this now.
ASKER CERTIFIED SOLUTION
membership
This solution is only available to members.
To access this solution, you must be a member of Experts Exchange.
ASKER
>>Also joining on quantity or price columns is not good practice.
This particular report is of 'selftrades', which are not the norm, or standard, trading practice. It must be joined on quantity and price in order to identify the trades we are watching for.
This particular report is of 'selftrades', which are not the norm, or standard, trading practice. It must be joined on quantity and price in order to identify the trades we are watching for.
ASKER
still having problems w/this. are you avail?
Not returning good data?
ASKER
well, i'm not sure. they are questioning this:
SUM(b.Quantity) as newQuantity,
COUNT(*) AS newTrades
and as i go in to check the data, i've got to question it, too. given that it is in the detailed report, not the summation, is this actually just a count of the selftrades?
SUM(b.Quantity) as newQuantity,
COUNT(*) AS newTrades
and as i go in to check the data, i've got to question it, too. given that it is in the detailed report, not the summation, is this actually just a count of the selftrades?
Yes - newTrades = count of trades that have this same values:
b.endpoint,b.trader, b.acct,b.Client, b.OrigOrderNo,s.trader,s.a cct,s.Clie nt, s.OrigOrderNo,b.Symbol, b.Price, b.tradetime
which is more detailed than set (b.EndPoint,b.Trader,b.Sym bol) used in your sum proc
b.endpoint,b.trader, b.acct,b.Client, b.OrigOrderNo,s.trader,s.a
which is more detailed than set (b.EndPoint,b.Trader,b.Sym
ASKER
>>which is more detailed than set (b.EndPoint,b.Trader,b.Sym bol) used in your sum proc
can you elaborate for me, please, srnar?
can you elaborate for me, please, srnar?
see 5th row of the following statement
....SELECT a.EndPoint,a.Trader,a.Symb ol,SUM(qua ntity) AS TotalVol,COUNT(*) AS TotalTrades
FROM database.dbo.ordertable a WITH (NOLOCK)
WHERE tradetime >= LEFT(GETDATE(),11)
AND EXISTS (SELECT 1 FROM workingDB.dbo.epSelfTrade WHERE endpoint = a.EndPoint)
GROUP BY a.EndPoint,a.Trader,a.Symb ol ) AS q1
INNER JOIN
(SELECT b.EndPoint,b.Trader,b.Symb ol,SUM(b.q uantity)AS SelfTradeVol,COUNT(*) AS TotalSelfTrades
FROM workingDB.dbo.SelfTrades b WITH (NOLOCK)
INNER JOIN workingDB.dbo.SelfTrades s WITH (NOLOCK)
ON s.EndPoint = b.EndPoint
AND s.Quantity = b.Quantity
AND s.Price = b.Price
AND s.tradetime = b.tradetime
AND s.Side = 'S'
AND b.Side = 'B'
GROUP BY b.EndPoint,b.Trader,b.Symb ol ) AS q2
ON q1.Trader = q2.Trader
AND q1.symbol = q2.symbol
ORDER BY q1.EndPoint,q1.Trader,q1.S ymbol
....SELECT a.EndPoint,a.Trader,a.Symb
FROM database.dbo.ordertable a WITH (NOLOCK)
WHERE tradetime >= LEFT(GETDATE(),11)
AND EXISTS (SELECT 1 FROM workingDB.dbo.epSelfTrade WHERE endpoint = a.EndPoint)
GROUP BY a.EndPoint,a.Trader,a.Symb
INNER JOIN
(SELECT b.EndPoint,b.Trader,b.Symb
FROM workingDB.dbo.SelfTrades b WITH (NOLOCK)
INNER JOIN workingDB.dbo.SelfTrades s WITH (NOLOCK)
ON s.EndPoint = b.EndPoint
AND s.Quantity = b.Quantity
AND s.Price = b.Price
AND s.tradetime = b.tradetime
AND s.Side = 'S'
AND b.Side = 'B'
GROUP BY b.EndPoint,b.Trader,b.Symb
ON q1.Trader = q2.Trader
AND q1.symbol = q2.symbol
ORDER BY q1.EndPoint,q1.Trader,q1.S
ASKER
yes, i can see the differences between the two, i'm just looking for a better understanding of the change
ASKER
I still don't have a thorough understanding of the change. Please advise if you are able.
dbaSQL, simply to avoid duplicity you have to group rows on your detail report - values than can be aggregated (used SUM, COUNT function) -
SUM(b.Quantity) as newQuantity,
COUNT(*) AS newTrades
for more you can visit:
http://msdn.microsoft.com/en-us/library/ms173454.aspx
SUM(b.Quantity) as newQuantity,
COUNT(*) AS newTrades
for more you can visit:
http://msdn.microsoft.com/en-us/library/ms173454.aspx
ASKER
Thank you.
ASKER