We help IT Professionals succeed at work.

Need to Sort and Compare Columns from two datasources

I am attempting to join sales data from source A with sales data from source D by Day, Store and Category. In some cases, I have a store/category match for both sources. In some cases, I have different categories for the same store on the same day. In some cases, I have no data for one source, but do from another.

This is what the desired result set should look like:

Desired Resultset
Here is the create/insert script.

-- drop table #ASales
CREATE TABLE #ASales(
	[Store] [varchar](50) NULL,
	[SaleDate] [datetime] NULL,
	[ACategory] [varchar](20) NULL,
	[ATotalGallons] [numeric](38, 4) NULL
);

-- drop table #DSales
CREATE TABLE #DSales (
	[Store] [varchar](20) NULL,
	[SaleDate] [date] NULL,
	[DCategory] [varchar](40) NULL,
	[DTotalGallons] [decimal](38, 3) NULL
);

INSERT #ASales ([Store], [SaleDate], [ACategory], [ATotalGallons]) VALUES (N'0009', CAST(N'2020-01-16T00:00:00.000' AS DateTime), N'D', CAST(643.0040 AS Numeric(38, 4)))
INSERT #ASales ([Store], [SaleDate], [ACategory], [ATotalGallons]) VALUES (N'0009', CAST(N'2020-01-16T00:00:00.000' AS DateTime), N'G', CAST(4534.1350 AS Numeric(38, 4)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'10', CAST(N'2020-01-16' AS Date), N'G', CAST(1444.512 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'12', CAST(N'2020-01-16' AS Date), N'G', CAST(1417.343 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'9', CAST(N'2020-01-16' AS Date), N'G', CAST(4506.082 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'9', CAST(N'2020-01-16' AS Date), N'O', CAST(6.000 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'12', CAST(N'2020-01-16' AS Date), N'O', CAST(7.000 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'10', CAST(N'2020-01-16' AS Date), N'AD', CAST(471.373 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'12', CAST(N'2020-01-16' AS Date), N'AD', CAST(230.340 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'9', CAST(N'2020-01-16' AS Date), N'AD', CAST(617.909 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'10', CAST(N'2020-01-16' AS Date), N'O', CAST(1.000 AS Decimal(38, 3)))

SELECT Store,
       SaleDate,
	   ACategory,
	   ATotalGallons
 FROM #ASales
  Order by SaleDate,cast(Store as int), ACategory;

SELECT  Store,
       SaleDate,
	   DCategory,
	   DTotalGallons
 FROM #DSales 
  Order by SaleDate, cast(Store as int), DCategory;

Open in new window


I haven't been able to retrieve the desired resultset. Please advise.
Comment
Watch Question

You need a full outer join, example below:
 drop table IF EXISTS #ASales
CREATE TABLE #ASales(
	[Store] [varchar](50) NULL,
	[SaleDate] [datetime] NULL,
	[ACategory] [varchar](20) NULL,
	[ATotalGallons] [numeric](38, 4) NULL
);

drop TABLE IF exists #DSales
CREATE TABLE #DSales (
	[Store] [varchar](20) NULL,
	[SaleDate] [date] NULL,
	[DCategory] [varchar](40) NULL,
	[DTotalGallons] [decimal](38, 3) NULL
);

INSERT #ASales ([Store], [SaleDate], [ACategory], [ATotalGallons]) VALUES (N'0009', CAST(N'2020-01-16T00:00:00.000' AS DateTime), N'D', CAST(643.0040 AS Numeric(38, 4)))
INSERT #ASales ([Store], [SaleDate], [ACategory], [ATotalGallons]) VALUES (N'0009', CAST(N'2020-01-16T00:00:00.000' AS DateTime), N'G', CAST(4534.1350 AS Numeric(38, 4)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'10', CAST(N'2020-01-16' AS Date), N'G', CAST(1444.512 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'12', CAST(N'2020-01-16' AS Date), N'G', CAST(1417.343 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'9', CAST(N'2020-01-16' AS Date), N'G', CAST(4506.082 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'9', CAST(N'2020-01-16' AS Date), N'O', CAST(6.000 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'12', CAST(N'2020-01-16' AS Date), N'O', CAST(7.000 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'10', CAST(N'2020-01-16' AS Date), N'AD', CAST(471.373 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'12', CAST(N'2020-01-16' AS Date), N'AD', CAST(230.340 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'9', CAST(N'2020-01-16' AS Date), N'AD', CAST(617.909 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'10', CAST(N'2020-01-16' AS Date), N'O', CAST(1.000 AS Decimal(38, 3)))
go
SELECT COALESCE(a.Store,d.Store) AS store,
       COALESCE(A.SaleDate,d.SaleDate) AS saledate,
	   A.ACategory,
	   A.ATotalGallons,
	   D.DCategory,
	   D.DTotalGallons
 FROM #ASales A
 FULL OUTER JOIN #DSales D ON D.Store = A.Store AND D.SaleDate = A.SaleDate
  Order by COALESCE(A.SaleDate,d.SaleDate),COALESCE(a.Store,d.Store), a.ACategory,D.DCategory;
  /*
SELECT  Store,
       SaleDate,
	   DCategory,
	   DTotalGallons
 FROM #DSales 
  Order by SaleDate, cast(Store as int), DCategory;*/

Open in new window

Paula DiTalloIntegration developer

Author

Commented:
Anders,
Very close to what I produced--however--A sales and D sales aren't showing as sequential block from both sources ordered by categories.  When there's an interesect (on category G), the results aren't connected.

Here's what I see:
CloseResults.PNG
Here's what I am attempting to produce:

DesiredResultsHighlightStore9.PNG
Brian CroweDatabase Engineer
Top Expert 2005

Commented:
You have a formatting issue when joining StoreA.Store to StoreD.Store "0009" <> "9" so you need to strip off the leading zeroes from one, add them to the other, or if possible cast them to INTs.  In the example below I've chosen the "Cast to INT" method but this may not work outside the sample data provided so adjust if needed.

SELECT COALESCE(A.Store, D.Store) AS Store,
	COALESCE(A.SaleDate, D.SaleDate) AS SaleDate,
	COALESCE(A.ACategory, D.DCategory) AS FuelCategory,
	A.ATotalGallons,
	D.DTotalGallons
FROM #ASales AS A
FULL OUTER JOIN #DSales AS D
	ON CAST(A.Store AS INT) = CAST(D.Store AS INT)
	AND A.SaleDate = D.SaleDate
	AND A.ACategory = D.DCategory
ORDER BY COALESCE(A.Store, D.Store),
	COALESCE(A.SaleDate, D.SaleDate),
	COALESCE(A.ACategory, D.DCategory)

Open in new window

Paula DiTalloIntegration developer

Author

Commented:
Brian,
So close--you've been able to join the the G category all the way across for both source a sales and source d sales--which is AMAZING-- but the result set still orders sales source a stores, then orders source d stores so that there are separate sets for a given store where that store exists in both sources.  Here are the results:

CloserResults.PNG
This query seems to solve the separate store ordering for each data source, but does not capture the fuel category intesection between the same fuel cagegory (G) which you were able to achieve.

SELECT SUBSTRING(COALESCE(a.Store,d.Store), PATINDEX('%[^0]%', COALESCE(a.Store,d.Store)+'.'), LEN(COALESCE(a.Store,d.Store))) AS store,
   COALESCE(A.SaleDate,d.SaleDate) AS saledate,
   A.AFuelCategory,
   A.ATotalGallons,
   D.DFuelCategory,
   D.DTotalGallons
 FROM #ASales A
  FULL OUTER JOIN #DSales D 
    ON D.Store = A.Store 
   AND D.SaleDate = A.SaleDate
   Order by cast(SUBSTRING(COALESCE(a.Store,d.Store), PATINDEX('%[^0]%', COALESCE(a.Store,d.Store)+'.'), LEN(COALESCE(a.Store,d.Store))) as int)
           ,COALESCE(A.SaleDate,d.SaleDate)
           , a.AFuelCategory
           , D.DFuelCategory;

Open in new window

Is this what you are looking for (I just edited my reply):
  drop table IF EXISTS #ASales
CREATE TABLE #ASales(
	[Store] [varchar](50) NULL,
	[SaleDate] [datetime] NULL,
	[ACategory] [varchar](20) NULL,
	[ATotalGallons] [numeric](38, 4) NULL
);

drop TABLE IF exists #DSales
CREATE TABLE #DSales (
	[Store] [varchar](20) NULL,
	[SaleDate] [date] NULL,
	[DCategory] [varchar](40) NULL,
	[DTotalGallons] [decimal](38, 3) NULL
);

INSERT #ASales ([Store], [SaleDate], [ACategory], [ATotalGallons]) VALUES (N'0009', CAST(N'2020-01-16T00:00:00.000' AS DateTime), N'D', CAST(643.0040 AS Numeric(38, 4)))
INSERT #ASales ([Store], [SaleDate], [ACategory], [ATotalGallons]) VALUES (N'0009', CAST(N'2020-01-16T00:00:00.000' AS DateTime), N'G', CAST(4534.1350 AS Numeric(38, 4)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'10', CAST(N'2020-01-16' AS Date), N'G', CAST(1444.512 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'12', CAST(N'2020-01-16' AS Date), N'G', CAST(1417.343 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'9', CAST(N'2020-01-16' AS Date), N'G', CAST(4506.082 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'9', CAST(N'2020-01-16' AS Date), N'O', CAST(6.000 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'12', CAST(N'2020-01-16' AS Date), N'O', CAST(7.000 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'10', CAST(N'2020-01-16' AS Date), N'AD', CAST(471.373 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'12', CAST(N'2020-01-16' AS Date), N'AD', CAST(230.340 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'9', CAST(N'2020-01-16' AS Date), N'AD', CAST(617.909 AS Decimal(38, 3)))
INSERT #DSales ([Store], [SaleDate], [DCategory], [DTotalGallons]) VALUES (N'10', CAST(N'2020-01-16' AS Date), N'O', CAST(1.000 AS Decimal(38, 3)))
go
SELECT CAST(COALESCE(a.Store,d.Store) AS INT) AS store,
       COALESCE(A.SaleDate,d.SaleDate) AS saledate,
	   A.ACategory,
	   A.ATotalGallons,
	   D.DCategory,
	   D.DTotalGallons
 FROM #ASales A
 FULL OUTER JOIN #DSales D ON CAST(D.Store AS INT) = CAST(A.Store AS INT) AND D.SaleDate = A.SaleDate AND a.ACategory=d.DCategory
  Order by COALESCE(A.SaleDate,d.SaleDate),CAST(COALESCE(a.Store,d.Store) AS INT), a.ACategory,D.DCategory;
  /*
SELECT  Store,
       SaleDate,
	   DCategory,
	   DTotalGallons
 FROM #DSales 
  Order by SaleDate, cast(Store as int), DCategory;*/

Open in new window


The output comes out as:
Post_EE.png
Brian CroweDatabase Engineer
Top Expert 2005

Commented:
SELECT CAST(COALESCE(A.Store, D.Store) AS INT) AS Store,
      COALESCE(A.SaleDate, D.SaleDate) AS SaleDate,
      COALESCE(A.ACategory, D.DCategory) AS FuelCategory,
      A.ATotalGallons,
      D.DTotalGallons
FROM #ASales AS A
FULL OUTER JOIN #DSales AS D
      ON CAST(A.Store AS INT) = CAST(D.Store AS INT)
      AND A.SaleDate = D.SaleDate
      AND A.ACategory = D.DCategory
ORDER BY CAST(COALESCE(A.Store, D.Store) AS INT),
      COALESCE(A.SaleDate, D.SaleDate),
      COALESCE(A.ACategory, D.DCategory)
Paula DiTalloIntegration developer

Author

Commented:
Anders and Brian,
Thank you both for spending your time to work on my SQL statement.  Brian, I ultimately assigned Anders the solution since all subsequent work was based on his original answer.  You however, solved an important piece--which was assuring that the intersecting fuel category for a given store remained on the same line.  I think you and Anders within minutes of each other submitted the exact same total solution! Again, brilliantly done -- both of you! :)