|
Execute the following
Microsoft SQL Server T-SQL scripts in Management Studio Query Editor to demonstrate how to use over partition by with ranking and aggregate window functions.
-- QUICK SYNTAX: RANK and DENSE RANK products with OVER PARTITION BY Color
SELECT ProductNumber, Color, ProductSubcategoryID,
Ranking = RANK() OVER (PARTITION BY Color ORDER BY ProductSubcategoryID),
DenseRanking = DENSE_RANK() OVER (PARTITION BY Color ORDER BY ProductSubcategoryID)
FROM AdventureWorks2008.Production.Product
WHERE Color is not null AND ProductSubcategoryID is not null
ORDER BY Color, Ranking, ProductNumber
/*
ProductNumber Color ProductSubcategoryID Ranking DenseRanking
...
VE-C304-S Blue 25 23 3
HL-U509-B Blue 31 26 4
PA-T100 Grey 35 1 1
SB-M891-L Multi 18 1 1
SB-M891-M Multi 18 1 1
SB-M891-S Multi 18 1 1
CA-1098 Multi 19 4 2
LJ-0192-L Multi 21 5 3
LJ-0192-M Multi 21 5 3
LJ-0192-S Multi 21 5 3
LJ-0192-X Multi 21 5 3
BK-R50R-44 Red 2 1 1
BK-R50R-48 Red 2 1 1
BK-R50R-52 Red 2 1 1
....
*/
------------
-- Finding last order date with GROUP BY
SELECT CustomerID, LastOrder=MAX(OrderDate), OrderCount=COUNT(*)
FROM AdventureWorks2008.Sales.SalesOrderHeader
GROUP BY CustomerID
ORDER BY OrderCount DESC;
-- Finding last order date with OVER PARTITION BY
;WITH CTE AS (SELECT CustomerID, OrderDate,
OrderCount = COUNT(*) OVER ( PARTITION BY CustomerID),
RN = ROW_NUMBER() OVER ( PARTITION BY CustomerID
ORDER BY OrderDate DESC)
FROM AdventureWorks2008.Sales.SalesOrderHeader)
SELECT * FROM CTE WHERE RN = 1
ORDER BY OrderCount DESC
------------
-- SQL over partiton by - QUICK SYNTAX - row_number over partition by - OVER clause
-- Find TOP selling salesstaff by city -- RANK() is used for ranking salesstaff
USE AdventureWorks2008;
WITH cteTopSalesStaffByCity
AS (SELECT p.FirstName + ' ' + p.LastName AS SalesStaff,
RANK()
OVER(PARTITION BY City ORDER BY SalesYTD DESC) AS RankNo
,
'$' + convert(VARCHAR,sp.SalesYTD,1) AS SalesYTD,
a.City,
a.PostalCode
FROM Sales.SalesPerson sp
INNER JOIN Person.Person p
ON sp.BusinessEntityID = p.BusinessEntityID
INNER JOIN Person.Address a
ON a.AddressID = p.BusinessEntityID
WHERE TerritoryID IS NOT NULL)
SELECT *
FROM cteTopSalesStaffByCity
WHERE RankNo = 1
ORDER BY City
GO
/*
SalesStaff RankNo SalesYTD City PostalCode
Linda Mitchell 1 $5,200,475.23 Issaquah 98027
Jae Pak 1 $5,015,682.38 Renton 98055
*/ ------------ -- Calculate aggregates over partition by PurchaseOrderID ------------ USE AdventureWorks; GO -- SQL Server over partition by - using partition by with aggregate functions -- SQL Server windowing functions - partition by sql server - t sql partition by SELECT PO = PurchaseOrderID, ProductID, OrderQty, SUM(OrderQty) OVER(PARTITION BY PurchaseOrderID ) AS 'TOTAL', AVG(OrderQty) OVER(PARTITION BY PurchaseOrderID ) AS 'Avg', COUNT(OrderQty) OVER(PARTITION BY PurchaseOrderID ) AS 'Count', MIN(OrderQty) OVER(PARTITION BY PurchaseOrderID ) AS 'Min', MAX(OrderQty) OVER(PARTITION BY PurchaseOrderID ) AS 'Max' FROM Purchasing.PurchaseOrderDetail WHERE PurchaseOrderID BETWEEN 10 AND 20 ORDER BY PurchaseOrderID, ProductID;
GO /* Partial results PO ProductID OrderQty TOTAL Avg Count Min Max 10 320 3 66 22 3 3 60 10 321 3 66 22 3 3 60 10 322 60 66 22 3 3 60 11 438 3 12 3 4 3 3 11 439 3 12 3 4 3 3 11 440 3 12 3 4 3 3 11 441 3 12 3 4 3 3
*/ ------------ -- SQL generate sequence number for partitioned data ------------ -- SQL Server row_number - using partition by with row_number() -- SQL over partition by order by - t sql ranking functions -- SQL Server COUNT aggregate function over partition by -- MSSQL windowing functions USE AdventureWorks; SELECT PO = PurchaseOrderID, VendorID, ShipDate = convert(VARCHAR,ShipDate,111), DailyPOCount = COUNT(*) OVER(PARTITION BY convert(VARCHAR,ShipDate,111) ), SeqNo = Row_Number() OVER(PARTITION BY convert(VARCHAR,ShipDate,111) ORDER BY PurchaseOrderID) FROM Purchasing.PurchaseOrderHeader WHERE YEAR(OrderDate) = 2003 AND MONTH(OrderDate) = 5 ORDER BY PurchaseOrderID GO /* Partial results PO VendorID ShipDate DailyPOCount SeqNo 297 3 2003/05/16 4 1 298 54 2003/05/16 4 2 299 104 2003/05/16 4 3 300 25 2003/05/16 4 4 301 90 2003/05/23 4 1 302 33 2003/05/23 4 2 303 41 2003/05/23 4 3 304 55 2003/05/23 4 4 305 70 2003/06/04 16 1 306 94 2003/06/04 16 2 307 15 2003/06/04 16 3 308 62 2003/06/04 16 4 309 98 2003/06/04 16 5 310 72 2003/06/04 16 6
*/
------------ -- SQL ranking functions over SalesOrderID by OrderQty ------------ -- SQL Server row_number - using the OVER clause with ranking functions -- SQL over partition by SalesOrderID - sql server windowing functions SELECT sod.SalesOrderID AS SO, OrderQty, ROW_NUMBER() OVER(PARTITION BY sod.SalesOrderID ORDER BY OrderQty DESC) AS [SeqNo], RANK() OVER(PARTITION BY sod.SalesOrderID ORDER BY OrderQty DESC) AS [Rank], DENSE_RANK() OVER(PARTITION BY sod.SalesOrderID ORDER BY OrderQty DESC) AS [DenseRank] FROM AdventureWorks.Sales.SalesOrderDetail sod INNER JOIN AdventureWorks.Sales.SalesOrderHeader soh ON soh.SalesOrderID = sod.SalesOrderID WHERE OrderDate = '2004-02-01' AND OrderQty > 1 ORDER BY SalesOrderID, SeqNo GO /* Partial results SO OrderQty SeqNo Rank DenseRank 63293 6 1 1 1 63293 4 2 2 2 63293 3 3 3 3 63293 3 4 3 3 63293 3 5 3 3 63293 3 6 3 3 63293 2 7 7 4 */
------------ -- SQL calculate minimum and maximum salaries by department ------------ -- SQL over partition by - aggregate functions: MIN, MAX USE AdventureWorks; WITH cteLastRaiseDate(EmployeeID,LastChangeDate) AS (SELECT EmployeeID, MAX(RateChangeDate) FROM HumanResources.EmployeePayHistory GROUP BY EmployeeID), ctePayRate(EmployeeID,Rate,Department) AS (SELECT eph.EmployeeID, eph.Rate, d.Name FROM HumanResources.EmployeePayHistory eph INNER JOIN cteLastRaiseDate lrd ON eph.EmployeeID = lrd.EmployeeID AND eph.RateChangeDate = lrd.LastChangeDate INNER JOIN HumanResources.EmployeeDepartmentHistory edh ON eph.EmployeeID = edh.EmployeeID INNER JOIN HumanResources.Department d ON edh.DepartmentID = d.DepartmentID WHERE edh.EndDate IS NULL) SELECT DISTINCT Department, MaxSalary = MAX(Rate) OVER(PARTITION BY Department ), MinSalary = MIN(Rate) OVER(PARTITION BY Department ) FROM ctePayRate ORDER BY Department GO /* Results Department MaxSalary MinSalary Document Control 17.7885 10.25 Engineering 63.4615 32.6923 Executive 125.50 60.0962 Facilities and Maintenance 24.0385 9.25 Finance 43.2692 13.4615 Human Resources 27.1394 13.9423 Information Services 50.4808 27.4038 Marketing 37.50 13.4615 Production 84.1346 9.50 Production Control 24.5192 16.00 Purchasing 30.00 12.75 Quality Assurance 28.8462 10.5769 Research and Development 50.4808 40.8654 Sales 72.1154 23.0769 Shipping and Receiving 19.2308 9.00 Tool Design 29.8462 25.00 */ ------------ -- SQL calculate aggregates by SalesOrderID ------------ -- SQL over partition by USE AdventureWorks; GO SELECT DISTINCT d.SalesOrderID, [Total Quantity] = SUM(OrderQty) OVER(PARTITION BY d.SalesOrderID ), [Average Quantity] = convert(VARCHAR,convert(MONEY,AVG(1.0 * OrderQty) OVER(PARTITION BY d.SalesOrderID ), 1)), [Total Order Count] = COUNT(OrderQty) OVER(PARTITION BY d.SalesOrderID ), [Minimum Order Count] = MIN(OrderQty) OVER(PARTITION BY d.SalesOrderID ), [Maximum Order Count] = MAX(OrderQty) OVER(PARTITION BY d.SalesOrderID ), [Average Amount] = convert(VARCHAR,convert(MONEY,AVG(LineTotal) OVER(PARTITION BY d.SalesOrderID ), 1)), [Total Amount] = convert(VARCHAR,convert(MONEY,SUM(LineTotal) OVER(PARTITION BY d.SalesOrderID ), 1)) FROM Sales.SalesOrderDetail d INNER JOIN Sales.SalesOrderHeader h ON h.SalesOrderID = d.SalesOrderID INNER JOIN Production.Product p ON d.ProductID = p.ProductID WHERE CustomerID = 100 ORDER BY SalesOrderID GO /* Partial results SalesOrderID Total Quantity Average Quantity Total Order Count 51818 81 2.61 31 57188 68 2.19 31 63290 68 2.96 23 69560 79 2.63 30 */
------------
-- Percent on base calculation
-- First & Second COUNT(*) refer to GROUP BY - Third COUNT(*) refers to OVER()
------------
SELECT Color,
COUNT(*) AS ColorFrequency,
((COUNT(*)*100.0) / (SUM(COUNT(*))
OVER())) AS PctColor
FROM Production.Product
GROUP BY Color
ORDER BY Color
/* Color ColorFrequency PctColor
NULL 248 49.206349206349
Black 93 18.452380952380
Blue 26 5.158730158730
Grey 1 0.198412698412
Multi 8 1.587301587301
Red 38 7.539682539682
Silver 43 8.531746031746
Silver/Black 7 1.388888888888
White 4 0.793650793650
Yellow 36 7.142857142857 */
------------
------------
-- SQL calculate daily average traffic batch size
------------
-- SQL over partition by dates
USE tempdb
GO
CREATE TABLE TollgateVehicleTraffic (
DateOfTollCollection DATETIME NOT NULL,
Tolls INT NOT NULL);
GO
-- Populate table with daily batches
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 10,1260);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 9,2160);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 8,2265);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 7,1625);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 6,1550);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 5,1590);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 4,1200);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 4,2200);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 3,2150);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 2,2265);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 3,1465);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 1,1550);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 10,160);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 10,260);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 9,260);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 8,265);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 7,125);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 6,150);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 5,160);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 5,260);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 4,265);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 4,125);
INSERT INTO TollgateVehicleTraffic VALUES (getdate() - 4,150);
GO
SELECT *
FROM TollgateVehicleTraffic
ORDER BY DateOfTollCollection;
GO
/* Partial results
DateOfTollCollection Tolls
2009-02-11 19:53:55.873 1260
2009-02-11 19:53:55.890 160
2009-02-11 19:53:55.890 260
2009-02-12 19:53:55.873 2160
2009-02-12 19:53:55.890 260
2009-02-13 19:53:55.890 265
2009-02-13 19:53:55.890 2265
*/
SELECT DateOfTollCollection = convert(CHAR(10),DateOfTollCollection,112),
Tolls,
FLOOR(AVG(Tolls)
OVER()) AS AvgVehicleTrafficBatchSize
FROM TollgateVehicleTraffic
ORDER BY convert(CHAR(10),DateOfTollCollection,112);
GO
/* Partial results
DateOfTollCollection Tolls AvgVehicleTrafficBatchSize
20090211 1260 1020
20090211 160 1020
20090211 260 1020
*/
-- SQL over partition by - AVG aggregate function
SELECT DISTINCT DateOfTollCollection = convert(CHAR(10),DateOfTollCollection,112),
FLOOR(AVG(Tolls)
OVER(PARTITION BY convert(CHAR(10),DateOfTollCollection,112) ))
AS AvgDailyVehicleTrafficBatchSize
FROM TollgateVehicleTraffic
ORDER BY convert(CHAR(10),DateOfTollCollection,112);
GO
/* Results
DateOfTollCollection AvgDailyVehicleTrafficBatchSize
20090211 560
20090212 1210
20090213 1265
20090214 875
20090215 850
20090216 670
20090217 788
20090218 1807
20090219 2265
20090220 1550
*/
-- Cleanup
DROP TABLE TollgateVehicleTraffic
------------
Related articles:
OVER Clause (Transact-SQL)
SQL Server 2005: Using OVER() with Aggregate Functions
T-SQL Tuesday #016: Aggregates and Windowing Functions and Ranking! Yum!
|