Execute the following Microsoft SQL Server T-SQL sample scripts in Management Studio Query Editor to delete/remove duplicate rows/records from a table and find the dups in a table with composite key which was not enforced as unique:
-- SQL delete / remove duplicates - Quick Syntax
-- T-SQL remove duplicates on ProductID column
WITH cteDupProduct
AS (SELECT DupID = ROW_NUMBER()
OVER(PARTITION BY ProductID ORDER BY (SELECT 1))
FROM AdventureWorks2008.Production.Product)
DELETE FROM cteDupProduct
WHERE DupID > 1
GO
-- T-SQL find duplicates on ProductID column
WITH cteDupProduct
AS (SELECT DupID = ROW_NUMBER()
OVER(PARTITION BY ProductID ORDER BY (SELECT 1))
FROM AdventureWorks2008.Production.Product)
SELECT * FROM cteDupProduct
WHERE DupID > 1
GO
-- SQL find the number of duplicate rows / records in a table
USE AdventureWorks2008;
SELECT DuplicateRows =
-- Count all rows in a table
(SELECT COUNT(* ) FROM Purchasing.PurchaseOrderDetail) -
-- -- Count distinct rows in a table
(SELECT DistinctRows = COUNT(* )
FROM (SELECT DISTINCT *
FROM Purchasing.PurchaseOrderDetail) pod)
GO
/* DuplicateRows
0 */
-- SQL dedupe - the fastest and simplest for entire row duplicates
SELECT DISTINCT *
INTO tempdb.dbo.pod
FROM AdventureWorks2008.Purchasing.PurchaseOrderDetail
GO
-- (8845 row(s) affected) ----------
-- Find SalesOrderID duplicates in SalesOrderDetail table
-- Business meaning: multiple items ordered on the same Sales Order
SELECT SalesOrderID, COUNT(SalesOrderDetailID) AS Items
FROM AdventureWorks2008.Sales.SalesOrderDetail
GROUP BY SalesOrderID
HAVING COUNT(SalesOrderDetailID) > 1
ORDER BY SalesOrderID ;
/*
SalesOrderID Items
43659 12
43660 2
43661 15
.... */
-- SQL delete duplicate records from a table - sql delete duplicate rows in a table
-- SQL remove duplicates from a table - delete duplicate rows from a table
-- SQL select into table create for demo
USE tempdb;
SELECT ProductID = CONVERT(SMALLINT,ProductID),
Product = Name,
StandardCost,
ListPrice,
Color,
Size
INTO Product
FROM AdventureWorks2008.Production.Product;
GO
-- (504 row(s) affected)
-- SQL insert duplicate records for dedup
INSERT Product
SELECT TOP 200 ProductID,
Name,
StandardCost,
ListPrice,
Color,
Size
FROM AdventureWorks2008.Production.Product
ORDER BY NEWID();
-- (200 row(s) affected)
-- Sequence ID numbering the duplicate ProductIDs with ROW_NUMBER
WITH cteDupProduct
AS (SELECT DupID = ROW_NUMBER()
OVER(PARTITION BY ProductID ORDER BY (select 1)),
*
FROM Product)
SELECT *
FROM cteDupProduct
ORDER BY ProductID
GO
/*
DupID ProductID Product StandardCost
1 1 Adjustable Race 0.00
1 2 Bearing Ball 0.00
1 3 BB Ball Bearing 0.00
1 4 Headset Ball Bearings 0.00
2 4 Headset Ball Bearings 0.00
1 316 Blade 0.00
2 316 Blade 0.00
...
*/
-- T-SQL DELETE records where DupID > 1
WITH cteDupProduct
AS (SELECT DupID = ROW_NUMBER()
OVER(PARTITION BY ProductID ORDER BY (SELECT 1)),
*
FROM Product)
DELETE FROM cteDupProduct
WHERE DupID > 1
GO
-- (200 row(s) affected)
-- Verifying dedup results
SELECT DuplicateRows =
(SELECT COUNT(* ) FROM Product) -
(SELECT DistinctRows = COUNT(* )
FROM (SELECT DISTINCT * FROM Product) p)
GO
-- 0
;WITH cteDupProduct
AS (SELECT DupID = ROW_NUMBER()
OVER(PARTITION BY ProductID ORDER BY (SELECT 1)),
*
FROM Product)
SELECT *
FROM cteDupProduct
ORDER BY ProductID
GO
/*
DupID ProductID Product StandardCost
1 1 Adjustable Race 0.00
1 2 Bearing Ball 0.00
1 3 BB Ball Bearing 0.00
1 4 Headset Ball Bearings 0.00
1 316 Blade 0.00
...
*/
DROP TABLE tempdb.dbo.Product
GO
------------
------------
-- SQL find duplicate rows in a table with 2-part keys
------------
-- Find duplicate rows in database tables
USE DatabaseZ;
SELECT t.* FROM SchemaY.TableX as t
JOIN
( SELECT KeyPart1ID , KeyPart2ID
FROM SchemaY.TableX
GROUP BY KeyPart1ID , KeyPart2ID,
ColumnA, ColumnB, ColumnC
HAVING COUNT(*) > 1 ) as d
ON t.KeyPart1ID = d.KeyPart1ID
AND t.KeyPart2ID = d.KeyPart2ID
------------ |