|
The following
sample SQL Server T-SQL script demonstrates the deletion of duplicates on columnA using
a derived table (b) w:
DELETE a
FROM TableX a
INNER JOIN (SELECT columnA,
minID = min(TableXID)
FROM TableX
GROUP BY columnA) b
ON a.columnA = b.columnA
AND a.TableXID > b.minID
Starting with SQL Server 2005, CTE can be used to delete duplicates.
-- T-SQL using ranking function ROW_NUMBER to delete duplicates
USE tempdb;
SELECT EmployeeId = CONVERT(INT,EmployeeID), ManagerID
INTO Employee
FROM AdventureWorks.HumanResources.Employee
GO
INSERT Employee VALUES (100, 109)
INSERT Employee VALUES (150, 109)
INSERT Employee VALUES (190, 109)
GO
;WITH CTE AS
(SELECT rn=ROW_NUMBER() OVER( PARTITION BY EmployeeID ORDER BY ManagerID), *
FROM Employee)
DELETE CTE
WHERE rn > 1
GO
-- (3 row(s) affected)
DROP TABLE Employee
------------
-- SQL eliminate duplicates with DISTINCT
------------
USE tempdb;
GO
-- Create test data
SELECT ProductID=CONVERT(int, ProductID), ProductName=Name,
ListPrice, Color
INTO Product
FROM AdventureWorks2008.Production.Product
GO
-- (504 row(s) affected)
-- Create duplicates
INSERT Product SELECT * FROM Product
GO
-- (504 row(s) affected)
-- Eliminate duplicates with DISTINCT
SELECT DISTINCT * INTO newProduct FROM Product
-- (504 row(s) affected)
SELECT COUNT(*) FROM Product -- 1008
SELECT COUNT(*) FROM newProduct -- 504
GO
DROP TABLE tempdb.dbo.Product
DROP TABLE tempdb.dbo.newProduct
GO
------------
Related KB article: How to remove duplicate rows from a table in SQL Server
|