Script on how partitioning can improve performance

By | December 23, 2019
USE MASTER
GO

-- THIS SCRIPT WILL TAKE TIME TO RUN , PLAN ACCORDINGLY.
-- CUP OF COFFEE IS OPTIONAL BUT HIGHLY RECOMMENDED.
---CREATE A DATABASE TO PLAY AROUND WITH
-- MAKE SURE THE HARD DISK HAS SUFFICIENT SPACE -- TYPICALLY 20GB
CREATE DATABASE partitions_demo

-- TO PREVENT TO LOG FROM BLOWING UP. MAKE SURE YOU TAKE A BACKUP TO INITIALIZE
ALTER DATABASE partitions_demo

SET RECOVERY SIMPLE
GO

-- CREATE SINGLE FILEROUP TO HOLD MULTIPLE FILES
ALTER DATABASE partitions_demo ADD FILEGROUP [secondary]
GO

-- ADD MULTIPLE FILES TO THE NEWLY CREATED FILEGROUP
-- PLEASE PAY ATTENTION TO THE FILE PATHS
-- NOT ALL DRIVES ARE CREATED EQUAL.
-- CHANGE AS NEEDED.
ALTER DATABASE partitions_demo ADD FILE (
	name = one
	,filename = 'G:\SQL\Data\one.ndf'
	) TO filegroup [secondary]

ALTER DATABASE partitions_demo ADD FILE (
	name = two
	,filename = 'G:\SQL\Data\two.ndf'
	) TO filegroup [secondary]

ALTER DATABASE partitions_demo ADD FILE (
	name = three
	,filename = 'G:\SQL\Data\three.ndf'
	) TO filegroup [secondary]

ALTER DATABASE partitions_demo ADD FILE (
	name = four
	,filename = 'G:\SQL\Data\four.ndf'
	) TO filegroup [secondary]

ALTER DATABASE partitions_demo ADD FILE (
	name = five
	,filename = 'G:\SQL\Data\five.ndf'
	) TO filegroup [secondary]

ALTER DATABASE partitions_demo ADD FILE (
	name = six
	,filename = 'G:\SQL\Data\six.ndf'
	) TO filegroup [secondary]

ALTER DATABASE partitions_demo ADD FILE (
	name = seven
	,filename = 'G:\SQL\Data\seven.ndf'
	) TO filegroup [secondary]
GO

-- CREATE MULTIPLE FILEGROUPS TO IMPLEMENT PARTITIONING
ALTER DATABASE partitions_demo ADD filegroup [secondaryP1]

ALTER DATABASE partitions_demo ADD filegroup [secondaryP2]

ALTER DATABASE partitions_demo ADD filegroup [secondaryP3]

ALTER DATABASE partitions_demo ADD filegroup [secondaryP4]

ALTER DATABASE partitions_demo ADD filegroup [secondaryP5]

ALTER DATABASE partitions_demo ADD filegroup [secondaryP6]

ALTER DATABASE partitions_demo ADD filegroup [secondaryP7]

--- ADD ONE FILE TO EACH OF THE ABOVE FILEGROUPS 
ALTER DATABASE partitions_demo ADD FILE (
	name = Pone
	,filename = 'G:\SQL\Data\Pone.ndf'
	) TO filegroup [secondaryP1]

ALTER DATABASE partitions_demo ADD FILE (
	name = Ptwo
	,filename = 'G:\SQL\Data\Ptwo.ndf'
	) TO filegroup [secondaryP2]

ALTER DATABASE partitions_demo ADD FILE (
	name = Pthree
	,filename = 'G:\SQL\Data\Pthree.ndf'
	) TO filegroup [secondaryP3]

ALTER DATABASE partitions_demo ADD FILE (
	name = Pfour
	,filename = 'G:\SQL\Data\Pfour.ndf'
	) TO filegroup [secondaryP4]

ALTER DATABASE partitions_demo ADD FILE (
	name = Pfive
	,filename = 'G:\SQL\Data\Pfive.ndf'
	) TO filegroup [secondaryP5]

ALTER DATABASE partitions_demo ADD FILE (
	name = Psix
	,filename = 'G:\SQL\Data\Psix.ndf'
	) TO filegroup [secondaryP6]

ALTER DATABASE partitions_demo ADD FILE (
	name = Pseven
	,filename = 'G:\SQL\Data\Pseven.ndf'
	) TO filegroup [secondaryP7]

-- SWITCH DATABASE CONTEXT
USE partitions_demo
GO

-- CREATE A TABLE TO HOLD DATA IN PRIMARY FILEGROUP AND SINGLE FILE -- CASE 1
CREATE TABLE [dbo].[SingleFile] (
	id INT identity(1, 1) PRIMARY KEY CLUSTERED
	,[object_name] [varchar](128) NOT NULL
	,[counter_name] [varchar](128) NOT NULL
	,[instance_name] [varchar](128) NULL
	,[cntr_value] [bigint] NOT NULL
	,[cntr_type] [int] NOT NULL
	) ON [PRIMARY]
GO

-- CREATE A TABLE TO HOLD DATA IN SECONDARY FILEGROUP AND MULTIPLE FILES -- CASE 2
CREATE TABLE [dbo].[MultipleFiles] (
	id INT identity(1, 1) PRIMARY KEY CLUSTERED
	,[object_name] [varchar](128) NOT NULL
	,[counter_name] [varchar](128) NOT NULL
	,[instance_name] [varchar](128) NULL
	,[cntr_value] [bigint] NOT NULL
	,[cntr_type] [int] NOT NULL
	) ON [Secondary]
GO

-- INSERT DUMMY DATA INTO SINGLE FILE TABLE SO THAT WE CAN TEST
-- CHANGE BATCH COUNT TO INCREASE OR DECREASE THE NUMBER OF ROWS.
SET NOCOUNT ON

INSERT INTO [SingleFile]
SELECT TOP 100000 p.*
FROM sys.dm_os_performance_counters p
CROSS APPLY sys.sysobjects a GO 50

-- MODIFY DATA IN CNTR_TYPE COLUMN SO THAT WE CAN ALSO SCAN 
--ACROSS RANGES OF DATA FOR OTHER EXAMPLES AND TESTING 
-- THIS STEP IS OPTIONAL
UPDATE d
SET cntr_type = ranks
FROM (
	SELECT *
		,ntile(16) OVER (
			ORDER BY id
			) AS ranks
	FROM SingleFile
	) d
GO

--- INSERT DATA INTO TABLE THAT WILL BE PARTITIONED 
SELECT *
INTO Partitioned
FROM [dbo].[SingleFile]
GO

-- START PARTITIONING THE ABOVE TABLE IN 7 GROUPS BECAUSE WE HAVE 7 FILES
DECLARE @partitions INT = (
		SELECT count(*)
		FROM SingleFile
		)

-- CREATE EQUAL NUMBER OF ROWS IN EACH PARTITION
SET @partitions = round(@partitions / 7, 0)

DECLARE @SQL NVARCHAR(4000) = '
CREATE PARTITION FUNCTION [pfunction](int) AS RANGE RIGHT FOR VALUES (' + cast(@partitions AS VARCHAR(10)) + ',' + cast(@partitions * 2 AS VARCHAR(10)) + ',' + cast(@partitions * 3 AS VARCHAR(10)) + ',' + cast(@partitions * 4 AS VARCHAR(10)) + ',' + cast(@partitions * 5 AS VARCHAR(10)) + ',' + cast(@partitions * 6 AS VARCHAR(10)) + ',' + cast(@partitions * 7 AS VARCHAR(10)) + ')'

EXEC sp_executesql @sql
GO

-- MAP EACH PARTITION WITH A FILEGROUP CREATED EARLIER.
CREATE PARTITION SCHEME [pscheme] AS PARTITION [pfunction] TO (
	[secondaryP1]
	,[secondaryP2]
	,[secondaryP3]
	,[secondaryP4]
	,[secondaryP5]
	,[secondaryP6]
	,[secondaryP7]
	,[secondary]
	)
GO

-- REARRANGE THE DATA IN THE TABLE INTO THE NEW PARTITIONS BY CREATING A CLUSTERED INDEX
CREATE CLUSTERED INDEX [CIX_id] ON [dbo].[Partitioned] ([id] ASC)
	WITH (
			PAD_INDEX = OFF
			,STATISTICS_NORECOMPUTE = OFF
			,SORT_IN_TEMPDB = OFF
			,DROP_EXISTING = OFF
			,ONLINE = OFF
			,ALLOW_ROW_LOCKS = ON
			,ALLOW_PAGE_LOCKS = ON
			) ON [pscheme]([id])
GO

-- INSERT DATA INTO MULTIPLE FILE TABLE AS WELL
SET IDENTITY_INSERT MultipleFiles ON

INSERT INTO MultipleFiles (
	[id]
	,[object_name]
	,[counter_name]
	,[instance_name]
	,[cntr_value]
	,[cntr_type]
	)
SELECT [id]
	,[object_name]
	,[counter_name]
	,[instance_name]
	,[cntr_value]
	,[cntr_type]
FROM SingleFile

SET IDENTITY_INSERT MultipleFiles OFF
GO

--- TEST TABLE PERFORMANCE AND NOTICE HOW PARTITIONING 
-- DOESNT OR ONLY MARGINALLY IMPROVES PERFORMANCE
DECLARE @table TABLE (
	cntr_types VARCHAR(100)
	,counts INT
	,source VARCHAR(100)
	)
DECLARE @int INT = round(rand() * 1000000, 0)
DECLARE @int2 INT = @int + round(rand() * 10000, 0)
DECLARE @starttime DATETIME2 = getdate()

INSERT INTO @table
SELECT cntr_type
	,count(*)
	,'Part'
FROM Partitioned
WHERE id BETWEEN @int
		AND @int2
GROUP BY cntr_type

SELECT datediff(ms, @starttime, getdate()) AS 'Partitions'

PRINT '*******************************Without Partitioning****************************'

DECLARE @starttime2 DATETIME2 = getdate()

INSERT INTO @table
SELECT cntr_type
	,count(*)
	,'Single'
FROM MultipleFiles
WHERE id BETWEEN @int
		AND @int2
GROUP BY cntr_type

SELECT datediff(ms, @starttime2, getdate()) AS 'Single File'

PRINT '******************************* After Single File****************************'

DECLARE @starttime3 DATETIME2 = getdate()

INSERT INTO @table
SELECT cntr_type
	,count(*)
	,'Multiple'
FROM SingleFile
WHERE id BETWEEN @int
		AND @int2
GROUP BY cntr_type

SELECT datediff(ms, @starttime3, getdate()) AS 'Multiple Files'

SELECT *
FROM @table

USE [partitions_demo]
GO

---- CREATE THE RIGHT INDEXES TO SUPPORT THE ABOVE QUERIES
CREATE NONCLUSTERED INDEX [NCIX_cntr_type] ON [dbo].[Partitioned] ([cntr_type] ASC)
GO

CREATE NONCLUSTERED INDEX [NCIX_cntr_type] ON [dbo].[SingleFile] ([cntr_type] ASC)
GO

CREATE NONCLUSTERED INDEX [NCIX_cntr_type] ON [dbo].[MultipleFiles] ([cntr_type] ASC)
GO

--- RERUN THE QUERIES AGAIN
--- TEST PERFORMANCE AND NOTICE HOW PERFORMANCE HAS INCREASED NOW THAT PARTITION
-- ELIMINATION HAS COME INTO PLAY
DECLARE @table TABLE (
	cntr_types VARCHAR(100)
	,counts INT
	,source VARCHAR(100)
	)
DECLARE @int INT = round(rand() * 1000000, 0)
DECLARE @int2 INT = @int + round(rand() * 10000, 0)
DECLARE @starttime DATETIME2 = getdate()

INSERT INTO @table
SELECT cntr_type
	,count(*)
	,'Part'
FROM Partitioned
WHERE id BETWEEN @int
		AND @int2
GROUP BY cntr_type

SELECT datediff(ms, @starttime, getdate()) AS 'Partitions'

PRINT '*******************************Without Partitioning****************************'

DECLARE @starttime2 DATETIME2 = getdate()

INSERT INTO @table
SELECT cntr_type
	,count(*)
	,'Single'
FROM MultipleFiles
WHERE id BETWEEN @int
		AND @int2
GROUP BY cntr_type

SELECT datediff(ms, @starttime2, getdate()) AS 'Single File'

PRINT '******************************* After Single File****************************'

DECLARE @starttime3 DATETIME2 = getdate()

INSERT INTO @table
SELECT cntr_type
	,count(*)
	,'Multiple'
FROM SingleFile
WHERE id BETWEEN @int
		AND @int2
GROUP BY cntr_type

SELECT datediff(ms, @starttime3, getdate()) AS 'Multiple Files'

SELECT *
FROM @table
GO

--- REPEAT ON SERVER WITH MULTIPLE DRIVES.
-- CHECK LOGICAL AND PHYSICAL READS
-- CHECK THE EXECUTION PLAN
-- BE NICE TO YOUR DBA

In this post I have pasted the script used in the Youtube video on how Partitioning can improve query performance.

[codesyntax lang=”tsql”] USE MASTER GO — THIS SCRIPT WILL TAKE TIME TO RUN , PLAN ACCORDINGLY. — CUP OF COFFEE IS OPTIONAL BUT HIGHLY RECOMMENDED. —CREATE A DATABASE TO PLAY AROUND WITH — MAKE SURE THE HARD DISK HAS SUFFICENT SPACE — TYPICALLY 20GB CREATE database partitions_demo — TO PREVENT TO LOG FROM BLOWING UP. MAKE SURE YOU TAKE A BACKUP TO INITIALIZE ALTER DATABASE partitions_demo SET RECOVERY SIMPLE GO — CREATE SINGLE FILEROUP TO HOLD MULTIPLE FILES ALTER DATABASE partitions_demo ADD FILEGROUP [secondary] GO — ADD MULTIPLE FILES TO THE NEWLY CREATED FILEGROUP — PLEASE PAY ATTENTION TO THE FILE PATHS — NOT ALL DRIVES ARE CREATED EQUAL. — CHANGE AS NEEDED. alter database partitions_demo add file ( name = one , filename=’G:\SQL\Data\one.ndf’) to filegroup [secondary] alter database partitions_demo add file ( name = two , filename=’G:\SQL\Data\two.ndf’) to filegroup [secondary] alter database partitions_demo add file ( name = three , filename=’G:\SQL\Data\three.ndf’) to filegroup [secondary] alter database partitions_demo add file ( name =four , filename=’G:\SQL\Data\four.ndf’) to filegroup [secondary] alter database partitions_demo add file ( name = five , filename=’G:\SQL\Data\five.ndf’) to filegroup [secondary] alter database partitions_demo add file ( name = six , filename=’G:\SQL\Data\six.ndf’) to filegroup [secondary] alter database partitions_demo add file ( name = seven , filename=’G:\SQL\Data\seven.ndf’) to filegroup [secondary] GO — CREATE MULTIPLE FILEGROUPS TO IMPLEMENT PARTITIONING alter database partitions_demo add filegroup [secondaryP1] alter database partitions_demo add filegroup [secondaryP2] alter database partitions_demo add filegroup [secondaryP3] alter database partitions_demo add filegroup [secondaryP4] alter database partitions_demo add filegroup [secondaryP5] alter database partitions_demo add filegroup [secondaryP6] alter database partitions_demo add filegroup [secondaryP7] — ADD ONE FILE TO EACH OF THE ABOVE FILEGROUPS alter database partitions_demo add file ( name = Pone , filename=’G:\SQL\Data\Pone.ndf’) to filegroup [secondaryP1] alter database partitions_demo add file ( name = Ptwo , filename=’G:\SQL\Data\Ptwo.ndf’) to filegroup [secondaryP2] alter database partitions_demo add file ( name = Pthree , filename=’G:\SQL\Data\Pthree.ndf’) to filegroup [secondaryP3] alter database partitions_demo add file ( name =Pfour , filename=’G:\SQL\Data\Pfour.ndf’) to filegroup [secondaryP4] alter database partitions_demo add file ( name = Pfive , filename=’G:\SQL\Data\Pfive.ndf’) to filegroup [secondaryP5] alter database partitions_demo add file ( name = Psix , filename=’G:\SQL\Data\Psix.ndf’) to filegroup [secondaryP6] alter database partitions_demo add file ( name = Pseven , filename=’G:\SQL\Data\Pseven.ndf’) to filegroup [secondaryP7] — SWITCH DATABASE CONTEXT USE partitions_demo GO — CREATE A TABLE TO HOLD DATA IN PRIMARY FILEGROUP AND SINGLE FILE — CASE 1 CREATE TABLE [dbo].[SingleFile]( id int identity(1,1) primary key clustered, [object_name] [varchar](128) NOT NULL, [counter_name] [varchar](128) NOT NULL, [instance_name] [varchar](128) NULL, [cntr_value] [bigint] NOT NULL, [cntr_type] [int] NOT NULL ) ON [PRIMARY] GO — CREATE A TABLE TO HOLD DATA IN SECONDARY FILEGROUP AND MULTIPLE FILES — CASE 2 CREATE TABLE [dbo].[MultipleFiles]( id int identity(1,1) primary key clustered, [object_name] [varchar](128) NOT NULL, [counter_name] [varchar](128) NOT NULL, [instance_name] [varchar](128) NULL, [cntr_value] [bigint] NOT NULL, [cntr_type] [int] NOT NULL ) ON [Secondary] GO — INSERT DUMMY DATA INTO SINGLE FILE TABLE SO THAT WE CAN TEST — CHANGE BATCH COUNT TO INCREASE OR DECREASE THE NUMBER OF ROWS. SET NOCOUNT ON INSERT INTO [SingleFile] SELECT top 100000 p.* FROM sys.dm_os_performance_counters p CROSS APPLY sys.sysobjects a GO 50 — MODIFY DATA IN CNTR_TYPE COLUMN SO THAT WE CAN ALSO SCAN –ACROSS RANGES OF DATA FOR OTHER EXAMPLES AND TESTING — THIS STEP IS OPTIONAL update d set cntr_type = ranks from ( select * , ntile(16) over ( order by id) as ranks from SingleFile ) d GO — INSERT DATA INTO TABLE THAT WILL BE PARTITIONED SELECT * INTO Partitioned FROM [dbo].[SingleFile] GO — START PARTITIONING THE ABOVE TABLE IN 7 GROUPS BECAUSE WE HAVE 7 FILES DECLARE @partitions int =(SELECT count(*) FROM SingleFile) — CREATE EQUAL NUMBER OF ROWS IN EACH PARTITION SET @partitions = round(@partitions/7,0) DECLARE @SQL NVARCHAR(4000)=’ CREATE PARTITION FUNCTION [pfunction](int) AS RANGE RIGHT FOR VALUES (‘+cast(@partitions as varchar(10))+’,’+cast(@partitions*2 as varchar(10))+’,’+cast(@partitions*3 as varchar(10))+’,’+cast(@partitions*4 as varchar(10))+’,’+cast(@partitions*5 as varchar(10))+’,’+cast(@partitions*6 as varchar(10))+’,’+cast(@partitions*7 as varchar(10))+’)’ EXEC sp_executesql @sql GO — MAP EACH PARTITION WITH A FILEGROUP CREATED EARLIER. CREATE PARTITION SCHEME [pscheme] AS PARTITION [pfunction] TO ([secondaryP1], [secondaryP2], [secondaryP3], [secondaryP4], [secondaryP5], [secondaryP6], [secondaryP7], [secondary]) GO — REARRANGE THE DATA IN THE TABLE INTO THE NEW PARTITIONS BY CREATING A CLUSTERED INDEX CREATE CLUSTERED INDEX [CIX_id] ON [dbo].[Partitioned] ( [id] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [pscheme]([id]) GO — INSERT DATA INTO MULTIPLE FILE TABLE AS WELL SET IDENTITY_INSERT MultipleFiles on INSERT INTO MultipleFiles ([id], [object_name], [counter_name], [instance_name], [cntr_value], [cntr_type]) SELECT [id], [object_name], [counter_name], [instance_name], [cntr_value], [cntr_type] FROM SingleFile SET IDENTITY_INSERT MultipleFiles OFF GO — TEST TABLE PERFORMANCE AND NOTICE HOW PARTITIONING — DOESNT OR ONLY MARGINALLY IMPROVES PERFORMANCE DECLARE @table TABLE ( cntr_types VARCHAR(100) , counts INT , source VARCHAR(100)) DECLARE @int INT = round(rand()*1000000,0) DECLARE @int2 INT = @int + round(rand()*10000,0) declare @starttime datetime2 = getdate() insert into @table select cntr_type , count(*) , ‘Part’ from Partitioned where id between @int and @int2 group by cntr_type select datediff(ms , @starttime , getdate()) as ‘Partitions’ PRINT ‘*******************************Without Partitioning****************************’ declare @starttime2 datetime2 = getdate() insert into @table select cntr_type , count(*) , ‘Single’ from MultipleFiles where id between @int and @int2 group by cntr_type select datediff(ms , @starttime2 , getdate()) as ‘Single File’ PRINT ‘******************************* After Single File****************************’ declare @starttime3 datetime2 = getdate() insert into @table select cntr_type , count(*), ‘Multiple’ from SingleFile where id between @int and @int2 group by cntr_type select datediff(ms , @starttime3 , getdate()) as ‘Multiple Files’ select * from @table USE [partitions_demo] GO —- CREATE THE RIGHT INDEXES TO SUPPORT THE ABOVE QUERIES CREATE NONCLUSTERED INDEX [NCIX_cntr_type] ON [dbo].[Partitioned] ( [cntr_type] ASC ) GO CREATE NONCLUSTERED INDEX [NCIX_cntr_type] ON [dbo].[SingleFile] ( [cntr_type] ASC ) GO CREATE NONCLUSTERED INDEX [NCIX_cntr_type] ON [dbo].[MultipleFiles] ( [cntr_type] ASC ) GO — RERUN THE QUERIES AGAIN — TEST PERFORMANCE AND NOTICE HOW PERFORMANCE HAS INCREASED NOW THAT PARTITION — ELIMINATION HAS COME INTO PLAY DECLARE @table TABLE ( cntr_types VARCHAR(100) , counts INT , source VARCHAR(100)) DECLARE @int INT = round(rand()*1000000,0) DECLARE @int2 INT = @int + round(rand()*10000,0) declare @starttime datetime2 = getdate() insert into @table select cntr_type , count(*) , ‘Part’ from Partitioned where id between @int and @int2 group by cntr_type select datediff(ms , @starttime , getdate()) as ‘Partitions’ PRINT ‘*******************************Without Partitioning****************************’ declare @starttime2 datetime2 = getdate() insert into @table select cntr_type , count(*) , ‘Single’ from MultipleFiles where id between @int and @int2 group by cntr_type select datediff(ms , @starttime2 , getdate()) as ‘Single File’ PRINT ‘******************************* After Single File****************************’ declare @starttime3 datetime2 = getdate() insert into @table select cntr_type , count(*), ‘Multiple’ from SingleFile where id between @int and @int2 group by cntr_type select datediff(ms , @starttime3 , getdate()) as ‘Multiple Files’ select * from @table GO — REPEAT ON SERVER WITH MULTIPLE DRIVES. — CHECK LOGICAL AND PHYSICAL READS — CHECK THE EXECUTION PLAN — BE NICE TO YOUR DBA [/codesyntax]