sql 服务器每天自动 table 分区
sql server automated daily table partitioning
我需要重新设计一个 table 来保存每日日志并且有数亿行。其概念是仅将数据保留一个月。 table 将每天分区,使用日期时间字段作为分区键。例如,对于 3 月,我需要有 31 个文件组和 31 个分区。一旦我们进入 4 月,该过程将不得不插入 4 月 1 日的分区并删除 3 月 1 日的数据和文件组。
我对分区和文件组的创建很满意table,我正在寻找有关如何创建自动删除过程的建议。也许 sql 作业将使用 dateadd(m,-1,getdate())
删除分区?还有什么我想念的吗?
附加问题:我将在 2020 年 12 月 31 日之前创建 year_month_day_partitions 和相应的文件组。有什么方法可以自动创建这些脚本,还是我们必须 运行 手动创建脚本?
滑动window 分区维护通常使用计划脚本或存储过程(SQL 服务器代理作业或其他计划系统)完成。应计划维护以避免在 SPLIT
和 MERGE
期间进行昂贵的数据移动,因为这需要大约 4 倍于正常 DML 操作的日志记录。为此,请确保包含边界值的分区在 MERGE
之前为空,并且在 SPLIT
时没有行大于指定的边界。我建议创建一些额外的未来分区作为缓冲区以避免数据移动,以防万一维护未按计划 运行。
下面是一个示例日常滑动window维护脚本。由于您使用的是 SQL Server 2005 并且分区级别 TRUNCATE
是在 SQL Server 2016 中引入的,因此这使用类似的分区分段 table 进行清除。请注意 SQL Server 2005 不受支持。
我从您的评论中了解到,您认为每个分区单独 filegroup/file 可能有助于删除分区,但事实并非如此。此示例对所有分区使用单个文件组。
--example setup
CREATE PARTITION FUNCTION PF_Date (datetime) AS
RANGE RIGHT FOR VALUES();
CREATE PARTITION SCHEME PS_LogTable AS
PARTITION PF_Date ALL TO ([PRIMARY]);
DECLARE @PartitionBoundaryDate datetime = DATEADD(day, -31, DATEADD(day, DATEDIFF(day, '', GETDATE()), ''));
WHILE @PartitionBoundaryDate < DATEADD(day, 1, GETDATE())
BEGIN
ALTER PARTITION SCHEME PS_LogTable NEXT USED [PRIMARY];
ALTER PARTITION FUNCTION PF_Date() SPLIT RANGE(@PartitionBoundaryDate);
SET @PartitionBoundaryDate = DATEADD(day, 1, @PartitionBoundaryDate)
END;
CREATE TABLE dbo.LogTable(DateColumn datetime INDEX cdx CLUSTERED) ON PS_LogTable(DateColumn);
CREATE TABLE dbo.LogTable_Staging(DateColumn datetime INDEX cdx CLUSTERED) ON PS_LogTable(DateColumn);
GO
--example partition maintenance scheduled nightly after midnight
BEGIN TRY
SET NOCOUNT ON;
SET XACT_ABORT ON;
DECLARE @RetentionDays int = 31;
DECLARE @FutureDays int = 7;
DECLARE @OldestRetainedDate datetime = DATEADD(day, -@RetentionDays, DATEADD(day, DATEDIFF(day, '', GETDATE()), ''));
DECLARE @LatestRetainedDate datetime = DATEADD(day, DATEDIFF(day, '', GETDATE()), '');
DECLARE @LatestFutureBoundaryDate datetime = DATEADD(day, @FutureDays, @LatestRetainedDate);
DECLARE @PartitionBoundaryDate datetime;
DECLARE @Message nvarchar(2048);
--make sure staging table is empty
TRUNCATE TABLE dbo.LogTable_Staging;
BEGIN TRAN;
--aquire exclusive table lock to avoid deadlocking during maintenance
SELECT TOP(0) @PartitionBoundaryDate = DateColumn FROM dbo.LogTable WITH(TABLOCKX);
--purge partition 1 in case data older than the first boundary was inserted
SET @Message = 'Purging partition 1';
PRINT @Message;
ALTER TABLE dbo.LogTable SWITCH
PARTITION 1 TO
dbo.LogTable_Staging PARTITION 1;
TRUNCATE TABLE dbo.LogTable_Staging;
--purge and remove expired partitions
DECLARE @PartitionBoundaries TABLE(PartitionBoundaryDate datetime NOT NULL PRIMARY KEY);
INSERT INTO @PartitionBoundaries(PartitionBoundaryDate)
SELECT CAST(prv.value AS datetime)
FROM sys.partition_functions AS pf
JOIN sys.partition_range_values AS prv ON prv.function_id = pf.function_id
WHERE
pf.name = N'PF_Date'
AND CAST(prv.value AS datetime) < @OldestRetainedDate;
DECLARE ExpiredPartitionBoundaries CURSOR LOCAL FAST_FORWARD FOR
SELECT PartitionBoundaryDate
FROM @PartitionBoundaries;
OPEN ExpiredPartitionBoundaries;
WHILE 1 = 1
BEGIN
FETCH NEXT FROM ExpiredPartitionBoundaries INTO @PartitionBoundaryDate;
IF @@FETCH_STATUS = -1 BREAK;
SET @Message = 'Purging data for ' + CONVERT(char(10), @PartitionBoundaryDate, 120);
PRINT @Message;
ALTER TABLE dbo.LogTable SWITCH
PARTITION $PARTITION.PF_Date(@PartitionBoundaryDate) TO
dbo.LogTable_Staging PARTITION $PARTITION.PF_Date(@PartitionBoundaryDate);
TRUNCATE TABLE dbo.LogTable_Staging;
ALTER PARTITION FUNCTION PF_Date() MERGE RANGE(@PartitionBoundaryDate);
END;
CLOSE ExpiredPartitionBoundaries;
DEALLOCATE ExpiredPartitionBoundaries;
--create partitions for future days
SET @PartitionBoundaryDate = DATEADD(day, 1, @LatestRetainedDate);
WHILE @PartitionBoundaryDate < = @LatestFutureBoundaryDate
BEGIN
IF NOT EXISTS(SELECT 1
FROM sys.partition_functions AS pf
JOIN sys.partition_range_values AS prv ON prv.function_id = pf.function_id
WHERE
pf.name = N'PF_Date'
AND CAST(prv.value AS datetime) = @PartitionBoundaryDate
)
BEGIN
SET @Message = 'Creating partition for ' + CONVERT(char(10), @PartitionBoundaryDate, 120);
PRINT @Message;
ALTER PARTITION SCHEME PS_LogTable NEXT USED [PRIMARY];
ALTER PARTITION FUNCTION PF_Date() SPLIT RANGE(@PartitionBoundaryDate);
END;
SET @PartitionBoundaryDate = DATEADD(day, 1, @PartitionBoundaryDate);
END;
COMMIT;
END TRY
BEGIN CATCH
IF @@TRANCOUNT > 0 ROLLBACK;
--better to use THROW in SQL 2012 and later
DECLARE
@ErrorNumber int
,@ErrorMessage nvarchar(2048)
,@ErrorSeverity int
,@ErrorState int
,@ErrorLine int;
SELECT
@ErrorNumber =ERROR_NUMBER()
,@ErrorMessage =ERROR_MESSAGE()
,@ErrorSeverity = ERROR_SEVERITY()
,@ErrorState =ERROR_STATE()
,@ErrorLine =ERROR_LINE();
RAISERROR('Error %d caught at line %d: %s'
,@ErrorSeverity
,@ErrorState
,@ErrorNumber
,@ErrorLine
,@ErrorMessage);
END CATCH;
GO
我需要重新设计一个 table 来保存每日日志并且有数亿行。其概念是仅将数据保留一个月。 table 将每天分区,使用日期时间字段作为分区键。例如,对于 3 月,我需要有 31 个文件组和 31 个分区。一旦我们进入 4 月,该过程将不得不插入 4 月 1 日的分区并删除 3 月 1 日的数据和文件组。
我对分区和文件组的创建很满意table,我正在寻找有关如何创建自动删除过程的建议。也许 sql 作业将使用 dateadd(m,-1,getdate())
删除分区?还有什么我想念的吗?
附加问题:我将在 2020 年 12 月 31 日之前创建 year_month_day_partitions 和相应的文件组。有什么方法可以自动创建这些脚本,还是我们必须 运行 手动创建脚本?
滑动window 分区维护通常使用计划脚本或存储过程(SQL 服务器代理作业或其他计划系统)完成。应计划维护以避免在 SPLIT
和 MERGE
期间进行昂贵的数据移动,因为这需要大约 4 倍于正常 DML 操作的日志记录。为此,请确保包含边界值的分区在 MERGE
之前为空,并且在 SPLIT
时没有行大于指定的边界。我建议创建一些额外的未来分区作为缓冲区以避免数据移动,以防万一维护未按计划 运行。
下面是一个示例日常滑动window维护脚本。由于您使用的是 SQL Server 2005 并且分区级别 TRUNCATE
是在 SQL Server 2016 中引入的,因此这使用类似的分区分段 table 进行清除。请注意 SQL Server 2005 不受支持。
我从您的评论中了解到,您认为每个分区单独 filegroup/file 可能有助于删除分区,但事实并非如此。此示例对所有分区使用单个文件组。
--example setup
CREATE PARTITION FUNCTION PF_Date (datetime) AS
RANGE RIGHT FOR VALUES();
CREATE PARTITION SCHEME PS_LogTable AS
PARTITION PF_Date ALL TO ([PRIMARY]);
DECLARE @PartitionBoundaryDate datetime = DATEADD(day, -31, DATEADD(day, DATEDIFF(day, '', GETDATE()), ''));
WHILE @PartitionBoundaryDate < DATEADD(day, 1, GETDATE())
BEGIN
ALTER PARTITION SCHEME PS_LogTable NEXT USED [PRIMARY];
ALTER PARTITION FUNCTION PF_Date() SPLIT RANGE(@PartitionBoundaryDate);
SET @PartitionBoundaryDate = DATEADD(day, 1, @PartitionBoundaryDate)
END;
CREATE TABLE dbo.LogTable(DateColumn datetime INDEX cdx CLUSTERED) ON PS_LogTable(DateColumn);
CREATE TABLE dbo.LogTable_Staging(DateColumn datetime INDEX cdx CLUSTERED) ON PS_LogTable(DateColumn);
GO
--example partition maintenance scheduled nightly after midnight
BEGIN TRY
SET NOCOUNT ON;
SET XACT_ABORT ON;
DECLARE @RetentionDays int = 31;
DECLARE @FutureDays int = 7;
DECLARE @OldestRetainedDate datetime = DATEADD(day, -@RetentionDays, DATEADD(day, DATEDIFF(day, '', GETDATE()), ''));
DECLARE @LatestRetainedDate datetime = DATEADD(day, DATEDIFF(day, '', GETDATE()), '');
DECLARE @LatestFutureBoundaryDate datetime = DATEADD(day, @FutureDays, @LatestRetainedDate);
DECLARE @PartitionBoundaryDate datetime;
DECLARE @Message nvarchar(2048);
--make sure staging table is empty
TRUNCATE TABLE dbo.LogTable_Staging;
BEGIN TRAN;
--aquire exclusive table lock to avoid deadlocking during maintenance
SELECT TOP(0) @PartitionBoundaryDate = DateColumn FROM dbo.LogTable WITH(TABLOCKX);
--purge partition 1 in case data older than the first boundary was inserted
SET @Message = 'Purging partition 1';
PRINT @Message;
ALTER TABLE dbo.LogTable SWITCH
PARTITION 1 TO
dbo.LogTable_Staging PARTITION 1;
TRUNCATE TABLE dbo.LogTable_Staging;
--purge and remove expired partitions
DECLARE @PartitionBoundaries TABLE(PartitionBoundaryDate datetime NOT NULL PRIMARY KEY);
INSERT INTO @PartitionBoundaries(PartitionBoundaryDate)
SELECT CAST(prv.value AS datetime)
FROM sys.partition_functions AS pf
JOIN sys.partition_range_values AS prv ON prv.function_id = pf.function_id
WHERE
pf.name = N'PF_Date'
AND CAST(prv.value AS datetime) < @OldestRetainedDate;
DECLARE ExpiredPartitionBoundaries CURSOR LOCAL FAST_FORWARD FOR
SELECT PartitionBoundaryDate
FROM @PartitionBoundaries;
OPEN ExpiredPartitionBoundaries;
WHILE 1 = 1
BEGIN
FETCH NEXT FROM ExpiredPartitionBoundaries INTO @PartitionBoundaryDate;
IF @@FETCH_STATUS = -1 BREAK;
SET @Message = 'Purging data for ' + CONVERT(char(10), @PartitionBoundaryDate, 120);
PRINT @Message;
ALTER TABLE dbo.LogTable SWITCH
PARTITION $PARTITION.PF_Date(@PartitionBoundaryDate) TO
dbo.LogTable_Staging PARTITION $PARTITION.PF_Date(@PartitionBoundaryDate);
TRUNCATE TABLE dbo.LogTable_Staging;
ALTER PARTITION FUNCTION PF_Date() MERGE RANGE(@PartitionBoundaryDate);
END;
CLOSE ExpiredPartitionBoundaries;
DEALLOCATE ExpiredPartitionBoundaries;
--create partitions for future days
SET @PartitionBoundaryDate = DATEADD(day, 1, @LatestRetainedDate);
WHILE @PartitionBoundaryDate < = @LatestFutureBoundaryDate
BEGIN
IF NOT EXISTS(SELECT 1
FROM sys.partition_functions AS pf
JOIN sys.partition_range_values AS prv ON prv.function_id = pf.function_id
WHERE
pf.name = N'PF_Date'
AND CAST(prv.value AS datetime) = @PartitionBoundaryDate
)
BEGIN
SET @Message = 'Creating partition for ' + CONVERT(char(10), @PartitionBoundaryDate, 120);
PRINT @Message;
ALTER PARTITION SCHEME PS_LogTable NEXT USED [PRIMARY];
ALTER PARTITION FUNCTION PF_Date() SPLIT RANGE(@PartitionBoundaryDate);
END;
SET @PartitionBoundaryDate = DATEADD(day, 1, @PartitionBoundaryDate);
END;
COMMIT;
END TRY
BEGIN CATCH
IF @@TRANCOUNT > 0 ROLLBACK;
--better to use THROW in SQL 2012 and later
DECLARE
@ErrorNumber int
,@ErrorMessage nvarchar(2048)
,@ErrorSeverity int
,@ErrorState int
,@ErrorLine int;
SELECT
@ErrorNumber =ERROR_NUMBER()
,@ErrorMessage =ERROR_MESSAGE()
,@ErrorSeverity = ERROR_SEVERITY()
,@ErrorState =ERROR_STATE()
,@ErrorLine =ERROR_LINE();
RAISERROR('Error %d caught at line %d: %s'
,@ErrorSeverity
,@ErrorState
,@ErrorNumber
,@ErrorLine
,@ErrorMessage);
END CATCH;
GO