sql 服务器每天自动 table 分区

sql server automated daily table partitioning

我需要重新设计一个 table 来保存每日日志并且有数亿行。其概念是仅将数据保留一个月。 table 将每天分区,使用日期时间字段作为分区键。例如,对于 3 月,我需要有 31 个文件组和 31 个分区。一旦我们进入 4 月,该过程将不得不插入 4 月 1 日的分区并删除 3 月 1 日的数据和文件组。

我对分区和文件组的创建很满意table,我正在寻找有关如何创建自动删除过程的建议。也许 sql 作业将使用 dateadd(m,-1,getdate()) 删除分区?还有什么我想念的吗?

附加问题:我将在 2020 年 12 月 31 日之前创建 year_month_day_partitions 和相应的文件组。有什么方法可以自动创建这些脚本,还是我们必须 运行 手动创建脚本?

滑动window 分区维护通常使用计划脚本或存储过程(SQL 服务器代理作业或其他计划系统)完成。应计划维护以避免在 SPLITMERGE 期间进行昂贵的数据移动,因为这需要大约 4 倍于正常 DML 操作的日志记录。为此,请确保包含边界值的分区在 MERGE 之前为空,并且在 SPLIT 时没有行大于指定的边界。我建议创建一些额外的未来分区作为缓冲区以避免数据移动,以防万一维护未按计划 运行。

下面是一个示例日常滑动window维护脚本。由于您使用的是 SQL Server 2005 并且分区级别 TRUNCATE 是在 SQL Server 2016 中引入的,因此这使用类似的分区分段 table 进行清除。请注意 SQL Server 2005 不受支持。

我从您的评论中了解到,您认为每个分区单独 filegroup/file 可能有助于删除分区,但事实并非如此。此示例对所有分区使用单个文件组。

--example setup
CREATE PARTITION FUNCTION PF_Date (datetime) AS
    RANGE RIGHT FOR VALUES();
CREATE PARTITION SCHEME PS_LogTable AS
    PARTITION PF_Date ALL TO ([PRIMARY]);
DECLARE @PartitionBoundaryDate datetime = DATEADD(day, -31, DATEADD(day, DATEDIFF(day, '', GETDATE()), ''));

WHILE @PartitionBoundaryDate < DATEADD(day, 1, GETDATE())
BEGIN
    ALTER PARTITION SCHEME PS_LogTable NEXT USED [PRIMARY];
    ALTER PARTITION FUNCTION PF_Date() SPLIT RANGE(@PartitionBoundaryDate);
    SET @PartitionBoundaryDate = DATEADD(day, 1, @PartitionBoundaryDate)
END;
CREATE TABLE dbo.LogTable(DateColumn datetime INDEX cdx CLUSTERED) ON PS_LogTable(DateColumn);
CREATE TABLE dbo.LogTable_Staging(DateColumn datetime INDEX cdx CLUSTERED) ON PS_LogTable(DateColumn);
GO

--example partition maintenance scheduled nightly after midnight
BEGIN TRY
    SET NOCOUNT ON;
    SET XACT_ABORT ON;
    DECLARE @RetentionDays int = 31;
    DECLARE @FutureDays int = 7;
    DECLARE @OldestRetainedDate datetime = DATEADD(day, -@RetentionDays, DATEADD(day, DATEDIFF(day, '', GETDATE()), ''));
    DECLARE @LatestRetainedDate datetime = DATEADD(day, DATEDIFF(day, '', GETDATE()), '');
    DECLARE @LatestFutureBoundaryDate datetime = DATEADD(day, @FutureDays, @LatestRetainedDate);
    DECLARE @PartitionBoundaryDate datetime;
    DECLARE @Message nvarchar(2048);

    --make sure staging table is empty
    TRUNCATE TABLE dbo.LogTable_Staging;

    BEGIN TRAN;
    --aquire exclusive table lock to avoid deadlocking during maintenance
    SELECT TOP(0) @PartitionBoundaryDate = DateColumn FROM dbo.LogTable WITH(TABLOCKX);

    --purge partition 1 in case data older than the first boundary was inserted
    SET @Message = 'Purging partition 1';
    PRINT @Message;
    ALTER TABLE dbo.LogTable SWITCH
        PARTITION 1 TO
        dbo.LogTable_Staging PARTITION 1;
    TRUNCATE TABLE dbo.LogTable_Staging;

    --purge and remove expired partitions
    DECLARE @PartitionBoundaries TABLE(PartitionBoundaryDate datetime NOT NULL PRIMARY KEY);
    INSERT INTO @PartitionBoundaries(PartitionBoundaryDate)
        SELECT CAST(prv.value AS datetime)
        FROM sys.partition_functions AS pf
        JOIN sys.partition_range_values AS prv ON prv.function_id = pf.function_id
        WHERE
            pf.name = N'PF_Date'
            AND CAST(prv.value AS datetime) < @OldestRetainedDate;
    DECLARE ExpiredPartitionBoundaries CURSOR LOCAL FAST_FORWARD FOR
        SELECT PartitionBoundaryDate
        FROM @PartitionBoundaries;

    OPEN ExpiredPartitionBoundaries;
    WHILE 1 = 1
    BEGIN
        FETCH NEXT FROM ExpiredPartitionBoundaries INTO @PartitionBoundaryDate;
        IF @@FETCH_STATUS = -1 BREAK;
        SET @Message = 'Purging data for ' + CONVERT(char(10), @PartitionBoundaryDate, 120);
        PRINT @Message;
        ALTER TABLE dbo.LogTable SWITCH
            PARTITION $PARTITION.PF_Date(@PartitionBoundaryDate) TO
            dbo.LogTable_Staging PARTITION $PARTITION.PF_Date(@PartitionBoundaryDate);
        TRUNCATE TABLE dbo.LogTable_Staging;
        ALTER PARTITION FUNCTION PF_Date() MERGE RANGE(@PartitionBoundaryDate);
    END;
    CLOSE ExpiredPartitionBoundaries;
    DEALLOCATE ExpiredPartitionBoundaries;

    --create partitions for future days
    SET @PartitionBoundaryDate = DATEADD(day, 1, @LatestRetainedDate);
    WHILE @PartitionBoundaryDate < = @LatestFutureBoundaryDate
    BEGIN
        IF NOT EXISTS(SELECT 1
            FROM sys.partition_functions AS pf
            JOIN sys.partition_range_values AS prv ON prv.function_id = pf.function_id
            WHERE
                pf.name = N'PF_Date'
                AND CAST(prv.value AS datetime) = @PartitionBoundaryDate
        )
        BEGIN
            SET @Message = 'Creating partition for ' + CONVERT(char(10), @PartitionBoundaryDate, 120);
            PRINT @Message;
            ALTER PARTITION SCHEME PS_LogTable NEXT USED [PRIMARY];
            ALTER PARTITION FUNCTION PF_Date() SPLIT RANGE(@PartitionBoundaryDate);
        END;
        SET @PartitionBoundaryDate = DATEADD(day, 1, @PartitionBoundaryDate);
    END;

    COMMIT;
END TRY
BEGIN CATCH

    IF @@TRANCOUNT > 0 ROLLBACK;

    --better to use THROW in SQL 2012 and later
    DECLARE
         @ErrorNumber int
        ,@ErrorMessage nvarchar(2048)
        ,@ErrorSeverity int
        ,@ErrorState int
        ,@ErrorLine int;

    SELECT
        @ErrorNumber =ERROR_NUMBER()
        ,@ErrorMessage =ERROR_MESSAGE()
        ,@ErrorSeverity = ERROR_SEVERITY()
        ,@ErrorState =ERROR_STATE()
        ,@ErrorLine =ERROR_LINE();

    RAISERROR('Error %d caught at line %d: %s'
        ,@ErrorSeverity
        ,@ErrorState
        ,@ErrorNumber
        ,@ErrorLine
        ,@ErrorMessage);

END CATCH;
GO