如何在组 T-SQL 中公开层次结构

How to Expose a Hierarchy in Groups T-SQL

我正在尝试找出一种方法来处理这种层次结构,但我不确定具体如何操作。为了简单起见,我制作了这个示例数据:

CREATE TABLE #EXAMPLE (
    ID INT
    , PARENT_ID INT
    , [PATH] VARCHAR(1000)
    , [TYPE] VARCHAR(10)
    );

INSERT INTO #EXAMPLE ([ID], PARENT_ID, [PATH], [TYPE]) VALUES 
     (63812, 0, '/Home', 'Folder')
    ,(55225, 63812, '/Home/User1', 'Folder')
    ,(92901, 63812, '/Home/User2', 'Folder')
    ,(40353, 63812, '/Home/User3', 'Folder')
    ,(96959, 55225, '/Home/User1/Item1', 'File')
    ,(97231, 55225, '/Home/User1/Item2', 'File')
    ,(53339, 92901, '/Home/User2/Item1', 'File')
    ,(58034, 92901, '/Home/User2/Music', 'Folder')
    ,(65023, 58034, '/Home/User2/Music/Item1', 'File')
    ,(72657, 58034, '/Home/User2/Music/Item2', 'File')
    ,(19406, 58034, '/Home/User2/Music/Item3', 'File')
    ,(56515, 58034, '/Home/User2/Music/Item4', 'File')
    ,(68394, 58034, '/Home/User2/Music/Item5', 'File')
    ,(42813, 92901, '/Home/User2/Movies', 'Folder')
    ,(32781, 42813, '/Home/User2/Movies/Item1', 'File')
    ,(96579, 40353, '/Home/User3/Scripts', 'Folder')    
    ,(36300, 96579, '/Home/User3/Scripts/Item1', 'File')
    ,(59930, 96579, '/Home/User3/Scripts/SQL', 'Folder')
    ,(83700, 59930, '/Home/User3/Scripts/SQL/Item1', 'File')
    ,(66753, 59930, '/Home/User3/Scripts/SQL/Item2', 'File')
    ,(34377, 96579, '/Home/User3/Scripts/Other', 'Folder')
    ,(20666, 34377, '/Home/User3/Scripts/Other/Item1', 'File')
    ,(23786, 34377, '/Home/User3/Scripts/Other/Item2', 'File')
    ,(87107, 34377, '/Home/User3/Scripts/Other/Item3', 'File')
    ,(29557, 34377, '/Home/User3/Scripts/Other/Item4', 'File')


SELECT E.*
FROM #EXAMPLE AS E

DROP TABLE #EXAMPLE

这是一个文件路径层次结构,我想做的是最终做一个报告,例如,“/Home”下有16个文件,然后你可以向下钻取到下一个层级并查看“/Home/User1”有 2 个文件,“/Home/User2”有 7 个文件,依此类推。

我可能想多了...但要这样做,我相信我必须显示在不同文件路径之间共享的元素。起初,我尝试过这样的事情(在 table 创建语句之后):

;WITH E_CTE AS (
    SELECT E.ID, E.PARENT_ID, E.[PATH], E.[TYPE], [LVL] = 0--, [COMMON_ROOT] = SUBSTRING(E.[PATH], 0, 0)
    FROM #EXAMPLE AS E
    WHERE E.ID = 63812
    UNION ALL
    SELECT EXM.ID, EXM.PARENT_ID, EXM.[PATH], EXM.[TYPE], CTE.LVL + 1--, SUBSTRING(EXM.[PATH], 0, CTE.LVL)
    FROM E_CTE AS CTE
        INNER JOIN #EXAMPLE AS EXM
            ON CTE.ID = EXM.PARENT_ID
    )


SELECT E.PARENT_ID, [PARENT_PATH] = ISNULL(EXM.[PATH], ''), E.ID, E.[PATH], E.[TYPE], E.LVL
FROM E_CTE AS E
    LEFT JOIN #EXAMPLE AS EXM
        ON E.PARENT_ID = EXM.ID
ORDER BY E.LVL, E.[PATH]

DROP TABLE #EXAMPLE

这为我提供了一个数据集,我可以在其中看到给定记录的直接父元素,但看不到每个父元素的分解。 PARENT_PATH 和 PATH 的层次结构使它看起来像有 9 个父组,当我想让它显示在顶部有一个组,“/Home”,在第二级有三个组,“/Home/User1"、"/Home/User2" 和 "/Home/User3",等等。我得出的结论是我需要这样的数据集:

CREATE TABLE #EXAMPLE (
    ID INT
    , PARENT_ID INT
    , LEVEL_0 VARCHAR(1000)
    , LEVEL_1 VARCHAR(1000)
    , LEVEL_2 VARCHAR(1000)
    , LEVEL_3 VARCHAR(1000)
    , LEVEL_4 VARCHAR(1000)
    , [PATH] VARCHAR(1000)
    , [TYPE] VARCHAR(10)
    );

INSERT INTO #EXAMPLE ([ID], PARENT_ID, LEVEL_0, LEVEL_1, LEVEL_2, LEVEL_3, LEVEL_4,  [PATH], [TYPE]) VALUES 

(63812, 0,      '/Home',    NULL,           NULL,                   NULL,                           NULL,                               '/Home',                            'Folder')
,(55225, 63812, '/Home',    '/Home/User1',  NULL,                   NULL,                           NULL,                               '/Home/User1',                      'Folder')
,(92901, 63812, '/Home',    '/Home/User2',  NULL,                   NULL,                           NULL,                               '/Home/User2',                      'Folder')
,(40353, 63812, '/Home',    '/Home/User3',  NULL,                   NULL,                           NULL,                               '/Home/User3',                      'Folder')
,(96959, 55225, '/Home',    '/Home/User1',  '/Home/User1/Item1',    NULL,                           NULL,                               '/Home/User1/Item1',                'File')
,(97231, 55225, '/Home',    '/Home/User1',  '/Home/User1/Item2',    NULL,                           NULL,                               '/Home/User1/Item2',                'File')
,(53339, 92901, '/Home',    '/Home/User2',  '/Home/User2/Item1',    NULL,                           NULL,                               '/Home/User2/Item1',                'File')
,(58034, 92901, '/Home',    '/Home/User2',  '/Home/User2/Music',    NULL,                           NULL,                               '/Home/User2/Music',                'Folder')
,(65023, 58034, '/Home',    '/Home/User2',  '/Home/User2/Music',    '/Home/User2/Music/Item1',      NULL,                               '/Home/User2/Music/Item1',          'File')
,(72657, 58034, '/Home',    '/Home/User2',  '/Home/User2/Music',    '/Home/User2/Music/Item2',      NULL,                               '/Home/User2/Music/Item2',          'File')
,(19406, 58034, '/Home',    '/Home/User2',  '/Home/User2/Music',    '/Home/User2/Music/Item3',      NULL,                               '/Home/User2/Music/Item3',          'File')
,(56515, 58034, '/Home',    '/Home/User2',  '/Home/User2/Music',    '/Home/User2/Music/Item4',      NULL,                               '/Home/User2/Music/Item4',          'File')
,(68394, 58034, '/Home',    '/Home/User2',  '/Home/User2/Music',    '/Home/User2/Music/Item5',      NULL,                               '/Home/User2/Music/Item5',          'File')
,(42813, 92901, '/Home',    '/Home/User2',  '/Home/User2/Movies',   NULL,                           NULL,                               '/Home/User2/Movies',               'Folder')
,(32781, 42813, '/Home',    '/Home/User2',  '/Home/User2/Movies',   '/Home/User2/Movies/Item1',     NULL,                               '/Home/User2/Movies/Item1',         'File')
,(96579, 40353, '/Home',    '/Home/User3',  '/Home/User3/Scripts',  NULL,                           NULL,                               '/Home/User3/Scripts',              'Folder')
,(36300, 96579, '/Home',    '/Home/User3',  '/Home/User3/Scripts',  '/Home/User3/Scripts/Item1',    NULL,                               '/Home/User3/Scripts/Item1',        'File')
,(59930, 96579, '/Home',    '/Home/User3',  '/Home/User3/Scripts',  '/Home/User3/Scripts/SQL',      NULL,                               '/Home/User3/Scripts/SQL',          'Folder')
,(83700, 59930, '/Home',    '/Home/User3',  '/Home/User3/Scripts',  '/Home/User3/Scripts/SQL',      '/Home/User3/Scripts/SQL/Item1',    '/Home/User3/Scripts/SQL/Item1',    'File')
,(66753, 59930, '/Home',    '/Home/User3',  '/Home/User3/Scripts',  '/Home/User3/Scripts/SQL',      '/Home/User3/Scripts/SQL/Item2',    '/Home/User3/Scripts/SQL/Item2',    'File')
,(34377, 96579, '/Home',    '/Home/User3',  '/Home/User3/Scripts',  '/Home/User3/Scripts/Other',    NULL,                               '/Home/User3/Scripts/Other',        'Folder')
,(20666, 34377, '/Home',    '/Home/User3',  '/Home/User3/Scripts',  '/Home/User3/Scripts/Other',    '/Home/User3/Scripts/Other/Item1',  '/Home/User3/Scripts/Other/Item1',  'File')
,(23786, 34377, '/Home',    '/Home/User3',  '/Home/User3/Scripts',  '/Home/User3/Scripts/Other',    '/Home/User3/Scripts/Other/Item2',  '/Home/User3/Scripts/Other/Item2',  'File')
,(87107, 34377, '/Home',    '/Home/User3',  '/Home/User3/Scripts',  '/Home/User3/Scripts/Other',    '/Home/User3/Scripts/Other/Item3',  '/Home/User3/Scripts/Other/Item3',  'File')
,(29557, 34377, '/Home',    '/Home/User3',  '/Home/User3/Scripts',  '/Home/User3/Scripts/Other',    '/Home/User3/Scripts/Other/Item4',  '/Home/User3/Scripts/Other/Item4',  'File')

SELECT * FROM #EXAMPLE

DROP TABLE #EXAMPLE

此数据集向我显示了每个行级项目(文件夹或文件)、路径,然后是层次结构每个级别的整个“共享路径”。我的问题是,如何使用 T-SQL 从我的第一个数据集到这个数据集? (也就是说,我如何动态地实现这一点?)这将涉及动态数量的列,具体取决于有多少级别(在生产数据中,更像是七或八个级别)。

第二个问题是:是否有替代方法 arrange/tabulate/organize 此数据,以便在 SSRS 或 Tableau 等报告工具中,我可以从顶层向下钻取到底层我描述的方式?

感谢所有花时间看这个问题的人。

通过一些调查,我弄清楚了我需要什么,这不是我要求的(没有词汇来知道开始时究竟要问什么)。在查看了 Wise Owl (https://www.youtube.com/watch?v=CHbqIsw5X30&list=PLNIs-AWhQzcmEFHyxCRwA_gb29WOz5SJU&index=28) 的 link 之后,我还参考了 Itzik Ben-Gan 的 T-SQL Querying ( 2015) pp. 778-786 并提出了这个嵌套集解决方案:

DECLARE @root AS INT = 63812;

CREATE TABLE #EXAMPLE (
    ID INT
    , PARENT_ID INT
    , [PATH] VARCHAR(1000)
    , [TYPE] VARCHAR(10)
    );

INSERT INTO #EXAMPLE ([ID], PARENT_ID, [PATH], [TYPE]) VALUES 
     (63812, 0, '/Home', 'Folder')
    ,(55225, 63812, '/Home/User1', 'Folder')
    ,(92901, 63812, '/Home/User2', 'Folder')
    ,(40353, 63812, '/Home/User3', 'Folder')
    ,(96959, 55225, '/Home/User1/Item1', 'File')
    ,(97231, 55225, '/Home/User1/Item2', 'File')
    ,(53339, 92901, '/Home/User2/Item1', 'File')
    ,(58034, 92901, '/Home/User2/Music', 'Folder')
    ,(65023, 58034, '/Home/User2/Music/Item1', 'File')
    ,(72657, 58034, '/Home/User2/Music/Item2', 'File')
    ,(19406, 58034, '/Home/User2/Music/Item3', 'File')
    ,(56515, 58034, '/Home/User2/Music/Item4', 'File')
    ,(68394, 58034, '/Home/User2/Music/Item5', 'File')
    ,(42813, 92901, '/Home/User2/Movies', 'Folder')
    ,(32781, 42813, '/Home/User2/Movies/Item1', 'File')
    ,(96579, 40353, '/Home/User3/Scripts', 'Folder')    
    ,(36300, 96579, '/Home/User3/Scripts/Item1', 'File')
    ,(59930, 96579, '/Home/User3/Scripts/SQL', 'Folder')
    ,(83700, 59930, '/Home/User3/Scripts/SQL/Item1', 'File')
    ,(66753, 59930, '/Home/User3/Scripts/SQL/Item2', 'File')
    ,(34377, 96579, '/Home/User3/Scripts/Other', 'Folder')
    ,(20666, 34377, '/Home/User3/Scripts/Other/Item1', 'File')
    ,(23786, 34377, '/Home/User3/Scripts/Other/Item2', 'File')
    ,(87107, 34377, '/Home/User3/Scripts/Other/Item3', 'File')
    ,(29557, 34377, '/Home/User3/Scripts/Other/Item4', 'File');


WITH TwoNums AS (
    SELECT n FROM (VALUES(1), (2)) AS D(n)
    )
, SortPath AS (
    SELECT EXM.ID, EXM.[PATH], EXM.[TYPE], TN.n, [LVL] = 0, [SORT_PATH] = CONVERT(VARBINARY(MAX), CONVERT(BINARY(4), TN.n))
    FROM #EXAMPLE AS EXM
        CROSS JOIN TwoNums AS TN
    WHERE EXM.ID = @root

    UNION ALL

    SELECT E.ID, E.[PATH], E.[TYPE], TNS.n, SP.LVL + 1, SP.SORT_PATH + CONVERT(BINARY(4), (-1 + ROW_NUMBER() OVER(PARTITION BY E.PARENT_ID
        ORDER BY E.[PATH])) / 2 * 2 + TNS.n)
    FROM SortPath AS SP
        INNER JOIN #EXAMPLE AS E
            ON SP.n = 1
            AND E.PARENT_ID = SP.ID
        CROSS JOIN TwoNums AS TNS
    )
, Sort AS (
    SELECT SP.ID, SP.[PATH], SP.[TYPE], [SortVal] = ROW_NUMBER() OVER(ORDER BY SP.SORT_PATH)
    FROM SortPath AS SP
    )
, NestedSets AS (
    SELECT SRT.ID, SRT.[PATH], SRT.[TYPE], [LFT] = MIN(SRT.SortVal), [RGT] = MAX(SRT.SortVal)
    FROM Sort AS SRT
    GROUP BY SRT.ID, SRT.[PATH], SRT.[TYPE]
    )


SELECT NS.*, EXM.PARENT_ID
INTO #StructureNS
FROM NestedSets AS NS
    INNER JOIN #EXAMPLE AS EXM 
        ON NS.ID = EXM.ID;


SELECT NS.PARENT_ID, NS.ID, NS.[PATH], NS.[TYPE]
    , [CNT_FILES] = 
    (SELECT COUNT(DISTINCT CHLD.ID)
    FROM #StructureNS AS PRNT
        INNER JOIN #StructureNS AS CHLD
            ON CHLD.LFT BETWEEN PRNT.LFT AND PRNT.RGT
    WHERE CHLD.RGT - CHLD.LFT = 1
        AND PRNT.[TYPE] = 'Folder'
        AND CHLD.[TYPE] = 'File'
        AND NS.ID = PRNT.ID
    )
FROM #StructureNS AS NS

DROP TABLE #EXAMPLE, #StructureNS

我所追求的是如何在给定递归层次结构的每个级别执行各种聚合(例如文件计数)。 T-SQL 代码很好地做到了这一点,尽管它似乎可以通过 SSRS 和“递归”参数实现。无论哪种方式,SSRS 递归高级设置都允许我以我正在寻找的方式显示数据,即从根向下钻取所有节点。