如何在SQL中的arcs/edges集合中识别groups/clusters?
How to identify groups/clusters in set of arcs/edges in SQL?
我有 arcs/edges 像这些“有”:
Node1 Node2
A B
B C
D E
这里A连接B,B连接C,D连接E,也就是说有2个groups/clusters显示在这些'wants'中:
Node1 Node2 Cluster
A B 1
B C 1
D E 2
我可以用SQL来识别这些groups/clusters吗?我想这涉及到自连接,但我看不出如何写这个 SQL。任何反馈将不胜感激。谢谢!
假设从所提供的示例数据来看,数据中没有像 A->B->C->A
这样的循环,以下是将 return 从 nodes
获得所需输出的查询 table.
WITH RECURSIVE NodeCluster (node1,node2,Cluster1) AS
(SELECT node1,
node2,
Rank() Over (
ORDER BY node1)
FROM nodes AS n1
WHERE NOT EXISTS
(SELECT *
FROM nodes AS n2
WHERE n1.node1 = n2.node2)
UNION ALL SELECT N1.node1,
N1.node2,
NodeCluster.Cluster1
FROM nodes n1,
NodeCluster
WHERE NodeCluster.node2=n1.node1 )
SELECT *
FROM NodeCluster
ORDER BY Cluster1,
node1,
node2;
在种子查询中选择所有起始节点,并在asc
中ranked
按升序将簇编号分配给数据。
根据问题提供的数据,输出如下。
Node1 | Node2 | Cluster1
-------------------------
A B 1
B C 1
D E 2
为了再次保证,已将更多数据添加到示例数据中,如下所示。
Node1 | Node2
-------------
A B
B C
D E
E F
F G
H I
I J
J K
L M
查询结果如下。
Node1 | Node2 | Cluster1
-------------------------
A B 1
B C 1
D E 2
E F 2
F G 2
H I 3
I J 3
J K 3
L M 4
在 teradata
和 ANSI
模式下使用 Teradata SQL Assistant
和 bteq
成功测试了解决方案查询。
希望这会有所帮助。
试试这个:
DECLARE @Edges TABLE
(
ID INT ,
Node1 CHAR(1) ,
Node2 CHAR(1)
);
INSERT INTO @Edges
( Node1, Node2 )
VALUES ( 'A', 'B' ),
( 'B', 'C' ),
( 'D', 'E' );
WITH CTE_Nodes
AS ( SELECT Node1 AS Node
FROM @Edges
UNION
SELECT Node2 AS Node
FROM @Edges
),
CTE_Pairs
AS ( SELECT Node1 ,
Node2
FROM @Edges
WHERE Node1 <> Node2
UNION
SELECT Node2 AS Node1 ,
Node1 AS Node2
FROM @Edges
WHERE Node1 <> Node2
),
CTE_Recursive
AS ( SELECT CAST(CTE_Nodes.Node AS VARCHAR(8000)) AS AnchorNode ,
Node1 ,
Node2 ,
CAST(',' + Node1 + ',' + Node2 + ',' AS VARCHAR(8000)) AS NodePath ,
1 AS Lvl
FROM CTE_Pairs
INNER JOIN CTE_Nodes ON CTE_Nodes.Node = CTE_Pairs.Node1
UNION ALL
SELECT CTE_Recursive.AnchorNode ,
CTE_Pairs.Node1 ,
CTE_Pairs.Node2 ,
CAST(CTE_Recursive.NodePath + CTE_Pairs.Node2 + ',' AS VARCHAR(8000)) AS NodePath ,
CTE_Recursive.Lvl + 1 AS Lvl
FROM CTE_Pairs
INNER JOIN CTE_Recursive ON CTE_Recursive.Node2 = CTE_Pairs.Node1
WHERE CTE_Recursive.NodePath NOT LIKE CAST('%,'
+ CTE_Pairs.Node2 + ',%' AS VARCHAR(8000))
),
CTE_RecursionResult
AS ( SELECT AnchorNode ,
Node1 ,
Node2
FROM CTE_Recursive
),
CTE_CleanResult
AS ( SELECT AnchorNode ,
Node1 AS Node
FROM CTE_RecursionResult
UNION
SELECT AnchorNode ,
Node2 AS Node
FROM CTE_RecursionResult
)
SELECT Edges.Node1 ,
Edges.Node2 ,
DENSE_RANK() OVER ( ORDER BY CASE WHEN CA_Data.XML_Value IS NULL
THEN Edges.Node1
ELSE CA_Data.XML_Value
END ) AS Cluster
FROM @Edges Edges
CROSS APPLY ( SELECT CTE_CleanResult.Node + ','
FROM CTE_CleanResult
WHERE CTE_CleanResult.AnchorNode = Edges.Node1
ORDER BY CTE_CleanResult.Node
FOR
XML PATH('') ,
TYPE
) AS CA_XML ( XML_Value )
CROSS APPLY ( SELECT CA_XML.XML_Value.value('.',
'NVARCHAR(MAX)')
) AS CA_Data ( XML_Value );
我有 arcs/edges 像这些“有”:
Node1 Node2
A B
B C
D E
这里A连接B,B连接C,D连接E,也就是说有2个groups/clusters显示在这些'wants'中:
Node1 Node2 Cluster
A B 1
B C 1
D E 2
我可以用SQL来识别这些groups/clusters吗?我想这涉及到自连接,但我看不出如何写这个 SQL。任何反馈将不胜感激。谢谢!
假设从所提供的示例数据来看,数据中没有像 A->B->C->A
这样的循环,以下是将 return 从 nodes
获得所需输出的查询 table.
WITH RECURSIVE NodeCluster (node1,node2,Cluster1) AS
(SELECT node1,
node2,
Rank() Over (
ORDER BY node1)
FROM nodes AS n1
WHERE NOT EXISTS
(SELECT *
FROM nodes AS n2
WHERE n1.node1 = n2.node2)
UNION ALL SELECT N1.node1,
N1.node2,
NodeCluster.Cluster1
FROM nodes n1,
NodeCluster
WHERE NodeCluster.node2=n1.node1 )
SELECT *
FROM NodeCluster
ORDER BY Cluster1,
node1,
node2;
在种子查询中选择所有起始节点,并在asc
中ranked
按升序将簇编号分配给数据。
根据问题提供的数据,输出如下。
Node1 | Node2 | Cluster1
-------------------------
A B 1
B C 1
D E 2
为了再次保证,已将更多数据添加到示例数据中,如下所示。
Node1 | Node2
-------------
A B
B C
D E
E F
F G
H I
I J
J K
L M
查询结果如下。
Node1 | Node2 | Cluster1
-------------------------
A B 1
B C 1
D E 2
E F 2
F G 2
H I 3
I J 3
J K 3
L M 4
在 teradata
和 ANSI
模式下使用 Teradata SQL Assistant
和 bteq
成功测试了解决方案查询。
希望这会有所帮助。
试试这个:
DECLARE @Edges TABLE
(
ID INT ,
Node1 CHAR(1) ,
Node2 CHAR(1)
);
INSERT INTO @Edges
( Node1, Node2 )
VALUES ( 'A', 'B' ),
( 'B', 'C' ),
( 'D', 'E' );
WITH CTE_Nodes
AS ( SELECT Node1 AS Node
FROM @Edges
UNION
SELECT Node2 AS Node
FROM @Edges
),
CTE_Pairs
AS ( SELECT Node1 ,
Node2
FROM @Edges
WHERE Node1 <> Node2
UNION
SELECT Node2 AS Node1 ,
Node1 AS Node2
FROM @Edges
WHERE Node1 <> Node2
),
CTE_Recursive
AS ( SELECT CAST(CTE_Nodes.Node AS VARCHAR(8000)) AS AnchorNode ,
Node1 ,
Node2 ,
CAST(',' + Node1 + ',' + Node2 + ',' AS VARCHAR(8000)) AS NodePath ,
1 AS Lvl
FROM CTE_Pairs
INNER JOIN CTE_Nodes ON CTE_Nodes.Node = CTE_Pairs.Node1
UNION ALL
SELECT CTE_Recursive.AnchorNode ,
CTE_Pairs.Node1 ,
CTE_Pairs.Node2 ,
CAST(CTE_Recursive.NodePath + CTE_Pairs.Node2 + ',' AS VARCHAR(8000)) AS NodePath ,
CTE_Recursive.Lvl + 1 AS Lvl
FROM CTE_Pairs
INNER JOIN CTE_Recursive ON CTE_Recursive.Node2 = CTE_Pairs.Node1
WHERE CTE_Recursive.NodePath NOT LIKE CAST('%,'
+ CTE_Pairs.Node2 + ',%' AS VARCHAR(8000))
),
CTE_RecursionResult
AS ( SELECT AnchorNode ,
Node1 ,
Node2
FROM CTE_Recursive
),
CTE_CleanResult
AS ( SELECT AnchorNode ,
Node1 AS Node
FROM CTE_RecursionResult
UNION
SELECT AnchorNode ,
Node2 AS Node
FROM CTE_RecursionResult
)
SELECT Edges.Node1 ,
Edges.Node2 ,
DENSE_RANK() OVER ( ORDER BY CASE WHEN CA_Data.XML_Value IS NULL
THEN Edges.Node1
ELSE CA_Data.XML_Value
END ) AS Cluster
FROM @Edges Edges
CROSS APPLY ( SELECT CTE_CleanResult.Node + ','
FROM CTE_CleanResult
WHERE CTE_CleanResult.AnchorNode = Edges.Node1
ORDER BY CTE_CleanResult.Node
FOR
XML PATH('') ,
TYPE
) AS CA_XML ( XML_Value )
CROSS APPLY ( SELECT CA_XML.XML_Value.value('.',
'NVARCHAR(MAX)')
) AS CA_Data ( XML_Value );