将子查询关联到 select 个最早条目的 ID
correlated sub query to select ids of earliest entry
我目前正在使用这个:
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL DROP TABLE #Temp
CREATE TABLE #Temp
(
SomeId INT,
UtcDateTime DATETIME2
)
INSERT INTO #Temp
SELECT 1, DATETIME2FROMPARTS(2015,1,1,1,1,1,0,0)
UNION
SELECT 1, DATETIME2FROMPARTS(2015,1,1,2,1,1,0,0)
UNION
SELECT 2, DATETIME2FROMPARTS(2015,1,1,3,1,1,0,0)
UNION
SELECT 2, DATETIME2FROMPARTS(2015,1,12,4,1,1,0,0)
UNION
SELECT 2, DATETIME2FROMPARTS(2015,1,12,5,1,1,0,0)
UNION
SELECT 3, DATETIME2FROMPARTS(2015,1,12,5,1,1,0,0)
SELECT * FROM #Temp ORDER BY UtcDateTime ASC
SELECT
*
FROM #Temp AS O1
WHERE UtcDateTime =
(
SELECT MIN(UtcDateTime) FROM #Temp AS O2 WHERE O1.SomeId = O2.SomeId
)
AND SomeId =
(
SELECT MAX(SomeId) FROM #Temp AS O2 WHERE O1.SomeId = O2.SomeId AND O1.UtcDateTime = O2.UtcDateTime
)
目的是 select 每个条目的第一次出现都具有基于 UtcDateTime 的不同 SomeId。换句话说,我在这些行之后:
SomeId UtcDateTime
1 2015-01-01 01:01:01.0000000
2 2015-01-01 03:01:01.0000000
3 2015-01-12 05:01:01.0000000
上述关联子查询的做法是否正确?
Select 没有具有相同 ID 和较早时间戳的行:
SELECT SomeId, UtcDateTime
FROM Temp AS T1
WHERE NOT EXIST (
SELECT 1 FROM Temp AS T2
WHERE T1.SomeId = T2.SomeId
AND T2.UtcDateTime < T1.UtcDateTime)
两次自连接(提到 3 次相同 table)- 在我看来太多了
1)
SELECT *
FROM #Temp AS O1
WHERE not exists
( select 1 from #Temp O2
where O2.SomeId = O1.SomeId and O2.UtcDateTime < O1.UtcDateTime)
2)
SELECT *
FROM #Temp AS O1
CROSS APPLY
(
SELECT TOP 1 O2.UtcDateTime
FROM #Temp as O2
WHERE O2.SomeId = O1.SomeId
ORDER BY O2.UtcDateTime ASC
) O2
WHERE O1.UtcDateTime = O2.UtcDateTime
2.1) 带 group by、min 和 join 的子查询(apply 很可能会导致使用循环连接的计划,而 group by first and join after - to hash join)
3) 示例案例
SELECT O1.id, MIN(O1.UtcDateTime)
FROM #Temp AS O1
GROUP BY O1.id
4) 对于小 tables;请注意,row_number 没有搜索,因此您 "disabling" 所有索引并请求额外的内存来填充新列
SELECT *
FROM
(
SELECT O1.*,
ROW_NUMBER() OVER(PARTITION BY O1.SomeID ORDER BY O1.UtcDateTime) as row_no
FROM #Temp as O1
) O2
WHERE O2.row_no = 1
您还可以使用 ROW_NUMBER 按 [UtcDateTime] 排序并按 [SomeId] 分区以获得第一次出现。
SELECT *
FROM (
SELECT *,
ROW_NUMBER () OVER (PARTITION BY SomeId ORDER BY [UtcDateTime]) RN
FROM #Temp
) t
WHERE Rn = 1
你的没有错,但你可以试试这个(更简单的方法):
SELECT SomeId, MIN(UTCDateTime) AS UTCDateTime FROM #Temp GROUP BY SomeId
每个 'SomeId'
将捕获第一次出现
我目前正在使用这个:
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL DROP TABLE #Temp
CREATE TABLE #Temp
(
SomeId INT,
UtcDateTime DATETIME2
)
INSERT INTO #Temp
SELECT 1, DATETIME2FROMPARTS(2015,1,1,1,1,1,0,0)
UNION
SELECT 1, DATETIME2FROMPARTS(2015,1,1,2,1,1,0,0)
UNION
SELECT 2, DATETIME2FROMPARTS(2015,1,1,3,1,1,0,0)
UNION
SELECT 2, DATETIME2FROMPARTS(2015,1,12,4,1,1,0,0)
UNION
SELECT 2, DATETIME2FROMPARTS(2015,1,12,5,1,1,0,0)
UNION
SELECT 3, DATETIME2FROMPARTS(2015,1,12,5,1,1,0,0)
SELECT * FROM #Temp ORDER BY UtcDateTime ASC
SELECT
*
FROM #Temp AS O1
WHERE UtcDateTime =
(
SELECT MIN(UtcDateTime) FROM #Temp AS O2 WHERE O1.SomeId = O2.SomeId
)
AND SomeId =
(
SELECT MAX(SomeId) FROM #Temp AS O2 WHERE O1.SomeId = O2.SomeId AND O1.UtcDateTime = O2.UtcDateTime
)
目的是 select 每个条目的第一次出现都具有基于 UtcDateTime 的不同 SomeId。换句话说,我在这些行之后:
SomeId UtcDateTime
1 2015-01-01 01:01:01.0000000
2 2015-01-01 03:01:01.0000000
3 2015-01-12 05:01:01.0000000
上述关联子查询的做法是否正确?
Select 没有具有相同 ID 和较早时间戳的行:
SELECT SomeId, UtcDateTime
FROM Temp AS T1
WHERE NOT EXIST (
SELECT 1 FROM Temp AS T2
WHERE T1.SomeId = T2.SomeId
AND T2.UtcDateTime < T1.UtcDateTime)
两次自连接(提到 3 次相同 table)- 在我看来太多了
1)
SELECT *
FROM #Temp AS O1
WHERE not exists
( select 1 from #Temp O2
where O2.SomeId = O1.SomeId and O2.UtcDateTime < O1.UtcDateTime)
2)
SELECT *
FROM #Temp AS O1
CROSS APPLY
(
SELECT TOP 1 O2.UtcDateTime
FROM #Temp as O2
WHERE O2.SomeId = O1.SomeId
ORDER BY O2.UtcDateTime ASC
) O2
WHERE O1.UtcDateTime = O2.UtcDateTime
2.1) 带 group by、min 和 join 的子查询(apply 很可能会导致使用循环连接的计划,而 group by first and join after - to hash join)
3) 示例案例
SELECT O1.id, MIN(O1.UtcDateTime)
FROM #Temp AS O1
GROUP BY O1.id
4) 对于小 tables;请注意,row_number 没有搜索,因此您 "disabling" 所有索引并请求额外的内存来填充新列
SELECT *
FROM
(
SELECT O1.*,
ROW_NUMBER() OVER(PARTITION BY O1.SomeID ORDER BY O1.UtcDateTime) as row_no
FROM #Temp as O1
) O2
WHERE O2.row_no = 1
您还可以使用 ROW_NUMBER 按 [UtcDateTime] 排序并按 [SomeId] 分区以获得第一次出现。
SELECT *
FROM (
SELECT *,
ROW_NUMBER () OVER (PARTITION BY SomeId ORDER BY [UtcDateTime]) RN
FROM #Temp
) t
WHERE Rn = 1
你的没有错,但你可以试试这个(更简单的方法):
SELECT SomeId, MIN(UTCDateTime) AS UTCDateTime FROM #Temp GROUP BY SomeId
每个 'SomeId'
将捕获第一次出现