需要一个更便宜的查询
Need a less expensive query
我有三个 tables,persons、email 和 personemail。 Personemail 基本上有一个人和电子邮件的外键,因此一个人可以链接到多个电子邮件地址。电子邮件 table 也有一个名为 primaryemail 的字段。此字段为 1 或 0。主要电子邮件标志用于将电子邮件拉入 reports/invoices 等
UI 中存在逻辑缺陷,允许用户不为客户设置主要电子邮件地址。我已经关闭了逻辑缺陷,但我需要一个脚本来为没有一套的任何客户强制设置一个主要电子邮件地址。决定将主电子邮件地址设置为 emailid 的最低值(电子邮件中的主键 table)。下面是编写的脚本,它可以工作,但它对 运行 来说非常昂贵,并且可能会在 运行ning 时导致最终用户锁定。该软件部署在多个时区,因此即使我们 运行 在最短的使用时间,我们也需要它尽快 运行。
这是当前脚本。它有 temp tables 和一个 while 循环,所以你可以看到它真的可以改进。我的 SQL 技能需要完善,所以我把它放在这里征求建议。
CREATE TABLE #TEMP(PERSONID INT, PRIMARYEMAIL INT,FLAG INT)
CREATE INDEX IDX_TEMP_PERSONID ON #TEMP(PERSONID)
CREATE TABLE #TEMP2(PERSONID INT,PRIMARYEMAIL INT)
CREATE INDEX IDX_TEMP2_PERSONID ON #TEMP2(PERSONID)
--Grab all the person id's that have at least one email addresses that is not primary in the db, also set a flag for the while loop
INSERT INTO #TEMP
SELECT PE.PersonID, E.primaryEmail ,0
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID
WHERE E.primaryEmail=0
--Grab all person ID's that have at least one email address that is primary.
INSERT INTO #TEMP2
SELECT PE.PersonID, E.primaryEmail
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID
WHERE E.primaryEmail=1
--SELECT * FROM #TEMP2
--Remove any customers that already have a primary email set.
DELETE FROM #TEMP WHERE PERSONID IN (SELECT DISTINCT PERSONID FROM #TEMP2)
--Debug line to see how many customers are affected.
--SELECT * FROM #TEMP
--Perfom a while loop to update the min email ID to primary.
DECLARE @INTFLAG INT
DECLARE @PERSONID INT
SET @INTFLAG = (SELECT COUNT(*) FROM #TEMP)
--SELECT @INTFLAG
WHILE (@INTFLAG > 0)
BEGIN
SET @PERSONID =(SELECT TOP(1) PERSONID FROM #TEMP WHERE FLAG=0)
UPDATE Account.tbEmail SET primaryEmail=1 WHERE EmailID=(SELECT MIN(EMAILID) FROM Account.tbPersonEmail where PersonID=@PERSONID)
--Update the flag on the #temp table to grab the next ID
UPDATE #TEMP SET FLAG=1 WHERE PERSONID=@PERSONID
--Reduce the intflag variable that the loop is running off of.
SET @INTFLAG=@INTFLAG-1
END
DROP TABLE #TEMP
DROP TABLE #TEMP2
创建临时表是一种非常昂贵的方法,使用循环是一个坏主意 SQL,因为它们很慢,因为它们无法优化。典型的方法是使用子查询。首先,尝试这样做:
CREATE TABLE #TEMP(PERSONID INT, PRIMARYEMAIL INT,FLAG INT)
CREATE INDEX IDX_TEMP_PERSONID ON #TEMP(PERSONID)
INSERT INTO #TEMP
SELECT PE.PersonID, E.primaryEmail , 0
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID
WHERE E.primaryEmail=0 and
PE.PersonID not in (SELECT Distinct PE2.PersonID
FROM Account.tbPersonEmail PE2 WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E2 ON E.EmailID=PE2.EmailID
WHERE E2.primaryEmail=1)
然后 运行 你的 while 循环。这应该有点帮助。您可以通过查看#TEMP 是否与以前的版本匹配来测试这是否正确。
要进一步优化,您可能需要将整个更新过程重写为单个查询。您可能还想看看这个:How can I optimize this SQL query (Using Indexes)?
为每个人的第一封电子邮件设置 primaryEmail=1 的单一查询,除了那些已经拥有主要电子邮件的人:
UPDATE Account.tbEmail E SET E.primaryEmail=1
WHERE
E.EmailID in (
-- get min email id for each person
SELECT min(PE.EmailID) FROM Account.tbPersonEmail PE
-- but exclude persons who already have primary email
WHERE PE.PersonID NOT IN (
SELECT PE1.PersonID
FROM Account.tbPersonEmail PE1
INNER JOIN Account.tbEmail E1 ON E1.EmailID=PE1.EmailID
WHERE E1.primaryEmail=1
)
GROUP BY PE.PersonID
)
你做primary email的逻辑不对,而且把聚合函数或rank函数放在varchar列上更糟糕。
我们也应该知道其他专栏。
我喜欢@David 的建议,但不喜欢脚本。
尝试我的脚本并进行适当的测试,然后你也应该重新开始。
;With CTE as
(
SELECT PE.PersonID, E.primaryEmail ,E.EmailID
,row_number()over(order by PE.EMAILID )rn
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID
--why left join
WHERE E.primaryEmail=0
)
-- IN CTE you get only those which is not updated.
-- row_number()over(order by PE.EMAILID ) is equivalent to min(emailid)
UPDATE Account.tbEmail SET primaryEmail=1
from Account.tbEmail A inner join CTE B on A.EmailID=B.EmailID
WHERE B.rn=1
结束了这个。
</p>
<pre><code>UPDATE Account.tbEmail set primaryEmail=1
where EmailID in
(SELECT P.Emailid from (
SELECT DISTINCT P.PersonID,MIN(P.EmailID)AS EmailID
FROM
(SELECT PE.PersonID, E.primaryEmail,PE.EmailID
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID
WHERE E.primaryEmail=0 and
PE.PersonID not in (SELECT Distinct PE2.PersonID
FROM Account.tbPersonEmail PE2 WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E2 ON E2.EmailID=PE2.EmailID
WHERE E2.primaryEmail=1)
)AS P
GROUP BY P.PersonID ) as P)
我有三个 tables,persons、email 和 personemail。 Personemail 基本上有一个人和电子邮件的外键,因此一个人可以链接到多个电子邮件地址。电子邮件 table 也有一个名为 primaryemail 的字段。此字段为 1 或 0。主要电子邮件标志用于将电子邮件拉入 reports/invoices 等
UI 中存在逻辑缺陷,允许用户不为客户设置主要电子邮件地址。我已经关闭了逻辑缺陷,但我需要一个脚本来为没有一套的任何客户强制设置一个主要电子邮件地址。决定将主电子邮件地址设置为 emailid 的最低值(电子邮件中的主键 table)。下面是编写的脚本,它可以工作,但它对 运行 来说非常昂贵,并且可能会在 运行ning 时导致最终用户锁定。该软件部署在多个时区,因此即使我们 运行 在最短的使用时间,我们也需要它尽快 运行。
这是当前脚本。它有 temp tables 和一个 while 循环,所以你可以看到它真的可以改进。我的 SQL 技能需要完善,所以我把它放在这里征求建议。
CREATE TABLE #TEMP(PERSONID INT, PRIMARYEMAIL INT,FLAG INT)
CREATE INDEX IDX_TEMP_PERSONID ON #TEMP(PERSONID)
CREATE TABLE #TEMP2(PERSONID INT,PRIMARYEMAIL INT)
CREATE INDEX IDX_TEMP2_PERSONID ON #TEMP2(PERSONID)
--Grab all the person id's that have at least one email addresses that is not primary in the db, also set a flag for the while loop
INSERT INTO #TEMP
SELECT PE.PersonID, E.primaryEmail ,0
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID
WHERE E.primaryEmail=0
--Grab all person ID's that have at least one email address that is primary.
INSERT INTO #TEMP2
SELECT PE.PersonID, E.primaryEmail
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID
WHERE E.primaryEmail=1
--SELECT * FROM #TEMP2
--Remove any customers that already have a primary email set.
DELETE FROM #TEMP WHERE PERSONID IN (SELECT DISTINCT PERSONID FROM #TEMP2)
--Debug line to see how many customers are affected.
--SELECT * FROM #TEMP
--Perfom a while loop to update the min email ID to primary.
DECLARE @INTFLAG INT
DECLARE @PERSONID INT
SET @INTFLAG = (SELECT COUNT(*) FROM #TEMP)
--SELECT @INTFLAG
WHILE (@INTFLAG > 0)
BEGIN
SET @PERSONID =(SELECT TOP(1) PERSONID FROM #TEMP WHERE FLAG=0)
UPDATE Account.tbEmail SET primaryEmail=1 WHERE EmailID=(SELECT MIN(EMAILID) FROM Account.tbPersonEmail where PersonID=@PERSONID)
--Update the flag on the #temp table to grab the next ID
UPDATE #TEMP SET FLAG=1 WHERE PERSONID=@PERSONID
--Reduce the intflag variable that the loop is running off of.
SET @INTFLAG=@INTFLAG-1
END
DROP TABLE #TEMP
DROP TABLE #TEMP2
创建临时表是一种非常昂贵的方法,使用循环是一个坏主意 SQL,因为它们很慢,因为它们无法优化。典型的方法是使用子查询。首先,尝试这样做:
CREATE TABLE #TEMP(PERSONID INT, PRIMARYEMAIL INT,FLAG INT)
CREATE INDEX IDX_TEMP_PERSONID ON #TEMP(PERSONID)
INSERT INTO #TEMP
SELECT PE.PersonID, E.primaryEmail , 0
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID
WHERE E.primaryEmail=0 and
PE.PersonID not in (SELECT Distinct PE2.PersonID
FROM Account.tbPersonEmail PE2 WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E2 ON E.EmailID=PE2.EmailID
WHERE E2.primaryEmail=1)
然后 运行 你的 while 循环。这应该有点帮助。您可以通过查看#TEMP 是否与以前的版本匹配来测试这是否正确。
要进一步优化,您可能需要将整个更新过程重写为单个查询。您可能还想看看这个:How can I optimize this SQL query (Using Indexes)?
为每个人的第一封电子邮件设置 primaryEmail=1 的单一查询,除了那些已经拥有主要电子邮件的人:
UPDATE Account.tbEmail E SET E.primaryEmail=1
WHERE
E.EmailID in (
-- get min email id for each person
SELECT min(PE.EmailID) FROM Account.tbPersonEmail PE
-- but exclude persons who already have primary email
WHERE PE.PersonID NOT IN (
SELECT PE1.PersonID
FROM Account.tbPersonEmail PE1
INNER JOIN Account.tbEmail E1 ON E1.EmailID=PE1.EmailID
WHERE E1.primaryEmail=1
)
GROUP BY PE.PersonID
)
你做primary email的逻辑不对,而且把聚合函数或rank函数放在varchar列上更糟糕。 我们也应该知道其他专栏。
我喜欢@David 的建议,但不喜欢脚本。 尝试我的脚本并进行适当的测试,然后你也应该重新开始。
;With CTE as
(
SELECT PE.PersonID, E.primaryEmail ,E.EmailID
,row_number()over(order by PE.EMAILID )rn
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID
--why left join
WHERE E.primaryEmail=0
)
-- IN CTE you get only those which is not updated.
-- row_number()over(order by PE.EMAILID ) is equivalent to min(emailid)
UPDATE Account.tbEmail SET primaryEmail=1
from Account.tbEmail A inner join CTE B on A.EmailID=B.EmailID
WHERE B.rn=1
结束了这个。
</p>
<pre><code>UPDATE Account.tbEmail set primaryEmail=1
where EmailID in
(SELECT P.Emailid from (
SELECT DISTINCT P.PersonID,MIN(P.EmailID)AS EmailID
FROM
(SELECT PE.PersonID, E.primaryEmail,PE.EmailID
FROM Account.tbPersonEmail PE WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E ON E.EmailID=PE.EmailID
WHERE E.primaryEmail=0 and
PE.PersonID not in (SELECT Distinct PE2.PersonID
FROM Account.tbPersonEmail PE2 WITH (NOLOCK)
LEFT OUTER JOIN Account.tbEmail E2 ON E2.EmailID=PE2.EmailID
WHERE E2.primaryEmail=1)
)AS P
GROUP BY P.PersonID ) as P)