psycopg2:用一个查询更新多行
psycopg2: update multiple rows with one query
我尝试通过实现以下函数用单个查询更新多行(大约 350000):
def update_items(rows_to_update):
sql_query = """UPDATE contact as t SET
name = e.name
FROM (VALUES %s) AS e(id, name)
WHERE e.id = t.id;"""
conn = get_db_connection()
cur = conn.cursor()
psycopg2.extras.execute_values (
cur, sql_query, rows_to_update, template=None, page_size=100
)
尝试运行上述功能时,仅更新了 31 条记录。然后,我尝试使用以下函数逐行更新:
def update_items_row_by_row(rows_to_update):
sql_query = """UPDATE contact SET name = %s WHERE id = %s"""
conn = get_db_connection()
with tqdm(total=len(rows_to_update)) as pbar:
for id, name in rows_to_update:
cur = conn.cursor()
# execute the UPDATE statement
cur.execute(sql_query, (name, id))
# get the number of updated rows
# Commit the changes to the database
conn.commit()
cur.close()
pbar.update(1)
后者已经更新了目前所有的记录,但是速度很慢(估计9小时后结束)。
有谁知道更新多条记录的有效方法是什么?
通过将列表分成大小等于 page_size 的块,效果很好:
def update_items(rows_to_update):
sql_query = """UPDATE contact as t SET
name = data.name
FROM (VALUES %s) AS data (id, name)
WHERE t.id = data.id"""
conn = get_db_connection()
cur = conn.cursor()
n = 100
with tqdm(total=len(rows_to_update)) as pbar:
for i in range(0, len(rows_to_update), n):
psycopg2.extras.execute_values (
cur, sql_query, rows_to_update[i:i + n], template=None, page_size=n
)
conn.commit()
pbar.update(cur.rowcount)
cur.close()
conn.close()
我尝试通过实现以下函数用单个查询更新多行(大约 350000):
def update_items(rows_to_update):
sql_query = """UPDATE contact as t SET
name = e.name
FROM (VALUES %s) AS e(id, name)
WHERE e.id = t.id;"""
conn = get_db_connection()
cur = conn.cursor()
psycopg2.extras.execute_values (
cur, sql_query, rows_to_update, template=None, page_size=100
)
尝试运行上述功能时,仅更新了 31 条记录。然后,我尝试使用以下函数逐行更新:
def update_items_row_by_row(rows_to_update):
sql_query = """UPDATE contact SET name = %s WHERE id = %s"""
conn = get_db_connection()
with tqdm(total=len(rows_to_update)) as pbar:
for id, name in rows_to_update:
cur = conn.cursor()
# execute the UPDATE statement
cur.execute(sql_query, (name, id))
# get the number of updated rows
# Commit the changes to the database
conn.commit()
cur.close()
pbar.update(1)
后者已经更新了目前所有的记录,但是速度很慢(估计9小时后结束)。 有谁知道更新多条记录的有效方法是什么?
通过将列表分成大小等于 page_size 的块,效果很好:
def update_items(rows_to_update):
sql_query = """UPDATE contact as t SET
name = data.name
FROM (VALUES %s) AS data (id, name)
WHERE t.id = data.id"""
conn = get_db_connection()
cur = conn.cursor()
n = 100
with tqdm(total=len(rows_to_update)) as pbar:
for i in range(0, len(rows_to_update), n):
psycopg2.extras.execute_values (
cur, sql_query, rows_to_update[i:i + n], template=None, page_size=n
)
conn.commit()
pbar.update(cur.rowcount)
cur.close()
conn.close()