Psycopg2 copy_from 用于 csv 到 postgres
Psycopg2 copy_from for csv to postgress
我有一个 csv 文件,我读入了 pandas,我应该将其插入到 postgres 中。该文件在某些字段中包含带有反斜杠“字符”的字符串。这会导致问题,因为 copy_from 函数将其读取为转义字符。我如何让它忽略“”并将其保留为字符串。我已经尝试了许多不同的编码格式,但我仍然收到“无法解码字符”错误。问题是我无法替换该字符,它在字符串中很重要。
def load_into_db(cur, con, file,table_name):
f = open(file, mode="r", encoding='utf-8')
try:
# print("wrote to csv")
sqlstr = "COPY {} FROM STDIN DELIMITER '|' CSV".format(table_name)
cur.copy_from(f, table_name, null="nan", sep="|")
con.commit()
f.close()
except Exception as e:
print(e)
print("something went wrong")
导致问题的行示例
name
age
attribute
name1
23
example/1/test
name2
26
example/2/test
错误:编码“UTF8”的字节序列无效:0xa2
import io
import csv
def df2db(df_a, table_name, engine):
output = io.StringIO()
# ignore the index
# df_a.to_csv(output, sep='\t', index = False, header = False, quoting=csv.QUOTE_NONE)
df_a.to_csv(output, sep='\t', index = False, header = False, quoting=csv.QUOTE_NONE, escapechar='\')
output.getvalue()
# jump to start of stream
output.seek(0)
#engine <--- from sqlalchemy import create_engine
connection = engine.raw_connection()
cursor = connection.cursor()
# null value become ''
cursor.copy_from(output,table_name,null='')
connection.commit()
cursor.close()
使用函数 df2db
将 DataFrame
插入到已存在的 table,因为 table 的列和 df 的列应该相同。
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('postgresql+psycopg2://user:psw@localhost:5432/dbname')
df = pd.read_csv(file)
df2db(df, table_name, engine)
我有一个 csv 文件,我读入了 pandas,我应该将其插入到 postgres 中。该文件在某些字段中包含带有反斜杠“字符”的字符串。这会导致问题,因为 copy_from 函数将其读取为转义字符。我如何让它忽略“”并将其保留为字符串。我已经尝试了许多不同的编码格式,但我仍然收到“无法解码字符”错误。问题是我无法替换该字符,它在字符串中很重要。
def load_into_db(cur, con, file,table_name):
f = open(file, mode="r", encoding='utf-8')
try:
# print("wrote to csv")
sqlstr = "COPY {} FROM STDIN DELIMITER '|' CSV".format(table_name)
cur.copy_from(f, table_name, null="nan", sep="|")
con.commit()
f.close()
except Exception as e:
print(e)
print("something went wrong")
导致问题的行示例
name | age | attribute |
---|---|---|
name1 | 23 | example/1/test |
name2 | 26 | example/2/test |
错误:编码“UTF8”的字节序列无效:0xa2
import io
import csv
def df2db(df_a, table_name, engine):
output = io.StringIO()
# ignore the index
# df_a.to_csv(output, sep='\t', index = False, header = False, quoting=csv.QUOTE_NONE)
df_a.to_csv(output, sep='\t', index = False, header = False, quoting=csv.QUOTE_NONE, escapechar='\')
output.getvalue()
# jump to start of stream
output.seek(0)
#engine <--- from sqlalchemy import create_engine
connection = engine.raw_connection()
cursor = connection.cursor()
# null value become ''
cursor.copy_from(output,table_name,null='')
connection.commit()
cursor.close()
使用函数 df2db
将 DataFrame
插入到已存在的 table,因为 table 的列和 df 的列应该相同。
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('postgresql+psycopg2://user:psw@localhost:5432/dbname')
df = pd.read_csv(file)
df2db(df, table_name, engine)