REDSHIFT : 只复制新记录并忽略现有记录
REDSHIFT :Copy only new records and ignore the existing ones
我正在使用外部 table 从 s3 读取 JSON 文件。
我已使用此查询
将数据从外部 table 传输到内部 table
insert into jatinanalysis (title,url,author,published_date,category)
select b.title,b.link,b.author,b.published_date,category
FROM jatinspectrum.extable a, a.enteries b,b.category category
但每次我 运行 这个查询都会创建重复项,我只需要新的查询,现有的应该被忽略。
更新:试过这个但没有成功
insert into jatinanalysis (title,url,author,published_date,category)
select distinct b.title,b.link,b.author,b.published_date,category
FROM jatinspectrum.extable a, a.enteries b,b.category category
WHERE NOT EXISTS(SELECT *
FROM jatinanalysis
WHERE (jatinspectrum.extable.b.title=jatinanalysis.title and
jatinspectrum.extable.b.link=jatinanalysis.url and
jatinspectrum.extable.b.author=jatinanalysis.author and
jatinspectrum.extable.b.b.published_date=jatinanalysis.published_date and
jatinspectrum.extable.category=jatinanalysis.category)
这对我有用
insert into jatinanalysis (title,url,author,published_date,category)
select b.title,b.link,b.author,b.published_date,category
FROM jatinspectrum.extable a, a.enteries b,b.category category
EXCEPT
SELECT title,url,author,published_date,category FROM jatinanalysis;
我正在使用外部 table 从 s3 读取 JSON 文件。 我已使用此查询
将数据从外部 table 传输到内部 tableinsert into jatinanalysis (title,url,author,published_date,category)
select b.title,b.link,b.author,b.published_date,category
FROM jatinspectrum.extable a, a.enteries b,b.category category
但每次我 运行 这个查询都会创建重复项,我只需要新的查询,现有的应该被忽略。
更新:试过这个但没有成功
insert into jatinanalysis (title,url,author,published_date,category)
select distinct b.title,b.link,b.author,b.published_date,category
FROM jatinspectrum.extable a, a.enteries b,b.category category
WHERE NOT EXISTS(SELECT *
FROM jatinanalysis
WHERE (jatinspectrum.extable.b.title=jatinanalysis.title and
jatinspectrum.extable.b.link=jatinanalysis.url and
jatinspectrum.extable.b.author=jatinanalysis.author and
jatinspectrum.extable.b.b.published_date=jatinanalysis.published_date and
jatinspectrum.extable.category=jatinanalysis.category)
这对我有用
insert into jatinanalysis (title,url,author,published_date,category)
select b.title,b.link,b.author,b.published_date,category
FROM jatinspectrum.extable a, a.enteries b,b.category category
EXCEPT
SELECT title,url,author,published_date,category FROM jatinanalysis;