如何使用 Azure 数据工厂将数据从 excel 加载到 SQL 数据库
How to load data from excel to SQL DB using Azure Data Factory
我有一个来源 Excel。如何使用 Azure 数据工厂将数据从 excel 加载到 SQL 数据库?
有很多方法可以做到这一点,但我将向您展示我认为最简单的方法。
- 在 Azure 数据工厂中设置复制数据 Activity 以复制数据
从 excel 到 Azure SQLDB 暂存 table.
- 在 Azure SQLDB 中创建一个存储过程,它将从暂存 table 插入最终输出 table
- 将存储过程activity连接到ADF中的复制数据activity以引用您刚刚在数据库中创建的过程,这样ADF将在之后立即运行存储过程暂存 table 已加载。
- 您可以让 command/procedure 在加载最终输出 table 之后或在 ADF 中复制 activity 之前清除暂存 table。
请参阅下面的屏幕截图和评论以了解演练(如果不使用 Azure Functions 或其他东西,ADF 目前无法在本机中获得 sheetname 等一些事情,但我涵盖了尽可能多的内容我可以。如果 excel 的名称与 sheet 的名称相同,可以改为使用,这是一个选项,我在这个例子中做了什么):
总体管道视图
第一步 T运行调整舞台 Table 以确保它在加载之前是空的
第二步用文件输入加载舞台table
最后一步是运行存储过程
下面是我做的存储过程的代码,加上ADF存储过程的截图activity:
create procedure stag.ttt_test (@ExcelFileName varchar(100))
as
DECLARE @ClassStartRow int
select @ClassStartRow = max(RowId)
from (
select
case when Details = '2.Class' then ROW_NUMBER() over (order by %%physloc%%) else 0 end as RowId
from Stag.ttt_test_stage
) as sub
;
INSERT INTO Stag.ttt_test_final
select
Case
when January is NOT NULL then DATEADD(day, -1, cast('2/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when February is NOT NULL then DATEADD(day, -1, cast('3/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when March is NOT NULL then DATEADD(day, -1, cast('4/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when April is NOT NULL then DATEADD(day, -1, cast('5/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when May is NOT NULL then DATEADD(day, -1, cast('6/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when June is NOT NULL then DATEADD(day, -1, cast('7/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when July is NOT NULL then DATEADD(day, -1, cast('8/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when August is NOT NULL then DATEADD(day, -1, cast('9/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when September is NOT NULL then DATEADD(day, -1, cast('10/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when October is NOT NULL then DATEADD(day, -1, cast('11/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when November is NOT NULL then DATEADD(day, -1, cast('12/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when December is NOT NULL then DATEADD(day, -1, cast('1/1/' + cast(YEAR(getdate()) + 1 as varchar(4)) as date))
end as [Date],
'name' as Category,
Details as [Type],
Coalesce(January, February, March, April, May, June, July, August, September, October, November, December) as [value],
@ExcelFileName as SheetName
from (
select
ROW_NUMBER() over (order by %%physloc%%) as RowId,
st.*
from Stag.ttt_test_stage as st
) as sub
where RowId > 1
and RowId < @ClassStartRow
;
INSERT INTO stag.ttt_test_final
select
Case
when January is NOT NULL then DATEADD(day, -1, cast('2/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when February is NOT NULL then DATEADD(day, -1, cast('3/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when March is NOT NULL then DATEADD(day, -1, cast('4/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when April is NOT NULL then DATEADD(day, -1, cast('5/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when May is NOT NULL then DATEADD(day, -1, cast('6/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when June is NOT NULL then DATEADD(day, -1, cast('7/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when July is NOT NULL then DATEADD(day, -1, cast('8/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when August is NOT NULL then DATEADD(day, -1, cast('9/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when September is NOT NULL then DATEADD(day, -1, cast('10/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when October is NOT NULL then DATEADD(day, -1, cast('11/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when November is NOT NULL then DATEADD(day, -1, cast('12/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when December is NOT NULL then DATEADD(day, -1, cast('1/1/' + cast(YEAR(getdate()) + 1 as varchar(4)) as date))
end as [Date],
'class' as Category,
Details as [Type],
Coalesce(January, February, March, April, May, June, July, August, September, October, November, December) as [value],
@ExcelFileName as SheetName
from (
select
ROW_NUMBER() over (order by %%physloc%%) as RowId,
st.*
from Stag.ttt_test_stage as st
) as sub
where RowId > @ClassStartRow
如果这对您有用,请采纳为答案,如果您有任何其他意见或问题,请告诉我。谢谢
我有一个来源 Excel。如何使用 Azure 数据工厂将数据从 excel 加载到 SQL 数据库?
有很多方法可以做到这一点,但我将向您展示我认为最简单的方法。
- 在 Azure 数据工厂中设置复制数据 Activity 以复制数据 从 excel 到 Azure SQLDB 暂存 table.
- 在 Azure SQLDB 中创建一个存储过程,它将从暂存 table 插入最终输出 table
- 将存储过程activity连接到ADF中的复制数据activity以引用您刚刚在数据库中创建的过程,这样ADF将在之后立即运行存储过程暂存 table 已加载。
- 您可以让 command/procedure 在加载最终输出 table 之后或在 ADF 中复制 activity 之前清除暂存 table。
请参阅下面的屏幕截图和评论以了解演练(如果不使用 Azure Functions 或其他东西,ADF 目前无法在本机中获得 sheetname 等一些事情,但我涵盖了尽可能多的内容我可以。如果 excel 的名称与 sheet 的名称相同,可以改为使用,这是一个选项,我在这个例子中做了什么):
总体管道视图
create procedure stag.ttt_test (@ExcelFileName varchar(100))
as
DECLARE @ClassStartRow int
select @ClassStartRow = max(RowId)
from (
select
case when Details = '2.Class' then ROW_NUMBER() over (order by %%physloc%%) else 0 end as RowId
from Stag.ttt_test_stage
) as sub
;
INSERT INTO Stag.ttt_test_final
select
Case
when January is NOT NULL then DATEADD(day, -1, cast('2/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when February is NOT NULL then DATEADD(day, -1, cast('3/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when March is NOT NULL then DATEADD(day, -1, cast('4/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when April is NOT NULL then DATEADD(day, -1, cast('5/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when May is NOT NULL then DATEADD(day, -1, cast('6/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when June is NOT NULL then DATEADD(day, -1, cast('7/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when July is NOT NULL then DATEADD(day, -1, cast('8/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when August is NOT NULL then DATEADD(day, -1, cast('9/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when September is NOT NULL then DATEADD(day, -1, cast('10/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when October is NOT NULL then DATEADD(day, -1, cast('11/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when November is NOT NULL then DATEADD(day, -1, cast('12/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when December is NOT NULL then DATEADD(day, -1, cast('1/1/' + cast(YEAR(getdate()) + 1 as varchar(4)) as date))
end as [Date],
'name' as Category,
Details as [Type],
Coalesce(January, February, March, April, May, June, July, August, September, October, November, December) as [value],
@ExcelFileName as SheetName
from (
select
ROW_NUMBER() over (order by %%physloc%%) as RowId,
st.*
from Stag.ttt_test_stage as st
) as sub
where RowId > 1
and RowId < @ClassStartRow
;
INSERT INTO stag.ttt_test_final
select
Case
when January is NOT NULL then DATEADD(day, -1, cast('2/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when February is NOT NULL then DATEADD(day, -1, cast('3/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when March is NOT NULL then DATEADD(day, -1, cast('4/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when April is NOT NULL then DATEADD(day, -1, cast('5/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when May is NOT NULL then DATEADD(day, -1, cast('6/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when June is NOT NULL then DATEADD(day, -1, cast('7/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when July is NOT NULL then DATEADD(day, -1, cast('8/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when August is NOT NULL then DATEADD(day, -1, cast('9/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when September is NOT NULL then DATEADD(day, -1, cast('10/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when October is NOT NULL then DATEADD(day, -1, cast('11/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when November is NOT NULL then DATEADD(day, -1, cast('12/1/' + cast(YEAR(getdate()) as varchar(4)) as date))
when December is NOT NULL then DATEADD(day, -1, cast('1/1/' + cast(YEAR(getdate()) + 1 as varchar(4)) as date))
end as [Date],
'class' as Category,
Details as [Type],
Coalesce(January, February, March, April, May, June, July, August, September, October, November, December) as [value],
@ExcelFileName as SheetName
from (
select
ROW_NUMBER() over (order by %%physloc%%) as RowId,
st.*
from Stag.ttt_test_stage as st
) as sub
where RowId > @ClassStartRow
如果这对您有用,请采纳为答案,如果您有任何其他意见或问题,请告诉我。谢谢