计算来自不同数据源的方差 Oracle SQL
Calculating Variance from different data Sources Oracle SQL
我正在尝试根据从两个不同来源获得的数据创建一个差异列表。此数据包含一个日期、一系列参考和包含数字计数的列等。
这背后的想法是检查数据源 1 中的数据与数据源 2 中的数据是否具有相同的数值计数,然后记录每 5 分钟间隔的差异。
这里我有创建简化场景table和示例数据所需的代码
Create Table ABP_PROFILE
( ABP_DATE Date not Null,
ABP_SOURCE_UID Number(10) not Null,
ABP_REFERENCE_1 Varchar2(30) not Null,
ABP_CHARGE Number(18,6),
ABP_COUNT Number(18)
);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:05:00', 'dd-mm-yyyy hh24:mi:ss'), 1, 'Another Reference', 757.500000, 101);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:05:00', 'dd-mm-yyyy hh24:mi:ss'), 1, 'Some Reference', 2954.000000, 211);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:05:00', 'dd-mm-yyyy hh24:mi:ss'), 2, 'Another Reference', 757.500000, 101);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:05:00', 'dd-mm-yyyy hh24:mi:ss'), 2, 'Some Reference', 2954.000000, 211);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:10:00', 'dd-mm-yyyy hh24:mi:ss'), 1, 'Another Reference', 5300.250000, 191);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:10:00', 'dd-mm-yyyy hh24:mi:ss'), 1, 'Some Reference', 9568.000000, 208);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:10:00', 'dd-mm-yyyy hh24:mi:ss'), 2, 'Another Reference', 5300.250000, 5555);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:10:00', 'dd-mm-yyyy hh24:mi:ss'), 2, 'Some Reference', 1111.000000, 208);
我在这里创建了一个 BASIC SQL 查询来了解我想做什么。
With A_DATA As (
Select ABP_DATE As A_DATE,
ABP_REFERENCE_1 As A_REFERENCE_1,
ABP_CHARGE As A_CHARGE,
ABP_COUNT As A_COUNT
From ABP_PROFILE
Where ABP_SOURCE_UID = 1
), B_DATA As (
Select ABP_DATE As B_DATE,
ABP_REFERENCE_1 As B_REFERENCE_1,
ABP_CHARGE As B_CHARGE,
ABP_COUNT As B_COUNT
From ABP_PROFILE
Where ABP_SOURCE_UID = 2
)
Select A_DATE,
A_REFERENCE_1,
B_CHARGE - A_CHARGE As ChargeDifference,
B_COUNT - A_COUNT As CountDifference
From A_DATA,
B_DATA
Where A_DATE = B_DATE
And A_REFERENCE_1 = B_REFERENCE_1
;
这会根据两个数据源的日期和引用进行联接。我需要一个更加通用的解决方案,如果缺少一侧的数据,还需要显示差异,是的,可以为此使用完整的外部联接,但我想探索其他选项。
我一直在研究分析函数,我确信那里有一个可以做我想做的事情。我想知道是否有任何 Oracle SQL 专家有任何想法可以在这里提供帮助。
仅供参考,我是 运行 11gR2 Enterprise
带有 FULL JOIN
的解决方案似乎更具可读性,但如果您搜索替代方案 - 在这里 - 具有函数 lag()
和 lead()
:
with data as (
select abp_date dt, abp_source_uid id, abp_reference_1 ref,
abp_charge charge, abp_count cnt,
lag(abp_source_uid) over (partition by abp_date, abp_reference_1
order by abp_source_uid) lgid,
lead(abp_charge) over (partition by abp_date, abp_reference_1
order by abp_source_uid) ldcharge,
lead( abp_count) over (partition by abp_date, abp_reference_1
order by abp_source_uid) ldcnt
from abp_profile a)
select dt, ref,
case when id = 1 then nvl(ldcharge, 0) - charge else charge end chrg_diff,
case when id = 1 then nvl(ldcnt, 0) - cnt else cnt end cnt_diff
from data
where id = 1 or id = 2 and lgid is null;
... 并且您修改后的查询已转换为 full join
版本,我将其用于比较结果:
With A_DATA As (
Select ABP_DATE As A_DATE,
ABP_REFERENCE_1 As A_REFERENCE_1,
ABP_CHARGE As A_CHARGE,
ABP_COUNT As A_COUNT
From ABP_PROFILE
Where ABP_SOURCE_UID = 1
), B_DATA As (
Select ABP_DATE As B_DATE,
ABP_REFERENCE_1 As B_REFERENCE_1,
ABP_CHARGE As B_CHARGE,
ABP_COUNT As B_COUNT
From ABP_PROFILE
Where ABP_SOURCE_UID = 2
)
Select nvl(A_DATE, b_date) dt,
nvl(A_REFERENCE_1, b_reference_1) ref,
nvl(B_CHARGE, 0) - nvl(A_CHARGE, 0) As Chrg_Diff,
nvl(B_COUNT, 0) - nvl(A_COUNT, 0) As Cnt_Diff
From A_DATA
full join B_DATA on A_DATE = B_DATE and A_REFERENCE_1 = B_REFERENCE_1;
两个查询给出了相同的结果。在示例中,我添加了两行来展示如何处理丢失的数据。
这里我使用了 nvl(..., 0) 但当然你可以保留空值或添加列来告知这种情况。
我正在尝试根据从两个不同来源获得的数据创建一个差异列表。此数据包含一个日期、一系列参考和包含数字计数的列等。
这背后的想法是检查数据源 1 中的数据与数据源 2 中的数据是否具有相同的数值计数,然后记录每 5 分钟间隔的差异。
这里我有创建简化场景table和示例数据所需的代码
Create Table ABP_PROFILE
( ABP_DATE Date not Null,
ABP_SOURCE_UID Number(10) not Null,
ABP_REFERENCE_1 Varchar2(30) not Null,
ABP_CHARGE Number(18,6),
ABP_COUNT Number(18)
);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:05:00', 'dd-mm-yyyy hh24:mi:ss'), 1, 'Another Reference', 757.500000, 101);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:05:00', 'dd-mm-yyyy hh24:mi:ss'), 1, 'Some Reference', 2954.000000, 211);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:05:00', 'dd-mm-yyyy hh24:mi:ss'), 2, 'Another Reference', 757.500000, 101);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:05:00', 'dd-mm-yyyy hh24:mi:ss'), 2, 'Some Reference', 2954.000000, 211);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:10:00', 'dd-mm-yyyy hh24:mi:ss'), 1, 'Another Reference', 5300.250000, 191);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:10:00', 'dd-mm-yyyy hh24:mi:ss'), 1, 'Some Reference', 9568.000000, 208);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:10:00', 'dd-mm-yyyy hh24:mi:ss'), 2, 'Another Reference', 5300.250000, 5555);
insert into ABP_PROFILE (ABP_DATE, ABP_SOURCE_UID, ABP_REFERENCE_1, ABP_CHARGE, ABP_COUNT)
values (to_date('15-06-2015 00:10:00', 'dd-mm-yyyy hh24:mi:ss'), 2, 'Some Reference', 1111.000000, 208);
我在这里创建了一个 BASIC SQL 查询来了解我想做什么。
With A_DATA As (
Select ABP_DATE As A_DATE,
ABP_REFERENCE_1 As A_REFERENCE_1,
ABP_CHARGE As A_CHARGE,
ABP_COUNT As A_COUNT
From ABP_PROFILE
Where ABP_SOURCE_UID = 1
), B_DATA As (
Select ABP_DATE As B_DATE,
ABP_REFERENCE_1 As B_REFERENCE_1,
ABP_CHARGE As B_CHARGE,
ABP_COUNT As B_COUNT
From ABP_PROFILE
Where ABP_SOURCE_UID = 2
)
Select A_DATE,
A_REFERENCE_1,
B_CHARGE - A_CHARGE As ChargeDifference,
B_COUNT - A_COUNT As CountDifference
From A_DATA,
B_DATA
Where A_DATE = B_DATE
And A_REFERENCE_1 = B_REFERENCE_1
;
这会根据两个数据源的日期和引用进行联接。我需要一个更加通用的解决方案,如果缺少一侧的数据,还需要显示差异,是的,可以为此使用完整的外部联接,但我想探索其他选项。
我一直在研究分析函数,我确信那里有一个可以做我想做的事情。我想知道是否有任何 Oracle SQL 专家有任何想法可以在这里提供帮助。
仅供参考,我是 运行 11gR2 Enterprise
带有 FULL JOIN
的解决方案似乎更具可读性,但如果您搜索替代方案 - 在这里 - 具有函数 lag()
和 lead()
:
with data as (
select abp_date dt, abp_source_uid id, abp_reference_1 ref,
abp_charge charge, abp_count cnt,
lag(abp_source_uid) over (partition by abp_date, abp_reference_1
order by abp_source_uid) lgid,
lead(abp_charge) over (partition by abp_date, abp_reference_1
order by abp_source_uid) ldcharge,
lead( abp_count) over (partition by abp_date, abp_reference_1
order by abp_source_uid) ldcnt
from abp_profile a)
select dt, ref,
case when id = 1 then nvl(ldcharge, 0) - charge else charge end chrg_diff,
case when id = 1 then nvl(ldcnt, 0) - cnt else cnt end cnt_diff
from data
where id = 1 or id = 2 and lgid is null;
... 并且您修改后的查询已转换为 full join
版本,我将其用于比较结果:
With A_DATA As (
Select ABP_DATE As A_DATE,
ABP_REFERENCE_1 As A_REFERENCE_1,
ABP_CHARGE As A_CHARGE,
ABP_COUNT As A_COUNT
From ABP_PROFILE
Where ABP_SOURCE_UID = 1
), B_DATA As (
Select ABP_DATE As B_DATE,
ABP_REFERENCE_1 As B_REFERENCE_1,
ABP_CHARGE As B_CHARGE,
ABP_COUNT As B_COUNT
From ABP_PROFILE
Where ABP_SOURCE_UID = 2
)
Select nvl(A_DATE, b_date) dt,
nvl(A_REFERENCE_1, b_reference_1) ref,
nvl(B_CHARGE, 0) - nvl(A_CHARGE, 0) As Chrg_Diff,
nvl(B_COUNT, 0) - nvl(A_COUNT, 0) As Cnt_Diff
From A_DATA
full join B_DATA on A_DATE = B_DATE and A_REFERENCE_1 = B_REFERENCE_1;
两个查询给出了相同的结果。在示例中,我添加了两行来展示如何处理丢失的数据。 这里我使用了 nvl(..., 0) 但当然你可以保留空值或添加列来告知这种情况。