Select 费率的最新时间
Select the latest hour for rates
我有这样的场景,我需要获得几个硬币对的汇率。我有 2 个表,一个包含与银行操作相关的信息,另一个包含银行考虑的每日汇率。我开始学习数据分析,所以请耐心等待。我的英语也不是很好。
考虑这个例子:
Table 1(银行业务):
Op Number | Coin_1 | Coin_2 | Date | Hour 1 | Weekday |
1 | EUR | GBP | 2020/06/01 | 03:30 | Monday |
Table 2(汇率):
Coin_1 | Coin_2 | Date | Hour 2 | Weekday | Rate
EUR | GBP | 2020/03/01 | 11:30 | Friday | 0.6
EUR | GBP | 2020/03/01 | 18:30 | Friday | 0.5
EUR | GBP | 2020/06/01 | 12:30 | Monday | 0.55
注意:汇率在周末不实际。
我不知道如何获得这个值。使用脚本组件?如果是这样,你能帮我解决这个算法吗?到目前为止,我已经完成了所需的所有 ETL,但似乎找不到解决此任务的方法。
这可以在 sql 中使用 lead 窗口函数和一些日期时间数学来完成。
create table #t1(
[Case] int,
[Op Number] int,
[Coin_1] varchar(10),
[Coin_2] varchar(10),
[Date] date,
[Hour 1] time,
[Weekday] varchar(10)
)
insert into #t1 values
( 1, 1, 'EUR', 'GBP', '2020/06/01', '03:30', 'Monday')
create table #t2(
[Case] int,
[Coin_1] varchar(10),
[Coin_2] varchar(10),
[Date] date,
[Hour 2] time,
[Weekday] varchar(10),
[Rate] decimal(10,2)
)
insert into #t2 values
( 1, 'EUR', 'GBP', '2020/03/01', '11:30', 'Friday', 0.6),
( 1, 'EUR', 'GBP', '2020/03/01', '18:30', 'Friday', 0.5 ),
( 1, 'EUR', 'GBP', '2020/06/01', '12:30', 'Monday', 0.55)
; with t1 as (
select *, dt = CAST(CONCAT([Date], ' ', [hour 1]) AS datetime2(0))
from #t1
)
, x as (
select *, dt = CAST(CONCAT([Date], ' ', [hour 2]) AS datetime2(0))
from #t2
)
, t2 as (
select [Case],
[Coin_1],
[Coin_2],
[Rate],
[Date]
[Hour 2],
[Weekday],
dt as start_dt,
isnull(lead(dt) over(partition by [case] order by dt asc), '20990101') end_dt
from x
)
select *
from t1
inner join t2 on t2.[case] = t1.[case]
and t1.dt >= t2.start_dt
and t1.dt < t2.end_dt
如果这是一个学习练习,最好使用 SSIS 的组件来完成。如果这是真实世界的东西,请相信我在这方面的经验,尝试使用 SSIS 片段来实现这一点不会令人愉快。
现有数据模型中更大的挑战之一是分开存储日期和时间。我假设源系统将其存储为日期和时间 (0) 数据类型。我在查询中创建了一个实际的 datetime2 列,这样我就可以利用 Microsoft 的优秀工程师来担心比较逻辑是否正确。
与史蒂夫提议的 lead/lag 解决方案不同,我将其视为 OUTER APPLY with TOP 1 problem。
CREATE TABLE dbo.BankOperations
(
CaseNumber int
, Coin_1 char(3)
, Coin_2 char(3)
, TransactionDate date
, TransactionTime time(0)
);
CREATE TABLE dbo.ExchangeRates
(
CaseNumber int
, Coin_1 char(3)
, Coin_2 char(3)
, TransactionDate date
, TransactionTime time(0)
, Rate decimal(4, 2)
);
INSERT INTO
dbo.BankOperations
VALUES
(
1, 'EUR', 'GBP', '2020-06-01', '03:30'
)
-- boundary checking exact
,( 2, 'EUR', 'GBP', '2020-06-01', '12:30')
-- boundary beyond/not defined
,( 3, 'EUR', 'GBP', '2020-06-01', '13:30')
-- boundary before
,( 4, 'EUR', 'GBP', '2020-03-01', '10:30')
-- boundary first at
,( 5, 'EUR', 'GBP', '2020-03-01', '11:30')
INSERT INTO
dbo.ExchangeRates
VALUES
(
1, 'EUR', 'GBP', '2020-03-01', '11:30', .6
)
, (
2, 'EUR', 'GBP', '2020-03-01', '18:30', .5
)
, (
3, 'EUR', 'GBP', '2020-06-01', '12:30', .55
);
-- Creating a temp table version of the above as the separate date and time fields will
-- crush performance at scale (so too might duplicating data as we're about to do)
SELECT
X.*
, CAST(CONCAT(X.TransactionDate, 'T', X.TransactionTime) AS datetime2(0)) AS IsThisWorking
INTO
#BankOperations
FROM
dbo.BankOperations AS X;
SELECT
X.*
, CAST(CONCAT(X.TransactionDate, 'T', X.TransactionTime) AS datetime2(0)) AS IsThisWorking
INTO
#ExchangeRates
FROM
dbo.ExchangeRates AS X;
-- Option A for pinning data
-- Outer apply will show use the TOP 1 to get the closest without going over
SELECT
BO.*
-- assuming surrogate key
, EX.CaseNumber
, EX.Rate
FROM
#BankOperations AS BO
OUTER APPLY
(
SELECT TOP 1 *
FROM
#ExchangeRates AS ER
WHERE
-- Match based on all of our keys
ER.Coin_1 = BO.Coin_1
AND ER.Coin_2 = BO.Coin_2
-- Eliminate
AND BO.IsThisWorking >= ER.IsThisWorking
ORDER BY
ER.IsThisWorking DESC
)EX
;
-- Option B
-- Use lead/lag function to get the value
-- but my brain isn't seeing it at the moment
/*
SELECT
BO.*
-- assuming surrogate key
, LAG()
FROM
#BankOperations AS BO
INNER JOIn #ExchangeRates
*/
如果我被迫提供一个纯粹基于 SSIS 的答案,我会使用查找组件而不是默认的 FULL 缓存,我会在 None 中运行它。性能影响是对于进入缓冲区的每一行,我们将向源系统发出查询以检索一行数据。根据音量,这可能是 "heavy."
作为源,您有一个指向 BankOperations 的 OLE DB 源组件。这会流入我们将参数化的查找。
SELECT TOP 1 *
FROM
dbo.ExchangeRates AS ER
CROSS APPLY (SELECT CAST(CONCAT(ER.TransactionDate, 'T', ER.TransactionTime) AS datetime2(0)) AS IsThisWorking) ITW
WHERE
-- Match based on all of our keys
ER.Coin_1 = ?
AND ER.Coin_2 = ?
-- Eliminate what's too new
AND CAST(CONCAT(?, 'T', ?) AS datetime2(0)) >= ITW.IsThisWorking
ORDER BY
ITW.IsThisWorking DESC
全部?其中有特定的序号占位符,从 0 开始。我们要做的是模仿原始查询的逻辑。完全公开,自从我完成参数化 none/partial 缓存查找以来已经有很长时间了,所以您必须阅读一些更详细的要点。我记得的是,您需要单击高级 "stuff" 才能使其正常工作。
我见过的一种使用 SSIS 组件的不同方法将涉及两个源和一个连接。我认为是 Matt Masson 演示了这项技术,但我已经有好几年没有这样做了。同样,如果您在源查询中执行此操作,您将获得更好的性能,因为这种方法将需要两种排序 + Join 的阻塞转换。
最好的脚本组件方法将采用模拟参数化查找组件方法。它保持同步(1 行输入,1 行输出),我们将通过添加速率列来丰富数据流。
伪代码大约
// make local variables with values from the row buffer
var coin_1 = Row.coin1;
var coin_2 = Row.coin2;
var transactionDate = Row.IsThisWorking;
// standard OLE DB parameterized query stuff here
using (SqlConnection conn = new SQLConnection)
{
conn.Open();
using(SqlCommand command = new SqlCommand())
{
command.Text = "SELECT TOP 1 ER.Rate FROM dbo.ExchangeRate AS ER WHERE @txnDate >= ER.IsThisWorking AND ER.Coin_1 = @coin1 AND ER.Coin_2 = @coin2;";
// I don't remember exact syntax
command.Parameters.AddWithValue("@txnDate", transactionDate);
command.Parameters.AddWithValue("@coin1", coin_1);
command.Parameters.AddWithValue("@coin2", coin_2);
}
}
我有这样的场景,我需要获得几个硬币对的汇率。我有 2 个表,一个包含与银行操作相关的信息,另一个包含银行考虑的每日汇率。我开始学习数据分析,所以请耐心等待。我的英语也不是很好。
考虑这个例子:
Table 1(银行业务):
Op Number | Coin_1 | Coin_2 | Date | Hour 1 | Weekday |
1 | EUR | GBP | 2020/06/01 | 03:30 | Monday |
Table 2(汇率):
Coin_1 | Coin_2 | Date | Hour 2 | Weekday | Rate
EUR | GBP | 2020/03/01 | 11:30 | Friday | 0.6
EUR | GBP | 2020/03/01 | 18:30 | Friday | 0.5
EUR | GBP | 2020/06/01 | 12:30 | Monday | 0.55
注意:汇率在周末不实际。
我不知道如何获得这个值。使用脚本组件?如果是这样,你能帮我解决这个算法吗?到目前为止,我已经完成了所需的所有 ETL,但似乎找不到解决此任务的方法。
这可以在 sql 中使用 lead 窗口函数和一些日期时间数学来完成。
create table #t1(
[Case] int,
[Op Number] int,
[Coin_1] varchar(10),
[Coin_2] varchar(10),
[Date] date,
[Hour 1] time,
[Weekday] varchar(10)
)
insert into #t1 values
( 1, 1, 'EUR', 'GBP', '2020/06/01', '03:30', 'Monday')
create table #t2(
[Case] int,
[Coin_1] varchar(10),
[Coin_2] varchar(10),
[Date] date,
[Hour 2] time,
[Weekday] varchar(10),
[Rate] decimal(10,2)
)
insert into #t2 values
( 1, 'EUR', 'GBP', '2020/03/01', '11:30', 'Friday', 0.6),
( 1, 'EUR', 'GBP', '2020/03/01', '18:30', 'Friday', 0.5 ),
( 1, 'EUR', 'GBP', '2020/06/01', '12:30', 'Monday', 0.55)
; with t1 as (
select *, dt = CAST(CONCAT([Date], ' ', [hour 1]) AS datetime2(0))
from #t1
)
, x as (
select *, dt = CAST(CONCAT([Date], ' ', [hour 2]) AS datetime2(0))
from #t2
)
, t2 as (
select [Case],
[Coin_1],
[Coin_2],
[Rate],
[Date]
[Hour 2],
[Weekday],
dt as start_dt,
isnull(lead(dt) over(partition by [case] order by dt asc), '20990101') end_dt
from x
)
select *
from t1
inner join t2 on t2.[case] = t1.[case]
and t1.dt >= t2.start_dt
and t1.dt < t2.end_dt
如果这是一个学习练习,最好使用 SSIS 的组件来完成。如果这是真实世界的东西,请相信我在这方面的经验,尝试使用 SSIS 片段来实现这一点不会令人愉快。
现有数据模型中更大的挑战之一是分开存储日期和时间。我假设源系统将其存储为日期和时间 (0) 数据类型。我在查询中创建了一个实际的 datetime2 列,这样我就可以利用 Microsoft 的优秀工程师来担心比较逻辑是否正确。
与史蒂夫提议的 lead/lag 解决方案不同,我将其视为 OUTER APPLY with TOP 1 problem。
CREATE TABLE dbo.BankOperations
(
CaseNumber int
, Coin_1 char(3)
, Coin_2 char(3)
, TransactionDate date
, TransactionTime time(0)
);
CREATE TABLE dbo.ExchangeRates
(
CaseNumber int
, Coin_1 char(3)
, Coin_2 char(3)
, TransactionDate date
, TransactionTime time(0)
, Rate decimal(4, 2)
);
INSERT INTO
dbo.BankOperations
VALUES
(
1, 'EUR', 'GBP', '2020-06-01', '03:30'
)
-- boundary checking exact
,( 2, 'EUR', 'GBP', '2020-06-01', '12:30')
-- boundary beyond/not defined
,( 3, 'EUR', 'GBP', '2020-06-01', '13:30')
-- boundary before
,( 4, 'EUR', 'GBP', '2020-03-01', '10:30')
-- boundary first at
,( 5, 'EUR', 'GBP', '2020-03-01', '11:30')
INSERT INTO
dbo.ExchangeRates
VALUES
(
1, 'EUR', 'GBP', '2020-03-01', '11:30', .6
)
, (
2, 'EUR', 'GBP', '2020-03-01', '18:30', .5
)
, (
3, 'EUR', 'GBP', '2020-06-01', '12:30', .55
);
-- Creating a temp table version of the above as the separate date and time fields will
-- crush performance at scale (so too might duplicating data as we're about to do)
SELECT
X.*
, CAST(CONCAT(X.TransactionDate, 'T', X.TransactionTime) AS datetime2(0)) AS IsThisWorking
INTO
#BankOperations
FROM
dbo.BankOperations AS X;
SELECT
X.*
, CAST(CONCAT(X.TransactionDate, 'T', X.TransactionTime) AS datetime2(0)) AS IsThisWorking
INTO
#ExchangeRates
FROM
dbo.ExchangeRates AS X;
-- Option A for pinning data
-- Outer apply will show use the TOP 1 to get the closest without going over
SELECT
BO.*
-- assuming surrogate key
, EX.CaseNumber
, EX.Rate
FROM
#BankOperations AS BO
OUTER APPLY
(
SELECT TOP 1 *
FROM
#ExchangeRates AS ER
WHERE
-- Match based on all of our keys
ER.Coin_1 = BO.Coin_1
AND ER.Coin_2 = BO.Coin_2
-- Eliminate
AND BO.IsThisWorking >= ER.IsThisWorking
ORDER BY
ER.IsThisWorking DESC
)EX
;
-- Option B
-- Use lead/lag function to get the value
-- but my brain isn't seeing it at the moment
/*
SELECT
BO.*
-- assuming surrogate key
, LAG()
FROM
#BankOperations AS BO
INNER JOIn #ExchangeRates
*/
如果我被迫提供一个纯粹基于 SSIS 的答案,我会使用查找组件而不是默认的 FULL 缓存,我会在 None 中运行它。性能影响是对于进入缓冲区的每一行,我们将向源系统发出查询以检索一行数据。根据音量,这可能是 "heavy."
作为源,您有一个指向 BankOperations 的 OLE DB 源组件。这会流入我们将参数化的查找。
SELECT TOP 1 *
FROM
dbo.ExchangeRates AS ER
CROSS APPLY (SELECT CAST(CONCAT(ER.TransactionDate, 'T', ER.TransactionTime) AS datetime2(0)) AS IsThisWorking) ITW
WHERE
-- Match based on all of our keys
ER.Coin_1 = ?
AND ER.Coin_2 = ?
-- Eliminate what's too new
AND CAST(CONCAT(?, 'T', ?) AS datetime2(0)) >= ITW.IsThisWorking
ORDER BY
ITW.IsThisWorking DESC
全部?其中有特定的序号占位符,从 0 开始。我们要做的是模仿原始查询的逻辑。完全公开,自从我完成参数化 none/partial 缓存查找以来已经有很长时间了,所以您必须阅读一些更详细的要点。我记得的是,您需要单击高级 "stuff" 才能使其正常工作。
我见过的一种使用 SSIS 组件的不同方法将涉及两个源和一个连接。我认为是 Matt Masson 演示了这项技术,但我已经有好几年没有这样做了。同样,如果您在源查询中执行此操作,您将获得更好的性能,因为这种方法将需要两种排序 + Join 的阻塞转换。
最好的脚本组件方法将采用模拟参数化查找组件方法。它保持同步(1 行输入,1 行输出),我们将通过添加速率列来丰富数据流。
伪代码大约
// make local variables with values from the row buffer
var coin_1 = Row.coin1;
var coin_2 = Row.coin2;
var transactionDate = Row.IsThisWorking;
// standard OLE DB parameterized query stuff here
using (SqlConnection conn = new SQLConnection)
{
conn.Open();
using(SqlCommand command = new SqlCommand())
{
command.Text = "SELECT TOP 1 ER.Rate FROM dbo.ExchangeRate AS ER WHERE @txnDate >= ER.IsThisWorking AND ER.Coin_1 = @coin1 AND ER.Coin_2 = @coin2;";
// I don't remember exact syntax
command.Parameters.AddWithValue("@txnDate", transactionDate);
command.Parameters.AddWithValue("@coin1", coin_1);
command.Parameters.AddWithValue("@coin2", coin_2);
}
}