如何通过 BigQuery 中的依赖匹配键连接两个表?
How to join two tables by dependent match keys in BigQuery?
我在 BigQuery 中有两个 table
第一个是费率列表。对于每个组合 code - offer
,费率具有默认值 source
等于 -1
。除了组合code - offer
,一些费率指定source
第二个 table 与第一个 table 具有相同的列,除了比率 + 任何其他数据。
我的目标加入率匹配 code - offer - source
否则使用匹配的默认率 code - offer
与 source
等于 -1
在示例查询中 returns 仅默认费率:
WITH t1 AS (SELECT 21 as source, 'SA' as code, 'offer1' as offer, 2.4 as rate
UNION ALL
SELECT 33, 'SA', 'offer1', 2.5
UNION ALL
SELECT 39, 'SA', 'offer1', 2.1
UNION ALL
SELECT -1, 'SA', 'offer1', 3
UNION ALL
SELECT -1, 'SA', 'offer2', 4
UNION ALL
SELECT 47, 'YN', 'offer1', 2.7
UNION ALL
SELECT -1, 'YN', 'offer1', 5.4
UNION ALL
SELECT -1, 'YN', 'offer2', 0.9
UNION ALL
SELECT -1, 'RE', 'offer1', 5.7
UNION ALL
SELECT -1, 'RE', 'offer2', 3.4),
t2 as (SELECT 21 as source, 'SA' as code, 'offer1' as offer, "any data" as other_columns
UNION ALL SELECT 21, 'SA', 'offer1', "any data"
UNION ALL SELECT 21, 'SA', 'offer1', "any data"
UNION ALL SELECT 21, 'SA', 'offer2', "any data"
UNION ALL SELECT 47, 'YN', 'offer1', "any data"
UNION ALL SELECT 47, 'YN', 'offer2', "any data"
UNION ALL SELECT 50, 'YN', 'offer1', "any data"
UNION ALL SELECT 47, 'YN', 'offer2', "any data"
UNION ALL SELECT 78, 'RE', 'offer1', "any data"
UNION ALL SELECT 66, 'RE', 'offer2', "any data")
SELECT t2.*, rate FROM t2
LEFT JOIN t1 ON t1.offer = t2.offer AND t1.code = t2.code AND IF (t1.source = t1.source AND rate IS NULL, t1.source = t2.source, t1.source = - 1)
当 source
与
不匹配时,下一个查询 returns 指定 source
和 null
的比率
SELECT t2.*, rate FROM t2
LEFT JOIN t1 ON t1.offer = t2.offer AND t1.code = t2.code AND IF (t1.source = t1.source AND rate IS NOT NULL, t1.source = t2.source, t1.source = - 1)
我怎样才能正确加入费率?
您可以 left join
两次并使用条件逻辑:
select t2.*, coalesce(t11.rate, t12.rate) rate
from t2
left join t1 t11
on t11.code = t2.code
and t11.offer = t2.offer
and t11.source = t2.source
left join t1 t12
on t12.code = t2.code
and t12.offer = t2.offer
and t12.source = -1
and t11.code is null
以下适用于 BigQuery 标准 SQL
#standardSQL
select any_value(t2).*,
array_agg(rate order by t1.source = t2.source desc, t1.source = -1 desc limit 1)[offset(0)] rate
from t2
left join t1
on t1.code = t2.code
and t1.offer = t2.offer
group by format('%t', t2)
如果应用于您问题中的示例数据 - 输出如下
以上避免了双重连接,这里唯一的副作用是 - 结果被删除 - 意味着重复行 - 出现在 table 2 - 被删除/消除
I need duplicate rows
当然,只要对上面几乎没有任何更改就可以得到所有行
#standardSQL
select any_value(t2).*,
array_agg(rate order by t1.source = t2.source desc, t1.source = -1 desc limit 1)[offset(0)] rate
from t2, unnest([rand()]) as r
left join t1
on t1.code = t2.code
and t1.offer = t2.offer
group by format('%t', t2), r
有输出
我在 BigQuery 中有两个 table
第一个是费率列表。对于每个组合 code - offer
,费率具有默认值 source
等于 -1
。除了组合code - offer
,一些费率指定source
第二个 table 与第一个 table 具有相同的列,除了比率 + 任何其他数据。
我的目标加入率匹配 code - offer - source
否则使用匹配的默认率 code - offer
与 source
等于 -1
在示例查询中 returns 仅默认费率:
WITH t1 AS (SELECT 21 as source, 'SA' as code, 'offer1' as offer, 2.4 as rate
UNION ALL
SELECT 33, 'SA', 'offer1', 2.5
UNION ALL
SELECT 39, 'SA', 'offer1', 2.1
UNION ALL
SELECT -1, 'SA', 'offer1', 3
UNION ALL
SELECT -1, 'SA', 'offer2', 4
UNION ALL
SELECT 47, 'YN', 'offer1', 2.7
UNION ALL
SELECT -1, 'YN', 'offer1', 5.4
UNION ALL
SELECT -1, 'YN', 'offer2', 0.9
UNION ALL
SELECT -1, 'RE', 'offer1', 5.7
UNION ALL
SELECT -1, 'RE', 'offer2', 3.4),
t2 as (SELECT 21 as source, 'SA' as code, 'offer1' as offer, "any data" as other_columns
UNION ALL SELECT 21, 'SA', 'offer1', "any data"
UNION ALL SELECT 21, 'SA', 'offer1', "any data"
UNION ALL SELECT 21, 'SA', 'offer2', "any data"
UNION ALL SELECT 47, 'YN', 'offer1', "any data"
UNION ALL SELECT 47, 'YN', 'offer2', "any data"
UNION ALL SELECT 50, 'YN', 'offer1', "any data"
UNION ALL SELECT 47, 'YN', 'offer2', "any data"
UNION ALL SELECT 78, 'RE', 'offer1', "any data"
UNION ALL SELECT 66, 'RE', 'offer2', "any data")
SELECT t2.*, rate FROM t2
LEFT JOIN t1 ON t1.offer = t2.offer AND t1.code = t2.code AND IF (t1.source = t1.source AND rate IS NULL, t1.source = t2.source, t1.source = - 1)
当 source
与
source
和 null
的比率
SELECT t2.*, rate FROM t2
LEFT JOIN t1 ON t1.offer = t2.offer AND t1.code = t2.code AND IF (t1.source = t1.source AND rate IS NOT NULL, t1.source = t2.source, t1.source = - 1)
我怎样才能正确加入费率?
您可以 left join
两次并使用条件逻辑:
select t2.*, coalesce(t11.rate, t12.rate) rate
from t2
left join t1 t11
on t11.code = t2.code
and t11.offer = t2.offer
and t11.source = t2.source
left join t1 t12
on t12.code = t2.code
and t12.offer = t2.offer
and t12.source = -1
and t11.code is null
以下适用于 BigQuery 标准 SQL
#standardSQL
select any_value(t2).*,
array_agg(rate order by t1.source = t2.source desc, t1.source = -1 desc limit 1)[offset(0)] rate
from t2
left join t1
on t1.code = t2.code
and t1.offer = t2.offer
group by format('%t', t2)
如果应用于您问题中的示例数据 - 输出如下
以上避免了双重连接,这里唯一的副作用是 - 结果被删除 - 意味着重复行 - 出现在 table 2 - 被删除/消除
I need duplicate rows
当然,只要对上面几乎没有任何更改就可以得到所有行
#standardSQL
select any_value(t2).*,
array_agg(rate order by t1.source = t2.source desc, t1.source = -1 desc limit 1)[offset(0)] rate
from t2, unnest([rand()]) as r
left join t1
on t1.code = t2.code
and t1.offer = t2.offer
group by format('%t', t2), r
有输出