BigQuery:联合具有不同字段顺序的重复字段
BigQuery: Union on repreated fields with different order of fields
如果字段顺序不匹配,如何使 UNION ALL
对重复字段起作用?
在下面的示例中,我尝试 UNION
data_1_nested 和 data_2_nested,而重复字段 nested
有两个字段:id 和 age 但顺序不同。
我可以 UNNEST
并重新嵌套,但如果我有超过 1 个嵌套字段需要 UNION
,这将不会很有帮助。
示例:
with
data_1 as (
Select 'a123' as id, 1 as age, 'a' as grade
union all
Select 'a123' as id, 3 as age,'b' as grade
union all
Select 'a123' as id, 4.5 as age,'c' as grade
)
,
data_2 as (
Select 'b456' as id, 6 as age,'e' as grade
union all
Select 'b456' as id, 5 as age,'f' as grade
union all
Select 'b456' as id, 2.5 as age,'g' as grade
)
,
data_1_nested as (
SELECT id,
array_agg(STRUCT(
age,grade
)) as nested
from data_1
group by 1
)
,
data_2_nested as (
SELECT id,
array_agg(STRUCT(
grade, age
)) as nested
from data_2
group by 1
)
SELECT * from data_1_nested
union all
SELECT * from data_2_nested
我稍微修改了您的数据以创建 2 个需要合并的嵌套字段。我还添加了一个 JS 函数来解析 JSON。这是一个丑陋的解决方案,但它似乎有效。不确定它是否可扩展(必须创建多少函数来隐藏不同的嵌套字段)。
CREATE TEMP FUNCTION JsonToItems(input STRING)
RETURNS ARRAY<STRUCT<age INT64, grade STRING>>
LANGUAGE js AS """
return JSON.parse(input);
""";
with
data_1 as (
Select 'a123' as id, 1 as age, 'a' as grade
union all
Select 'a123' as id, 3 as age,'b' as grade
union all
Select 'a123' as id, 4.5 as age,'c' as grade
)
,
data_2 as (
Select 'b456' as id, 6 as age,'e' as grade
union all
Select 'b456' as id, 5 as age,'f' as grade
union all
Select 'b456' as id, 2.5 as age,'g' as grade
)
,
data_1_nested as (
SELECT id,
array_agg(STRUCT(
age,grade
)) as nested,
array_agg(STRUCT(
age,grade
)) as nested2
from data_1
group by 1
)
,
data_2_nested as (
SELECT id,
array_agg(STRUCT(
grade, age
)) as nested,
array_agg(STRUCT(
grade, age
)) as nested2
from data_2
group by 1
)
select id, JsonToItems(json), JsonToItems(json2) from (
SELECT id, TO_JSON_STRING(nested) as json, TO_JSON_STRING(nested2) as json2 from data_1_nested
union all
SELECT id, TO_JSON_STRING(nested) as json, TO_JSON_STRING(nested2) as json2 from data_2_nested
);
以下应该适合您
select * from data_1_nested
union all
select id, array(select as struct age, grade from t.nested) from data_2_nested t
如果应用于您问题中的示例数据 - 输出为
如果字段顺序不匹配,如何使 UNION ALL
对重复字段起作用?
在下面的示例中,我尝试 UNION
data_1_nested 和 data_2_nested,而重复字段 nested
有两个字段:id 和 age 但顺序不同。
我可以 UNNEST
并重新嵌套,但如果我有超过 1 个嵌套字段需要 UNION
,这将不会很有帮助。
示例:
with
data_1 as (
Select 'a123' as id, 1 as age, 'a' as grade
union all
Select 'a123' as id, 3 as age,'b' as grade
union all
Select 'a123' as id, 4.5 as age,'c' as grade
)
,
data_2 as (
Select 'b456' as id, 6 as age,'e' as grade
union all
Select 'b456' as id, 5 as age,'f' as grade
union all
Select 'b456' as id, 2.5 as age,'g' as grade
)
,
data_1_nested as (
SELECT id,
array_agg(STRUCT(
age,grade
)) as nested
from data_1
group by 1
)
,
data_2_nested as (
SELECT id,
array_agg(STRUCT(
grade, age
)) as nested
from data_2
group by 1
)
SELECT * from data_1_nested
union all
SELECT * from data_2_nested
我稍微修改了您的数据以创建 2 个需要合并的嵌套字段。我还添加了一个 JS 函数来解析 JSON。这是一个丑陋的解决方案,但它似乎有效。不确定它是否可扩展(必须创建多少函数来隐藏不同的嵌套字段)。
CREATE TEMP FUNCTION JsonToItems(input STRING)
RETURNS ARRAY<STRUCT<age INT64, grade STRING>>
LANGUAGE js AS """
return JSON.parse(input);
""";
with
data_1 as (
Select 'a123' as id, 1 as age, 'a' as grade
union all
Select 'a123' as id, 3 as age,'b' as grade
union all
Select 'a123' as id, 4.5 as age,'c' as grade
)
,
data_2 as (
Select 'b456' as id, 6 as age,'e' as grade
union all
Select 'b456' as id, 5 as age,'f' as grade
union all
Select 'b456' as id, 2.5 as age,'g' as grade
)
,
data_1_nested as (
SELECT id,
array_agg(STRUCT(
age,grade
)) as nested,
array_agg(STRUCT(
age,grade
)) as nested2
from data_1
group by 1
)
,
data_2_nested as (
SELECT id,
array_agg(STRUCT(
grade, age
)) as nested,
array_agg(STRUCT(
grade, age
)) as nested2
from data_2
group by 1
)
select id, JsonToItems(json), JsonToItems(json2) from (
SELECT id, TO_JSON_STRING(nested) as json, TO_JSON_STRING(nested2) as json2 from data_1_nested
union all
SELECT id, TO_JSON_STRING(nested) as json, TO_JSON_STRING(nested2) as json2 from data_2_nested
);
以下应该适合您
select * from data_1_nested
union all
select id, array(select as struct age, grade from t.nested) from data_2_nested t
如果应用于您问题中的示例数据 - 输出为