使用雪花从数组中提取值
Extracting values from an array using snowflake
我的其中一个专栏中以以下格式存储了数据;
[
{
"arrival_date": "2022-02-15T08:00:00.000Z",
"cargo_available_timestamp": "2022-02-16T13:00:00.000Z",
"cargo_type": "unable_to_provide",
"carton_count": null,
"lfd": "2022-02-17T08:00:00.000Z"
},
{
"arrival_date": "2022-02-16T08:00:00.000Z",
"flight_status": "in_transit",
"flight_status_other": null
}
]
我正在尝试提取 lfd
的值,如数据中所示,使用以下代码;
select col1,
json_extract_path_text(get(col1,0),'lfd') as value
from table
但是 get()
命令似乎没有获取数组。我收到以下错误:
SQL compilation error: error line 4 at position 20 Invalid argument types for
function 'GET': (VARCHAR(16777216), NUMBER(1,0))
当我检查 col1 的数据类型时,它是 varchar
。我可以知道如何解析此 varchar 以提取 lfd
的值。谢谢
所以用一个CTE来提供假数据,并为我们解析JSON:
WITH fake_data AS (
SELECT parse_json(column1) as json
FROM VALUES
('[
{
"arrival_date": "2022-02-15T08:00:00.000Z",
"cargo_available_timestamp": "2022-02-16T13:00:00.000Z",
"cargo_type": "unable_to_provide",
"carton_count": null,
"lfd": "2022-02-17T08:00:00.000Z"
},
{
"arrival_date": "2022-02-16T08:00:00.000Z",
"flight_status": "in_transit",
"flight_status_other": null
}
]')
)
select
json[0] as array_0
,json[1] as array_1
,array_0:lfd as lfd_0
,array_1:lfd as lfd_1
from fake_data;
我们得到:
ARRAY_0
ARRAY_1
LFD_0
LFD_1
{ "arrival_date": "2022-02-15T08:00:00.000Z", "cargo_available_timestamp": "2022-02-16T13:00:00.000Z", "cargo_type": "unable_to_provide", "carton_count": null, "lfd": "2022-02-17T08:00:00.000Z" }
{ "arrival_date": "2022-02-16T08:00:00.000Z", "flight_status": "in_transit", "flight_status_other": null }
"2022-02-17T08:00:00.000Z"
因此,如果您知道 JSON 数组将始终按顺序排列,您可以使用:
select
json[0]:lfd as lfd
,to_timestamp_ntz(lfd) as lfd_as_timestamp
from fake_data;
LFD
LFD_AS_TIMESTAMP
"2022-02-17T08:00:00.000Z"
2022-02-17 08:00:00.000
现在,如果您不确定数组的顺序,或者您需要选择一个数组元素,您将需要展平数组。
WITH fake_data AS (
SELECT parse_json(column1) as json
FROM VALUES
('[
{
"arrival_date": "2022-02-15T08:00:00.000Z",
"cargo_available_timestamp": "2022-02-16T13:00:00.000Z",
"cargo_type": "unable_to_provide",
"carton_count": null,
"lfd": "2022-02-17T08:00:00.000Z"
},
{
"arrival_date": "2022-02-16T08:00:00.000Z",
"flight_status": "in_transit",
"flight_status_other": null
}
]')
,('[
{
"arrival_date": "2022-02-16T08:00:00.000Z",
"flight_status": "in_transit",
"flight_status_other": null
},
{
"arrival_date": "2022-02-15T08:00:00.000Z",
"cargo_available_timestamp": "2022-02-16T13:00:00.000Z",
"cargo_type": "unable_to_provide",
"carton_count": null,
"lfd": "2022-02-18T08:00:00.000Z"
}
]')
)
select
to_timestamp_ntz(f.value:lfd) as lfd_ntz
from fake_data d, table(flatten(input=>d.json)) f
where lfd_ntz is not null;
LFD_NTZ
2022-02-17 08:00:00.000
2022-02-18 08:00:00.000
我的其中一个专栏中以以下格式存储了数据;
[
{
"arrival_date": "2022-02-15T08:00:00.000Z",
"cargo_available_timestamp": "2022-02-16T13:00:00.000Z",
"cargo_type": "unable_to_provide",
"carton_count": null,
"lfd": "2022-02-17T08:00:00.000Z"
},
{
"arrival_date": "2022-02-16T08:00:00.000Z",
"flight_status": "in_transit",
"flight_status_other": null
}
]
我正在尝试提取 lfd
的值,如数据中所示,使用以下代码;
select col1,
json_extract_path_text(get(col1,0),'lfd') as value
from table
但是 get()
命令似乎没有获取数组。我收到以下错误:
SQL compilation error: error line 4 at position 20 Invalid argument types for
function 'GET': (VARCHAR(16777216), NUMBER(1,0))
当我检查 col1 的数据类型时,它是 varchar
。我可以知道如何解析此 varchar 以提取 lfd
的值。谢谢
所以用一个CTE来提供假数据,并为我们解析JSON:
WITH fake_data AS (
SELECT parse_json(column1) as json
FROM VALUES
('[
{
"arrival_date": "2022-02-15T08:00:00.000Z",
"cargo_available_timestamp": "2022-02-16T13:00:00.000Z",
"cargo_type": "unable_to_provide",
"carton_count": null,
"lfd": "2022-02-17T08:00:00.000Z"
},
{
"arrival_date": "2022-02-16T08:00:00.000Z",
"flight_status": "in_transit",
"flight_status_other": null
}
]')
)
select
json[0] as array_0
,json[1] as array_1
,array_0:lfd as lfd_0
,array_1:lfd as lfd_1
from fake_data;
我们得到:
ARRAY_0 | ARRAY_1 | LFD_0 | LFD_1 |
---|---|---|---|
{ "arrival_date": "2022-02-15T08:00:00.000Z", "cargo_available_timestamp": "2022-02-16T13:00:00.000Z", "cargo_type": "unable_to_provide", "carton_count": null, "lfd": "2022-02-17T08:00:00.000Z" } | { "arrival_date": "2022-02-16T08:00:00.000Z", "flight_status": "in_transit", "flight_status_other": null } | "2022-02-17T08:00:00.000Z" |
因此,如果您知道 JSON 数组将始终按顺序排列,您可以使用:
select
json[0]:lfd as lfd
,to_timestamp_ntz(lfd) as lfd_as_timestamp
from fake_data;
LFD | LFD_AS_TIMESTAMP |
---|---|
"2022-02-17T08:00:00.000Z" | 2022-02-17 08:00:00.000 |
现在,如果您不确定数组的顺序,或者您需要选择一个数组元素,您将需要展平数组。
WITH fake_data AS (
SELECT parse_json(column1) as json
FROM VALUES
('[
{
"arrival_date": "2022-02-15T08:00:00.000Z",
"cargo_available_timestamp": "2022-02-16T13:00:00.000Z",
"cargo_type": "unable_to_provide",
"carton_count": null,
"lfd": "2022-02-17T08:00:00.000Z"
},
{
"arrival_date": "2022-02-16T08:00:00.000Z",
"flight_status": "in_transit",
"flight_status_other": null
}
]')
,('[
{
"arrival_date": "2022-02-16T08:00:00.000Z",
"flight_status": "in_transit",
"flight_status_other": null
},
{
"arrival_date": "2022-02-15T08:00:00.000Z",
"cargo_available_timestamp": "2022-02-16T13:00:00.000Z",
"cargo_type": "unable_to_provide",
"carton_count": null,
"lfd": "2022-02-18T08:00:00.000Z"
}
]')
)
select
to_timestamp_ntz(f.value:lfd) as lfd_ntz
from fake_data d, table(flatten(input=>d.json)) f
where lfd_ntz is not null;
LFD_NTZ |
---|
2022-02-17 08:00:00.000 |
2022-02-18 08:00:00.000 |