在 R 中展平嵌套 JSON
Flattening nested JSON in R
大家好:我已经在 stack overflow 和互联网的其他地方搜索了这个问题的答案,但是 none 我能找到的答案似乎对我有用。
我有数千行 json 数据,其中包含有关相机陷阱研究图像的信息。我在解压缩数据时遇到了很多麻烦。我使用 jsonlite::fromJSON
无济于事。来自 tidyjson 的 as.tbl_json
也是如此。
我的目标是编写一些代码,为我提供一个数据框,每个变量都有一列以 json 格式存储。你能帮忙吗?
这是我正在使用的数据向量,尽管我实际上将数据作为一个较大的 .csv
文件中的单列。第一行是列名。
annotations<-c(annotations,
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""DEERWHITETAILED"",""answers"":{""HOWMANY"":""1"",""YOUNGPRESENT"":""NO"",""ANTLERSPRESENT"":""NO"",""WHATBEHAVIORSDOYOUSEE"":[""ALERT""],""ESTIMATEOFSNOWDEPTHSEETUTORIAL"":""NOSNOWBAREGROUND"",""ISITACTIVELYRAININGORSNOWINGINTHEPICTURE"":""NO""},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""FISHER"",""answers"":{""HOWMANY"":""1"",""YOUNGPRESENT"":""NO"",""WHATBEHAVIORSDOYOUSEE"":[""WALKINGRUNNING"",""ALERT""],""ESTIMATEOFSNOWDEPTHSEETUTORIAL"":""1020CM"",""ISITACTIVELYRAININGORSNOWINGINTHEPICTURE"":""NO""},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]")
这就是我 运行 dput(annotations):
得到的结果
structure(list(annotations = c("[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"DEERWHITETAILED\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"ANTLERSPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"NOSNOWBAREGROUND\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"FISHER\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"WALKINGRUNNING\",\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"1020CM\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]"
)), class = "data.frame", row.names = c(NA, -10L))
我不完全清楚您要寻找的输出格式。有很多不同的方法可以做到这一点。此外,数据结构中的数组(每个数组中只有一个对象)使事情变得有点复杂,因为它们 可以 包含更多对象。
无论如何,由于 spread_all()
,tidyjson
不需要太多代码。您也可以使用 spread_values()
或 enter_object(answers)
仅传播特定值以传播答案等。希望它有所帮助!
library(tidyjson)
#>
#> Attaching package: 'tidyjson'
#> The following object is masked from 'package:stats':
#>
#> filter
library(tibble)
annotations <- structure(list(annotations = c("[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"DEERWHITETAILED\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"ANTLERSPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"NOSNOWBAREGROUND\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"FISHER\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"WALKINGRUNNING\",\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"1020CM\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]"
)), class = "data.frame", row.names = c(NA, -10L))
ant <- tibble(raw = annotations$annotations)
as.tbl_json(ant, json.column = "raw") %>%
gather_array("object_id") %>%
spread_all() %>%
enter_object("value") %>%
gather_array("value_id") %>%
spread_all() %>%
as_tibble()
#> # A tibble: 10 x 9
#> object_id task value_id choice answers.HOWMANY answers.YOUNGPR…
#> <int> <chr> <int> <chr> <chr> <chr>
#> 1 1 T0 1 NOTHI… <NA> <NA>
#> 2 1 T0 1 NOTHI… <NA> <NA>
#> 3 1 T0 1 DEERW… 1 NO
#> 4 1 T0 1 NOTHI… <NA> <NA>
#> 5 1 T0 1 NOTHI… <NA> <NA>
#> 6 1 T0 1 NOTHI… <NA> <NA>
#> 7 1 T0 1 NOTHI… <NA> <NA>
#> 8 1 T0 1 NOTHI… <NA> <NA>
#> 9 1 T0 1 FISHER 1 NO
#> 10 1 T0 1 NOTHI… <NA> <NA>
#> # … with 3 more variables: answers.ANTLERSPRESENT <chr>,
#> # answers.ESTIMATEOFSNOWDEPTHSEETUTORIAL <chr>,
#> # answers.ISITACTIVELYRAININGORSNOWINGINTHEPICTURE <chr>
由 reprex package (v0.3.0)
于 2020 年 3 月 14 日创建
大家好:我已经在 stack overflow 和互联网的其他地方搜索了这个问题的答案,但是 none 我能找到的答案似乎对我有用。
我有数千行 json 数据,其中包含有关相机陷阱研究图像的信息。我在解压缩数据时遇到了很多麻烦。我使用 jsonlite::fromJSON
无济于事。来自 tidyjson 的 as.tbl_json
也是如此。
我的目标是编写一些代码,为我提供一个数据框,每个变量都有一列以 json 格式存储。你能帮忙吗?
这是我正在使用的数据向量,尽管我实际上将数据作为一个较大的 .csv
文件中的单列。第一行是列名。
annotations<-c(annotations,
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""DEERWHITETAILED"",""answers"":{""HOWMANY"":""1"",""YOUNGPRESENT"":""NO"",""ANTLERSPRESENT"":""NO"",""WHATBEHAVIORSDOYOUSEE"":[""ALERT""],""ESTIMATEOFSNOWDEPTHSEETUTORIAL"":""NOSNOWBAREGROUND"",""ISITACTIVELYRAININGORSNOWINGINTHEPICTURE"":""NO""},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""FISHER"",""answers"":{""HOWMANY"":""1"",""YOUNGPRESENT"":""NO"",""WHATBEHAVIORSDOYOUSEE"":[""WALKINGRUNNING"",""ALERT""],""ESTIMATEOFSNOWDEPTHSEETUTORIAL"":""1020CM"",""ISITACTIVELYRAININGORSNOWINGINTHEPICTURE"":""NO""},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]")
这就是我 运行 dput(annotations):
得到的结果structure(list(annotations = c("[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"DEERWHITETAILED\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"ANTLERSPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"NOSNOWBAREGROUND\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"FISHER\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"WALKINGRUNNING\",\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"1020CM\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]"
)), class = "data.frame", row.names = c(NA, -10L))
我不完全清楚您要寻找的输出格式。有很多不同的方法可以做到这一点。此外,数据结构中的数组(每个数组中只有一个对象)使事情变得有点复杂,因为它们 可以 包含更多对象。
无论如何,由于 spread_all()
,tidyjson
不需要太多代码。您也可以使用 spread_values()
或 enter_object(answers)
仅传播特定值以传播答案等。希望它有所帮助!
library(tidyjson)
#>
#> Attaching package: 'tidyjson'
#> The following object is masked from 'package:stats':
#>
#> filter
library(tibble)
annotations <- structure(list(annotations = c("[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"DEERWHITETAILED\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"ANTLERSPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"NOSNOWBAREGROUND\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"FISHER\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"WALKINGRUNNING\",\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"1020CM\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]",
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]"
)), class = "data.frame", row.names = c(NA, -10L))
ant <- tibble(raw = annotations$annotations)
as.tbl_json(ant, json.column = "raw") %>%
gather_array("object_id") %>%
spread_all() %>%
enter_object("value") %>%
gather_array("value_id") %>%
spread_all() %>%
as_tibble()
#> # A tibble: 10 x 9
#> object_id task value_id choice answers.HOWMANY answers.YOUNGPR…
#> <int> <chr> <int> <chr> <chr> <chr>
#> 1 1 T0 1 NOTHI… <NA> <NA>
#> 2 1 T0 1 NOTHI… <NA> <NA>
#> 3 1 T0 1 DEERW… 1 NO
#> 4 1 T0 1 NOTHI… <NA> <NA>
#> 5 1 T0 1 NOTHI… <NA> <NA>
#> 6 1 T0 1 NOTHI… <NA> <NA>
#> 7 1 T0 1 NOTHI… <NA> <NA>
#> 8 1 T0 1 NOTHI… <NA> <NA>
#> 9 1 T0 1 FISHER 1 NO
#> 10 1 T0 1 NOTHI… <NA> <NA>
#> # … with 3 more variables: answers.ANTLERSPRESENT <chr>,
#> # answers.ESTIMATEOFSNOWDEPTHSEETUTORIAL <chr>,
#> # answers.ISITACTIVELYRAININGORSNOWINGINTHEPICTURE <chr>
由 reprex package (v0.3.0)
于 2020 年 3 月 14 日创建