在 R 中展平嵌套 JSON

Flattening nested JSON in R

大家好:我已经在 stack overflow 和互联网的其他地方搜索了这个问题的答案,但是 none 我能找到的答案似乎对我有用。

我有数千行 json 数据,其中包含有关相机陷阱研究图像的信息。我在解压缩数据时遇到了很多麻烦。我使用 jsonlite::fromJSON 无济于事。来自 tidyjson 的 as.tbl_json 也是如此。

我的目标是编写一些代码,为我提供一个数据框,每个变量都有一列以 json 格式存储。你能帮忙吗?

这是我正在使用的数据向量,尽管我实际上将数据作为一个较大的 .csv 文件中的单列。第一行是列名。

annotations<-c(annotations,
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""DEERWHITETAILED"",""answers"":{""HOWMANY"":""1"",""YOUNGPRESENT"":""NO"",""ANTLERSPRESENT"":""NO"",""WHATBEHAVIORSDOYOUSEE"":[""ALERT""],""ESTIMATEOFSNOWDEPTHSEETUTORIAL"":""NOSNOWBAREGROUND"",""ISITACTIVELYRAININGORSNOWINGINTHEPICTURE"":""NO""},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""FISHER"",""answers"":{""HOWMANY"":""1"",""YOUNGPRESENT"":""NO"",""WHATBEHAVIORSDOYOUSEE"":[""WALKINGRUNNING"",""ALERT""],""ESTIMATEOFSNOWDEPTHSEETUTORIAL"":""1020CM"",""ISITACTIVELYRAININGORSNOWINGINTHEPICTURE"":""NO""},""filters"":{}}]}]"
"[{""task"":""T0"",""value"":[{""choice"":""NOTHINGHERE"",""answers"":{},""filters"":{}}]}]")

这就是我 运行 dput(annotations):

得到的结果
structure(list(annotations = c("[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"DEERWHITETAILED\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"ANTLERSPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"NOSNOWBAREGROUND\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]", 
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"FISHER\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"WALKINGRUNNING\",\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"1020CM\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]", 
"[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]"
)), class = "data.frame", row.names = c(NA, -10L))

我不完全清楚您要寻找的输出格式。有很多不同的方法可以做到这一点。此外,数据结构中的数组(每个数组中只有一个对象)使事情变得有点复杂,因为它们 可以 包含更多对象。

无论如何,由于 spread_all()tidyjson 不需要太多代码。您也可以使用 spread_values()enter_object(answers) 仅传播特定值以传播答案等。希望它有所帮助!

library(tidyjson)
#> 
#> Attaching package: 'tidyjson'
#> The following object is masked from 'package:stats':
#> 
#>     filter
library(tibble)

annotations <- structure(list(annotations = c("[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
                                              "[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
                                              "[{\"task\":\"T0\",\"value\":[{\"choice\":\"DEERWHITETAILED\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"ANTLERSPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"NOSNOWBAREGROUND\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]", 
                                              "[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
                                              "[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
                                              "[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
                                              "[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
                                              "[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]", 
                                              "[{\"task\":\"T0\",\"value\":[{\"choice\":\"FISHER\",\"answers\":{\"HOWMANY\":\"1\",\"YOUNGPRESENT\":\"NO\",\"WHATBEHAVIORSDOYOUSEE\":[\"WALKINGRUNNING\",\"ALERT\"],\"ESTIMATEOFSNOWDEPTHSEETUTORIAL\":\"1020CM\",\"ISITACTIVELYRAININGORSNOWINGINTHEPICTURE\":\"NO\"},\"filters\":{}}]}]", 
                                              "[{\"task\":\"T0\",\"value\":[{\"choice\":\"NOTHINGHERE\",\"answers\":{},\"filters\":{}}]}]"
)), class = "data.frame", row.names = c(NA, -10L))

ant <- tibble(raw = annotations$annotations)

as.tbl_json(ant, json.column = "raw") %>%
  gather_array("object_id") %>% 
  spread_all() %>%
  enter_object("value") %>%
  gather_array("value_id") %>%
  spread_all() %>%
  as_tibble()
#> # A tibble: 10 x 9
#>    object_id task  value_id choice answers.HOWMANY answers.YOUNGPR…
#>        <int> <chr>    <int> <chr>  <chr>           <chr>           
#>  1         1 T0           1 NOTHI… <NA>            <NA>            
#>  2         1 T0           1 NOTHI… <NA>            <NA>            
#>  3         1 T0           1 DEERW… 1               NO              
#>  4         1 T0           1 NOTHI… <NA>            <NA>            
#>  5         1 T0           1 NOTHI… <NA>            <NA>            
#>  6         1 T0           1 NOTHI… <NA>            <NA>            
#>  7         1 T0           1 NOTHI… <NA>            <NA>            
#>  8         1 T0           1 NOTHI… <NA>            <NA>            
#>  9         1 T0           1 FISHER 1               NO              
#> 10         1 T0           1 NOTHI… <NA>            <NA>            
#> # … with 3 more variables: answers.ANTLERSPRESENT <chr>,
#> #   answers.ESTIMATEOFSNOWDEPTHSEETUTORIAL <chr>,
#> #   answers.ISITACTIVELYRAININGORSNOWINGINTHEPICTURE <chr>

reprex package (v0.3.0)

于 2020 年 3 月 14 日创建