将嵌套列表转换为具有不同列长度的 data.frame

Convert Nested List into data.frame with different column length

我试图将下面的嵌套列表转换为 data.frame 但没有成功。有一些并发症,主要是位置1的“结果”列与位置2不一致,因为位置2没有结果。

不同位置的项目长度不一致

[[1]]
[[1]]$html_attributions
list()

[[1]]$results
  geometry.location.lat geometry.location.lng
1              25.66544             -100.4354
                                        id                    place_id
1 6ce0a030663144c8e992cbce51eb00479ef7db89 ChIJVy7b7FW9YoYRdaH2I_gOJIk
                                                                                                                                                                                       reference
1 CmRSAAAATdtVfB4Tz1aQ8GhGaw4-nRJ5lZlVNgiOR3ciF4QjmYC56bn6b7omWh1SJEWWqQQEFNXxGZndgEwSgl8sRCOtdF8aXpngUY878Q__yH4in8EMZMCIqSHLARqNgGlV4mKgEhDlvkHLXLiBW4F_KQVT83jIGhS5DJipk6PAnpPDXP2p-4X5NPuG9w

[[1]]$status
[1] "OK"

[[2]]
[[2]]$html_attributions
list()

[[2]]$results
list()

[[2]]$status
[1] "ZERO_RESULTS"

我尝试了以下代码,但它们不起作用。

#1
m1 <- do.call(rbind, lapply(myDataFrames, function(y) do.call(rbind, y)))
relist(m1, skeleton = myDataFrames)

#2
relist(matrix(unlist(myDataFrames), ncol = 4, byrow = T), skeleton = myDataFrames)

#3
library(data.table)

df<-rbindlist(myDataFrames, idcol = "index")
df<-rbindlist(myDataFrames, fill=TRUE)

#4 
myDataFrame <- do.call(rbind.data.frame, c(myDataFrames, list(stringsAsFactors = FALSE)))

我想我有足够的原始资料 JSON 可以创建一个可重现的例子:

okjson <- '{"html_attributions":[],"results":[{"geometry":{"location":{"lat":25.66544,"lon":-100.4354},"id":"foo","place_id":"quux"}}],"status":"OK"}'
emptyjson <- '{"html_attributions":[],"results":[],"status":"ZERO_RESULTS"}'
jsons <- list(okjson, emptyjson, okjson)

从这里开始,我将逐步(缓慢地)完成整个过程。为了重现性,我已经包含了很多中间结构,对于冗长,我深表歉意。这可以很容易地组合在一起 and/or 放在 magrittr 管道中。

lists <- lapply(jsons, jsonlite::fromJSON)
str(lists)
# List of 3
#  $ :List of 3
#   ..$ html_attributions: list()
#   ..$ results          :'data.frame': 1 obs. of  1 variable:
#   .. ..$ geometry:'data.frame':   1 obs. of  3 variables:
#   .. .. ..$ location:'data.frame':    1 obs. of  2 variables:
#   .. .. .. ..$ lat: num 25.7
#   .. .. .. ..$ lon: num -100
#   .. .. ..$ id      : chr "foo"
#   .. .. ..$ place_id: chr "quux"
#   ..$ status           : chr "OK"
#  $ :List of 3
#   ..$ html_attributions: list()
#   ..$ results          : list()
#   ..$ status           : chr "ZERO_RESULTS"
#  $ :List of 3
#   ..$ html_attributions: list()
#   ..$ results          :'data.frame': 1 obs. of  1 variable:
#   .. ..$ geometry:'data.frame':   1 obs. of  3 variables:
#   .. .. ..$ location:'data.frame':    1 obs. of  2 variables:
#   .. .. .. ..$ lat: num 25.7
#   .. .. .. ..$ lon: num -100
#   .. .. ..$ id      : chr "foo"
#   .. .. ..$ place_id: chr "quux"
#   ..$ status           : chr "OK"


goodlists <- Filter(function(a) "results" %in% names(a) && length(a$results) > 0, lists)
goodresults <- lapply(goodlists, `[[`, "results")
str(goodresults)
# List of 2
#  $ :'data.frame': 1 obs. of  1 variable:
#   ..$ geometry:'data.frame':  1 obs. of  3 variables:
#   .. ..$ location:'data.frame':   1 obs. of  2 variables:
#   .. .. ..$ lat: num 25.7
#   .. .. ..$ lon: num -100
#   .. ..$ id      : chr "foo"
#   .. ..$ place_id: chr "quux"
#  $ :'data.frame': 1 obs. of  1 variable:
#   ..$ geometry:'data.frame':  1 obs. of  3 variables:
#   .. ..$ location:'data.frame':   1 obs. of  2 variables:
#   .. .. ..$ lat: num 25.7
#   .. .. ..$ lon: num -100
#   .. ..$ id      : chr "foo"
#   .. ..$ place_id: chr "quux"

goodresultsdf <- lapply(goodresults, function(a) jsonlite::flatten(as.data.frame(a)))
str(goodresultsdf)
# List of 2
#  $ :'data.frame': 1 obs. of  4 variables:
#   ..$ geometry.id          : chr "foo"
#   ..$ geometry.place_id    : chr "quux"
#   ..$ geometry.location.lat: num 25.7
#   ..$ geometry.location.lon: num -100
#  $ :'data.frame': 1 obs. of  4 variables:
#   ..$ geometry.id          : chr "foo"
#   ..$ geometry.place_id    : chr "quux"
#   ..$ geometry.location.lat: num 25.7
#   ..$ geometry.location.lon: num -100

我们现在有 listdata.frame 个,是个好地方。

do.call(rbind.data.frame, c(goodresultsdf, stringsAsFactors = FALSE))
#   geometry.id geometry.place_id geometry.location.lat geometry.location.lon
# 1         foo              quux              25.66544             -100.4354
# 2         foo              quux              25.66544             -100.4354