用 purrr 重组列表
Restructuring list with purrr
我在使用 R 和 purrr 清理复杂列表时遇到以下问题。示例将清除这个。
需要包裹
library(jsonlite)
library(purrr)
library(tibble)
library(dplyr)
让我们从 NHL json.
获取示例数据
url <- c("https://statsapi.web.nhl.com/api/v1/game/2017010002/feed/live", "https://statsapi.web.nhl.com/api/v1/game/2017010005/feed/live")
data_list <- map(url, fromJSON)
这是一个非常复杂的列表,但最后我想要的是 result
。主要思想是我想在其中包含带有 tibble 的列表列,但是 tibble 是从列表的不同部分选择的 vectors/columns 创建的。这是我失败的尝试:
wrong <- tibble(
gamepk = map_chr(data_list, c("gamePk"), .default = NA),
home = map_chr(data_list, c("gameData", "teams", "home", "triCode"), .default = NA),
away = map_chr(data_list, c("gameData", "teams", "away", "triCode"), .default = NA),
test = list(tibble(event = map(data_list, c("liveData", "plays", "allPlays", "result", "event"), .default = NA),
x_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "x"), .default = NA),
y_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "y"), .default = NA)))
)
# A tibble: 2 x 4
gamepk home away test
<chr> <chr> <chr> <list>
1 2017010002 VAN VGK <tibble [2 x 3]>
2 2017010005 BUF CAR <tibble [2 x 3]>
这是一种笨拙的方式来完成它并纠正结果。
result <- tibble(
gamepk = map_chr(data_list, c("gamePk"), .default = NA),
home = map_chr(data_list, c("gameData", "teams", "home", "triCode"), .default = NA),
away = map_chr(data_list, c("gameData", "teams", "away", "triCode"), .default = NA))
test1 <- bind_cols(event = data_list[[1]]$liveData$plays$allPlays$result$event,
x_cord = data_list[[1]]$liveData$plays$allPlays$coordinates$x,
y_cord = data_list[[1]]$liveData$plays$allPlays$coordinates$y)
test2 <- bind_cols(event = data_list[[2]]$liveData$plays$allPlays$result$event,
x_cord = data_list[[2]]$liveData$plays$allPlays$coordinates$x,
y_cord = data_list[[2]]$liveData$plays$allPlays$coordinates$y)
result$test[1] <- list(test1)
result$test[2] <- list(test2)
result
# A tibble: 2 x 4
gamepk home away test
<chr> <chr> <chr> <list>
1 2017010002 VAN VGK <tibble [314 x 3]>
2 2017010005 BUF CAR <tibble [283 x 3]>
在您已经完成的所有工作的基础上,如果您制作 test
列表列表,您可以使用 tibble()
将其传递给 pmap()
。这同时循环遍历嵌套列表的每个元素并将它们 "entibbles" 放在一起,为您留下一个包含两个小标题的列表。
test
的新行如下所示:
test = list(event = map(data_list, c("liveData", "plays", "allPlays", "result", "event"), .default = NA),
x_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "x"), .default = NA),
y_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "y"), .default = NA)) %>%
pmap(tibble)
将其与您已有的内容相结合,得到 "right" 输出。
right <- tibble(
gamepk = map_chr(data_list, c("gamePk"), .default = NA),
home = map_chr(data_list, c("gameData", "teams", "home", "triCode"), .default = NA),
away = map_chr(data_list, c("gameData", "teams", "away", "triCode"), .default = NA),
test = list(event = map(data_list, c("liveData", "plays", "allPlays", "result", "event"), .default = NA),
x_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "x"), .default = NA),
y_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "y"), .default = NA)) %>%
pmap(tibble) )
right
# A tibble: 2 x 4
gamepk home away test
<chr> <chr> <chr> <list>
1 2017010002 VAN VGK <tibble [314 x 3]>
2 2017010005 BUF CAR <tibble [283 x 3]>
我在使用 R 和 purrr 清理复杂列表时遇到以下问题。示例将清除这个。
需要包裹
library(jsonlite)
library(purrr)
library(tibble)
library(dplyr)
让我们从 NHL json.
获取示例数据url <- c("https://statsapi.web.nhl.com/api/v1/game/2017010002/feed/live", "https://statsapi.web.nhl.com/api/v1/game/2017010005/feed/live")
data_list <- map(url, fromJSON)
这是一个非常复杂的列表,但最后我想要的是 result
。主要思想是我想在其中包含带有 tibble 的列表列,但是 tibble 是从列表的不同部分选择的 vectors/columns 创建的。这是我失败的尝试:
wrong <- tibble(
gamepk = map_chr(data_list, c("gamePk"), .default = NA),
home = map_chr(data_list, c("gameData", "teams", "home", "triCode"), .default = NA),
away = map_chr(data_list, c("gameData", "teams", "away", "triCode"), .default = NA),
test = list(tibble(event = map(data_list, c("liveData", "plays", "allPlays", "result", "event"), .default = NA),
x_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "x"), .default = NA),
y_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "y"), .default = NA)))
)
# A tibble: 2 x 4
gamepk home away test
<chr> <chr> <chr> <list>
1 2017010002 VAN VGK <tibble [2 x 3]>
2 2017010005 BUF CAR <tibble [2 x 3]>
这是一种笨拙的方式来完成它并纠正结果。
result <- tibble(
gamepk = map_chr(data_list, c("gamePk"), .default = NA),
home = map_chr(data_list, c("gameData", "teams", "home", "triCode"), .default = NA),
away = map_chr(data_list, c("gameData", "teams", "away", "triCode"), .default = NA))
test1 <- bind_cols(event = data_list[[1]]$liveData$plays$allPlays$result$event,
x_cord = data_list[[1]]$liveData$plays$allPlays$coordinates$x,
y_cord = data_list[[1]]$liveData$plays$allPlays$coordinates$y)
test2 <- bind_cols(event = data_list[[2]]$liveData$plays$allPlays$result$event,
x_cord = data_list[[2]]$liveData$plays$allPlays$coordinates$x,
y_cord = data_list[[2]]$liveData$plays$allPlays$coordinates$y)
result$test[1] <- list(test1)
result$test[2] <- list(test2)
result
# A tibble: 2 x 4
gamepk home away test
<chr> <chr> <chr> <list>
1 2017010002 VAN VGK <tibble [314 x 3]>
2 2017010005 BUF CAR <tibble [283 x 3]>
在您已经完成的所有工作的基础上,如果您制作 test
列表列表,您可以使用 tibble()
将其传递给 pmap()
。这同时循环遍历嵌套列表的每个元素并将它们 "entibbles" 放在一起,为您留下一个包含两个小标题的列表。
test
的新行如下所示:
test = list(event = map(data_list, c("liveData", "plays", "allPlays", "result", "event"), .default = NA),
x_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "x"), .default = NA),
y_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "y"), .default = NA)) %>%
pmap(tibble)
将其与您已有的内容相结合,得到 "right" 输出。
right <- tibble(
gamepk = map_chr(data_list, c("gamePk"), .default = NA),
home = map_chr(data_list, c("gameData", "teams", "home", "triCode"), .default = NA),
away = map_chr(data_list, c("gameData", "teams", "away", "triCode"), .default = NA),
test = list(event = map(data_list, c("liveData", "plays", "allPlays", "result", "event"), .default = NA),
x_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "x"), .default = NA),
y_cord = map(data_list, c("liveData", "plays", "allPlays", "coordinates", "y"), .default = NA)) %>%
pmap(tibble) )
right
# A tibble: 2 x 4
gamepk home away test
<chr> <chr> <chr> <list>
1 2017010002 VAN VGK <tibble [314 x 3]>
2 2017010005 BUF CAR <tibble [283 x 3]>