行绑定多个数据框并将原始数据框的名称添加到列中
row bind multiple dataframes and add name of originating dataframe into column
我的环境中有很多数据框:
x1 <- structure(list(time = structure(c(1327241343, 1327327803, 1327414263
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), x1 = c(22.5,
12, 0)), .Names = c("time", "x1"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -3L))
x2 <- structure(list(time = structure(c(1326636543, 1326636603, 1326636663
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), x2 = c(8,
6, 1)), .Names = c("time", "x2"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -3L))
x3 <- structure(list(time = structure(numeric(0), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), x3 = numeric(0)), .Names = c("time",
"x1"), class = c("tbl_df", "tbl", "data.frame"), row.names = integer(0))
##-----------------------------------------------------------------------------
## PREVIEW
##-----------------------------------------------------------------------------
> knitr::kable(x1)
|time | x1|
|:-------------------|----:|
|2012-01-22 14:09:03 | 22.5|
|2012-01-23 14:10:03 | 12.0|
|2012-01-24 14:11:03 | 0.0|
> knitr::kable(x2)
|time | x2|
|:-------------------|--:|
|2012-01-15 14:09:03 | 8|
|2012-01-15 14:10:03 | 6|
|2012-01-15 14:11:03 | 1|
> knitr::kable(x3)
|time | x1|
|:----|--:|
请注意 x3 是一个空数据框,因为这反映了我的情况。我正在尝试获取以下 行绑定单个数据帧:
x.all <- structure(list(time = structure(c(1327241343, 1327327803, 1327414263,
1326636543, 1326636603, 1326636663), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), x = c(22.5, 12, 0, 8, 6, 1), which = c("x1",
"x1", "x1", "x2", "x2", "x2")), .Names = c("time", "x", "which"
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-6L))
##-----------------------------------------------------------------------------
## PREVIEW
##-----------------------------------------------------------------------------
> knitr::kable(x.all)
|time | x|which |
|:-------------------|----:|:-----|
|2012-01-22 14:09:03 | 22.5|x1 |
|2012-01-23 14:10:03 | 12.0|x1 |
|2012-01-24 14:11:03 | 0.0|x1 |
|2012-01-15 14:09:03 | 8.0|x2 |
|2012-01-15 14:10:03 | 6.0|x2 |
|2012-01-15 14:11:03 | 1.0|x2 |
我知道怎么做了。但是,由于有 100 多个数据框,我正在寻找一种有效执行此操作的方法(每个数据框包含 2 列和 >500,000 行)。
谢谢。
这应该适合你,使用 tidyverse。使用get
按名称抓取数据。
listofdf <- paste0("x", 1:3)
# "x1" "x2" "x3"
library(tidyverse)
map_df(listofdf, ~get(.x) %>% setNames(c("time","x")), .id="which") %>%
mutate(which = paste0("x", which)) %>%
select(time, x, which)
# # A tibble: 6 x 3
# time x which
# <dttm> <dbl> <chr>
# 1 2012-01-22 14:09:03 22.5 x1
# 2 2012-01-23 14:10:03 12.0 x1
# 3 2012-01-24 14:11:03 0.0 x1
# 4 2012-01-15 14:09:03 8.0 x2
# 5 2012-01-15 14:10:03 6.0 x2
# 6 2012-01-15 14:11:03 1.0 x2
编辑 处理并非总是以相同模式开头的数据
您需要进行2次修改
使用 ls()
在您的 R 环境中获取数据
listofdf <- ls()
# "x1" "x2" "x3"
使用 mutate(which = listofdf[as.integer(which)])
而不是 mutate(which = paste0("x", which))
获取 ID
map_df(listofdf, ~get(.x) %>% setNames(c("time","x")), .id="which") %>%
mutate(which = listofdf[as.integer(which)]) %>% # 2nd change
select(time, x, which)
我的环境中有很多数据框:
x1 <- structure(list(time = structure(c(1327241343, 1327327803, 1327414263
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), x1 = c(22.5,
12, 0)), .Names = c("time", "x1"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -3L))
x2 <- structure(list(time = structure(c(1326636543, 1326636603, 1326636663
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), x2 = c(8,
6, 1)), .Names = c("time", "x2"), class = c("tbl_df", "tbl",
"data.frame"), row.names = c(NA, -3L))
x3 <- structure(list(time = structure(numeric(0), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), x3 = numeric(0)), .Names = c("time",
"x1"), class = c("tbl_df", "tbl", "data.frame"), row.names = integer(0))
##-----------------------------------------------------------------------------
## PREVIEW
##-----------------------------------------------------------------------------
> knitr::kable(x1)
|time | x1|
|:-------------------|----:|
|2012-01-22 14:09:03 | 22.5|
|2012-01-23 14:10:03 | 12.0|
|2012-01-24 14:11:03 | 0.0|
> knitr::kable(x2)
|time | x2|
|:-------------------|--:|
|2012-01-15 14:09:03 | 8|
|2012-01-15 14:10:03 | 6|
|2012-01-15 14:11:03 | 1|
> knitr::kable(x3)
|time | x1|
|:----|--:|
请注意 x3 是一个空数据框,因为这反映了我的情况。我正在尝试获取以下 行绑定单个数据帧:
x.all <- structure(list(time = structure(c(1327241343, 1327327803, 1327414263,
1326636543, 1326636603, 1326636663), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), x = c(22.5, 12, 0, 8, 6, 1), which = c("x1",
"x1", "x1", "x2", "x2", "x2")), .Names = c("time", "x", "which"
), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-6L))
##-----------------------------------------------------------------------------
## PREVIEW
##-----------------------------------------------------------------------------
> knitr::kable(x.all)
|time | x|which |
|:-------------------|----:|:-----|
|2012-01-22 14:09:03 | 22.5|x1 |
|2012-01-23 14:10:03 | 12.0|x1 |
|2012-01-24 14:11:03 | 0.0|x1 |
|2012-01-15 14:09:03 | 8.0|x2 |
|2012-01-15 14:10:03 | 6.0|x2 |
|2012-01-15 14:11:03 | 1.0|x2 |
我知道怎么做了。但是,由于有 100 多个数据框,我正在寻找一种有效执行此操作的方法(每个数据框包含 2 列和 >500,000 行)。
谢谢。
这应该适合你,使用 tidyverse。使用get
按名称抓取数据。
listofdf <- paste0("x", 1:3)
# "x1" "x2" "x3"
library(tidyverse)
map_df(listofdf, ~get(.x) %>% setNames(c("time","x")), .id="which") %>%
mutate(which = paste0("x", which)) %>%
select(time, x, which)
# # A tibble: 6 x 3
# time x which
# <dttm> <dbl> <chr>
# 1 2012-01-22 14:09:03 22.5 x1
# 2 2012-01-23 14:10:03 12.0 x1
# 3 2012-01-24 14:11:03 0.0 x1
# 4 2012-01-15 14:09:03 8.0 x2
# 5 2012-01-15 14:10:03 6.0 x2
# 6 2012-01-15 14:11:03 1.0 x2
编辑 处理并非总是以相同模式开头的数据
您需要进行2次修改
使用 ls()
listofdf <- ls()
# "x1" "x2" "x3"
使用 mutate(which = listofdf[as.integer(which)])
而不是 mutate(which = paste0("x", which))
map_df(listofdf, ~get(.x) %>% setNames(c("time","x")), .id="which") %>%
mutate(which = listofdf[as.integer(which)]) %>% # 2nd change
select(time, x, which)