在 R 中从宽到长重塑,其中 id 和 id 的值在同一行
Reshape from wide to long in R where id and value of id are in the same row
我无法将我的数据集重塑为面板数据集。我的 df 如下所示
id s1 s2 s3 s4 ct1 ct2 ret1 ret2 ret3 ret4
1 a b c d 0.5 0.5 0.6 0.7 0.8 0.5
2 c b a d 0.6 0.6 0.7 0.6 0.5 0.4
3 a c d b 0.7 0.7 0.7 0.8 0.2 0.1
我想整形成如下样子
id s ct1 ct2 ret
1 a 0.5 0.5 0.6
1 b 0.5 0.5 0.7
1 c 0.5 0.5 0.8
1 d 0.5 0.5 0.5
2 a 0.6 0.6 0.5
2 b 0.6 0.6 0.6
2 c 0.6 0.6 0.7
2 d 0.6 0.6 0.4
3 a 0.7 0.7 0.7
3 b 0.7 0.7 0.1
3 c 0.7 0.7 0.8
3 d 0.7 0.7 0.2
我经常从宽变长,但不知何故我的脑袋无法解决这个问题。
您可以使用 tidyr
包中的 spread
和 gather
来完成。您将需要创建一个临时 id 变量以便能够旋转数据:
library(dplyr)
library(tidyr)
df %>%
gather(key, value , -id, -ct1, -ct2) %>%
mutate(key = str_extract(key, "[:alpha:]+")) %>%
group_by(key) %>%
mutate(tmp_id = row_number()) %>%
ungroup() %>%
spread(key, value) %>%
select(id, s, ct1, ct2, ret)
这是 tidyr
的开发版本(使用 devtools::install_github("tidyverse/tidyr")
安装)的一种方法,使用 pivot_longer
可以使此操作变得容易得多。我们创建了一个 spec
指示 s
列应该进入一个 s
变量并且对于 ret
列也是如此。如果需要,您可以删除表示 s
或 ret
之后数字的最后 obs
列。
library(tidyverse)
tbl <- read_table2(
"id s1 s2 s3 s4 ct1 ct2 ret1 ret2 ret3 ret4
1 a b c d 0.5 0.5 0.6 0.7 0.8 0.5
2 c b a d 0.6 0.6 0.7 0.6 0.5 0.4
3 a c d b 0.7 0.7 0.7 0.8 0.2 0.1"
)
spec <- tibble(
`.name` = tbl %>% select(matches("^s|ret")) %>% colnames(),
`.value` = str_remove(`.name`, "\d$"),
obs = str_extract(`.name`, "\d")
)
tbl %>%
pivot_longer(spec = spec)
#> # A tibble: 12 x 6
#> id ct1 ct2 obs s ret
#> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
#> 1 1 0.5 0.5 1 a 0.6
#> 2 1 0.5 0.5 2 b 0.7
#> 3 1 0.5 0.5 3 c 0.8
#> 4 1 0.5 0.5 4 d 0.5
#> 5 2 0.6 0.6 1 c 0.7
#> 6 2 0.6 0.6 2 b 0.6
#> 7 2 0.6 0.6 3 a 0.5
#> 8 2 0.6 0.6 4 d 0.4
#> 9 3 0.7 0.7 1 a 0.7
#> 10 3 0.7 0.7 2 c 0.8
#> 11 3 0.7 0.7 3 d 0.2
#> 12 3 0.7 0.7 4 b 0.1
由 reprex package (v0.3.0)
于 2019-07-23 创建
1) 基础 R
一个选项使用reshape
out <- reshape(
dat,
idvar = c("id", "ct1", "ct2"),
varying = c(outer(c("s", "ret"), 1:4, paste0)),
sep = "",
direction = "long"
)
删除行名和列time
rownames(out) <- out$time <- NULL
结果
out[order(out$id), ]
# id ct1 ct2 s ret
#1 1 0.5 0.5 a 0.6
#4 1 0.5 0.5 b 0.7
#7 1 0.5 0.5 c 0.8
#10 1 0.5 0.5 d 0.5
#2 2 0.6 0.6 c 0.7
#5 2 0.6 0.6 b 0.6
#8 2 0.6 0.6 a 0.5
#11 2 0.6 0.6 d 0.4
#3 3 0.7 0.7 a 0.7
#6 3 0.7 0.7 c 0.8
#9 3 0.7 0.7 d 0.2
#12 3 0.7 0.7 b 0.1
2) data.table
使用来自 data.table
的 melt
library(data.table)
out <- melt(
setDT(dat),
id.vars = c("id", "ct1", "ct2"),
measure.vars = patterns(c("^s\d", "^ret\d")),
value.name = c("s", "ret")
)[, variable := NULL]
out
数据
dat <- structure(list(id = 1:3, s1 = structure(c(1L, 2L, 1L), .Label = c("a",
"c"), class = "factor"), s2 = structure(c(1L, 1L, 2L), .Label = c("b",
"c"), class = "factor"), s3 = structure(c(2L, 1L, 3L), .Label = c("a",
"c", "d"), class = "factor"), s4 = structure(c(2L, 2L, 1L), .Label = c("b",
"d"), class = "factor"), ct1 = c(0.5, 0.6, 0.7), ct2 = c(0.5,
0.6, 0.7), ret1 = c(0.6, 0.7, 0.7), ret2 = c(0.7, 0.6, 0.8),
ret3 = c(0.8, 0.5, 0.2), ret4 = c(0.5, 0.4, 0.1)), .Names = c("id",
"s1", "s2", "s3", "s4", "ct1", "ct2", "ret1", "ret2", "ret3",
"ret4"), class = "data.frame", row.names = c(NA, -3L))
我无法将我的数据集重塑为面板数据集。我的 df 如下所示
id s1 s2 s3 s4 ct1 ct2 ret1 ret2 ret3 ret4
1 a b c d 0.5 0.5 0.6 0.7 0.8 0.5
2 c b a d 0.6 0.6 0.7 0.6 0.5 0.4
3 a c d b 0.7 0.7 0.7 0.8 0.2 0.1
我想整形成如下样子
id s ct1 ct2 ret
1 a 0.5 0.5 0.6
1 b 0.5 0.5 0.7
1 c 0.5 0.5 0.8
1 d 0.5 0.5 0.5
2 a 0.6 0.6 0.5
2 b 0.6 0.6 0.6
2 c 0.6 0.6 0.7
2 d 0.6 0.6 0.4
3 a 0.7 0.7 0.7
3 b 0.7 0.7 0.1
3 c 0.7 0.7 0.8
3 d 0.7 0.7 0.2
我经常从宽变长,但不知何故我的脑袋无法解决这个问题。
您可以使用 tidyr
包中的 spread
和 gather
来完成。您将需要创建一个临时 id 变量以便能够旋转数据:
library(dplyr)
library(tidyr)
df %>%
gather(key, value , -id, -ct1, -ct2) %>%
mutate(key = str_extract(key, "[:alpha:]+")) %>%
group_by(key) %>%
mutate(tmp_id = row_number()) %>%
ungroup() %>%
spread(key, value) %>%
select(id, s, ct1, ct2, ret)
这是 tidyr
的开发版本(使用 devtools::install_github("tidyverse/tidyr")
安装)的一种方法,使用 pivot_longer
可以使此操作变得容易得多。我们创建了一个 spec
指示 s
列应该进入一个 s
变量并且对于 ret
列也是如此。如果需要,您可以删除表示 s
或 ret
之后数字的最后 obs
列。
library(tidyverse)
tbl <- read_table2(
"id s1 s2 s3 s4 ct1 ct2 ret1 ret2 ret3 ret4
1 a b c d 0.5 0.5 0.6 0.7 0.8 0.5
2 c b a d 0.6 0.6 0.7 0.6 0.5 0.4
3 a c d b 0.7 0.7 0.7 0.8 0.2 0.1"
)
spec <- tibble(
`.name` = tbl %>% select(matches("^s|ret")) %>% colnames(),
`.value` = str_remove(`.name`, "\d$"),
obs = str_extract(`.name`, "\d")
)
tbl %>%
pivot_longer(spec = spec)
#> # A tibble: 12 x 6
#> id ct1 ct2 obs s ret
#> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
#> 1 1 0.5 0.5 1 a 0.6
#> 2 1 0.5 0.5 2 b 0.7
#> 3 1 0.5 0.5 3 c 0.8
#> 4 1 0.5 0.5 4 d 0.5
#> 5 2 0.6 0.6 1 c 0.7
#> 6 2 0.6 0.6 2 b 0.6
#> 7 2 0.6 0.6 3 a 0.5
#> 8 2 0.6 0.6 4 d 0.4
#> 9 3 0.7 0.7 1 a 0.7
#> 10 3 0.7 0.7 2 c 0.8
#> 11 3 0.7 0.7 3 d 0.2
#> 12 3 0.7 0.7 4 b 0.1
由 reprex package (v0.3.0)
于 2019-07-23 创建1) 基础 R
一个选项使用reshape
out <- reshape(
dat,
idvar = c("id", "ct1", "ct2"),
varying = c(outer(c("s", "ret"), 1:4, paste0)),
sep = "",
direction = "long"
)
删除行名和列time
rownames(out) <- out$time <- NULL
结果
out[order(out$id), ]
# id ct1 ct2 s ret
#1 1 0.5 0.5 a 0.6
#4 1 0.5 0.5 b 0.7
#7 1 0.5 0.5 c 0.8
#10 1 0.5 0.5 d 0.5
#2 2 0.6 0.6 c 0.7
#5 2 0.6 0.6 b 0.6
#8 2 0.6 0.6 a 0.5
#11 2 0.6 0.6 d 0.4
#3 3 0.7 0.7 a 0.7
#6 3 0.7 0.7 c 0.8
#9 3 0.7 0.7 d 0.2
#12 3 0.7 0.7 b 0.1
2) data.table
使用来自 data.table
melt
library(data.table)
out <- melt(
setDT(dat),
id.vars = c("id", "ct1", "ct2"),
measure.vars = patterns(c("^s\d", "^ret\d")),
value.name = c("s", "ret")
)[, variable := NULL]
out
数据
dat <- structure(list(id = 1:3, s1 = structure(c(1L, 2L, 1L), .Label = c("a",
"c"), class = "factor"), s2 = structure(c(1L, 1L, 2L), .Label = c("b",
"c"), class = "factor"), s3 = structure(c(2L, 1L, 3L), .Label = c("a",
"c", "d"), class = "factor"), s4 = structure(c(2L, 2L, 1L), .Label = c("b",
"d"), class = "factor"), ct1 = c(0.5, 0.6, 0.7), ct2 = c(0.5,
0.6, 0.7), ret1 = c(0.6, 0.7, 0.7), ret2 = c(0.7, 0.6, 0.8),
ret3 = c(0.8, 0.5, 0.2), ret4 = c(0.5, 0.4, 0.1)), .Names = c("id",
"s1", "s2", "s3", "s4", "ct1", "ct2", "ret1", "ret2", "ret3",
"ret4"), class = "data.frame", row.names = c(NA, -3L))