使用 tidyr spread 从长到宽
Using tidyr spread to move from long to wide
假设我有以下数据表示成分组 A, B, C
中 2 种成分的混合物:
(dat <- structure(list(var1 = c("A", "A", "A", "A", "A", "B", "B", "B", "C"),
var2 = c("-", "B", "B", "C", "C", "-", "C", "C", "-"),
val1 = c(100, 25, 50, 25, 50, 100, 25, 50, 100),
val2 = c(0, 75, 50, 75, 50, 0, 75, 50, 0)),
.Names = c("var1", "var2", "val1", "val2"),
row.names = c(NA, -9L), class = "data.frame"))
# var1 var2 val1 val2
# 1 A - 100 0
# 2 A B 25 75
# 3 A B 50 50
# 4 A C 25 75
# 5 A C 50 50
# 6 B - 100 0
# 7 B C 25 75
# 8 B C 50 50
# 9 C - 100 0
我现在想转换此数据:我想将列标记为 A, B, C
,给出每种成分的含量:
# A B C
# 1 100 0 0
# 2 25 75 0
# 3 50 50 0
# 4 25 0 75
# 5 75 0 25
# 6 0 100 0
# 7 0 25 75
# 8 0 50 25
# 9 0 0 100
我如何使用 tidyr
解决这个问题? spread
和 unite
的任意组合?
我相信有更优雅的方法可以做到这一点,但您可以执行以下操作:
library(dplyr)
library(tidyr)
wideDf <- data.frame(id = rep(1:nrow(dat),2),
var = c(dat$var1, dat$var2), val = c(dat$val1, dat$val2)) %>%
filter(var != "-") %>% tidyr::spread(key = var, value = val, fill = 0)
这给你:
> longDf
# id A B C
# 1 1 100 0 0
# 2 2 25 75 0
# 3 3 50 50 0
# 4 4 25 0 75
# 5 5 50 0 50
# 6 6 0 100 0
# 7 7 0 25 75
# 8 8 0 50 50
# 9 9 0 0 100
这是一种不太可靠的方法,但似乎适用于您的示例。或许您可以将其作为更好解决方案的灵感。
t(apply(dat, MARGIN = 1, FUN = function(x) {
# "split" the data into names and values. works only for two columns, obviously
xval <- as.numeric(x[3:4])
names(xval) <- x[1:2]
# make sure it's sorted for reasons that will become apparent later
xval <- xval[order(names(xval))]
# prepare an empty vector
out <- c(A = 0, B = 0, C = 0)
# and insert values from columns which appear in xval
find.index <- names(out) %in% names(xval)
out[find.index] <- xval[names(xval) %in% names(out)]
out
}))
A B C
[1,] 100 0 0
[2,] 25 75 0
[3,] 50 50 0
[4,] 25 0 75
[5,] 50 0 50
[6,] 0 100 0
[7,] 0 25 75
[8,] 0 50 50
[9,] 0 0 100
我会分两部分进行,然后将它们加在一起
library("tidyverse")
v1 <- dat %>% rownames_to_column() %>% spread(key = var1, value = val1, fill = 0) %>% select(A, B, C)
v2 <- dat %>% rownames_to_column() %>% spread(key = var2, value = val2, fill = 0) %>% select(A = `-`, B, C)
v1 + v2
如果你想和reshape2
一起去:
dat$id <- row.names(dat)
dcast(rbind(dat[,c(5,1,3)],
setnames(dat[,c(5,2,4)],c("id","var1","val1"))),
id~var1, value.var = "val1", fill=0)[, -2]
# id A B C
# 1 1 100 0 0
# 2 2 25 75 0
# 3 3 50 50 0
# 4 4 25 0 75
# 5 5 50 0 50
# 6 6 0 100 0
# 7 7 0 25 75
# 8 8 0 50 50
# 9 9 0 0 100
假设我有以下数据表示成分组 A, B, C
中 2 种成分的混合物:
(dat <- structure(list(var1 = c("A", "A", "A", "A", "A", "B", "B", "B", "C"),
var2 = c("-", "B", "B", "C", "C", "-", "C", "C", "-"),
val1 = c(100, 25, 50, 25, 50, 100, 25, 50, 100),
val2 = c(0, 75, 50, 75, 50, 0, 75, 50, 0)),
.Names = c("var1", "var2", "val1", "val2"),
row.names = c(NA, -9L), class = "data.frame"))
# var1 var2 val1 val2
# 1 A - 100 0
# 2 A B 25 75
# 3 A B 50 50
# 4 A C 25 75
# 5 A C 50 50
# 6 B - 100 0
# 7 B C 25 75
# 8 B C 50 50
# 9 C - 100 0
我现在想转换此数据:我想将列标记为 A, B, C
,给出每种成分的含量:
# A B C
# 1 100 0 0
# 2 25 75 0
# 3 50 50 0
# 4 25 0 75
# 5 75 0 25
# 6 0 100 0
# 7 0 25 75
# 8 0 50 25
# 9 0 0 100
我如何使用 tidyr
解决这个问题? spread
和 unite
的任意组合?
我相信有更优雅的方法可以做到这一点,但您可以执行以下操作:
library(dplyr)
library(tidyr)
wideDf <- data.frame(id = rep(1:nrow(dat),2),
var = c(dat$var1, dat$var2), val = c(dat$val1, dat$val2)) %>%
filter(var != "-") %>% tidyr::spread(key = var, value = val, fill = 0)
这给你:
> longDf
# id A B C
# 1 1 100 0 0
# 2 2 25 75 0
# 3 3 50 50 0
# 4 4 25 0 75
# 5 5 50 0 50
# 6 6 0 100 0
# 7 7 0 25 75
# 8 8 0 50 50
# 9 9 0 0 100
这是一种不太可靠的方法,但似乎适用于您的示例。或许您可以将其作为更好解决方案的灵感。
t(apply(dat, MARGIN = 1, FUN = function(x) {
# "split" the data into names and values. works only for two columns, obviously
xval <- as.numeric(x[3:4])
names(xval) <- x[1:2]
# make sure it's sorted for reasons that will become apparent later
xval <- xval[order(names(xval))]
# prepare an empty vector
out <- c(A = 0, B = 0, C = 0)
# and insert values from columns which appear in xval
find.index <- names(out) %in% names(xval)
out[find.index] <- xval[names(xval) %in% names(out)]
out
}))
A B C
[1,] 100 0 0
[2,] 25 75 0
[3,] 50 50 0
[4,] 25 0 75
[5,] 50 0 50
[6,] 0 100 0
[7,] 0 25 75
[8,] 0 50 50
[9,] 0 0 100
我会分两部分进行,然后将它们加在一起
library("tidyverse")
v1 <- dat %>% rownames_to_column() %>% spread(key = var1, value = val1, fill = 0) %>% select(A, B, C)
v2 <- dat %>% rownames_to_column() %>% spread(key = var2, value = val2, fill = 0) %>% select(A = `-`, B, C)
v1 + v2
如果你想和reshape2
一起去:
dat$id <- row.names(dat)
dcast(rbind(dat[,c(5,1,3)],
setnames(dat[,c(5,2,4)],c("id","var1","val1"))),
id~var1, value.var = "val1", fill=0)[, -2]
# id A B C
# 1 1 100 0 0
# 2 2 25 75 0
# 3 3 50 50 0
# 4 4 25 0 75
# 5 5 50 0 50
# 6 6 0 100 0
# 7 7 0 25 75
# 8 8 0 50 50
# 9 9 0 0 100