使用 tidyr spread 从长到宽

Using tidyr spread to move from long to wide

假设我有以下数据表示成分组 A, B, C 中 2 种成分的混合物:

(dat <- structure(list(var1 = c("A", "A", "A", "A", "A", "B", "B", "B", "C"), 
                       var2 = c("-", "B", "B", "C", "C", "-", "C", "C", "-"), 
                       val1 = c(100, 25, 50, 25, 50, 100, 25, 50, 100), 
                       val2 = c(0, 75, 50, 75, 50, 0, 75, 50, 0)), 
                  .Names = c("var1", "var2", "val1", "val2"), 
                  row.names = c(NA, -9L), class = "data.frame"))

#   var1 var2 val1 val2
# 1    A    -  100    0
# 2    A    B   25   75
# 3    A    B   50   50
# 4    A    C   25   75
# 5    A    C   50   50
# 6    B    -  100    0
# 7    B    C   25   75
# 8    B    C   50   50
# 9    C    -  100    0

我现在想转换此数据:我想将列标记为 A, B, C,给出每种成分的含量:

#     A   B   C
# 1 100   0   0
# 2  25  75   0
# 3  50  50   0
# 4  25   0  75
# 5  75   0  25
# 6   0 100   0
# 7   0  25  75
# 8   0  50  25
# 9   0   0 100

我如何使用 tidyr 解决这个问题? spreadunite 的任意组合?

我相信有更优雅的方法可以做到这一点,但您可以执行以下操作:

library(dplyr)
library(tidyr)
wideDf <- data.frame(id = rep(1:nrow(dat),2),
                var = c(dat$var1, dat$var2), val = c(dat$val1, dat$val2)) %>%
        filter(var != "-") %>% tidyr::spread(key = var, value = val, fill = 0) 

这给你:

> longDf
#   id   A   B   C
# 1  1 100   0   0
# 2  2  25  75   0
# 3  3  50  50   0
# 4  4  25   0  75
# 5  5  50   0  50
# 6  6   0 100   0
# 7  7   0  25  75
# 8  8   0  50  50
# 9  9   0   0 100

这是一种不太可靠的方法,但似乎适用于您的示例。或许您可以将其作为更好解决方案的灵感。

t(apply(dat, MARGIN = 1, FUN = function(x) {
  # "split" the data into names and values. works only for two columns, obviously
  xval <- as.numeric(x[3:4])
  names(xval) <- x[1:2]
  # make sure it's sorted for reasons that will become apparent later
  xval <- xval[order(names(xval))]
  # prepare an empty vector
  out <- c(A = 0, B = 0, C = 0)
  # and insert values from columns which appear in xval
  find.index <- names(out) %in% names(xval)
  out[find.index] <- xval[names(xval) %in% names(out)]

  out
}))

        A   B   C
 [1,] 100   0   0
 [2,]  25  75   0
 [3,]  50  50   0
 [4,]  25   0  75
 [5,]  50   0  50
 [6,]   0 100   0
 [7,]   0  25  75
 [8,]   0  50  50
 [9,]   0   0 100

我会分两部分进行,然后将它们加在一起

library("tidyverse")
v1 <- dat %>% rownames_to_column() %>% spread(key = var1, value = val1, fill = 0) %>% select(A, B, C)
v2 <- dat %>% rownames_to_column() %>% spread(key = var2, value = val2, fill = 0) %>% select(A = `-`, B, C)

v1 + v2

如果你想和reshape2一起去:

dat$id <- row.names(dat)
dcast(rbind(dat[,c(5,1,3)],
        setnames(dat[,c(5,2,4)],c("id","var1","val1"))), 
        id~var1, value.var = "val1", fill=0)[, -2]  

#   id   A   B   C
# 1  1 100   0   0
# 2  2  25  75   0
# 3  3  50  50   0
# 4  4  25   0  75
# 5  5  50   0  50
# 6  6   0 100   0
# 7  7   0  25  75
# 8  8   0  50  50
# 9  9   0   0 100