将宽数据转换为长格式

Bring wide data into long format

我有一个如下所示的文件,想将其转换为输出中给出的 R 数据框

        A    B    C     D   E   
2010    25   74   85    88  89
2011    27   86   97    99  
2012    37   115  131   
2013    47   146            
2014    56  

输出:

Year  Year_No   Division   Amount 
2010    1       A          25
2010    1       B          74
2010    1       C          85
2010    1       D          88
2010    1       E          89
2011    2       A          27
2011    2       B          86
2011    2       C          97
2011    2       D          99
2012    3       A          37
2012    3       B          115
2012    3       C          131
2013    4       A          47
2013    4       B          146
2014    5       A          56

如果有人能帮我解决这个问题,我将不胜感激

我们可以转换为 matrix 然后 melt

library(reshape2)
library(data.table)
dt <- setDT(melt(as.matrix(df1), na.rm = TRUE))[ , YearNo := .GRP, Var1][order(Var1)]
setnames(dt, c("Var1", "Var2", "value"), c("Year", "Division", "Amount"))[]
#     Year Division Amount YearNo
# 1: 2010        A     25      1
# 2: 2010        B     74      1
# 3: 2010        C     85      1
# 4: 2010        D     88      1
# 5: 2010        E     89      1
# 6: 2011        A     27      2
# 7: 2011        B     86      2
# 8: 2011        C     97      2
# 9: 2011        D     99      2
#10: 2012        A     37      3
#11: 2012        B    115      3
#12: 2012        C    131      3
#13: 2013        A     47      4
#14: 2013        B    146      4
#15: 2014        A     56      5
 

注意:假设缺失值是 NA,因为它似乎是数字列,即 'A' 到 'E'

数据

df1 <- structure(list(A = c(25L, 27L, 37L, 47L, 56L), B = c(74L, 86L, 
115L, 146L, NA), C = c(85L, 97L, 131L, NA, NA), D = c(88L, 99L, 
NA, NA, NA), E = c(89L, NA, NA, NA, NA)), .Names = c("A", "B", 
"C", "D", "E"), class = "data.frame", row.names = c("2010", "2011", 
"2012", "2013", "2014"))

我们可以使用 tidyverse.

中的函数
library(tidyverse)

dt2 <- dt %>%
  rownames_to_column("Year") %>%
  rowid_to_column("Year_No") %>%
  gather(Division, Amount, -Year, -Year_No, na.rm = TRUE) %>%
  arrange(Year_No, Division) %>% 
  select(Year_No, Year, Division, Amount)
dt2
   Year_No Year Division Amount
1        1 2010        A     25
2        1 2010        B     74
3        1 2010        C     85
4        1 2010        D     88
5        1 2010        E     89
6        2 2011        A     27
7        2 2011        B     86
8        2 2011        C     97
9        2 2011        D     99
10       3 2012        A     37
11       3 2012        B    115
12       3 2012        C    131
13       4 2013        A     47
14       4 2013        B    146
15       5 2014        A     56

数据

dt <- read.table(text = "        A    B    C     D   E   
2010    25   74   85    88  89
                 2011    27   86   97    99  NA 
                 2012    37   115  131   NA  NA
                 2013    47   146  NA    NA  NA           
                 2014    56  NA  NA  NA  NA",
                 header = TRUE)

基础中的简单解决方案

# create the data
mat <- matrix(1:30,6,5)
for(z in 1:nrow(mat)){
  a <- (1:(5-z+1))*-1
  mat[z,a] <- NA
}
rownames(mat) <- 2012:2017
colnames(mat) <- LETTERS[1:5]
mat

# start the task
col <- rep(colnames(mat), each=nrow(mat))
value <- as.vector(mat)
row <- rownames(mat)
table <- data.frame(col,row,value)
table <- table[!is.na(table$value),]
table