将宽数据转换为长格式
Bring wide data into long format
我有一个如下所示的文件,想将其转换为输出中给出的 R 数据框
A B C D E
2010 25 74 85 88 89
2011 27 86 97 99
2012 37 115 131
2013 47 146
2014 56
输出:
Year Year_No Division Amount
2010 1 A 25
2010 1 B 74
2010 1 C 85
2010 1 D 88
2010 1 E 89
2011 2 A 27
2011 2 B 86
2011 2 C 97
2011 2 D 99
2012 3 A 37
2012 3 B 115
2012 3 C 131
2013 4 A 47
2013 4 B 146
2014 5 A 56
如果有人能帮我解决这个问题,我将不胜感激
我们可以转换为 matrix
然后 melt
它
library(reshape2)
library(data.table)
dt <- setDT(melt(as.matrix(df1), na.rm = TRUE))[ , YearNo := .GRP, Var1][order(Var1)]
setnames(dt, c("Var1", "Var2", "value"), c("Year", "Division", "Amount"))[]
# Year Division Amount YearNo
# 1: 2010 A 25 1
# 2: 2010 B 74 1
# 3: 2010 C 85 1
# 4: 2010 D 88 1
# 5: 2010 E 89 1
# 6: 2011 A 27 2
# 7: 2011 B 86 2
# 8: 2011 C 97 2
# 9: 2011 D 99 2
#10: 2012 A 37 3
#11: 2012 B 115 3
#12: 2012 C 131 3
#13: 2013 A 47 4
#14: 2013 B 146 4
#15: 2014 A 56 5
注意:假设缺失值是 NA
,因为它似乎是数字列,即 'A' 到 'E'
数据
df1 <- structure(list(A = c(25L, 27L, 37L, 47L, 56L), B = c(74L, 86L,
115L, 146L, NA), C = c(85L, 97L, 131L, NA, NA), D = c(88L, 99L,
NA, NA, NA), E = c(89L, NA, NA, NA, NA)), .Names = c("A", "B",
"C", "D", "E"), class = "data.frame", row.names = c("2010", "2011",
"2012", "2013", "2014"))
我们可以使用 tidyverse
.
中的函数
library(tidyverse)
dt2 <- dt %>%
rownames_to_column("Year") %>%
rowid_to_column("Year_No") %>%
gather(Division, Amount, -Year, -Year_No, na.rm = TRUE) %>%
arrange(Year_No, Division) %>%
select(Year_No, Year, Division, Amount)
dt2
Year_No Year Division Amount
1 1 2010 A 25
2 1 2010 B 74
3 1 2010 C 85
4 1 2010 D 88
5 1 2010 E 89
6 2 2011 A 27
7 2 2011 B 86
8 2 2011 C 97
9 2 2011 D 99
10 3 2012 A 37
11 3 2012 B 115
12 3 2012 C 131
13 4 2013 A 47
14 4 2013 B 146
15 5 2014 A 56
数据
dt <- read.table(text = " A B C D E
2010 25 74 85 88 89
2011 27 86 97 99 NA
2012 37 115 131 NA NA
2013 47 146 NA NA NA
2014 56 NA NA NA NA",
header = TRUE)
基础中的简单解决方案
# create the data
mat <- matrix(1:30,6,5)
for(z in 1:nrow(mat)){
a <- (1:(5-z+1))*-1
mat[z,a] <- NA
}
rownames(mat) <- 2012:2017
colnames(mat) <- LETTERS[1:5]
mat
# start the task
col <- rep(colnames(mat), each=nrow(mat))
value <- as.vector(mat)
row <- rownames(mat)
table <- data.frame(col,row,value)
table <- table[!is.na(table$value),]
table
我有一个如下所示的文件,想将其转换为输出中给出的 R 数据框
A B C D E
2010 25 74 85 88 89
2011 27 86 97 99
2012 37 115 131
2013 47 146
2014 56
输出:
Year Year_No Division Amount
2010 1 A 25
2010 1 B 74
2010 1 C 85
2010 1 D 88
2010 1 E 89
2011 2 A 27
2011 2 B 86
2011 2 C 97
2011 2 D 99
2012 3 A 37
2012 3 B 115
2012 3 C 131
2013 4 A 47
2013 4 B 146
2014 5 A 56
如果有人能帮我解决这个问题,我将不胜感激
我们可以转换为 matrix
然后 melt
它
library(reshape2)
library(data.table)
dt <- setDT(melt(as.matrix(df1), na.rm = TRUE))[ , YearNo := .GRP, Var1][order(Var1)]
setnames(dt, c("Var1", "Var2", "value"), c("Year", "Division", "Amount"))[]
# Year Division Amount YearNo
# 1: 2010 A 25 1
# 2: 2010 B 74 1
# 3: 2010 C 85 1
# 4: 2010 D 88 1
# 5: 2010 E 89 1
# 6: 2011 A 27 2
# 7: 2011 B 86 2
# 8: 2011 C 97 2
# 9: 2011 D 99 2
#10: 2012 A 37 3
#11: 2012 B 115 3
#12: 2012 C 131 3
#13: 2013 A 47 4
#14: 2013 B 146 4
#15: 2014 A 56 5
注意:假设缺失值是 NA
,因为它似乎是数字列,即 'A' 到 'E'
数据
df1 <- structure(list(A = c(25L, 27L, 37L, 47L, 56L), B = c(74L, 86L,
115L, 146L, NA), C = c(85L, 97L, 131L, NA, NA), D = c(88L, 99L,
NA, NA, NA), E = c(89L, NA, NA, NA, NA)), .Names = c("A", "B",
"C", "D", "E"), class = "data.frame", row.names = c("2010", "2011",
"2012", "2013", "2014"))
我们可以使用 tidyverse
.
library(tidyverse)
dt2 <- dt %>%
rownames_to_column("Year") %>%
rowid_to_column("Year_No") %>%
gather(Division, Amount, -Year, -Year_No, na.rm = TRUE) %>%
arrange(Year_No, Division) %>%
select(Year_No, Year, Division, Amount)
dt2
Year_No Year Division Amount
1 1 2010 A 25
2 1 2010 B 74
3 1 2010 C 85
4 1 2010 D 88
5 1 2010 E 89
6 2 2011 A 27
7 2 2011 B 86
8 2 2011 C 97
9 2 2011 D 99
10 3 2012 A 37
11 3 2012 B 115
12 3 2012 C 131
13 4 2013 A 47
14 4 2013 B 146
15 5 2014 A 56
数据
dt <- read.table(text = " A B C D E
2010 25 74 85 88 89
2011 27 86 97 99 NA
2012 37 115 131 NA NA
2013 47 146 NA NA NA
2014 56 NA NA NA NA",
header = TRUE)
基础中的简单解决方案
# create the data
mat <- matrix(1:30,6,5)
for(z in 1:nrow(mat)){
a <- (1:(5-z+1))*-1
mat[z,a] <- NA
}
rownames(mat) <- 2012:2017
colnames(mat) <- LETTERS[1:5]
mat
# start the task
col <- rep(colnames(mat), each=nrow(mat))
value <- as.vector(mat)
row <- rownames(mat)
table <- data.frame(col,row,value)
table <- table[!is.na(table$value),]
table