计算两行数据框 R 之间的匹配项
count matches between two rows of dataframe R
我有一个数据框如下:
LA LE LI LO LU
A 1 0 0 0 0
B 0 0 1 1 1
C 0 0 0 0 0
D 1 0 0 0 0
E 1 0 1 1 0
我想计算两行之间有多少个值匹配,但不将等于 0 算作匹配,只计算等于 1。例如 ax = 1, BxE = 2 ,但在某种程度上,我将为 2 行的每个组合获取这些值。我正在使用
df = read.table('rmatr.txt', header = TRUE, sep = "\t", row.names = 1)
阅读我的文件
tmp=do.call(cbind,
sapply(1:(nrow(df)-1),function(i){
sapply((i+1):nrow(df),function(j){
c(i,j,sum(df[i,]==1 & df[j,]==1))
})
})
)
data.frame(
"Row1"=rownames(df)[tmp[1,]],
"Row2"=rownames(df)[tmp[2,]],
"cnt"=tmp[3,]
)
Row1 Row2 cnt
1 A B 0
2 A C 0
3 A D 1
4 A E 1
5 B C 0
6 B D 0
7 B E 2
8 C D 0
9 C E 0
10 D E 1
也许你可以像下面那样尝试combn
cbind(
data.frame(t(combn(row.names(df), 2))),
cnt = combn(asplit(df, 1), 2, FUN = function(x) sum(do.call("*", x)))
)
这给出了
X1 X2 cnt
1 A B 0
2 A C 0
3 A D 1
4 A E 1
5 B C 0
6 B D 0
7 B E 2
8 C D 0
9 C E 0
10 D E 1
数据
> dput(df)
structure(list(LA = c(1L, 0L, 0L, 1L, 1L), LE = c(0L, 0L, 0L,
0L, 0L), LI = c(0L, 1L, 0L, 0L, 1L), LO = c(0L, 1L, 0L, 0L, 1L
), LU = c(0L, 1L, 0L, 0L, 0L)), class = "data.frame", row.names = c("A",
"B", "C", "D", "E"))
我们还可以使用 base R
中的 crossprod
out <- as.data.frame.table(tcrossprod(as.matrix(df)))
out[1:2] <- t(apply(out[1:2], 1, sort))
subset(out, Var1 != Var2 & !duplicated(out[1:2]))
# Var1 Var2 Freq
#2 A B 0
#3 A C 0
#4 A D 1
#5 A E 1
#8 B C 0
#9 B D 0
#10 B E 2
#14 C D 0
#15 C E 0
#20 D E 1
数据
df <- structure(list(LA = c(1L, 0L, 0L, 1L, 1L), LE = c(0L, 0L, 0L,
0L, 0L), LI = c(0L, 1L, 0L, 0L, 1L), LO = c(0L, 1L, 0L, 0L, 1L
), LU = c(0L, 1L, 0L, 0L, 0L)), class = "data.frame", row.names = c("A",
"B", "C", "D", "E"))
我有一个数据框如下:
LA LE LI LO LU
A 1 0 0 0 0
B 0 0 1 1 1
C 0 0 0 0 0
D 1 0 0 0 0
E 1 0 1 1 0
我想计算两行之间有多少个值匹配,但不将等于 0 算作匹配,只计算等于 1。例如 ax = 1, BxE = 2 ,但在某种程度上,我将为 2 行的每个组合获取这些值。我正在使用
df = read.table('rmatr.txt', header = TRUE, sep = "\t", row.names = 1)
阅读我的文件
tmp=do.call(cbind,
sapply(1:(nrow(df)-1),function(i){
sapply((i+1):nrow(df),function(j){
c(i,j,sum(df[i,]==1 & df[j,]==1))
})
})
)
data.frame(
"Row1"=rownames(df)[tmp[1,]],
"Row2"=rownames(df)[tmp[2,]],
"cnt"=tmp[3,]
)
Row1 Row2 cnt
1 A B 0
2 A C 0
3 A D 1
4 A E 1
5 B C 0
6 B D 0
7 B E 2
8 C D 0
9 C E 0
10 D E 1
也许你可以像下面那样尝试combn
cbind(
data.frame(t(combn(row.names(df), 2))),
cnt = combn(asplit(df, 1), 2, FUN = function(x) sum(do.call("*", x)))
)
这给出了
X1 X2 cnt
1 A B 0
2 A C 0
3 A D 1
4 A E 1
5 B C 0
6 B D 0
7 B E 2
8 C D 0
9 C E 0
10 D E 1
数据
> dput(df)
structure(list(LA = c(1L, 0L, 0L, 1L, 1L), LE = c(0L, 0L, 0L,
0L, 0L), LI = c(0L, 1L, 0L, 0L, 1L), LO = c(0L, 1L, 0L, 0L, 1L
), LU = c(0L, 1L, 0L, 0L, 0L)), class = "data.frame", row.names = c("A",
"B", "C", "D", "E"))
我们还可以使用 base R
crossprod
out <- as.data.frame.table(tcrossprod(as.matrix(df)))
out[1:2] <- t(apply(out[1:2], 1, sort))
subset(out, Var1 != Var2 & !duplicated(out[1:2]))
# Var1 Var2 Freq
#2 A B 0
#3 A C 0
#4 A D 1
#5 A E 1
#8 B C 0
#9 B D 0
#10 B E 2
#14 C D 0
#15 C E 0
#20 D E 1
数据
df <- structure(list(LA = c(1L, 0L, 0L, 1L, 1L), LE = c(0L, 0L, 0L,
0L, 0L), LI = c(0L, 1L, 0L, 0L, 1L), LO = c(0L, 1L, 0L, 0L, 1L
), LU = c(0L, 1L, 0L, 0L, 0L)), class = "data.frame", row.names = c("A",
"B", "C", "D", "E"))