R: 在 apply() 函数中使用 data.table
R: Using data.table inside the function of apply()
我有距离矩阵,每一行是一个个体,每一列是一个设施。单元格显示从个人到设施的长度。
> head(ODMatrix, 5)
toFacility1 toFacility2 toFacility3 toFacility4 toFacility5 toFacility6 toFacility7 toFacility8 toFacility9 toFacility10
1: 4154.229 1835.176 5228.835 8093.985 7813.0557 2396.326 4055.081 4199.636 6790.750 4206.637
2: 4075.044 4848.875 3403.399 2575.370 501.4027 1072.520 1860.508 3188.388 2639.671 6118.273
3: 5660.299 3767.281 7249.469 4276.207 1917.6547 1288.333 3956.757 4511.083 1576.480 4940.198
4: 6853.425 1385.334 8696.045 7012.102 3201.9396 1708.367 4052.216 5352.751 5315.842 3218.540
5: 6746.253 1735.916 8397.047 5014.986 4820.9541 1681.347 3728.737 5334.818 6826.545 2085.071
一些设施是车站,一些设施是投票站。我想知道哪个最小距离更短。设施 1、2 和 3 是车站,因此 station_col_numbers <- c(1,2,3)
。其他设施是投票站。
例如,在第一行的情况下,离他最近的车站是 Facility2(1835.176m),离他最近的投票站是 Facility6(2396.326)。然后,我真正想知道的是哪个更接近。在这种情况下,因为 1835.176 < 2396.326,车站对他来说更近,所以 0 是这一行的虚拟变量。
analyse <- function(row_I){
row_I_withoutStation <- row_I[ , -station_col_numbers, with=F]
row_I_ToStation <- row_I[ , station_col_numbers, with=F]
toStation_min <- min(row_I_ToStation)
toPollStation_min <- min(col_I_withoutStation)
if (toStation_min >= toPollStation_min){
return(1)
}else{
return(0)
}
}
但是,当我使用 apply()
时,它失败了。
Dummy <- apply(ODMatrix, 1, analyse)
Error in row_I[, -station_col_numbers, with = F] :
incorrect number of dimensions
这是对 apply()
的误用吗?我该如何解决?
修改你的函数,有一些typos/error:
analyse <- function(row_I){ #row_I=ODMatrix[1,]
col_I_withoutStation <- row_I[ -station_col_numbers]
col_I_ToStation <- row_I[ station_col_numbers]
toStation_min <- min(col_I_ToStation)
toPollStation_min <- min(col_I_withoutStation)
#cat(toStation_min , toPollStation_min)
if (toStation_min >= toPollStation_min){
return(1)
}else{
return(0)
}
}
apply(ODMatrix, 1, analyse)
你会得到
[1] 0 1 1 0 1
在 base R 中,您可以创建一个逻辑整数向量,指示投票站是否最接近:
ODMatrix$poll.closest <- +(apply(ODMatrix[,1:3], 1, min) > apply(ODMatrix[,4:10], 1, min))
给出:
> ODMatrix
toFacility1 toFacility2 toFacility3 toFacility4 toFacility5 toFacility6 toFacility7 toFacility8 toFacility9 toFacility10 poll.closest
1: 4154.229 1835.176 5228.835 8093.985 7813.0557 2396.326 4055.081 4199.636 6790.750 4206.637 0
2: 4075.044 4848.875 3403.399 2575.370 501.4027 1072.520 1860.508 3188.388 2639.671 6118.273 1
3: 5660.299 3767.281 7249.469 4276.207 1917.6547 1288.333 3956.757 4511.083 1576.480 4940.198 1
4: 6853.425 1385.334 8696.045 7012.102 3201.9396 1708.367 4052.216 5352.751 5315.842 3218.540 0
5: 6746.253 1735.916 8397.047 5014.986 4820.9541 1681.347 3728.737 5334.818 6826.545 2085.071 1
使用 data.table 你可以:
stations <- names(ODMatrix)[1:3]
pollstations <- names(ODMatrix)[4:10]
ODMatrix[, idx:=.I
][, dist.station := min(.SD), idx, .SDcols=stations
][, dist.poll := min(.SD), idx, .SDcols=pollstations
][, poll.closest := +(dist.station > dist.poll)
][, c("idx","dist.station","dist.poll"):=NULL]
得到相同的结果。或者,您也可以使用:
ODMatrix[, poll.closest := pmin(toFacility1,toFacility2,toFacility3) >
pmin(toFacility4,toFacility5,toFacility6,toFacility7,toFacility8,toFacility9,toFacility10),
by = 1:nrow(ODMatrix)]
我有距离矩阵,每一行是一个个体,每一列是一个设施。单元格显示从个人到设施的长度。
> head(ODMatrix, 5)
toFacility1 toFacility2 toFacility3 toFacility4 toFacility5 toFacility6 toFacility7 toFacility8 toFacility9 toFacility10
1: 4154.229 1835.176 5228.835 8093.985 7813.0557 2396.326 4055.081 4199.636 6790.750 4206.637
2: 4075.044 4848.875 3403.399 2575.370 501.4027 1072.520 1860.508 3188.388 2639.671 6118.273
3: 5660.299 3767.281 7249.469 4276.207 1917.6547 1288.333 3956.757 4511.083 1576.480 4940.198
4: 6853.425 1385.334 8696.045 7012.102 3201.9396 1708.367 4052.216 5352.751 5315.842 3218.540
5: 6746.253 1735.916 8397.047 5014.986 4820.9541 1681.347 3728.737 5334.818 6826.545 2085.071
一些设施是车站,一些设施是投票站。我想知道哪个最小距离更短。设施 1、2 和 3 是车站,因此 station_col_numbers <- c(1,2,3)
。其他设施是投票站。
例如,在第一行的情况下,离他最近的车站是 Facility2(1835.176m),离他最近的投票站是 Facility6(2396.326)。然后,我真正想知道的是哪个更接近。在这种情况下,因为 1835.176 < 2396.326,车站对他来说更近,所以 0 是这一行的虚拟变量。
analyse <- function(row_I){
row_I_withoutStation <- row_I[ , -station_col_numbers, with=F]
row_I_ToStation <- row_I[ , station_col_numbers, with=F]
toStation_min <- min(row_I_ToStation)
toPollStation_min <- min(col_I_withoutStation)
if (toStation_min >= toPollStation_min){
return(1)
}else{
return(0)
}
}
但是,当我使用 apply()
时,它失败了。
Dummy <- apply(ODMatrix, 1, analyse)
Error in row_I[, -station_col_numbers, with = F] :
incorrect number of dimensions
这是对 apply()
的误用吗?我该如何解决?
修改你的函数,有一些typos/error:
analyse <- function(row_I){ #row_I=ODMatrix[1,]
col_I_withoutStation <- row_I[ -station_col_numbers]
col_I_ToStation <- row_I[ station_col_numbers]
toStation_min <- min(col_I_ToStation)
toPollStation_min <- min(col_I_withoutStation)
#cat(toStation_min , toPollStation_min)
if (toStation_min >= toPollStation_min){
return(1)
}else{
return(0)
}
}
apply(ODMatrix, 1, analyse)
你会得到
[1] 0 1 1 0 1
在 base R 中,您可以创建一个逻辑整数向量,指示投票站是否最接近:
ODMatrix$poll.closest <- +(apply(ODMatrix[,1:3], 1, min) > apply(ODMatrix[,4:10], 1, min))
给出:
> ODMatrix
toFacility1 toFacility2 toFacility3 toFacility4 toFacility5 toFacility6 toFacility7 toFacility8 toFacility9 toFacility10 poll.closest
1: 4154.229 1835.176 5228.835 8093.985 7813.0557 2396.326 4055.081 4199.636 6790.750 4206.637 0
2: 4075.044 4848.875 3403.399 2575.370 501.4027 1072.520 1860.508 3188.388 2639.671 6118.273 1
3: 5660.299 3767.281 7249.469 4276.207 1917.6547 1288.333 3956.757 4511.083 1576.480 4940.198 1
4: 6853.425 1385.334 8696.045 7012.102 3201.9396 1708.367 4052.216 5352.751 5315.842 3218.540 0
5: 6746.253 1735.916 8397.047 5014.986 4820.9541 1681.347 3728.737 5334.818 6826.545 2085.071 1
使用 data.table 你可以:
stations <- names(ODMatrix)[1:3]
pollstations <- names(ODMatrix)[4:10]
ODMatrix[, idx:=.I
][, dist.station := min(.SD), idx, .SDcols=stations
][, dist.poll := min(.SD), idx, .SDcols=pollstations
][, poll.closest := +(dist.station > dist.poll)
][, c("idx","dist.station","dist.poll"):=NULL]
得到相同的结果。或者,您也可以使用:
ODMatrix[, poll.closest := pmin(toFacility1,toFacility2,toFacility3) >
pmin(toFacility4,toFacility5,toFacility6,toFacility7,toFacility8,toFacility9,toFacility10),
by = 1:nrow(ODMatrix)]