如何按组在R中进行多项算术运算

Question

有几个数据集。第一个

lid=structure(list(x1 = 619490L, x2 = 10L, x3 = 0L, x4 = 6089230L, 
    x5 = 0L, x6 = -10L), class = "data.frame", row.names = c(NA, 
-1L))

第二个数据集

lidar=structure(list(A = c(638238.76, 638238.76, 638239.29, 638235.39, 
638233.86, 638233.86, 638235.55, 638231.97, 638231.91, 638228.41, 
638238.76, 638238.76, 63239.29, 638235.39, 638233.86, 638233.86, 
638235.55, 638231.97, 638231.91, 638228.41), B = c(6078001.09, 
6078001.09, 6078001.15, 6078001.15, 6078001.07, 6078001.07, 6078001.02, 
6078001.08, 6078001.09, 6078001.01, 6078001.09, 6078001.09, 6078001.15, 
6078001.15, 6078001.07, 6078001.07, 6078001.02, 6078001.08, 6078001.09, 
6078001.01), C = c(186.64, 186.59, 199.28, 189.37, 186.67, 186.67, 
198.04, 200.03, 199.73, 192.14, 186.64, 186.59, 199.28, 189.37, 
196.67, 186.67, 198.04, 200.03, 199.73, 100.14), gpstime = c(319805734.664265, 
319805734.664265, 319805734.67875, 319805734.678768, 319805734.678777, 
319805734.678777, 319805734.687338, 319805734.701928, 319805734.701928, 
319805734.701945, 319805734.664265, 319805734.664265, 319805734.67875, 
319805734.678768, 319805734.678777, 319805734.678777, 319805734.687338, 
319805734.701928, 319805734.701928, 319805734.701945), Intensity = c(13L, 
99L, 5L, 2L, 20L, 189L, 2L, 11L, 90L, 1L, 13L, 99L, 5L, 2L, 20L, 
189L, 2L, 11L, 90L, 1L), ReturnNumber = c(2L, 1L, 1L, 2L, 1L, 
1L, 2L, 1L, 1L, 3L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 3L), 
    NumberOfReturns = c(2L, 1L, 3L, 2L, 1L, 1L, 3L, 1L, 1L, 4L, 
    2L, 1L, 3L, 2L, 1L, 1L, 3L, 1L, 1L, 4L), ScanDirectionFlag = c(1L, 
    1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 
    1L, 0L, 0L, 0L), EdgeOfFlightline = c(0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
    ), Classification = c(1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), group = c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L)), class = "data.frame", row.names = c(NA, 
-20L))

lid 静态数据集，它总是相同的（总是相同的值）。我必须执行这些算术运算

` lidar$row <- round((lidar$A-lid$x1)/lid$x3, 0)

lidar$col <- (lidar$B-lid$x4)/lid$x6

lidar$cdif <- max(lidar$C)-min(lidar$C)

但每个激光雷达 $groups 分别。

如何使用 dplyr 做得更好？感谢您的帮助。

Answer 1

library(dplyr)
lidar %>%
  group_by(group) %>%
  mutate(
    row = (A-lid$x1)/lid$x3,
    col = (B-lid$x4)/lid$x6,
    cdif = max(C)-min(C)
  ) %>%
  ungroup()
# # A tibble: 20 x 14
#          A        B     C    gpstime Intensity ReturnNumber NumberOfReturns ScanDirectionFlag EdgeOfFlightline Classification group   row   col  cdif
#      <dbl>    <dbl> <dbl>      <dbl>     <int>        <int>           <int>             <int>            <int>          <int> <int> <dbl> <dbl> <dbl>
#  1 638239. 6078001.  187. 319805735.        13            2               2                 1                0              1     1   Inf 1123.  13.4
#  2 638239. 6078001.  187. 319805735.        99            1               1                 1                0              2     1   Inf 1123.  13.4
#  3 638239. 6078001.  199. 319805735.         5            1               3                 0                0              1     1   Inf 1123.  13.4
#  4 638235. 6078001.  189. 319805735.         2            2               2                 0                0              1     1   Inf 1123.  13.4
#  5 638234. 6078001.  187. 319805735.        20            1               1                 0                0              1     1   Inf 1123.  13.4
#  6 638234. 6078001.  187. 319805735.       189            1               1                 0                0              1     1   Inf 1123.  13.4
#  7 638236. 6078001.  198. 319805735.         2            2               3                 1                0              1     1   Inf 1123.  13.4
#  8 638232. 6078001.  200. 319805735.        11            1               1                 0                0              1     1   Inf 1123.  13.4
#  9 638232. 6078001.  200. 319805735.        90            1               1                 0                0              1     1   Inf 1123.  13.4
# 10 638228. 6078001.  192. 319805735.         1            3               4                 0                0              1     1   Inf 1123.  13.4
# 11 638239. 6078001.  187. 319805735.        13            2               2                 1                0              1     2   Inf 1123.  99.9
# 12 638239. 6078001.  187. 319805735.        99            1               1                 1                0              2     2   Inf 1123.  99.9
# 13  63239. 6078001.  199. 319805735.         5            1               3                 0                0              1     2  -Inf 1123.  99.9
# 14 638235. 6078001.  189. 319805735.         2            2               2                 0                0              1     2   Inf 1123.  99.9
# 15 638234. 6078001.  197. 319805735.        20            1               1                 0                0              1     2   Inf 1123.  99.9
# 16 638234. 6078001.  187. 319805735.       189            1               1                 0                0              1     2   Inf 1123.  99.9
# 17 638236. 6078001.  198. 319805735.         2            2               3                 1                0              1     2   Inf 1123.  99.9
# 18 638232. 6078001.  200. 319805735.        11            1               1                 0                0              1     2   Inf 1123.  99.9
# 19 638232. 6078001.  200. 319805735.        90            1               1                 0                0              1     2   Inf 1123.  99.9
# 20 638228. 6078001.  100. 319805735.         1            3               4                 0                0              1     2   Inf 1123.  99.9

row 总是 Inf 因为 lid$x3 是 0。唯一必须分组的部分是 xdif，因为它是唯一进行分组聚合的部分，其余部分可以不分组地完成。

lidar %>%
  mutate(
    row = (A-lid$x1)/lid$x3,
    col = (B-lid$x4)/lid$x6
  ) %>%
  group_by(group) %>%
  mutate(cdif = max(C)-min(C)) %>%
  ungroup()

为什么要这样做？对于更大的数据集或有很多组，一次处理整个向量而不是 per-group 会更有效（可能明显更快）。实际计算应该return相同的结果。

如何按组在R中进行多项算术运算

How to do multiple arithmetic operations in R by group

r

dplyr