计算多个值时分组依据
Group by while calculating multiple values
我正在尝试使用一个函数在 data.table
中按组计算一些统计数据,如下所示:
minmax <- function(vec) {
c(min(vec), max(vec))
}
library(data.table)
iris <- as.data.table(iris)
iris[, c('Min', 'Max') := minmax(Petal.Length), by = Species]
结果应该是Species
的最小值和最大值Petal.Length
,并且行数与物种数一样多。即,与以下代码相同的结果:
merge(
iris[, .(Min = min(Petal.Length)), Species],
iris[, .(Max = max(Petal.Length)), Species],
on = 'Species'
)
Species Min Max
1: setosa 1.0 1.9
2: versicolor 3.0 5.1
3: virginica 4.5 6.9
注意:在我自己的代码中,我想一次性完成,而不是使用 merge()
。
这里有几点:
:=
用于向现有框架添加列,因此它不会像您演示的那样进行汇总。也就是说,DT[, a := b]
应该始终具有完全相同的行数。我想这不是你需要的。
你可以在你的总结代码中做多重赋值并取消 merge
(也许这类似于你说你“一次性”做的代码):
iris[, .(Min = min(Petal.Length), Max = max(Petal.Length)), by = .(Species)]
# Species Min Max
# <fctr> <num> <num>
# 1: setosa 1.0 1.9
# 2: versicolor 3.0 5.1
# 3: virginica 4.5 6.9
但最后,你问的是如何使用你的函数来得到这个。第一次尝试可能是:
minmax <- function(vec) c(min(vec), max(vec))
iris[, minmax(Petal.Length), by = .(Species)]
# Species V1
# <fctr> <num>
# 1: setosa 1.0
# 2: setosa 1.9
# 3: versicolor 3.0
# 4: versicolor 5.1
# 5: virginica 4.5
# 6: virginica 6.9
iris[, as.list(minmax(Petal.Length)), by = .(Species)]
# Species V1 V2
# <fctr> <num> <num>
# 1: setosa 1.0 1.9
# 2: versicolor 3.0 5.1
# 3: virginica 4.5 6.9
iris[, setNames(as.list(minmax(Petal.Length)), c("Min", "Max")), by = .(Species)]
# Species Min Max
# <fctr> <num> <num>
# 1: setosa 1.0 1.9
# 2: versicolor 3.0 5.1
# 3: virginica 4.5 6.9
minmax <- function(vec) c(Min = min(vec), Max = max(vec))
iris[, as.list(minmax(Petal.Length)), by = .(Species)]
# Species Min Max
# <fctr> <num> <num>
# 1: setosa 1.0 1.9
# 2: versicolor 3.0 5.1
# 3: virginica 4.5 6.9
因此我们可以将函数更改为 return 列表(可选命名)。
minmax <- function(vec) list(min(vec), max(vec))
iris[, minmax(Petal.Length), by = .(Species)]
# Species V1 V2
# <fctr> <num> <num>
# 1: setosa 1.0 1.9
# 2: versicolor 3.0 5.1
# 3: virginica 4.5 6.9
iris[, setNames(minmax(Petal.Length), c("Min", "Max")), by = .(Species)]
# Species Min Max
# <fctr> <num> <num>
# 1: setosa 1.0 1.9
# 2: versicolor 3.0 5.1
# 3: virginica 4.5 6.9
minmax <- function(vec) list(Min = min(vec), Max = max(vec))
iris[, minmax(Petal.Length), by = .(Species)]
# Species Min Max
# <fctr> <num> <num>
# 1: setosa 1.0 1.9
# 2: versicolor 3.0 5.1
# 3: virginica 4.5 6.9
我正在尝试使用一个函数在 data.table
中按组计算一些统计数据,如下所示:
minmax <- function(vec) {
c(min(vec), max(vec))
}
library(data.table)
iris <- as.data.table(iris)
iris[, c('Min', 'Max') := minmax(Petal.Length), by = Species]
结果应该是Species
的最小值和最大值Petal.Length
,并且行数与物种数一样多。即,与以下代码相同的结果:
merge(
iris[, .(Min = min(Petal.Length)), Species],
iris[, .(Max = max(Petal.Length)), Species],
on = 'Species'
)
Species Min Max
1: setosa 1.0 1.9
2: versicolor 3.0 5.1
3: virginica 4.5 6.9
注意:在我自己的代码中,我想一次性完成,而不是使用 merge()
。
这里有几点:
:=
用于向现有框架添加列,因此它不会像您演示的那样进行汇总。也就是说,DT[, a := b]
应该始终具有完全相同的行数。我想这不是你需要的。你可以在你的总结代码中做多重赋值并取消
merge
(也许这类似于你说你“一次性”做的代码):iris[, .(Min = min(Petal.Length), Max = max(Petal.Length)), by = .(Species)] # Species Min Max # <fctr> <num> <num> # 1: setosa 1.0 1.9 # 2: versicolor 3.0 5.1 # 3: virginica 4.5 6.9
但最后,你问的是如何使用你的函数来得到这个。第一次尝试可能是:
minmax <- function(vec) c(min(vec), max(vec)) iris[, minmax(Petal.Length), by = .(Species)] # Species V1 # <fctr> <num> # 1: setosa 1.0 # 2: setosa 1.9 # 3: versicolor 3.0 # 4: versicolor 5.1 # 5: virginica 4.5 # 6: virginica 6.9 iris[, as.list(minmax(Petal.Length)), by = .(Species)] # Species V1 V2 # <fctr> <num> <num> # 1: setosa 1.0 1.9 # 2: versicolor 3.0 5.1 # 3: virginica 4.5 6.9 iris[, setNames(as.list(minmax(Petal.Length)), c("Min", "Max")), by = .(Species)] # Species Min Max # <fctr> <num> <num> # 1: setosa 1.0 1.9 # 2: versicolor 3.0 5.1 # 3: virginica 4.5 6.9 minmax <- function(vec) c(Min = min(vec), Max = max(vec)) iris[, as.list(minmax(Petal.Length)), by = .(Species)] # Species Min Max # <fctr> <num> <num> # 1: setosa 1.0 1.9 # 2: versicolor 3.0 5.1 # 3: virginica 4.5 6.9
因此我们可以将函数更改为 return 列表(可选命名)。
minmax <- function(vec) list(min(vec), max(vec)) iris[, minmax(Petal.Length), by = .(Species)] # Species V1 V2 # <fctr> <num> <num> # 1: setosa 1.0 1.9 # 2: versicolor 3.0 5.1 # 3: virginica 4.5 6.9 iris[, setNames(minmax(Petal.Length), c("Min", "Max")), by = .(Species)] # Species Min Max # <fctr> <num> <num> # 1: setosa 1.0 1.9 # 2: versicolor 3.0 5.1 # 3: virginica 4.5 6.9 minmax <- function(vec) list(Min = min(vec), Max = max(vec)) iris[, minmax(Petal.Length), by = .(Species)] # Species Min Max # <fctr> <num> <num> # 1: setosa 1.0 1.9 # 2: versicolor 3.0 5.1 # 3: virginica 4.5 6.9