函数在 summarize 中返回多个值的取消嵌套结果
Unnesting results of function returning multiple values in summarize
"wanted" 结果由下面的 "do" 函数给出。我以为我可以通过一些使用 unnest 来获得相同的结果,但无法让它工作。
library(dplyr)
library(tidyr)
# Function rr is given
rr = function(x){
# This should be an expensive and possibly random function
r = range(x + rnorm(length(x),0.1))
# setNames(r, c("min", "max")) # fails, expecting single value
# list(min = r[1], max= r[2]) # fails
list(r) # Works, but result is in "long" form without min/max
}
# Works, but syntactically awkward
iris %>% group_by(Species) %>%
do( {
r = rr(.$Sepal.Width)[[1]]
data_frame(min = r[1], max = r[2])
})
# This give the long format, but without column
# names min/max
iris %>% group_by(Species) %>%
summarize(
range = rr(Sepal.Length)
) %>% unnest(range)
Unnest()
将始终以 "long" 格式取消列出嵌套列,但如果您创建 key
列,则可以使用 spread()
获得所需的输出。
library(dplyr)
library(tidyr)
iris %>%
group_by(Species) %>%
summarize(range = rr(Sepal.Length)) %>%
unnest(range) %>% mutate(newcols = rep(c("min", "max"), 3)) %>%
spread(newcols, range)
# Species max min
# (fctr) (dbl) (dbl)
#1 setosa 7.636698 3.292692
#2 versicolor 9.792319 3.337382
#3 virginica 9.810723 3.367066
这是使用 data.table
包
的一个非常直接的替代方案
# Function rr is given
rr = function(x) as.list(setNames(range(x + rnorm(length(x), 0.1)), c("min", "max")))
library(data.table)
data.table(iris)[, rr(Sepal.Width), by = Species]
# Species min max
# 1: setosa 1.839845 6.341040
# 2: versicolor 1.063727 5.498810
# 3: virginica 1.232525 5.402483
"wanted" 结果由下面的 "do" 函数给出。我以为我可以通过一些使用 unnest 来获得相同的结果,但无法让它工作。
library(dplyr)
library(tidyr)
# Function rr is given
rr = function(x){
# This should be an expensive and possibly random function
r = range(x + rnorm(length(x),0.1))
# setNames(r, c("min", "max")) # fails, expecting single value
# list(min = r[1], max= r[2]) # fails
list(r) # Works, but result is in "long" form without min/max
}
# Works, but syntactically awkward
iris %>% group_by(Species) %>%
do( {
r = rr(.$Sepal.Width)[[1]]
data_frame(min = r[1], max = r[2])
})
# This give the long format, but without column
# names min/max
iris %>% group_by(Species) %>%
summarize(
range = rr(Sepal.Length)
) %>% unnest(range)
Unnest()
将始终以 "long" 格式取消列出嵌套列,但如果您创建 key
列,则可以使用 spread()
获得所需的输出。
library(dplyr)
library(tidyr)
iris %>%
group_by(Species) %>%
summarize(range = rr(Sepal.Length)) %>%
unnest(range) %>% mutate(newcols = rep(c("min", "max"), 3)) %>%
spread(newcols, range)
# Species max min
# (fctr) (dbl) (dbl)
#1 setosa 7.636698 3.292692
#2 versicolor 9.792319 3.337382
#3 virginica 9.810723 3.367066
这是使用 data.table
包
# Function rr is given
rr = function(x) as.list(setNames(range(x + rnorm(length(x), 0.1)), c("min", "max")))
library(data.table)
data.table(iris)[, rr(Sepal.Width), by = Species]
# Species min max
# 1: setosa 1.839845 6.341040
# 2: versicolor 1.063727 5.498810
# 3: virginica 1.232525 5.402483