将 data.table 的列(名称和值)传递给函数
Pass a column (name and value) of a data.table to the function
我想写一个简单的函数来计算 Var1 的平均值:
data<-structure(list(time = structure(c(1358832600, 1358832600), class = c("POSIXct",
"POSIXt"), tzone = ""), Var1 = c(0.4, 0.2)), .Names = c("time",
"Var1"), row.names = c(NA, -2L), class = "data.frame")
data<- data.table(data)
time Var1
2013-01-22 09:30:00 0.4
2013-01-22 09:30:00 0.2
Aggregated.Data<- function(data, col) {
aggregated <- ddply(data, time, summarise, col= mean(eval(col)))
return(aggregated)
}
aggregated.data <- Aggregated.Data(data, quote(Var1))
Everything works, output:
time Col
2013-01-22 09:30:00 0.3
问题:
- 这是正确的方法吗?我的意思是使用 quote 和 eval?
- 为什么输出中的第二列名称是col,如何将其更改为Var1?
编辑:使用data.table
Aggregated.Data<- function(data, col) {
aggregated <- data(data, list(col=mean(eval(substitute(col)))), by=list(time=time))
return(aggregated)
}
library(dplyr)
library(lazyeval)
summarize_column = function(data, variable_name)
data %>%
group_by(time) %>%
select_(lazy(variable_name)) %>%
summarize_each(funs(mean))
data %>% summarize_column(Var1)
一种选择是使用 eval(as.name)
并获得 mean
。我们可以稍后使用 setnames
更改列名。
f1 <- function(dat, col){
DT <- dat[, mean(eval(as.name(col))), time]
v1 <- setdiff(colnames(DT), colnames(dat))
setnames(DT, v1, col)
DT
}
f1(data, 'Var1')
# time Var1
#1: 2013-01-22 00:30:00 0.3
我想写一个简单的函数来计算 Var1 的平均值:
data<-structure(list(time = structure(c(1358832600, 1358832600), class = c("POSIXct",
"POSIXt"), tzone = ""), Var1 = c(0.4, 0.2)), .Names = c("time",
"Var1"), row.names = c(NA, -2L), class = "data.frame")
data<- data.table(data)
time Var1
2013-01-22 09:30:00 0.4
2013-01-22 09:30:00 0.2
Aggregated.Data<- function(data, col) {
aggregated <- ddply(data, time, summarise, col= mean(eval(col)))
return(aggregated)
}
aggregated.data <- Aggregated.Data(data, quote(Var1))
Everything works, output:
time Col
2013-01-22 09:30:00 0.3
问题:
- 这是正确的方法吗?我的意思是使用 quote 和 eval?
- 为什么输出中的第二列名称是col,如何将其更改为Var1?
编辑:使用data.table
Aggregated.Data<- function(data, col) {
aggregated <- data(data, list(col=mean(eval(substitute(col)))), by=list(time=time))
return(aggregated)
}
library(dplyr)
library(lazyeval)
summarize_column = function(data, variable_name)
data %>%
group_by(time) %>%
select_(lazy(variable_name)) %>%
summarize_each(funs(mean))
data %>% summarize_column(Var1)
一种选择是使用 eval(as.name)
并获得 mean
。我们可以稍后使用 setnames
更改列名。
f1 <- function(dat, col){
DT <- dat[, mean(eval(as.name(col))), time]
v1 <- setdiff(colnames(DT), colnames(dat))
setnames(DT, v1, col)
DT
}
f1(data, 'Var1')
# time Var1
#1: 2013-01-22 00:30:00 0.3