是否可以在数据框中创建一个新列,该列是 R 中使用 mutate 的函数的输出?
Is it possible to create a new column in a dataframe that is the output of a function using mutate in R?
我需要 运行 一个跨行的自制函数,并在同一数据框中创建一个输出列(列名 tt_daily
)。这是一些虚构的例子。
#data
data1 <- read.csv(text = "
doy,tmx,tmn,relHum,srad
148,31.3,13.8,68.3,30.4
149,31.1,17.2,62.2,30
150,30.1,16.1,69.7,20.9
151,27.3,16.2,77.1,26.1
152,33.4,18.4,65.9,27.4
153,27.2,18,70.3,26.6
154,30.3,13,71.5,28.4
155,36.2,22,62.2,28.8
156,32.9,22.2,61.1,24.9
157,30.5,16.2,63.2,27.9
158,25.7,19.3,71,18.3
159,29.1,18.3,87.2,12.7
160,28.5,20.3,70.2,24.8
")
这是函数:
# function to run row wise
tb<- 11
topt<- 30
tmax<- 42
tt<-function(tmx, tmn, tb, topt, tmax){
tmean<- (tmx + tmn) / 2
if(tmean <= tb) {t1 = 0}
if(tmean >tb & tmean <=topt) {t1 = tmean - tb}
if(tmean>topt & tmean<max) {t1 = (topt - tb) / (topt - tmax) * (tmean - tmax)}
if(tmean >= tmax) {t1 <- 0}
return(t1)
}
这是我所做的两个选项:
#Option 1
library(dplyr)
tt.example <- data1 %>%
mutate(tt_daily = purrr::pmap(function(tmx, tmn, tb, topt, tmax) tt))
这是错误:
Error: Problem with mutate()
column tt_daily
.
i tt_daily = purrr::pmap(function(tmx, tmn, tb, topt, tmax) tt)
.
x argument ".f" is missing, with no default
这是选项2:
#Option 2
tt.example <- data1 %>%
rowwise() %>%
mutate(tt_daily = tt(tmx, tmn, tb, topt, tmax))
这是我得到的错误:
Error: Problem with mutate()
column tt_daily
.
i tt_daily = tt(tmx, tmn, tb, topt, tmax)
.
x comparison (3) is possible only for atomic and list types
i The error occurred in row 1.
感谢任何建议。
函数中有错字,应该是 tmax
而不是 max
tt<-function(tmx, tmn, tb, topt, tmax){
tmean<- (tmx + tmn) / 2
if(tmean <= tb) {t1 = 0}
if(tmean >tb & tmean <=topt) {t1 = tmean - tb}
if(tmean>topt & tmean<tmax) {t1 = (topt - tb) / (topt - tmax) * (tmean - tmax)}
if(tmean >= tmax) {t1 <- 0}
return(t1)
}
现在,我们在 pmap
中附加其他参数作为命名 list
后,在 mutate
中应用该函数
library(dplyr)
library(purrr)
data1 %>%
mutate(tt_daily = pmap_dbl(c(across(tmx:tmn),
dplyr::lst(tb, topt, tmax)), tt))
-输出
doy tmx tmn relHum srad tt_daily
1 148 31.3 13.8 68.3 30.4 11.55
2 149 31.1 17.2 62.2 30.0 13.15
3 150 30.1 16.1 69.7 20.9 12.10
4 151 27.3 16.2 77.1 26.1 10.75
5 152 33.4 18.4 65.9 27.4 14.90
6 153 27.2 18.0 70.3 26.6 11.60
7 154 30.3 13.0 71.5 28.4 10.65
8 155 36.2 22.0 62.2 28.8 18.10
9 156 32.9 22.2 61.1 24.9 16.55
10 157 30.5 16.2 63.2 27.9 12.35
11 158 25.7 19.3 71.0 18.3 11.50
12 159 29.1 18.3 87.2 12.7 12.70
13 160 28.5 20.3 70.2 24.8 13.40
或使用rowwise
data1 %>%
rowwise %>%
mutate(tt_daily = tt(tmx, tmn, tb, topt, tmax)) %>%
ungroup
-输出
# A tibble: 13 x 6
doy tmx tmn relHum srad tt_daily
<int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 148 31.3 13.8 68.3 30.4 11.6
2 149 31.1 17.2 62.2 30 13.2
3 150 30.1 16.1 69.7 20.9 12.1
4 151 27.3 16.2 77.1 26.1 10.8
5 152 33.4 18.4 65.9 27.4 14.9
6 153 27.2 18 70.3 26.6 11.6
7 154 30.3 13 71.5 28.4 10.6
8 155 36.2 22 62.2 28.8 18.1
9 156 32.9 22.2 61.1 24.9 16.5
10 157 30.5 16.2 63.2 27.9 12.4
11 158 25.7 19.3 71 18.3 11.5
12 159 29.1 18.3 87.2 12.7 12.7
13 160 28.5 20.3 70.2 24.8 13.4
如果我们想添加一个新列,那么最好是 return a list
或 tibble
in 'tt' function
tt<-function(tmx, tmn, tb, topt, tmax){
tmean<- (tmx + tmn) / 2
if(tmean <= tb) {t1 = 0}
if(tmean >tb & tmean <=topt) {t1 = tmean - tb}
if(tmean>topt & tmean<tmax) {t1 = (topt - tb) / (topt - tmax) * (tmean - tmax)}
if(tmean >= tmax) {t1 <- 0}
return(tibble(tt_daily = t1, tmean = tmean))
}
现在,我们将内容包装在 list
和 unnest
输出列中
library(tidyr)
data1 %>%
rowwise %>%
mutate(out = list(tt(tmx, tmn, tb, topt, tmax))) %>%
ungroup %>%
unnest_wider(c(out))
# A tibble: 13 x 7
doy tmx tmn relHum srad tt_daily tmean
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 148 31.3 13.8 68.3 30.4 11.6 22.6
2 149 31.1 17.2 62.2 30 13.2 24.2
3 150 30.1 16.1 69.7 20.9 12.1 23.1
4 151 27.3 16.2 77.1 26.1 10.8 21.8
5 152 33.4 18.4 65.9 27.4 14.9 25.9
6 153 27.2 18 70.3 26.6 11.6 22.6
7 154 30.3 13 71.5 28.4 10.6 21.6
8 155 36.2 22 62.2 28.8 18.1 29.1
9 156 32.9 22.2 61.1 24.9 16.5 27.6
10 157 30.5 16.2 63.2 27.9 12.4 23.4
11 158 25.7 19.3 71 18.3 11.5 22.5
12 159 29.1 18.3 87.2 12.7 12.7 23.7
13 160 28.5 20.3 70.2 24.8 13.4 24.4
我需要 运行 一个跨行的自制函数,并在同一数据框中创建一个输出列(列名 tt_daily
)。这是一些虚构的例子。
#data
data1 <- read.csv(text = "
doy,tmx,tmn,relHum,srad
148,31.3,13.8,68.3,30.4
149,31.1,17.2,62.2,30
150,30.1,16.1,69.7,20.9
151,27.3,16.2,77.1,26.1
152,33.4,18.4,65.9,27.4
153,27.2,18,70.3,26.6
154,30.3,13,71.5,28.4
155,36.2,22,62.2,28.8
156,32.9,22.2,61.1,24.9
157,30.5,16.2,63.2,27.9
158,25.7,19.3,71,18.3
159,29.1,18.3,87.2,12.7
160,28.5,20.3,70.2,24.8
")
这是函数:
# function to run row wise
tb<- 11
topt<- 30
tmax<- 42
tt<-function(tmx, tmn, tb, topt, tmax){
tmean<- (tmx + tmn) / 2
if(tmean <= tb) {t1 = 0}
if(tmean >tb & tmean <=topt) {t1 = tmean - tb}
if(tmean>topt & tmean<max) {t1 = (topt - tb) / (topt - tmax) * (tmean - tmax)}
if(tmean >= tmax) {t1 <- 0}
return(t1)
}
这是我所做的两个选项:
#Option 1
library(dplyr)
tt.example <- data1 %>%
mutate(tt_daily = purrr::pmap(function(tmx, tmn, tb, topt, tmax) tt))
这是错误:
Error: Problem with
mutate()
columntt_daily
. itt_daily = purrr::pmap(function(tmx, tmn, tb, topt, tmax) tt)
. x argument ".f" is missing, with no default
这是选项2:
#Option 2
tt.example <- data1 %>%
rowwise() %>%
mutate(tt_daily = tt(tmx, tmn, tb, topt, tmax))
这是我得到的错误:
Error: Problem with
mutate()
columntt_daily
. itt_daily = tt(tmx, tmn, tb, topt, tmax)
. x comparison (3) is possible only for atomic and list types i The error occurred in row 1.
感谢任何建议。
函数中有错字,应该是 tmax
而不是 max
tt<-function(tmx, tmn, tb, topt, tmax){
tmean<- (tmx + tmn) / 2
if(tmean <= tb) {t1 = 0}
if(tmean >tb & tmean <=topt) {t1 = tmean - tb}
if(tmean>topt & tmean<tmax) {t1 = (topt - tb) / (topt - tmax) * (tmean - tmax)}
if(tmean >= tmax) {t1 <- 0}
return(t1)
}
现在,我们在 pmap
list
后,在 mutate
中应用该函数
library(dplyr)
library(purrr)
data1 %>%
mutate(tt_daily = pmap_dbl(c(across(tmx:tmn),
dplyr::lst(tb, topt, tmax)), tt))
-输出
doy tmx tmn relHum srad tt_daily
1 148 31.3 13.8 68.3 30.4 11.55
2 149 31.1 17.2 62.2 30.0 13.15
3 150 30.1 16.1 69.7 20.9 12.10
4 151 27.3 16.2 77.1 26.1 10.75
5 152 33.4 18.4 65.9 27.4 14.90
6 153 27.2 18.0 70.3 26.6 11.60
7 154 30.3 13.0 71.5 28.4 10.65
8 155 36.2 22.0 62.2 28.8 18.10
9 156 32.9 22.2 61.1 24.9 16.55
10 157 30.5 16.2 63.2 27.9 12.35
11 158 25.7 19.3 71.0 18.3 11.50
12 159 29.1 18.3 87.2 12.7 12.70
13 160 28.5 20.3 70.2 24.8 13.40
或使用rowwise
data1 %>%
rowwise %>%
mutate(tt_daily = tt(tmx, tmn, tb, topt, tmax)) %>%
ungroup
-输出
# A tibble: 13 x 6
doy tmx tmn relHum srad tt_daily
<int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 148 31.3 13.8 68.3 30.4 11.6
2 149 31.1 17.2 62.2 30 13.2
3 150 30.1 16.1 69.7 20.9 12.1
4 151 27.3 16.2 77.1 26.1 10.8
5 152 33.4 18.4 65.9 27.4 14.9
6 153 27.2 18 70.3 26.6 11.6
7 154 30.3 13 71.5 28.4 10.6
8 155 36.2 22 62.2 28.8 18.1
9 156 32.9 22.2 61.1 24.9 16.5
10 157 30.5 16.2 63.2 27.9 12.4
11 158 25.7 19.3 71 18.3 11.5
12 159 29.1 18.3 87.2 12.7 12.7
13 160 28.5 20.3 70.2 24.8 13.4
如果我们想添加一个新列,那么最好是 return a list
或 tibble
in 'tt' function
tt<-function(tmx, tmn, tb, topt, tmax){
tmean<- (tmx + tmn) / 2
if(tmean <= tb) {t1 = 0}
if(tmean >tb & tmean <=topt) {t1 = tmean - tb}
if(tmean>topt & tmean<tmax) {t1 = (topt - tb) / (topt - tmax) * (tmean - tmax)}
if(tmean >= tmax) {t1 <- 0}
return(tibble(tt_daily = t1, tmean = tmean))
}
现在,我们将内容包装在 list
和 unnest
输出列中
library(tidyr)
data1 %>%
rowwise %>%
mutate(out = list(tt(tmx, tmn, tb, topt, tmax))) %>%
ungroup %>%
unnest_wider(c(out))
# A tibble: 13 x 7
doy tmx tmn relHum srad tt_daily tmean
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 148 31.3 13.8 68.3 30.4 11.6 22.6
2 149 31.1 17.2 62.2 30 13.2 24.2
3 150 30.1 16.1 69.7 20.9 12.1 23.1
4 151 27.3 16.2 77.1 26.1 10.8 21.8
5 152 33.4 18.4 65.9 27.4 14.9 25.9
6 153 27.2 18 70.3 26.6 11.6 22.6
7 154 30.3 13 71.5 28.4 10.6 21.6
8 155 36.2 22 62.2 28.8 18.1 29.1
9 156 32.9 22.2 61.1 24.9 16.5 27.6
10 157 30.5 16.2 63.2 27.9 12.4 23.4
11 158 25.7 19.3 71 18.3 11.5 22.5
12 159 29.1 18.3 87.2 12.7 12.7 23.7
13 160 28.5 20.3 70.2 24.8 13.4 24.4