从 purrr::invoke_map() 监控 progress/Print 到控制台
Monitor progress/Print to console from purrr::invoke_map()
我正在尝试使用 purrr::invoke_map()
.
从 R 中的列表列格式(参见:this blog post)中训练多个 caret
模型
调用 invoke_map()
时,我希望能够以某种方式监控进度。具体来说,我想在 invoke_map()
遍历 model/data 组合时打印行号或 id
列。有没有办法做到这一点,可能是通过修改训练函数(下面的linearRegModel()
)?
library(tidyverse)
library(mlbench)
library(caret)
data("BostonHousing") # from mlbench
starter_df <-
list(BostonHousing) %>%
rep(3) %>%
enframe(name = 'id', value = 'rawdata') %>%
transmute(
id
, train.X = map(rawdata, ~ .x %>% select(-medv))
, train.Y = map(rawdata, ~ .x$medv)
)
# re-write any caret training method as a function.
# using linear regression here for simplicity
linearRegModel <- function(X, Y) {
ctrl <- trainControl(
method = "repeatedcv",
number = 2
)
train(
x = X,
y = Y,
method = 'lm',
trControl = ctrl,
preProc = c('center', 'scale')
)
}
# convert models to tibble
model_list <-
list(linearRegModel = linearRegModel,
linearRegModel2 = linearRegModel) %>%
enframe(name = 'modelName',value = 'model')
# combine model tibble with the data tibble
train_df <-
starter_df[rep(1:nrow(starter_df),nrow(model_list)),] %>%
bind_cols(
model_list[rep(1:nrow(model_list),nrow(starter_df)),] %>% arrange(modelName)
) %>%
mutate(id=1:nrow(.))
train_df
# A tibble: 6 x 5
id train.X train.Y modelName model
<int> <list> <list> <chr> <list>
1 1 <data.frame [506 x 13]> <dbl [506]> linearRegModel <fun>
2 2 <data.frame [506 x 13]> <dbl [506]> linearRegModel <fun>
3 3 <data.frame [506 x 13]> <dbl [506]> linearRegModel <fun>
4 4 <data.frame [506 x 13]> <dbl [506]> linearRegModel2 <fun>
5 5 <data.frame [506 x 13]> <dbl [506]> linearRegModel2 <fun>
6 6 <data.frame [506 x 13]> <dbl [506]> linearRegModel2 <fun>
# train models by calling invoke_map()
# (takes a few seconds)
data_with_model_fits <-
train_df %>%
mutate(params = map2(train.X, train.Y, ~ list(X = .x, Y = .y)),
modelFits = invoke_map(model,params)
)
您可能会发现 progress
包很有趣。下面我将它整合到你的问题中。注意两点:
在开始使用 progress::progress_bar(tick = number_of_ticks)
拟合模型之前初始化进度条。
在 linRegModel()
函数中,您 "tick" 模型与 pb$tick()
拟合后进度条前进。
pb
是一个使用面向对象技术的 R6
对象,因此您不必将它作为参数传递给 linRegModel()
函数。
希望对您有所帮助。
library(tidyverse)
library(mlbench)
library(caret)
data("BostonHousing") # from mlbench
library(progress)
starter_df <-
list(BostonHousing) %>%
rep(3) %>%
enframe(name = 'id', value = 'rawdata') %>%
transmute(
id
, train.X = map(rawdata, ~ .x %>% select(-medv))
, train.Y = map(rawdata, ~ .x$medv)
)
# re-write any caret training method as a function.
# using linear regression here for simplicity
linearRegModel <- function(X, Y) {
ctrl <- trainControl(
method = "repeatedcv",
number = 2
)
train(
x = X,
y = Y,
method = 'lm',
trControl = ctrl,
preProc = c('center', 'scale')
)
# Tick the progress bar forward 1 tick after each completed model fit
pb$tick()
}
# convert models to tibble
model_list <-
list(linearRegModel = linearRegModel,
linearRegModel2 = linearRegModel) %>%
enframe(name = 'modelName',value = 'model')
# combine model tibble with the data tibble
train_df <-
starter_df[rep(1:nrow(starter_df),nrow(model_list)),] %>%
bind_cols(
model_list[rep(1:nrow(model_list),nrow(starter_df)),] %>% arrange(modelName)
) %>%
mutate(id=1:nrow(.))
train_df
# initialize progress bar
ticks <- nrow(train_df)
pb <- progress::progress_bar$new(total = ticks)
# train models by calling invoke_map()
# (takes a few seconds)
data_with_model_fits <-
train_df %>%
mutate(params = map2(train.X, train.Y, ~ list(X = .x, Y = .y)),
modelFits = invoke_map(model,params)
)
为了增加灵活性,您可以在创建进度条时通过 format
参数使用 token
。一些是内置的,例如 :current
以向您显示当前迭代。这可能更直接地回答您的问题。在这种情况下,我会在模型运行之前调用 pb$tick()
。该文档还建议 运行 pb$tick(0)
在长时间 运行 计算之前立即显示进度条。
# initialize progress bar
pb <- progress::progress_bar$new(format = "running model :current", show_after = .01)
pb$tick(0)
我正在尝试使用 purrr::invoke_map()
.
caret
模型
调用 invoke_map()
时,我希望能够以某种方式监控进度。具体来说,我想在 invoke_map()
遍历 model/data 组合时打印行号或 id
列。有没有办法做到这一点,可能是通过修改训练函数(下面的linearRegModel()
)?
library(tidyverse)
library(mlbench)
library(caret)
data("BostonHousing") # from mlbench
starter_df <-
list(BostonHousing) %>%
rep(3) %>%
enframe(name = 'id', value = 'rawdata') %>%
transmute(
id
, train.X = map(rawdata, ~ .x %>% select(-medv))
, train.Y = map(rawdata, ~ .x$medv)
)
# re-write any caret training method as a function.
# using linear regression here for simplicity
linearRegModel <- function(X, Y) {
ctrl <- trainControl(
method = "repeatedcv",
number = 2
)
train(
x = X,
y = Y,
method = 'lm',
trControl = ctrl,
preProc = c('center', 'scale')
)
}
# convert models to tibble
model_list <-
list(linearRegModel = linearRegModel,
linearRegModel2 = linearRegModel) %>%
enframe(name = 'modelName',value = 'model')
# combine model tibble with the data tibble
train_df <-
starter_df[rep(1:nrow(starter_df),nrow(model_list)),] %>%
bind_cols(
model_list[rep(1:nrow(model_list),nrow(starter_df)),] %>% arrange(modelName)
) %>%
mutate(id=1:nrow(.))
train_df
# A tibble: 6 x 5
id train.X train.Y modelName model
<int> <list> <list> <chr> <list>
1 1 <data.frame [506 x 13]> <dbl [506]> linearRegModel <fun>
2 2 <data.frame [506 x 13]> <dbl [506]> linearRegModel <fun>
3 3 <data.frame [506 x 13]> <dbl [506]> linearRegModel <fun>
4 4 <data.frame [506 x 13]> <dbl [506]> linearRegModel2 <fun>
5 5 <data.frame [506 x 13]> <dbl [506]> linearRegModel2 <fun>
6 6 <data.frame [506 x 13]> <dbl [506]> linearRegModel2 <fun>
# train models by calling invoke_map()
# (takes a few seconds)
data_with_model_fits <-
train_df %>%
mutate(params = map2(train.X, train.Y, ~ list(X = .x, Y = .y)),
modelFits = invoke_map(model,params)
)
您可能会发现 progress
包很有趣。下面我将它整合到你的问题中。注意两点:
在开始使用
progress::progress_bar(tick = number_of_ticks)
拟合模型之前初始化进度条。在
linRegModel()
函数中,您 "tick" 模型与pb$tick()
拟合后进度条前进。
pb
是一个使用面向对象技术的 R6
对象,因此您不必将它作为参数传递给 linRegModel()
函数。
希望对您有所帮助。
library(tidyverse)
library(mlbench)
library(caret)
data("BostonHousing") # from mlbench
library(progress)
starter_df <-
list(BostonHousing) %>%
rep(3) %>%
enframe(name = 'id', value = 'rawdata') %>%
transmute(
id
, train.X = map(rawdata, ~ .x %>% select(-medv))
, train.Y = map(rawdata, ~ .x$medv)
)
# re-write any caret training method as a function.
# using linear regression here for simplicity
linearRegModel <- function(X, Y) {
ctrl <- trainControl(
method = "repeatedcv",
number = 2
)
train(
x = X,
y = Y,
method = 'lm',
trControl = ctrl,
preProc = c('center', 'scale')
)
# Tick the progress bar forward 1 tick after each completed model fit
pb$tick()
}
# convert models to tibble
model_list <-
list(linearRegModel = linearRegModel,
linearRegModel2 = linearRegModel) %>%
enframe(name = 'modelName',value = 'model')
# combine model tibble with the data tibble
train_df <-
starter_df[rep(1:nrow(starter_df),nrow(model_list)),] %>%
bind_cols(
model_list[rep(1:nrow(model_list),nrow(starter_df)),] %>% arrange(modelName)
) %>%
mutate(id=1:nrow(.))
train_df
# initialize progress bar
ticks <- nrow(train_df)
pb <- progress::progress_bar$new(total = ticks)
# train models by calling invoke_map()
# (takes a few seconds)
data_with_model_fits <-
train_df %>%
mutate(params = map2(train.X, train.Y, ~ list(X = .x, Y = .y)),
modelFits = invoke_map(model,params)
)
为了增加灵活性,您可以在创建进度条时通过 format
参数使用 token
。一些是内置的,例如 :current
以向您显示当前迭代。这可能更直接地回答您的问题。在这种情况下,我会在模型运行之前调用 pb$tick()
。该文档还建议 运行 pb$tick(0)
在长时间 运行 计算之前立即显示进度条。
# initialize progress bar
pb <- progress::progress_bar$new(format = "running model :current", show_after = .01)
pb$tick(0)