类似于 R 中 Stata 的多个文件的循环命令
Looping command for multiple files similar to Stata in R
我正在使用 Stata 和 R 处理多个文件(每个文件对应于 2008 年至 2020 年的每一年)。出于问题目的,3 个示例 Stata 数据文件被命名为:“file_2008.dta”,“file_2009.dta”和“file_2010.dta”。我想重命名和标记每年对应的变量。然后,我想使用循环合并这些文件。我在 Stata 中尝试使用以下代码。我想了解如何在 R 中使用循环重现类似的输出。
Stata 中的数据示例
file_2008.dta
clear
input byte(id x1 x2 y1 y2)
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2008.dta"
clear
input byte(id x1 x2 y1 y2)
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2009.dta"
clear
input byte(id x1 x2 y1 y2)
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2010.dta"
期望的输出
clear
input byte(id y1_08 y2_08 x1_08 x2_08 y1_09 y2_09 x1_09 x2_09 y1_10 y2_10 x1_10 x2_10)
1 1 1 1 1 1 1 1 1 1 1 1 1
2 2 2 2 2 2 2 2 2 2 2 2 2
3 3 3 3 3 3 3 3 3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2008_2010.dta"
在 Stata 中,我使用了以下代码来获得所需的输出。
global workdir "C:/Users/sai/Desktop/"
*for rename and label variables corresponding to each year
foreach name in "08" "09" "10" {
use "${workdir}file_20`name'.dta",clear
rename x1 x1_`name'
label variable x1_`name' "year 20`name'"
rename x2 x2_`name'
label variable x2_`name' "year 20`name'"
rename y1 y1_`name'
label variable y1_`name' "year 20`name'"
rename y2 y2_`name'
label variable y2_`name' "year 20`name'"
order y*, before (x1*)
save "${workdir}file_20`name'.dta",replace
}
*merging multiple files
use "${workdir}file_2008.dta", clear
forvalues i=2009/2010 {
sort id
merge 1:1 id using "${workdir}file_`i'.dta", nogen
}
save "${workdir}file_2008_2010.dta", replace
对于 R,示例数据和所需的输出如下。
file_2008 <- tribble(
~id,~x1,~x2,~y1,~y2,
1, 1, 1, 1, 1,
2, 2, 2, 2, 2,
3, 3, 3, 3, 3
)
file_2009 <- tribble(
~id,~x1,~x2,~y1,~y2,
1, 1, 1, 1, 1,
2, 2, 2, 2, 2,
3, 3, 3, 3, 3
)
file_2010 <- tribble(
~id,~x1,~x2,~y1,~y2,
1, 1, 1, 1, 1,
2, 2, 2, 2, 2,
3, 3, 3, 3, 3
)
# desired output
file_2008_2010 <- tribble(
~id,~y1_08,~y2_08,~x1_08,~x2_08, ~y1_09,~y2_09,~x1_09,~x2_09,~y1_10,~y2_10,~x1_10,~x2_10,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
)
假设您的 dta 文件位于路径 "./data_folder"
的文件夹中(即在 R 工作目录的子文件夹中),tidyverse
用于整理,haven
用于 dta 导入,
library(tidyverse)
library(haven)
# Function to load the dta's, and append the year from the file name as a variable
fx_load_lable_dta <- function(path) {
str_yearlabel <- sub("^.*(?=\d\d[.]dta$)", "", path , perl = TRUE) %>% sub("[.]dta$", "", . )
df <- haven::read_dta(path)
df$year_label <- str_yearlabel
df
}
list.files(path = "./dta_folder/", full.names = TRUE) %>% #Create a list of file's paths
map_df(.f = fx_load_lable_dta) %>% #Iterate the list of file paths through the function and append all data frames
pivot_longer(cols = x1:y2) %>% #Wrangle to get your desire outpu
pivot_wider(id_cols = id,
names_from = c(name, year_label),
values_from = c(value))
# A tibble: 3 x 13
id x1_08 x2_08 y1_08 y2_08 x1_09 x2_09 y1_09 y2_09 x1_10 x2_10 y1_10 y2_10
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 1 1 1 1 1 1 1 1 1 1 1
2 2 2 2 2 2 2 2 2 2 2 2 2 2
3 3 3 3 3 3 3 3 3 3 3 3 3 3
我正在使用 Stata 和 R 处理多个文件(每个文件对应于 2008 年至 2020 年的每一年)。出于问题目的,3 个示例 Stata 数据文件被命名为:“file_2008.dta”,“file_2009.dta”和“file_2010.dta”。我想重命名和标记每年对应的变量。然后,我想使用循环合并这些文件。我在 Stata 中尝试使用以下代码。我想了解如何在 R 中使用循环重现类似的输出。
Stata 中的数据示例 file_2008.dta
clear
input byte(id x1 x2 y1 y2)
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2008.dta"
clear
input byte(id x1 x2 y1 y2)
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2009.dta"
clear
input byte(id x1 x2 y1 y2)
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2010.dta"
期望的输出
clear
input byte(id y1_08 y2_08 x1_08 x2_08 y1_09 y2_09 x1_09 x2_09 y1_10 y2_10 x1_10 x2_10)
1 1 1 1 1 1 1 1 1 1 1 1 1
2 2 2 2 2 2 2 2 2 2 2 2 2
3 3 3 3 3 3 3 3 3 3 3 3 3
end
save "C:/Users/sai/Desktop/file_2008_2010.dta"
在 Stata 中,我使用了以下代码来获得所需的输出。
global workdir "C:/Users/sai/Desktop/"
*for rename and label variables corresponding to each year
foreach name in "08" "09" "10" {
use "${workdir}file_20`name'.dta",clear
rename x1 x1_`name'
label variable x1_`name' "year 20`name'"
rename x2 x2_`name'
label variable x2_`name' "year 20`name'"
rename y1 y1_`name'
label variable y1_`name' "year 20`name'"
rename y2 y2_`name'
label variable y2_`name' "year 20`name'"
order y*, before (x1*)
save "${workdir}file_20`name'.dta",replace
}
*merging multiple files
use "${workdir}file_2008.dta", clear
forvalues i=2009/2010 {
sort id
merge 1:1 id using "${workdir}file_`i'.dta", nogen
}
save "${workdir}file_2008_2010.dta", replace
对于 R,示例数据和所需的输出如下。
file_2008 <- tribble(
~id,~x1,~x2,~y1,~y2,
1, 1, 1, 1, 1,
2, 2, 2, 2, 2,
3, 3, 3, 3, 3
)
file_2009 <- tribble(
~id,~x1,~x2,~y1,~y2,
1, 1, 1, 1, 1,
2, 2, 2, 2, 2,
3, 3, 3, 3, 3
)
file_2010 <- tribble(
~id,~x1,~x2,~y1,~y2,
1, 1, 1, 1, 1,
2, 2, 2, 2, 2,
3, 3, 3, 3, 3
)
# desired output
file_2008_2010 <- tribble(
~id,~y1_08,~y2_08,~x1_08,~x2_08, ~y1_09,~y2_09,~x1_09,~x2_09,~y1_10,~y2_10,~x1_10,~x2_10,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
)
假设您的 dta 文件位于路径 "./data_folder"
的文件夹中(即在 R 工作目录的子文件夹中),tidyverse
用于整理,haven
用于 dta 导入,
library(tidyverse)
library(haven)
# Function to load the dta's, and append the year from the file name as a variable
fx_load_lable_dta <- function(path) {
str_yearlabel <- sub("^.*(?=\d\d[.]dta$)", "", path , perl = TRUE) %>% sub("[.]dta$", "", . )
df <- haven::read_dta(path)
df$year_label <- str_yearlabel
df
}
list.files(path = "./dta_folder/", full.names = TRUE) %>% #Create a list of file's paths
map_df(.f = fx_load_lable_dta) %>% #Iterate the list of file paths through the function and append all data frames
pivot_longer(cols = x1:y2) %>% #Wrangle to get your desire outpu
pivot_wider(id_cols = id,
names_from = c(name, year_label),
values_from = c(value))
# A tibble: 3 x 13
id x1_08 x2_08 y1_08 y2_08 x1_09 x2_09 y1_09 y2_09 x1_10 x2_10 y1_10 y2_10
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 1 1 1 1 1 1 1 1 1 1 1 1
2 2 2 2 2 2 2 2 2 2 2 2 2 2
3 3 3 3 3 3 3 3 3 3 3 3 3 3