为不同的数据集迭代相同的代码
Iterating same code for different dataset
我编写了以下代码来清理名为 db17
的起始数据集:
#traspose matrix
db17_t <- t(db17)
#convert in data.frame object
db17_t <- as.data.frame((db17_t))
#first row as column names
db17_t <- row_to_names(db17_t, 1, remove_row = TRUE)
#name first four columns
colnames(db17_t)[1:4] <- c("name","code","cluster","sector")
#Get rid of Commodoties sector
db17_t_nocomm <-db17_t[!(db17_t$cluster=="Commodities"),]
rm(db17_t)
#Keep only EORA26 sector classification
db17_t_nocomm_f <- filter(db17_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
rm(db17_t_nocomm)
#convert VA values in numeric
db17_t_nocomm_f[, 5:194] <- sapply(db17_t_nocomm_f[, 5:194], as.numeric)
#Calculate total VA
db17_t_nocomm_f$Total <- rowSums(db17_t_nocomm_f[,5:194], na.rm = TRUE)
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
#Calculate internal VA
db17_t_nocomm_f$Internal_VA <- as.numeric(db17_t_nocomm_f[cbind(1:nrow(db17_t_nocomm_f), match(db17_t_nocomm_f$name, names(db17_t_nocomm_f)))])
#Calculate Net Exported VA
db17_t_nocomm_f$Net_TotalExp <- db17_t_nocomm_f$Total - db17_t_nocomm_f$Internal_VA
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
#finalize
db17_final <- db17_t_nocomm_f[,c(1,2,3,4,7)]
rm(db17_t_nocomm_f)
现在,我必须对从 db03
到 db17
命名的数据集应用相同的代码。
是否有 for 循环允许我这样做,使用 data.frame 名称作为索引?
谢谢
不完全是一个循环,但您可以将您的代码变成一个函数并轻松调用它多次。
doing_stuff <- function(df) {
#traspose matrix
df_t <- t(df)
#convert in data.frame object
df_t <- as.data.frame((df_t))
#first row as column names
df_t <- row_to_names(df_t, 1, remove_row = TRUE)
#name first four columns
colnames(df_t)[1:4] <- c("name","code","cluster","sector")
#Get rid of Commodoties sector
df_t_nocomm <-df_t[!(df_t$cluster=="Commodities"),]
rm(df_t)
#Keep only EORA26 sector classification
df_t_nocomm_f <- filter(df_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
rm(df_t_nocomm)
#convert VA values in numeric
df_t_nocomm_f[, 5:194] <- sapply(df_t_nocomm_f[, 5:194], as.numeric)
#Calculate total VA
df_t_nocomm_f$Total <- rowSums(df_t_nocomm_f[,5:194], na.rm = TRUE)
df_t_nocomm_f <- df_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
#Calculate internal VA
df_t_nocomm_f$Internal_VA <- as.numeric(df_t_nocomm_f[cbind(1:nrow(df_t_nocomm_f), match(df_t_nocomm_f$name, names(df_t_nocomm_f)))])
#Calculate Net Exported VA
df_t_nocomm_f$Net_TotalExp <- df_t_nocomm_f$Total - df_t_nocomm_f$Internal_VA
df_t_nocomm_f <- df_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
#finalize
df_final <- df_t_nocomm_f[,c(1,2,3,4,7)]
rm(df_t_nocomm_f)
# return final output
return(df_final)
}
db17_final <- doing_stuff(db17)
db03_final <- doing_stuff(db03)
db19_final <- doing_stuff(db19)
(因为我没有数据可查,所以我只是将每个“db17”替换为“df”。)
编辑:来自@MonJeanJean 的答案更精彩,但取决于您的数据有多大,也许多次执行函数而不是加载两次数据可能会更好。如果你有一个小数据集,请忽略我的回答!
如果没有数据样本,很难知道这是否有效。然而:
创建数据框列表:
my_list <- list(df03,...,df17)
代码:
MyF <- function(data){
#traspose matrix
db17_t <- t(data)
#convert in data.frame object
db17_t <- as.data.frame((db17_t))
#first row as column names
db17_t <- row_to_names(db17_t, 1, remove_row = TRUE)
#name first four columns
colnames(db17_t)[1:4] <- c("name","code","cluster","sector")
#Get rid of Commodoties sector
db17_t_nocomm <-db17_t[!(db17_t$cluster=="Commodities"),]
#Keep only EORA26 sector classification
db17_t_nocomm_f <- filter(db17_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
#convert VA values in numeric
db17_t_nocomm_f[, 5:194] <- sapply(db17_t_nocomm_f[, 5:194], as.numeric)
#Calculate total VA
db17_t_nocomm_f$Total <- rowSums(db17_t_nocomm_f[,5:194], na.rm = TRUE)
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
#Calculate internal VA
db17_t_nocomm_f$Internal_VA <- as.numeric(db17_t_nocomm_f[cbind(1:nrow(db17_t_nocomm_f), match(db17_t_nocomm_f$name, names(db17_t_nocomm_f)))])
#Calculate Net Exported VA
db17_t_nocomm_f$Net_TotalExp <- db17_t_nocomm_f$Total - db17_t_nocomm_f$Internal_VA
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
#finalize
db17_final <- db17_t_nocomm_f[,c(1,2,3,4,7)]
db17_final
}
list_of_df <- lapply(my_list, MyF)
i = 3
MyF2 <- function(input){
if(i<10){
nom <- paste0("df_final_0",i)
} else{
nom <- paste0("df_final_",i)
}
assign(nom, input, envir = .GlobalEnv)
i <<- i + 1
}
lapply(list_of_df, MyF2)
这将在您的环境中创建所有 df_final
从 3 到 17
我编写了以下代码来清理名为 db17
的起始数据集:
#traspose matrix
db17_t <- t(db17)
#convert in data.frame object
db17_t <- as.data.frame((db17_t))
#first row as column names
db17_t <- row_to_names(db17_t, 1, remove_row = TRUE)
#name first four columns
colnames(db17_t)[1:4] <- c("name","code","cluster","sector")
#Get rid of Commodoties sector
db17_t_nocomm <-db17_t[!(db17_t$cluster=="Commodities"),]
rm(db17_t)
#Keep only EORA26 sector classification
db17_t_nocomm_f <- filter(db17_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
rm(db17_t_nocomm)
#convert VA values in numeric
db17_t_nocomm_f[, 5:194] <- sapply(db17_t_nocomm_f[, 5:194], as.numeric)
#Calculate total VA
db17_t_nocomm_f$Total <- rowSums(db17_t_nocomm_f[,5:194], na.rm = TRUE)
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
#Calculate internal VA
db17_t_nocomm_f$Internal_VA <- as.numeric(db17_t_nocomm_f[cbind(1:nrow(db17_t_nocomm_f), match(db17_t_nocomm_f$name, names(db17_t_nocomm_f)))])
#Calculate Net Exported VA
db17_t_nocomm_f$Net_TotalExp <- db17_t_nocomm_f$Total - db17_t_nocomm_f$Internal_VA
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
#finalize
db17_final <- db17_t_nocomm_f[,c(1,2,3,4,7)]
rm(db17_t_nocomm_f)
现在,我必须对从 db03
到 db17
命名的数据集应用相同的代码。
是否有 for 循环允许我这样做,使用 data.frame 名称作为索引?
谢谢
不完全是一个循环,但您可以将您的代码变成一个函数并轻松调用它多次。
doing_stuff <- function(df) {
#traspose matrix
df_t <- t(df)
#convert in data.frame object
df_t <- as.data.frame((df_t))
#first row as column names
df_t <- row_to_names(df_t, 1, remove_row = TRUE)
#name first four columns
colnames(df_t)[1:4] <- c("name","code","cluster","sector")
#Get rid of Commodoties sector
df_t_nocomm <-df_t[!(df_t$cluster=="Commodities"),]
rm(df_t)
#Keep only EORA26 sector classification
df_t_nocomm_f <- filter(df_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
rm(df_t_nocomm)
#convert VA values in numeric
df_t_nocomm_f[, 5:194] <- sapply(df_t_nocomm_f[, 5:194], as.numeric)
#Calculate total VA
df_t_nocomm_f$Total <- rowSums(df_t_nocomm_f[,5:194], na.rm = TRUE)
df_t_nocomm_f <- df_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
#Calculate internal VA
df_t_nocomm_f$Internal_VA <- as.numeric(df_t_nocomm_f[cbind(1:nrow(df_t_nocomm_f), match(df_t_nocomm_f$name, names(df_t_nocomm_f)))])
#Calculate Net Exported VA
df_t_nocomm_f$Net_TotalExp <- df_t_nocomm_f$Total - df_t_nocomm_f$Internal_VA
df_t_nocomm_f <- df_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
#finalize
df_final <- df_t_nocomm_f[,c(1,2,3,4,7)]
rm(df_t_nocomm_f)
# return final output
return(df_final)
}
db17_final <- doing_stuff(db17)
db03_final <- doing_stuff(db03)
db19_final <- doing_stuff(db19)
(因为我没有数据可查,所以我只是将每个“db17”替换为“df”。)
编辑:来自@MonJeanJean 的答案更精彩,但取决于您的数据有多大,也许多次执行函数而不是加载两次数据可能会更好。如果你有一个小数据集,请忽略我的回答!
如果没有数据样本,很难知道这是否有效。然而:
创建数据框列表:
my_list <- list(df03,...,df17)
代码:
MyF <- function(data){
#traspose matrix
db17_t <- t(data)
#convert in data.frame object
db17_t <- as.data.frame((db17_t))
#first row as column names
db17_t <- row_to_names(db17_t, 1, remove_row = TRUE)
#name first four columns
colnames(db17_t)[1:4] <- c("name","code","cluster","sector")
#Get rid of Commodoties sector
db17_t_nocomm <-db17_t[!(db17_t$cluster=="Commodities"),]
#Keep only EORA26 sector classification
db17_t_nocomm_f <- filter(db17_t_nocomm, grepl("Agriculture|Fishing|Mining and Quarrying|Food and Beverages|Textiles and Wearing Apparel|Wood and Paper|Petroleum, Chemical and Non-Metallic Mineral Products|Metal Products|Electrinal and Machinery|Transport Equipment|Other Manufacturing|Recylcing|Electricity, Gas and Water|Construction|Maintenance and Repair|Wholesale Trade|Retail Trade|Hotels and Restaurants|Transport|Post and Telecommunications|Financial Intermediation and Business Activities|Public Administration|Education, Health and Other Services|Private Households|Others|Re-export & Re-import" , sector))
#convert VA values in numeric
db17_t_nocomm_f[, 5:194] <- sapply(db17_t_nocomm_f[, 5:194], as.numeric)
#Calculate total VA
db17_t_nocomm_f$Total <- rowSums(db17_t_nocomm_f[,5:194], na.rm = TRUE)
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(Total, .before = Afghanistan)
#Calculate internal VA
db17_t_nocomm_f$Internal_VA <- as.numeric(db17_t_nocomm_f[cbind(1:nrow(db17_t_nocomm_f), match(db17_t_nocomm_f$name, names(db17_t_nocomm_f)))])
#Calculate Net Exported VA
db17_t_nocomm_f$Net_TotalExp <- db17_t_nocomm_f$Total - db17_t_nocomm_f$Internal_VA
db17_t_nocomm_f <- db17_t_nocomm_f %>% relocate(c(Internal_VA,Net_TotalExp), .before = Afghanistan)
#finalize
db17_final <- db17_t_nocomm_f[,c(1,2,3,4,7)]
db17_final
}
list_of_df <- lapply(my_list, MyF)
i = 3
MyF2 <- function(input){
if(i<10){
nom <- paste0("df_final_0",i)
} else{
nom <- paste0("df_final_",i)
}
assign(nom, input, envir = .GlobalEnv)
i <<- i + 1
}
lapply(list_of_df, MyF2)
这将在您的环境中创建所有 df_final
从 3 到 17