如何遍历列表并创建数据框
How to loop through a list and create a data frame
提前致谢。
我有一个来自 list.files 的文件路径列表。我正在尝试调用每个文件并标准化列数,以便我可以将每个文件绑定到一个大 DF 中。
以团队为名单,这是我的代码(注意Add_X_Col是一个标准化列数的函数):
x1<-read.csv(Team[1],row.names=1) #Reads the csv from the first file path
y<-ncol(x1) #creates object with number of columns
#If statement to standarise number of columns so can bind
if (y=="37"){
x1<-Add_5_Col(x1)
} else if (y=="38"){
x1<-Add_4_Col(x1)
} else if (y=="39"){
x1<-Add_3_Col(x1)
}
Team_Split<-x1 #Creates a new DF called Team_Split
Team<-Team[-1];#Removes the file path from the list
第二部分 - 我必须对定向文件路径中的所有文件重复 ~275 次
x1<-read.csv(Team[1],row.names=1) #Reads the csv from the first file path
y<-ncol(x1) #creates object with number of columns
#If statement to standarise number of columns so can bind
if (y=="37"){
x1<-Add_5_Col(x1)
} else if (y=="38"){
x1<-Add_4_Col(x1)
} else if (y=="39"){
x1<-Add_3_Col(x1)
}
Team_Split<-rbind(Team_Split,x1) #Binds to already made Team_Split DF
Team<-Team[-1];
我的预期输出是一个大数据帧,将文件路径中的每个文件作为数据帧读取并将它们绑定在一起。
我不熟悉循环和使用应用程序包;我试图理解他们在这里使用它们,但无济于事!任何帮助将不胜感激。
根据要求进行编辑。
Add_3_Col <- function(x){
names(x1)[names(x1) == "X60"] <- "X60.1"
x1$X60<- 0
x1$X105<- 0
x1$X110<- 0
x1<-x1[c(1:12,40,13:24,41,42,25:39)]
return(x1)
}
Add_5_Col <- function(x){
names(x1)[names(x1) == "X55"] <- "X55.1"
names(x1)[names(x1) == "X60"] <- "X60.1"
x1$X55<- 0
x1$X60<- 0
x1$X100<- 0
x1$X105<- 0
x1$X110<- 0
x1<-x1[c(1:11,38,39,12:22,40,41,42,23:37)] #re-arrnages to put the columns in the correct place.
return(x1)
}
Add_4_Col <- function(x){
names(x1)[names(x1) == "X55"] <- "X55.1"
names(x1)[names(x1) == "X60"] <- "X60.1"
x1$X55<- 0
x1$X60<- 0
x1$X105<- 0
x1$X110<- 0
x1<-x1[c(1:11,39,40,12:23,41,42,24:38)]
return(x1)
}
My intended output would be a large dataframe, reading each file in the file path as a dataframe and binding them together.
取决于文件在目录中的位置:
library(readr)
library(dplyr)
files <- dir("data/", pattern = "\.csv$", full.names = TRUE)
df2_list <- vector("list", length(files))
names(df2_list) <- files
for (fname in files) {
df2_list[[fname]] <- read_csv(fname)
}
df2 <- bind_rows(df2_list)
然后是这样的?
library(dplyr)
df2 %>% summarise_each(funs(na.omit(.)))
此解决方案结合了 3 个单独的 .csv
文件,在我的机器上具有不同数量的列,它们都位于我的工作目录中的 data
文件夹中。
df2 %>% summarise_each(funs(na.omit(.)))
占加入后生成的 NA
。
这对我适用于一堆带有模拟数据的 CSV 文件。
Team <- list.files("c:\Test\Teams\", full.names=TRUE)
Team_Split <- data.frame()
print(Team)
for (Team_File in Team) {
xl <-
read.csv(Team_File) #Reads the csv from the first file path
y <- ncol(x1) #creates object with number of columns
#If statement to standarise number of columns so can bind
if (y == "37") {
x1 <- Add_5_Col(x1)
} else if (y == "38") {
x1 <- Add_4_Col(x1)
} else if (y == "39") {
x1 <- Add_3_Col(x1)
}
# Sets Team_Split to xl if it's the first set of data
# or binds Team_Split and xl
print(xl)
if (nrow(Team_Split) == 0) {
Team_Split <- xl
} else {
Team_Split <- rbind(Team_Split, xl)
}
}
print(Team_Split)
提前致谢。
我有一个来自 list.files 的文件路径列表。我正在尝试调用每个文件并标准化列数,以便我可以将每个文件绑定到一个大 DF 中。
以团队为名单,这是我的代码(注意Add_X_Col是一个标准化列数的函数):
x1<-read.csv(Team[1],row.names=1) #Reads the csv from the first file path
y<-ncol(x1) #creates object with number of columns
#If statement to standarise number of columns so can bind
if (y=="37"){
x1<-Add_5_Col(x1)
} else if (y=="38"){
x1<-Add_4_Col(x1)
} else if (y=="39"){
x1<-Add_3_Col(x1)
}
Team_Split<-x1 #Creates a new DF called Team_Split
Team<-Team[-1];#Removes the file path from the list
第二部分 - 我必须对定向文件路径中的所有文件重复 ~275 次
x1<-read.csv(Team[1],row.names=1) #Reads the csv from the first file path
y<-ncol(x1) #creates object with number of columns
#If statement to standarise number of columns so can bind
if (y=="37"){
x1<-Add_5_Col(x1)
} else if (y=="38"){
x1<-Add_4_Col(x1)
} else if (y=="39"){
x1<-Add_3_Col(x1)
}
Team_Split<-rbind(Team_Split,x1) #Binds to already made Team_Split DF
Team<-Team[-1];
我的预期输出是一个大数据帧,将文件路径中的每个文件作为数据帧读取并将它们绑定在一起。
我不熟悉循环和使用应用程序包;我试图理解他们在这里使用它们,但无济于事!任何帮助将不胜感激。
根据要求进行编辑。
Add_3_Col <- function(x){
names(x1)[names(x1) == "X60"] <- "X60.1"
x1$X60<- 0
x1$X105<- 0
x1$X110<- 0
x1<-x1[c(1:12,40,13:24,41,42,25:39)]
return(x1)
}
Add_5_Col <- function(x){
names(x1)[names(x1) == "X55"] <- "X55.1"
names(x1)[names(x1) == "X60"] <- "X60.1"
x1$X55<- 0
x1$X60<- 0
x1$X100<- 0
x1$X105<- 0
x1$X110<- 0
x1<-x1[c(1:11,38,39,12:22,40,41,42,23:37)] #re-arrnages to put the columns in the correct place.
return(x1)
}
Add_4_Col <- function(x){
names(x1)[names(x1) == "X55"] <- "X55.1"
names(x1)[names(x1) == "X60"] <- "X60.1"
x1$X55<- 0
x1$X60<- 0
x1$X105<- 0
x1$X110<- 0
x1<-x1[c(1:11,39,40,12:23,41,42,24:38)]
return(x1)
}
My intended output would be a large dataframe, reading each file in the file path as a dataframe and binding them together.
取决于文件在目录中的位置:
library(readr)
library(dplyr)
files <- dir("data/", pattern = "\.csv$", full.names = TRUE)
df2_list <- vector("list", length(files))
names(df2_list) <- files
for (fname in files) {
df2_list[[fname]] <- read_csv(fname)
}
df2 <- bind_rows(df2_list)
然后是这样的?
library(dplyr)
df2 %>% summarise_each(funs(na.omit(.)))
此解决方案结合了 3 个单独的 .csv
文件,在我的机器上具有不同数量的列,它们都位于我的工作目录中的 data
文件夹中。
df2 %>% summarise_each(funs(na.omit(.)))
占加入后生成的 NA
。
这对我适用于一堆带有模拟数据的 CSV 文件。
Team <- list.files("c:\Test\Teams\", full.names=TRUE)
Team_Split <- data.frame()
print(Team)
for (Team_File in Team) {
xl <-
read.csv(Team_File) #Reads the csv from the first file path
y <- ncol(x1) #creates object with number of columns
#If statement to standarise number of columns so can bind
if (y == "37") {
x1 <- Add_5_Col(x1)
} else if (y == "38") {
x1 <- Add_4_Col(x1)
} else if (y == "39") {
x1 <- Add_3_Col(x1)
}
# Sets Team_Split to xl if it's the first set of data
# or binds Team_Split and xl
print(xl)
if (nrow(Team_Split) == 0) {
Team_Split <- xl
} else {
Team_Split <- rbind(Team_Split, xl)
}
}
print(Team_Split)