在 R 中将 TXT 批量编辑为 CSV
Batch Editing TXT to CSV in R
我有一系列 .txt 文件,如下所示:
Button,Intensity,Acc,Intensity,RT,Time
0,30,0,0,0,77987.931
1,30,1,13.5,0,78084.57
1,30,1,15,0,78098.624
1,30,1,6,0,78114.132
1,30,1,15,0,78120.669
它们的文件名类似于 1531_Day49.txt、1531_Day50.txt、1532_Day49.txt、1532_Day50.txt 等
我想将此目录中的所有文件加载到数据框中,附加一列是上面一行中的时间 (tdelta) 之间的差异,并附加前 4 位数字的两列(即 1531 , 1532) 和一列未编码的日期代码,因此列标题将是 PrePost,每一行将是,如果文件名 Day49,则 "Pre",如果文件名 Day50,则 "Post"。
1531 第 49 天文件的理想输出为:
Button,Intensity,Acc,Intensity,RT,Time,Tdelta,ID,PrePost
0,30,0,0,0,77987.931,0 ,1531,Pre
1,30,1,13.5,0,78084.57,96.693 ,1531,Pre
1,30,1,15,0,78098.624, 14.054,1531,Pre
到目前为止我有:
#call library
library(data.table)
#batch enter .txt files and put them into a data frame
setwd("~/Documents/PVTPASAT/PVT")
temp = list.files(pattern="*.txt")
list.DFs <- lapply(myfiles,fread)
#view print out to visually check
View(list.DFs)
#add column of time difference
list.DFs <- lapply(list.DFs, cbind, tDelta = c(0, diff(df$Time)))
#Add empty columns for ID and PrePost
list.DFs <- lapply(list.DFs, cbind, ID = c(""))
list.DFs <- lapply(list.DFs, cbind, PrePost = c(""))
#print one to visually check
View(list.DFs[3])
我会创建一个函数来进行处理,然后像这样将其应用于您的文件列表:
example <- read.delim(textConnection('
Button, Intensity, Acc, Intensity, RT, Time
0,30,0,0,0,77987.931
1,30,1,13.5,0,78084.57
1,30,1,15,0,78098.624
1,30,1,6,0,78114.132
1,30,1,15,0,78120.669'),
header = T,
sep = ','
)
write.table(example, '1531_Day49.txt', row.names = F)
temp <- list.files(pattern="*.txt")
process_txt <- function(x) {
dat <- data.table::fread(x, header = T)
dat$tdelta <- c(0, diff(dat$Time))
dat$ID <- substr(x, 1, 4)
dat$PrePost <- if (grepl('49\.', x)) {'Pre'} else {'Post'}
dat
}
out <- lapply(temp, process_txt)
@Heather,主要指导是先正确解决一个文件。然后,将所有工作代码放入一个函数中。
library(dplyr) ## for lag function
library(stringr) ## for str_detect
# make two test files
dt <- read.csv(text=
'Button,Intensity,Acc,Intensity,RT,Time
0,30,0,0,0,77987.931
1,30,1,13.5,0,78084.57
1,30,1,15,0,78098.624
1,30,1,6,0,78114.132
1,30,1,15,0,78120.669
')
write.csv(dt,"1531_Day49.txt")
write.csv(dt,"1532_Day50.txt")
# function to do the work for one file name - returns a dataframe
doOne <- function (file) {
# read
contents <- fread(file)
# compute delta
contents$Tdelta <- contents$Time - lag(contents$Time)
# prefix up to underscore
contents$ID <- strsplit(file, c("_"))[[1]][[1]]
# add the prepost using ifelse and str_detetct
contents$PrePost <- ifelse(str_detect(file, "Day49"), "Pre", "Post")
return(contents)
}
#test files
files <- c("1531_Day49.txt", "1532_Day50.txt")
# call the function for each file -- result is
# a list of dataframes
lapply(files, doOne)
# better get them all into a single data frame for analysis
do.call(rbind, lapply(files, doOne))
# V1 Button Intensity Acc Intensity.1 RT Time Tdelta ID PrePost
# 1: 1 0 30 0 0.0 0 77987.93 NA 1531 Pre
# 2: 2 1 30 1 13.5 0 78084.57 96.639 1531 Pre
# 3: 3 1 30 1 15.0 0 78098.62 14.054 1531 Pre
# 4: 4 1 30 1 6.0 0 78114.13 15.508 1531 Pre
# 5: 5 1 30 1 15.0 0 78120.67 6.537 1531 Pre
# 6: 1 0 30 0 0.0 0 77987.93 NA 1532 Post
# 7: 2 1 30 1 13.5 0 78084.57 96.639 1532 Post
# 8: 3 1 30 1 15.0 0 78098.62 14.054 1532 Post
# 9: 4 1 30 1 6.0 0 78114.13 15.508 1532 Post
# 10: 5 1 30 1 15.0 0 78120.67 6.537 1532 Post
我有一系列 .txt 文件,如下所示:
Button,Intensity,Acc,Intensity,RT,Time
0,30,0,0,0,77987.931
1,30,1,13.5,0,78084.57
1,30,1,15,0,78098.624
1,30,1,6,0,78114.132
1,30,1,15,0,78120.669
它们的文件名类似于 1531_Day49.txt、1531_Day50.txt、1532_Day49.txt、1532_Day50.txt 等
我想将此目录中的所有文件加载到数据框中,附加一列是上面一行中的时间 (tdelta) 之间的差异,并附加前 4 位数字的两列(即 1531 , 1532) 和一列未编码的日期代码,因此列标题将是 PrePost,每一行将是,如果文件名 Day49,则 "Pre",如果文件名 Day50,则 "Post"。
1531 第 49 天文件的理想输出为:
Button,Intensity,Acc,Intensity,RT,Time,Tdelta,ID,PrePost
0,30,0,0,0,77987.931,0 ,1531,Pre
1,30,1,13.5,0,78084.57,96.693 ,1531,Pre
1,30,1,15,0,78098.624, 14.054,1531,Pre
到目前为止我有:
#call library
library(data.table)
#batch enter .txt files and put them into a data frame
setwd("~/Documents/PVTPASAT/PVT")
temp = list.files(pattern="*.txt")
list.DFs <- lapply(myfiles,fread)
#view print out to visually check
View(list.DFs)
#add column of time difference
list.DFs <- lapply(list.DFs, cbind, tDelta = c(0, diff(df$Time)))
#Add empty columns for ID and PrePost
list.DFs <- lapply(list.DFs, cbind, ID = c(""))
list.DFs <- lapply(list.DFs, cbind, PrePost = c(""))
#print one to visually check
View(list.DFs[3])
我会创建一个函数来进行处理,然后像这样将其应用于您的文件列表:
example <- read.delim(textConnection('
Button, Intensity, Acc, Intensity, RT, Time
0,30,0,0,0,77987.931
1,30,1,13.5,0,78084.57
1,30,1,15,0,78098.624
1,30,1,6,0,78114.132
1,30,1,15,0,78120.669'),
header = T,
sep = ','
)
write.table(example, '1531_Day49.txt', row.names = F)
temp <- list.files(pattern="*.txt")
process_txt <- function(x) {
dat <- data.table::fread(x, header = T)
dat$tdelta <- c(0, diff(dat$Time))
dat$ID <- substr(x, 1, 4)
dat$PrePost <- if (grepl('49\.', x)) {'Pre'} else {'Post'}
dat
}
out <- lapply(temp, process_txt)
@Heather,主要指导是先正确解决一个文件。然后,将所有工作代码放入一个函数中。
library(dplyr) ## for lag function
library(stringr) ## for str_detect
# make two test files
dt <- read.csv(text=
'Button,Intensity,Acc,Intensity,RT,Time
0,30,0,0,0,77987.931
1,30,1,13.5,0,78084.57
1,30,1,15,0,78098.624
1,30,1,6,0,78114.132
1,30,1,15,0,78120.669
')
write.csv(dt,"1531_Day49.txt")
write.csv(dt,"1532_Day50.txt")
# function to do the work for one file name - returns a dataframe
doOne <- function (file) {
# read
contents <- fread(file)
# compute delta
contents$Tdelta <- contents$Time - lag(contents$Time)
# prefix up to underscore
contents$ID <- strsplit(file, c("_"))[[1]][[1]]
# add the prepost using ifelse and str_detetct
contents$PrePost <- ifelse(str_detect(file, "Day49"), "Pre", "Post")
return(contents)
}
#test files
files <- c("1531_Day49.txt", "1532_Day50.txt")
# call the function for each file -- result is
# a list of dataframes
lapply(files, doOne)
# better get them all into a single data frame for analysis
do.call(rbind, lapply(files, doOne))
# V1 Button Intensity Acc Intensity.1 RT Time Tdelta ID PrePost
# 1: 1 0 30 0 0.0 0 77987.93 NA 1531 Pre
# 2: 2 1 30 1 13.5 0 78084.57 96.639 1531 Pre
# 3: 3 1 30 1 15.0 0 78098.62 14.054 1531 Pre
# 4: 4 1 30 1 6.0 0 78114.13 15.508 1531 Pre
# 5: 5 1 30 1 15.0 0 78120.67 6.537 1531 Pre
# 6: 1 0 30 0 0.0 0 77987.93 NA 1532 Post
# 7: 2 1 30 1 13.5 0 78084.57 96.639 1532 Post
# 8: 3 1 30 1 15.0 0 78098.62 14.054 1532 Post
# 9: 4 1 30 1 6.0 0 78114.13 15.508 1532 Post
# 10: 5 1 30 1 15.0 0 78120.67 6.537 1532 Post