Importing/Conditioning file.txt 在 R 中具有 "kind" 的 json 结构
Importing/Conditioning a file.txt with a "kind" of json structure in R
我想在 R 中导入一个 .txt 文件,但格式非常特殊,看起来像 json 格式,但我不知道如何导入它。有一个我的数据的例子:
{"datetime":"2015-07-08 09:10:00","subject":"MMM","sscore":"-0.2280","smean":"0.2593","svscore":"-0.2795","sdispersion":"0.375","svolume":"8","sbuzz":"0.6026","lastclose":"155.430000000","companyname":"3M Company"},{"datetime":"2015-07-07 09:10:00","subject":"MMM","sscore":"0.2977","smean":"0.2713","svscore":"-0.7436","sdispersion":"0.400","svolume":"5","sbuzz":"0.4895","lastclose":"155.080000000","companyname":"3M Company"},{"datetime":"2015-07-06 09:10:00","subject":"MMM","sscore":"-1.0057","smean":"0.2579","svscore":"-1.3796","sdispersion":"1.000","svolume":"1","sbuzz":"0.4531","lastclose":"155.380000000","companyname":"3M Company"}
要处理此问题,请使用以下代码:
test1 <- read.csv("C:/Users/test1.txt", header=FALSE)
## Import as 5 observations (5th is all empty) of 1700 variables
#(in fact 40 observations of 11 variables). In fact when I imported the
#.txt file, it's having one line (5th obs) empty, and 4 lines of data and
#placed next to each other 4 lines of data of 11 variables.
# Get the different lines
part1=test1[1:10]
part2=test1[11:20]
part3=test1[21:30]
part4=test1[31:40]
...
## Remove the empty line (there were an empty line after each)
part1=part1[-5,]
part2=part2[-5,]
part3=part3[-5,]
...
## Rename the columns
names(part1)=c("Date Time","Subject","Sscore","Smean","Svscore","Sdispersion","Svolume","Sbuzz","Last close","Company name")
names(part2)=c("Date Time","Subject","Sscore","Smean","Svscore","Sdispersion","Svolume","Sbuzz","Last close","Company name")
names(part3)=c("Date Time","Subject","Sscore","Smean","Svscore","Sdispersion","Svolume","Sbuzz","Last close","Company name")
...
## Assemble data to have one dataset
data=rbind(part1,part2,part3,part4,part5,part6,part7,part8,part9,part10)
## Formate Date Time
times <- as.POSIXct(data$`Date Time`, format='{datetime:%Y-%m-%d %H:%M:%S')
data$`Date Time` <- times
## Keep only the Date
data$Date <- as.Date(times)
## Formate data - Remove text
data$Subject <- gsub("subject:", "", data$Subject)
data$Sscore <- gsub("sscore:", "", data$Sscore)
...
所以我的代码正在努力恢复数据,但这可能非常困难,而且时间更长我知道有更好的方法可以做到这一点,所以如果你能帮助我,我将不胜感激。
有很多包读作 JSON,例如rjson
、jsonlite
、RJSONIO
(他们会进行 google 搜索)- 选择一个并试一试。
例如
library(jsonlite)
json.text <- '{"datetime":"2015-07-08 09:10:00","subject":"MMM","sscore":"-0.2280","smean":"0.2593","svscore":"-0.2795","sdispersion":"0.375","svolume":"8","sbuzz":"0.6026","lastclose":"155.430000000","companyname":"3M Company"},{"datetime":"2015-07-07 09:10:00","subject":"MMM","sscore":"0.2977","smean":"0.2713","svscore":"-0.7436","sdispersion":"0.400","svolume":"5","sbuzz":"0.4895","lastclose":"155.080000000","companyname":"3M Company"},{"datetime":"2015-07-06 09:10:00","subject":"MMM","sscore":"-1.0057","smean":"0.2579","svscore":"-1.3796","sdispersion":"1.000","svolume":"1","sbuzz":"0.4531","lastclose":"155.380000000","companyname":"3M Company"}'
x <- fromJSON(paste0('[', json.text, ']'))
datetime subject sscore smean svscore sdispersion svolume sbuzz lastclose companyname
1 2015-07-08 09:10:00 MMM -0.2280 0.2593 -0.2795 0.375 8 0.6026 155.430000000 3M Company
2 2015-07-07 09:10:00 MMM 0.2977 0.2713 -0.7436 0.400 5 0.4895 155.080000000 3M Company
3 2015-07-06 09:10:00 MMM -1.0057 0.2579 -1.3796 1.000 1 0.4531 155.380000000 3M Company
我在你的 JSON 周围粘贴了“[”和“]”,因为你有多个 JSON 元素(上面数据框中的行)并且为了使其格式正确 JSON 它需要是一个数组,即 [ {...}, {...}, {...} ]
而不是 {...}, {...}, {...}
.
我想在 R 中导入一个 .txt 文件,但格式非常特殊,看起来像 json 格式,但我不知道如何导入它。有一个我的数据的例子:
{"datetime":"2015-07-08 09:10:00","subject":"MMM","sscore":"-0.2280","smean":"0.2593","svscore":"-0.2795","sdispersion":"0.375","svolume":"8","sbuzz":"0.6026","lastclose":"155.430000000","companyname":"3M Company"},{"datetime":"2015-07-07 09:10:00","subject":"MMM","sscore":"0.2977","smean":"0.2713","svscore":"-0.7436","sdispersion":"0.400","svolume":"5","sbuzz":"0.4895","lastclose":"155.080000000","companyname":"3M Company"},{"datetime":"2015-07-06 09:10:00","subject":"MMM","sscore":"-1.0057","smean":"0.2579","svscore":"-1.3796","sdispersion":"1.000","svolume":"1","sbuzz":"0.4531","lastclose":"155.380000000","companyname":"3M Company"}
要处理此问题,请使用以下代码:
test1 <- read.csv("C:/Users/test1.txt", header=FALSE)
## Import as 5 observations (5th is all empty) of 1700 variables
#(in fact 40 observations of 11 variables). In fact when I imported the
#.txt file, it's having one line (5th obs) empty, and 4 lines of data and
#placed next to each other 4 lines of data of 11 variables.
# Get the different lines
part1=test1[1:10]
part2=test1[11:20]
part3=test1[21:30]
part4=test1[31:40]
...
## Remove the empty line (there were an empty line after each)
part1=part1[-5,]
part2=part2[-5,]
part3=part3[-5,]
...
## Rename the columns
names(part1)=c("Date Time","Subject","Sscore","Smean","Svscore","Sdispersion","Svolume","Sbuzz","Last close","Company name")
names(part2)=c("Date Time","Subject","Sscore","Smean","Svscore","Sdispersion","Svolume","Sbuzz","Last close","Company name")
names(part3)=c("Date Time","Subject","Sscore","Smean","Svscore","Sdispersion","Svolume","Sbuzz","Last close","Company name")
...
## Assemble data to have one dataset
data=rbind(part1,part2,part3,part4,part5,part6,part7,part8,part9,part10)
## Formate Date Time
times <- as.POSIXct(data$`Date Time`, format='{datetime:%Y-%m-%d %H:%M:%S')
data$`Date Time` <- times
## Keep only the Date
data$Date <- as.Date(times)
## Formate data - Remove text
data$Subject <- gsub("subject:", "", data$Subject)
data$Sscore <- gsub("sscore:", "", data$Sscore)
...
所以我的代码正在努力恢复数据,但这可能非常困难,而且时间更长我知道有更好的方法可以做到这一点,所以如果你能帮助我,我将不胜感激。
有很多包读作 JSON,例如rjson
、jsonlite
、RJSONIO
(他们会进行 google 搜索)- 选择一个并试一试。
例如
library(jsonlite)
json.text <- '{"datetime":"2015-07-08 09:10:00","subject":"MMM","sscore":"-0.2280","smean":"0.2593","svscore":"-0.2795","sdispersion":"0.375","svolume":"8","sbuzz":"0.6026","lastclose":"155.430000000","companyname":"3M Company"},{"datetime":"2015-07-07 09:10:00","subject":"MMM","sscore":"0.2977","smean":"0.2713","svscore":"-0.7436","sdispersion":"0.400","svolume":"5","sbuzz":"0.4895","lastclose":"155.080000000","companyname":"3M Company"},{"datetime":"2015-07-06 09:10:00","subject":"MMM","sscore":"-1.0057","smean":"0.2579","svscore":"-1.3796","sdispersion":"1.000","svolume":"1","sbuzz":"0.4531","lastclose":"155.380000000","companyname":"3M Company"}'
x <- fromJSON(paste0('[', json.text, ']'))
datetime subject sscore smean svscore sdispersion svolume sbuzz lastclose companyname
1 2015-07-08 09:10:00 MMM -0.2280 0.2593 -0.2795 0.375 8 0.6026 155.430000000 3M Company
2 2015-07-07 09:10:00 MMM 0.2977 0.2713 -0.7436 0.400 5 0.4895 155.080000000 3M Company
3 2015-07-06 09:10:00 MMM -1.0057 0.2579 -1.3796 1.000 1 0.4531 155.380000000 3M Company
我在你的 JSON 周围粘贴了“[”和“]”,因为你有多个 JSON 元素(上面数据框中的行)并且为了使其格式正确 JSON 它需要是一个数组,即 [ {...}, {...}, {...} ]
而不是 {...}, {...}, {...}
.