对 tibble 包中子集的更改;以前工作代码的新错误
changes to subsetting within tibble package; new error for previously working code
下面的代码创建了类似于我正在使用的原始数据。我使用 tibble 包中的 add_row 函数编写了一些代码来重新格式化它。现在我收到一个错误(此代码在 2020 年 4 月之前有效)。由于包的更新,子集规则似乎变得更严格了?我想知道是否有人可以帮助纠正这个错误......
首先创建数据
# Create replicate of raw data
date <- seq(from = as.Date('1999-01-01'),
to = as.Date('2013-12-31'),
by = 'day')
temp <- rnorm(5479,15,5)
precip <- rlnorm(5479)
rawdata <- data.frame(date=date,
temp=round(temp, digits = 2),
precip=round(precip, digits = 2))
# Add columns needed to run code
rawdata$year <- as.numeric(substr(rawdata$date,1,4))
rawdata$month <- as.numeric(substr(rawdata$date, 6,7))
rawdata$chardate <- format(rawdata$date, '%Y-%h-%d') # create abbreviated month column
rawdata$charmonth <- substr(rawdata$chardate, 6,8) # for formatting
rawdata$charmonth <- as.character(rawdata$charmonth)
rawdata$day <- as.numeric(substr(rawdata$date, 9,10))
rawdata$uniqdate <- rawdata$year*100+as.numeric(rawdata$day)+rawdata$month*10
rawdata$uniqmonth <- (rawdata$year*100)+rawdata$month# create unique month identifier
rawdata$yr <- NA # This column will be filled only in the new rows to be added
# Create weather object to feed the for loop below----
weather <- data.frame(year = rawdata$year,
month = rawdata$month,
day = rawdata$day,
charmonth = rawdata$charmonth,
uniqmonth = rawdata$uniqmonth,
uniqdate = rawdata$uniqdate,
temp = rawdata$temp,
precip = rawdata$precip,
yr = rawdata$yr)
# weather$charmonth <- as.character(rawdata$charmonth)
现在出现错误...我正在尝试在每个月的数据顶部添加一行,其中包含该月的天数,缩写为三个字母的月份(jan、feb , mar 等) 和年份。
library(tibble) # package containing the add_row function
# create empty list to put all of the monthly dataframes in
newdat <- list()
# the following loop will create a dataframe for each month and put in a list
for(i in unique(weather$uniqmonth)) { # for every unique month value
# create object nam that is of the format 'df.uniqmonth'
nam <- paste("df", i, sep = ".")
# create object dat that contains all data for each unique month
dat <- weather[weather$uniqmonth==i,]
# add a row of data at the start of each dataframe with the days in month, month abbr., year
dat <- add_row(dat, year = NA, month = NA, day = NA,
charmonth = NA, uniqmonth = NA, uniqdate = NA,
# the line below is the info we are adding in the columns we will keep
temp = na.omit(max(dat$day)), precip = unique(dat$charmonth), yr = unique(dat$year),
.before = 1)
# just keep required columns
dat <- data.frame(dat$temp, dat$precip, dat$yr)
# add new dataframe to a list, using the new name
newdat[[nam]] <- dat
}
**您可以 运行 循环或逐行执行(设置 i = 199901)并且错误是相同的:
错误:无法合并 ..1$precip
和 ..2$precip
。
最终我应该能够运行以下内容以获得我需要的输出,我在文本编辑器中完成了这些输出(删除尾随逗号)。**
# Merge all data into a dataframe
full_data <- do.call("rbind", newdat)
# turn NA's into blanks
full_data[is.na(full_data)] <- ""
**感谢解决此错误的任何帮助!
这是我需要的最终产品
a <- c("Jan",
round(rnorm(31,15,5), digits = 2),
"Feb",
round(rnorm(28,5,5), digits = 2),
"Mar",
round(rnorm(31,15,5),digits = 2))
b <- c(31,
rlnorm(31),
28,
rlnorm(28),
31,
rlnorm(31))
c <- c(1999,
rep(NA,31),
1999,
rep(NA,28),
1999,
rep(NA,31))
final_data <- data.frame(temp = a,
precip = round(b,digits=2),
year = c)
经过长时间的讨论,最终结果不是传统的CSV,所以需要一点弯曲。
鉴于 weather
开始看起来像这样:
head(weather)
# # A tibble: 6 x 9
# year month day charmonth uniqmonth uniqdate temp precip yr
# <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <lgl>
# 1 1999 1 1 Jan 199901 199911 13.8 2.03 NA
# 2 1999 1 2 Jan 199901 199912 10.8 2.53 NA
# 3 1999 1 3 Jan 199901 199913 8.78 3.15 NA
# 4 1999 1 4 Jan 199901 199914 14.3 0.63 NA
# 5 1999 1 5 Jan 199901 199915 18.5 0.47 NA
# 6 1999 1 6 Jan 199901 199916 10.4 0.39 NA
所需的输出 (full_data
) 在文件中是这样的:
Jan,31,1999
13.83,2.03
10.76,2.53
8.78,3.15
...truncated...
18.74,0.79
Feb,28,1999
17.47,1.62
9.15,0.48
...truncated...
18.36,2.26
Mar,31,1999
20.53,2.65
11.1,2.58
19.52,0.33
...truncated...
重点是输出实际上是两列:precip
和 temp
,但每一“天”都需要有 3 列 header 之类的。
我认为处理这个问题的最简单方法是首先 group_by
主分组变量 (uniqmonth
),然后 do
每组数据。这个“东西”实际上是:(1) 创建新的 header 行,即 charmonth,max(day)
和 year
。由于它比我们在普通 CSV 中想要的多一个逗号,我将在第一个字段中嵌入一个逗号并告诉 write.table
不要引用它。这是一种解决方法,但是......它有效。
library(dplyr)
weather %>%
group_by(uniqmonth) %>%
do({
bind_rows(
tibble(temp = paste(.$charmonth[1], max(.$day), sep = ","),
precip = as.character(.$year[1])),
mutate_all(select(., temp, precip), as.character)
)
}) %>%
ungroup() %>%
select(-uniqmonth) %>%
write.table(., file = "quux.csv", quote = FALSE,
sep = ",", row.names = FALSE, col.names = FALSE)
下面的代码创建了类似于我正在使用的原始数据。我使用 tibble 包中的 add_row 函数编写了一些代码来重新格式化它。现在我收到一个错误(此代码在 2020 年 4 月之前有效)。由于包的更新,子集规则似乎变得更严格了?我想知道是否有人可以帮助纠正这个错误...... 首先创建数据
# Create replicate of raw data
date <- seq(from = as.Date('1999-01-01'),
to = as.Date('2013-12-31'),
by = 'day')
temp <- rnorm(5479,15,5)
precip <- rlnorm(5479)
rawdata <- data.frame(date=date,
temp=round(temp, digits = 2),
precip=round(precip, digits = 2))
# Add columns needed to run code
rawdata$year <- as.numeric(substr(rawdata$date,1,4))
rawdata$month <- as.numeric(substr(rawdata$date, 6,7))
rawdata$chardate <- format(rawdata$date, '%Y-%h-%d') # create abbreviated month column
rawdata$charmonth <- substr(rawdata$chardate, 6,8) # for formatting
rawdata$charmonth <- as.character(rawdata$charmonth)
rawdata$day <- as.numeric(substr(rawdata$date, 9,10))
rawdata$uniqdate <- rawdata$year*100+as.numeric(rawdata$day)+rawdata$month*10
rawdata$uniqmonth <- (rawdata$year*100)+rawdata$month# create unique month identifier
rawdata$yr <- NA # This column will be filled only in the new rows to be added
# Create weather object to feed the for loop below----
weather <- data.frame(year = rawdata$year,
month = rawdata$month,
day = rawdata$day,
charmonth = rawdata$charmonth,
uniqmonth = rawdata$uniqmonth,
uniqdate = rawdata$uniqdate,
temp = rawdata$temp,
precip = rawdata$precip,
yr = rawdata$yr)
# weather$charmonth <- as.character(rawdata$charmonth)
现在出现错误...我正在尝试在每个月的数据顶部添加一行,其中包含该月的天数,缩写为三个字母的月份(jan、feb , mar 等) 和年份。
library(tibble) # package containing the add_row function
# create empty list to put all of the monthly dataframes in
newdat <- list()
# the following loop will create a dataframe for each month and put in a list
for(i in unique(weather$uniqmonth)) { # for every unique month value
# create object nam that is of the format 'df.uniqmonth'
nam <- paste("df", i, sep = ".")
# create object dat that contains all data for each unique month
dat <- weather[weather$uniqmonth==i,]
# add a row of data at the start of each dataframe with the days in month, month abbr., year
dat <- add_row(dat, year = NA, month = NA, day = NA,
charmonth = NA, uniqmonth = NA, uniqdate = NA,
# the line below is the info we are adding in the columns we will keep
temp = na.omit(max(dat$day)), precip = unique(dat$charmonth), yr = unique(dat$year),
.before = 1)
# just keep required columns
dat <- data.frame(dat$temp, dat$precip, dat$yr)
# add new dataframe to a list, using the new name
newdat[[nam]] <- dat
}
**您可以 运行 循环或逐行执行(设置 i = 199901)并且错误是相同的:
错误:无法合并 ..1$precip
和 ..2$precip
。
最终我应该能够运行以下内容以获得我需要的输出,我在文本编辑器中完成了这些输出(删除尾随逗号)。**
# Merge all data into a dataframe
full_data <- do.call("rbind", newdat)
# turn NA's into blanks
full_data[is.na(full_data)] <- ""
**感谢解决此错误的任何帮助!
这是我需要的最终产品
a <- c("Jan",
round(rnorm(31,15,5), digits = 2),
"Feb",
round(rnorm(28,5,5), digits = 2),
"Mar",
round(rnorm(31,15,5),digits = 2))
b <- c(31,
rlnorm(31),
28,
rlnorm(28),
31,
rlnorm(31))
c <- c(1999,
rep(NA,31),
1999,
rep(NA,28),
1999,
rep(NA,31))
final_data <- data.frame(temp = a,
precip = round(b,digits=2),
year = c)
经过长时间的讨论,最终结果不是传统的CSV,所以需要一点弯曲。
鉴于 weather
开始看起来像这样:
head(weather)
# # A tibble: 6 x 9
# year month day charmonth uniqmonth uniqdate temp precip yr
# <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <lgl>
# 1 1999 1 1 Jan 199901 199911 13.8 2.03 NA
# 2 1999 1 2 Jan 199901 199912 10.8 2.53 NA
# 3 1999 1 3 Jan 199901 199913 8.78 3.15 NA
# 4 1999 1 4 Jan 199901 199914 14.3 0.63 NA
# 5 1999 1 5 Jan 199901 199915 18.5 0.47 NA
# 6 1999 1 6 Jan 199901 199916 10.4 0.39 NA
所需的输出 (full_data
) 在文件中是这样的:
Jan,31,1999
13.83,2.03
10.76,2.53
8.78,3.15
...truncated...
18.74,0.79
Feb,28,1999
17.47,1.62
9.15,0.48
...truncated...
18.36,2.26
Mar,31,1999
20.53,2.65
11.1,2.58
19.52,0.33
...truncated...
重点是输出实际上是两列:precip
和 temp
,但每一“天”都需要有 3 列 header 之类的。
我认为处理这个问题的最简单方法是首先 group_by
主分组变量 (uniqmonth
),然后 do
每组数据。这个“东西”实际上是:(1) 创建新的 header 行,即 charmonth,max(day)
和 year
。由于它比我们在普通 CSV 中想要的多一个逗号,我将在第一个字段中嵌入一个逗号并告诉 write.table
不要引用它。这是一种解决方法,但是......它有效。
library(dplyr)
weather %>%
group_by(uniqmonth) %>%
do({
bind_rows(
tibble(temp = paste(.$charmonth[1], max(.$day), sep = ","),
precip = as.character(.$year[1])),
mutate_all(select(., temp, precip), as.character)
)
}) %>%
ungroup() %>%
select(-uniqmonth) %>%
write.table(., file = "quux.csv", quote = FALSE,
sep = ",", row.names = FALSE, col.names = FALSE)