如何调整 R 中 pivot_longer() 中的特定错误
How to adjust specific error in pivot_longer() in R
你能帮我调整一下我的 Sumpk
变量吗?不幸的是,当我 运行.
时出现错误
library(dplyr)
library(tidyverse)
library(lubridate)
df1 <- structure(
list(date1 = c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-04-02","2021-04-02","2021-04-08","2021-04-08"),
Code = c("ABC","CDE","ABC","CDE"),
Week= c("Friday","Friday","Thursday","Thursday"),
DR1 = c(11,17,14,13),
DR01 = c(14,11,14,13), DR02= c(14,12,16,17),DR03= c(19,15,14,13),
DR04 = c(15,14,13,13)),
class = "data.frame", row.names = c(NA, -4L))
> df1
date1 date2 Code Week DR1 DR01 DR02 DR03 DR04
1 2021-06-28 2021-04-02 ABC Friday 11 14 14 19 15
2 2021-06-28 2021-04-02 CDE Friday 17 11 12 15 14
3 2021-06-28 2021-04-08 ABC Thursday 14 14 16 14 13
4 2021-06-28 2021-04-08 CDE Thursday 13 13 17 13 13
x<-df1 %>% select(starts_with("DR"))
x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
pk<-select(x, date2,Code, Week, DR1, ends_with("PV"))
med<-pk %>%
group_by(Code, Week) %>%
summarize(across(ends_with("PV"), median))
> med
# A tibble: 4 x 7
# Groups: Code [2]
Code Week DR1_PV DR01_PV DR02_PV DR03_PV DR04_PV
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ABC Friday 0 -3 -3 -8 -4
2 ABC Thursday 0 0 -2 0 1
3 CDE Friday 0 6 5 2 3
4 CDE Thursday 0 0 -4 0 0
Sumpk<-df1%>%
pivot_longer(-c(date1:Week)) %>%
left_join(med %>% rename_with( ~str_remove(., "_PV")) %>%
pivot_longer(-Week, values_to = "med")) %>%
mutate(new_value = value + med) %>%
select(-c(value:med)) %>%
pivot_wider(names_from = name, values_from = new_value,
names_glue = '{name}_{name}_PV')
Sumpk
的输出
在left_join
部分,由于Code
变量是字符,所以出现错误。如果你删除它,错误将被删除,但取决于你的目的,它可能需要以其他方式处理。
Sumpk<- df1%>%
pivot_longer(-c(date1:Week)) %>%
left_join(med %>% ungroup %>% select(-Code) %>% rename_with( ~str_remove(., "_PV")) %>%
pivot_longer(-Week, values_to = "med")) %>%
mutate(new_value = value + med) %>%
select(-c(value:med)) %>%
pivot_wider(names_from = name, values_from = new_value,
names_glue = '{name}_{name}_PV')
head(Sumpk)
# A tibble: 6 x 12
date1 date2 Code Week DR1_DR1_PV DR01_DR01_PV DR02_DR02_PV DR03_DR03_PV
<chr> <chr> <chr> <chr> <list> <list> <list> <list>
1 2021-06-28 2021~ ABC Frid~ <dbl [3]> <dbl [3]> <dbl [3]> <dbl [3]>
2 2021-06-28 2021~ CDE Satu~ <dbl [3]> <dbl [3]> <dbl [3]> <dbl [3]>
3 2021-06-28 2021~ EFG Thur~ <dbl [2]> <dbl [2]> <dbl [2]> <dbl [2]>
4 2021-06-28 2021~ HIJ Frid~ <dbl [3]> <dbl [3]> <dbl [3]> <dbl [3]>
5 2021-06-28 2021~ ABC Satu~ <dbl [3]> <dbl [3]> <dbl [3]> <dbl [3]>
6 2021-06-28 2021~ CDE Thur~ <dbl [2]> <dbl [2]> <dbl [2]> <dbl [2]>
# ... with 4 more variables: DR04_DR04_PV <list>, DR05_DR05_PV <list>,
# DR06_DR06_PV <list>, DR07_DR07_PV <list>
你可以试试这个inner_join
方法-
library(dplyr)
df1 %>%
inner_join(med, by = c('Code', 'Week')) %>%
mutate(across(DR1:DR04, ~.x + get(paste0(cur_column(), '_PV')),
.names = '{col}_{col}_PV')) %>%
select(date1:Week, DR1_DR1_PV:DR04_DR04_PV)
# date1 date2 Code Week DR1_DR1_PV DR01_DR01_PV DR02_DR02_PV DR03_DR03_PV DR04_DR04_PV
#1 2021-06-28 2021-04-02 ABC Friday 11 11 11 11 11
#2 2021-06-28 2021-04-02 CDE Friday 17 17 17 17 17
#3 2021-06-28 2021-04-08 ABC Thursday 14 14 14 14 14
#4 2021-06-28 2021-04-08 CDE Thursday 13 13 13 13 13
另一种适用于动态列的方法是 -
df3 <- df1 %>% inner_join(med, by = c('Code', 'Week'))
cols <- grep('DR', names(df1), value = TRUE)
new_cols <- paste(cols, cols, 'PV', sep = '_')
df3[new_cols] <- df1[cols] + df3[paste0(cols, '_PV')]
df3 %>% select(date1:Week, all_of(new_cols))
你能帮我调整一下我的 Sumpk
变量吗?不幸的是,当我 运行.
library(dplyr)
library(tidyverse)
library(lubridate)
df1 <- structure(
list(date1 = c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
date2 = c("2021-04-02","2021-04-02","2021-04-08","2021-04-08"),
Code = c("ABC","CDE","ABC","CDE"),
Week= c("Friday","Friday","Thursday","Thursday"),
DR1 = c(11,17,14,13),
DR01 = c(14,11,14,13), DR02= c(14,12,16,17),DR03= c(19,15,14,13),
DR04 = c(15,14,13,13)),
class = "data.frame", row.names = c(NA, -4L))
> df1
date1 date2 Code Week DR1 DR01 DR02 DR03 DR04
1 2021-06-28 2021-04-02 ABC Friday 11 14 14 19 15
2 2021-06-28 2021-04-02 CDE Friday 17 11 12 15 14
3 2021-06-28 2021-04-08 ABC Thursday 14 14 16 14 13
4 2021-06-28 2021-04-08 CDE Thursday 13 13 17 13 13
x<-df1 %>% select(starts_with("DR"))
x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
pk<-select(x, date2,Code, Week, DR1, ends_with("PV"))
med<-pk %>%
group_by(Code, Week) %>%
summarize(across(ends_with("PV"), median))
> med
# A tibble: 4 x 7
# Groups: Code [2]
Code Week DR1_PV DR01_PV DR02_PV DR03_PV DR04_PV
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ABC Friday 0 -3 -3 -8 -4
2 ABC Thursday 0 0 -2 0 1
3 CDE Friday 0 6 5 2 3
4 CDE Thursday 0 0 -4 0 0
Sumpk<-df1%>%
pivot_longer(-c(date1:Week)) %>%
left_join(med %>% rename_with( ~str_remove(., "_PV")) %>%
pivot_longer(-Week, values_to = "med")) %>%
mutate(new_value = value + med) %>%
select(-c(value:med)) %>%
pivot_wider(names_from = name, values_from = new_value,
names_glue = '{name}_{name}_PV')
Sumpk
的输出
在left_join
部分,由于Code
变量是字符,所以出现错误。如果你删除它,错误将被删除,但取决于你的目的,它可能需要以其他方式处理。
Sumpk<- df1%>%
pivot_longer(-c(date1:Week)) %>%
left_join(med %>% ungroup %>% select(-Code) %>% rename_with( ~str_remove(., "_PV")) %>%
pivot_longer(-Week, values_to = "med")) %>%
mutate(new_value = value + med) %>%
select(-c(value:med)) %>%
pivot_wider(names_from = name, values_from = new_value,
names_glue = '{name}_{name}_PV')
head(Sumpk)
# A tibble: 6 x 12
date1 date2 Code Week DR1_DR1_PV DR01_DR01_PV DR02_DR02_PV DR03_DR03_PV
<chr> <chr> <chr> <chr> <list> <list> <list> <list>
1 2021-06-28 2021~ ABC Frid~ <dbl [3]> <dbl [3]> <dbl [3]> <dbl [3]>
2 2021-06-28 2021~ CDE Satu~ <dbl [3]> <dbl [3]> <dbl [3]> <dbl [3]>
3 2021-06-28 2021~ EFG Thur~ <dbl [2]> <dbl [2]> <dbl [2]> <dbl [2]>
4 2021-06-28 2021~ HIJ Frid~ <dbl [3]> <dbl [3]> <dbl [3]> <dbl [3]>
5 2021-06-28 2021~ ABC Satu~ <dbl [3]> <dbl [3]> <dbl [3]> <dbl [3]>
6 2021-06-28 2021~ CDE Thur~ <dbl [2]> <dbl [2]> <dbl [2]> <dbl [2]>
# ... with 4 more variables: DR04_DR04_PV <list>, DR05_DR05_PV <list>,
# DR06_DR06_PV <list>, DR07_DR07_PV <list>
你可以试试这个inner_join
方法-
library(dplyr)
df1 %>%
inner_join(med, by = c('Code', 'Week')) %>%
mutate(across(DR1:DR04, ~.x + get(paste0(cur_column(), '_PV')),
.names = '{col}_{col}_PV')) %>%
select(date1:Week, DR1_DR1_PV:DR04_DR04_PV)
# date1 date2 Code Week DR1_DR1_PV DR01_DR01_PV DR02_DR02_PV DR03_DR03_PV DR04_DR04_PV
#1 2021-06-28 2021-04-02 ABC Friday 11 11 11 11 11
#2 2021-06-28 2021-04-02 CDE Friday 17 17 17 17 17
#3 2021-06-28 2021-04-08 ABC Thursday 14 14 14 14 14
#4 2021-06-28 2021-04-08 CDE Thursday 13 13 13 13 13
另一种适用于动态列的方法是 -
df3 <- df1 %>% inner_join(med, by = c('Code', 'Week'))
cols <- grep('DR', names(df1), value = TRUE)
new_cols <- paste(cols, cols, 'PV', sep = '_')
df3[new_cols] <- df1[cols] + df3[paste0(cols, '_PV')]
df3 %>% select(date1:Week, all_of(new_cols))