如何根据 Excel 电子表格中重复 headers 的值在 R 中创建新列?
How to create an new column in R based on value from repeated headers in an Excel spreadsheet?
我正在尝试读取多个 excel 传播 sheet,其中数据被组织成 table。 (图像中当前传播 sheet 的示例):
Data in Excel to be transformed in R, has repeated tables
我在分析中需要实验单位(牛)作为自己的列,但它只位于每个table的header。关于在 R 中重新排列数据的最佳方法的任何想法? spreadsheet 的第一行也作为列名读入,我显然也不想这样做。这是一个可重现的示例,说明 sheet 读入 R 时的样子:
r1<-c("id","Cow 5590","...2","...3","Control","...4","...5","...6","...7","...8")
r2<-c("","","date","day","intake1","intake2","eattime","visits","dmi","vtime")
r3<-c("1","","43173","-21","24384","14.6","214","10","1.46","21.4")
r4<-c("1"," ","43174","-20","24374","17.6","217","17","1.76","27.4")
r5<-c("id","Cow 5591","...2","...3","Control","...4","...5","...6","...7","...8")
r6<-c("","","date","day","intake1","intake2","eattime","visits","dmi","vtime")
r7<-c("1"," ","43173","-21","24364","15.6","234","12","1.36","22.4")
r8<-c("1"," ","43174","-20","24884","18.6","284","18","1.86","28.4")
r9<-c("id","Cow 5592","...2","...3","Control","...4","...5","...6","...7","...8")
r10<-c("","","date","day","intake1","intake2","eattime","visits","dmi","vtime")
r11<-c("1"," ","43173","-21","24564","15.5","234","15","1.56","24.4")
r12<-c("1"," ","43174","-20","24584","15.6","254","18","1.85","34.4")
df<-data.frame(matrix(ncol=10,nrow=11))
colnames(df)<-r1
df[1,]<-r2
df[2,]<-r3
df[3,]<-r4
df[4,]<-r5
df[5,]<-r6
df[6,]<-r7
df[7,]<-r8
df[8,]<-r9
df[9,]<-r10
df[10,]<-r11
df[11,]<-r12
这就是我想要的样子:
c1<-c("id","Cow", "date","day","intake1","intake2","eattime","visits","dmi","vtime")
c2<-c("1","Cow 5590","43173","-21","24384","14.6","214","10","1.46","21.4")
c3<-c("1","Cow 5590","43174","-20","24374","17.6","217","17","1.76","27.4")
c4<-c("1","Cow 5591","43173","-21","24364","15.6","234","12","1.36","22.4")
c5<-c("1","Cow 5591","43174","-20","24884","18.6","284","18","1.86","28.4")
c6<-c("1","Cow 5592","43173","-21","24564","15.5","234","15","1.56","24.4")
c7<-c("1","Cow 5592","43174","-20","24584","15.6","254","18","1.85","34.4")
df2<-data.frame(matrix(ncol=10,nrow=6))
colnames(df2)<-c1
df2[1,]<-c2
df2[2,]<-c3
df2[3,]<-c4
df2[4,]<-c5
df2[5,]<-c6
df2[6,]<-c7
第一次海报。感谢您的帮助。
我们可以
library(dplyr)
library(tidyr)
library(janitor)
df %>%
mutate(`Cow 5590` = na_if(trimws(`Cow 5590`), "")) %>%
fill(`Cow 5590`) %>%
mutate(`Cow 5590` = replace_na(`Cow 5590`, "Cow 5590")) %>%
mutate(id = replace(id, 1, 'id')) %>%
row_to_names(1) %>%
filter(!id %in% c("id", "")) %>%
rename(Cow = `Cow 5590`) %>%
type.convert(as.is = TRUE)
-输出
id Cow date day intake1 intake2 eattime visits dmi vtime
1 1 Cow 5590 43173 -21 24384 14.6 214 10 1.46 21.4
2 1 Cow 5590 43174 -20 24374 17.6 217 17 1.76 27.4
3 1 Cow 5591 43173 -21 24364 15.6 234 12 1.36 22.4
4 1 Cow 5591 43174 -20 24884 18.6 284 18 1.86 28.4
5 1 Cow 5592 43173 -21 24564 15.5 234 15 1.56 24.4
6 1 Cow 5592 43174 -20 24584 15.6 254 18 1.85 34.4
我正在尝试读取多个 excel 传播 sheet,其中数据被组织成 table。 (图像中当前传播 sheet 的示例):
Data in Excel to be transformed in R, has repeated tables
我在分析中需要实验单位(牛)作为自己的列,但它只位于每个table的header。关于在 R 中重新排列数据的最佳方法的任何想法? spreadsheet 的第一行也作为列名读入,我显然也不想这样做。这是一个可重现的示例,说明 sheet 读入 R 时的样子:
r1<-c("id","Cow 5590","...2","...3","Control","...4","...5","...6","...7","...8")
r2<-c("","","date","day","intake1","intake2","eattime","visits","dmi","vtime")
r3<-c("1","","43173","-21","24384","14.6","214","10","1.46","21.4")
r4<-c("1"," ","43174","-20","24374","17.6","217","17","1.76","27.4")
r5<-c("id","Cow 5591","...2","...3","Control","...4","...5","...6","...7","...8")
r6<-c("","","date","day","intake1","intake2","eattime","visits","dmi","vtime")
r7<-c("1"," ","43173","-21","24364","15.6","234","12","1.36","22.4")
r8<-c("1"," ","43174","-20","24884","18.6","284","18","1.86","28.4")
r9<-c("id","Cow 5592","...2","...3","Control","...4","...5","...6","...7","...8")
r10<-c("","","date","day","intake1","intake2","eattime","visits","dmi","vtime")
r11<-c("1"," ","43173","-21","24564","15.5","234","15","1.56","24.4")
r12<-c("1"," ","43174","-20","24584","15.6","254","18","1.85","34.4")
df<-data.frame(matrix(ncol=10,nrow=11))
colnames(df)<-r1
df[1,]<-r2
df[2,]<-r3
df[3,]<-r4
df[4,]<-r5
df[5,]<-r6
df[6,]<-r7
df[7,]<-r8
df[8,]<-r9
df[9,]<-r10
df[10,]<-r11
df[11,]<-r12
这就是我想要的样子:
c1<-c("id","Cow", "date","day","intake1","intake2","eattime","visits","dmi","vtime")
c2<-c("1","Cow 5590","43173","-21","24384","14.6","214","10","1.46","21.4")
c3<-c("1","Cow 5590","43174","-20","24374","17.6","217","17","1.76","27.4")
c4<-c("1","Cow 5591","43173","-21","24364","15.6","234","12","1.36","22.4")
c5<-c("1","Cow 5591","43174","-20","24884","18.6","284","18","1.86","28.4")
c6<-c("1","Cow 5592","43173","-21","24564","15.5","234","15","1.56","24.4")
c7<-c("1","Cow 5592","43174","-20","24584","15.6","254","18","1.85","34.4")
df2<-data.frame(matrix(ncol=10,nrow=6))
colnames(df2)<-c1
df2[1,]<-c2
df2[2,]<-c3
df2[3,]<-c4
df2[4,]<-c5
df2[5,]<-c6
df2[6,]<-c7
第一次海报。感谢您的帮助。
我们可以
library(dplyr)
library(tidyr)
library(janitor)
df %>%
mutate(`Cow 5590` = na_if(trimws(`Cow 5590`), "")) %>%
fill(`Cow 5590`) %>%
mutate(`Cow 5590` = replace_na(`Cow 5590`, "Cow 5590")) %>%
mutate(id = replace(id, 1, 'id')) %>%
row_to_names(1) %>%
filter(!id %in% c("id", "")) %>%
rename(Cow = `Cow 5590`) %>%
type.convert(as.is = TRUE)
-输出
id Cow date day intake1 intake2 eattime visits dmi vtime
1 1 Cow 5590 43173 -21 24384 14.6 214 10 1.46 21.4
2 1 Cow 5590 43174 -20 24374 17.6 217 17 1.76 27.4
3 1 Cow 5591 43173 -21 24364 15.6 234 12 1.36 22.4
4 1 Cow 5591 43174 -20 24884 18.6 284 18 1.86 28.4
5 1 Cow 5592 43173 -21 24564 15.5 234 15 1.56 24.4
6 1 Cow 5592 43174 -20 24584 15.6 254 18 1.85 34.4