如何通过从 R 中的列名生成键来 pivot/gather 多个 groups/pairs 列?
How to pivot/gather multiple groups/pairs of columns through genertating keys from the column names in R?
我有以下数据集
grade9_math_zscore <- rnorm(10, 0,1)
grade9_science_zscore <- rnorm(10, 0,1)
grade10_math_zscore <- rnorm(10, 0,1)
grade10_science_zscore <- rnorm(10, 0,1)
grade9_math_passed_lab<- sample(0:1,10,replace=TRUE)
grade10_math_passed_lab<- sample(0:1,10,replace=TRUE)
grade9_science_passed_lab<- sample(0:1,10,replace=TRUE)
grade10_science_passed_lab<- sample(0:1,10,replace=TRUE)
grade9_math_used_comp <- sample(0:1,10,replace=TRUE)
grade10_math_used_comp <- sample(0:1,10,replace=TRUE)
grade9_science_used_comp <- sample(0:1,10,replace=TRUE)
grade10_science_used_comp <- sample(0:1,10,replace=TRUE)
students<-as.data.frame(cbind(grade9_math_zscore, grade9_science_zscore, grade10_math_zscore , grade10_science_zscore , grade9_math_passed_lab, grade10_math_passed_lab, grade9_science_passed_lab, grade10_science_passed_lab, grade9_math_used_comp, grade10_math_used_comp, grade9_science_used_comp, grade10_science_used_comp ))
我需要得到的输出(前 4 行)如下所示
grade course z_score passed_lab used_comp
1 9 math -0.287118228740724 0 0
2 9 science 0.421672812450803 0 0
3 10 math 1.66175637068003 1 1
4 10 science -0.000352193924396851 0 1
我一直在尝试使用 pivot_longer
从 R 上的 dplyr
获得这个。我需要帮助主要是找出 names_pattern
选项。另外,我似乎无法在一个命令中 gather
(以 dplyr 术语)所有三列 z_score , passed_lab , used_comp
。
感谢任何编码解决方案或仅仅是建议。不使用 dplyr 的任何解决方案也很受欢迎。
使用 pivot_longer
你可以:
tidyr::pivot_longer(students,
cols = everything(),
names_to = c('grade', 'course', '.value'),
names_pattern = 'grade(\d+)_(.*?)_(.*)')
# A tibble: 40 x 5
# grade course zscore passed_lab used_comp
# <chr> <chr> <dbl> <int> <int>
# 1 9 math -1.04 0 1
# 2 9 science 0.608 0 0
# 3 10 math 1.27 0 1
# 4 10 science 1.38 1 1
# 5 9 math -1.30 1 1
# 6 9 science 0.582 1 1
# 7 10 math -0.196 1 1
# 8 10 science -0.198 0 1
# 9 9 math -1.28 1 1
#10 9 science 2.05 0 0
# … with 30 more rows
数据
不要cbind
然后加as.data.frame
,直接用data.frame
构造dataframe
students<-data.frame(grade9_math_zscore, grade9_science_zscore....)
我有以下数据集
grade9_math_zscore <- rnorm(10, 0,1)
grade9_science_zscore <- rnorm(10, 0,1)
grade10_math_zscore <- rnorm(10, 0,1)
grade10_science_zscore <- rnorm(10, 0,1)
grade9_math_passed_lab<- sample(0:1,10,replace=TRUE)
grade10_math_passed_lab<- sample(0:1,10,replace=TRUE)
grade9_science_passed_lab<- sample(0:1,10,replace=TRUE)
grade10_science_passed_lab<- sample(0:1,10,replace=TRUE)
grade9_math_used_comp <- sample(0:1,10,replace=TRUE)
grade10_math_used_comp <- sample(0:1,10,replace=TRUE)
grade9_science_used_comp <- sample(0:1,10,replace=TRUE)
grade10_science_used_comp <- sample(0:1,10,replace=TRUE)
students<-as.data.frame(cbind(grade9_math_zscore, grade9_science_zscore, grade10_math_zscore , grade10_science_zscore , grade9_math_passed_lab, grade10_math_passed_lab, grade9_science_passed_lab, grade10_science_passed_lab, grade9_math_used_comp, grade10_math_used_comp, grade9_science_used_comp, grade10_science_used_comp ))
我需要得到的输出(前 4 行)如下所示
grade course z_score passed_lab used_comp
1 9 math -0.287118228740724 0 0
2 9 science 0.421672812450803 0 0
3 10 math 1.66175637068003 1 1
4 10 science -0.000352193924396851 0 1
我一直在尝试使用 pivot_longer
从 R 上的 dplyr
获得这个。我需要帮助主要是找出 names_pattern
选项。另外,我似乎无法在一个命令中 gather
(以 dplyr 术语)所有三列 z_score , passed_lab , used_comp
。
感谢任何编码解决方案或仅仅是建议。不使用 dplyr 的任何解决方案也很受欢迎。
使用 pivot_longer
你可以:
tidyr::pivot_longer(students,
cols = everything(),
names_to = c('grade', 'course', '.value'),
names_pattern = 'grade(\d+)_(.*?)_(.*)')
# A tibble: 40 x 5
# grade course zscore passed_lab used_comp
# <chr> <chr> <dbl> <int> <int>
# 1 9 math -1.04 0 1
# 2 9 science 0.608 0 0
# 3 10 math 1.27 0 1
# 4 10 science 1.38 1 1
# 5 9 math -1.30 1 1
# 6 9 science 0.582 1 1
# 7 10 math -0.196 1 1
# 8 10 science -0.198 0 1
# 9 9 math -1.28 1 1
#10 9 science 2.05 0 0
# … with 30 more rows
数据
不要cbind
然后加as.data.frame
,直接用data.frame
构造dataframe
students<-data.frame(grade9_math_zscore, grade9_science_zscore....)