R - 数据框的操作
R - Manipulation of data frame
我有以下数据框table
S/N
Index 1
Index 2
A
5
5
B
5
2
C
2
4
如何改成下面的table?
index
1
2
3
4
5
Index 1
0%
33.3%
0%
0%
66.6%
Index 2
0%
33.3%
0%
33.3%
33.3%
提前致谢!
您似乎想查看每个值在每个索引中的计数分数。我花了一些步骤,但这是否大致产生了您要找的东西?
df1 <- data.frame("S_N" = c("A", "B", "C"),
"Index1" = c(5, 5, 2),
"Index2" = c(5, 2, 4))
df1 <- df1 %>%
pivot_longer(names_to = "Index", values_to = "value", cols = c("Index1", "Index2"))
df2 <- df1 %>%
group_by(Index) %>%
summarise(idx_cnt = n())
final_df <- df1 %>%
group_by(Index, value) %>%
summarise(count = n()) %>%
left_join(df2, by = "Index") %>%
mutate(frac = count / idx_cnt * 100) %>%
select(Index, value, frac) %>%
pivot_wider(names_from = value, values_from = frac)
结果:
> final_df
# A tibble: 2 x 4
# Groups: Index [2]
Index `2` `5` `4`
<chr> <dbl> <dbl> <dbl>
1 Index1 33.3 66.7 NA
2 Index2 33.3 33.3 33.3
这里使用tidyverse
函数是一种方法。使用从@Carey Caginalp
共享的数据
library(tidyverse)
df1 %>%
pivot_longer(cols = -S_N, names_to = 'index') %>%
count(index, value) %>%
group_by(index) %>%
mutate(n = prop.table(n) * 100) %>%
ungroup %>%
complete(index, value = 1:max(value), fill = list(n = 0)) %>%
pivot_wider(names_from = value, values_from = n)
# index `1` `2` `3` `4` `5`
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 Index1 0 33.3 0 0 66.7
#2 Index2 0 33.3 0 33.3 33.3
janitor
的简单语法
library(tidyverse)
library(janitor)
df1 %>% pivot_longer(-S_N) %>%
tabyl(name, value) %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits = 2)
name 2 4 5
Index1 33.33% 0.00% 66.67%
Index2 33.33% 33.33% 33.33%
我有以下数据框table
S/N | Index 1 | Index 2 |
---|---|---|
A | 5 | 5 |
B | 5 | 2 |
C | 2 | 4 |
如何改成下面的table?
index | 1 | 2 | 3 | 4 | 5 |
---|---|---|---|---|---|
Index 1 | 0% | 33.3% | 0% | 0% | 66.6% |
Index 2 | 0% | 33.3% | 0% | 33.3% | 33.3% |
提前致谢!
您似乎想查看每个值在每个索引中的计数分数。我花了一些步骤,但这是否大致产生了您要找的东西?
df1 <- data.frame("S_N" = c("A", "B", "C"),
"Index1" = c(5, 5, 2),
"Index2" = c(5, 2, 4))
df1 <- df1 %>%
pivot_longer(names_to = "Index", values_to = "value", cols = c("Index1", "Index2"))
df2 <- df1 %>%
group_by(Index) %>%
summarise(idx_cnt = n())
final_df <- df1 %>%
group_by(Index, value) %>%
summarise(count = n()) %>%
left_join(df2, by = "Index") %>%
mutate(frac = count / idx_cnt * 100) %>%
select(Index, value, frac) %>%
pivot_wider(names_from = value, values_from = frac)
结果:
> final_df
# A tibble: 2 x 4
# Groups: Index [2]
Index `2` `5` `4`
<chr> <dbl> <dbl> <dbl>
1 Index1 33.3 66.7 NA
2 Index2 33.3 33.3 33.3
这里使用tidyverse
函数是一种方法。使用从@Carey Caginalp
library(tidyverse)
df1 %>%
pivot_longer(cols = -S_N, names_to = 'index') %>%
count(index, value) %>%
group_by(index) %>%
mutate(n = prop.table(n) * 100) %>%
ungroup %>%
complete(index, value = 1:max(value), fill = list(n = 0)) %>%
pivot_wider(names_from = value, values_from = n)
# index `1` `2` `3` `4` `5`
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 Index1 0 33.3 0 0 66.7
#2 Index2 0 33.3 0 33.3 33.3
janitor
library(tidyverse)
library(janitor)
df1 %>% pivot_longer(-S_N) %>%
tabyl(name, value) %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits = 2)
name 2 4 5
Index1 33.33% 0.00% 66.67%
Index2 33.33% 33.33% 33.33%