R:在 data.frame 中对数值数据进行排名
R: ranking numerical data in a data.frame
mydata <- data.frame(Train = c(14.2, 2.2, 11.9), Test = c(10, 11.2, 12))
rownames(mydata) <- c("Method1", "Method2", "Method3")
> mydata
Train Test
Method1 14.2 10.0
Method2 2.2 11.2
Method3 11.9 12.0
我想按如下方式对我的 Train
和 Test
数据进行排名:
> mydata
Train Test Train_rank Test_rank
Method1 14.2 10.0 3 1
Method2 2.2 11.2 1 2
Method3 11.9 12.0 2 3
我试过以下方法:
library(plyr)
ddply(mydata, .(stat), transform,
Train_rank = rank(Train),
Test_rank = rank(Test),
)
但我收到以下错误:
Error in unique.default(x) : unique() applies only to vectors
使用tidyverse
,我们可以将mutate
与across
一起使用(来自dplyr
1.0.0or earlier versions with
mutate_at/mutate_all`)
library(dplyr)# 1.0.0
mydata %>%
mutate(across(everything(), rank, .names = "{col}_rank"))
# Train Test Train_rank Test_rank
#1 14.2 10.0 3 1
#2 2.2 11.2 1 2
#3 11.9 12.0 2 3
如果我们需要保留 row.names(tidyverse 省略),请创建一个包含行名称的列 (rownames_to_column
),然后将该列更改为 rownames
library(tibble)
mydata %>%
rownames_to_column('rn') %>%
mutate(across(-rn, rank, .names = "{col}_rank")) %>%
column_to_rownames('rn')
# Train Test Train_rank Test_rank
#Method1 14.2 10.0 3 1
#Method2 2.2 11.2 1 2
#Method3 11.9 12.0 2 3
或 base R
mydata[paste0(names(mydata), "_rank")] <- lapply(mydata, rank)
我会用 data.table
library(data.table)
mydata <- data.table(Train = c(14.2, 2.2, 11.9), Test = c(10, 11.2, 12))
col_to_rank <- c('Train', 'Test')
mydata[, paste0('rank', col_to_rank) := lapply(.SD, rank), .SDcols = col_to_rank]
您可以使用 dense_rank
:
library(dplyr)
mydata %>% mutate(across(everything(), dense_rank, .names = '{col}_rank'))
# Train Test Train_rank Test_rank
#1 14.2 10.0 3 1
#2 2.2 11.2 1 2
#3 11.9 12.0 2 3
mydata <- data.frame(Train = c(14.2, 2.2, 11.9), Test = c(10, 11.2, 12))
rownames(mydata) <- c("Method1", "Method2", "Method3")
> mydata
Train Test
Method1 14.2 10.0
Method2 2.2 11.2
Method3 11.9 12.0
我想按如下方式对我的 Train
和 Test
数据进行排名:
> mydata
Train Test Train_rank Test_rank
Method1 14.2 10.0 3 1
Method2 2.2 11.2 1 2
Method3 11.9 12.0 2 3
我试过以下方法:
library(plyr)
ddply(mydata, .(stat), transform,
Train_rank = rank(Train),
Test_rank = rank(Test),
)
但我收到以下错误:
Error in unique.default(x) : unique() applies only to vectors
使用tidyverse
,我们可以将mutate
与across
一起使用(来自dplyr
1.0.0or earlier versions with
mutate_at/mutate_all`)
library(dplyr)# 1.0.0
mydata %>%
mutate(across(everything(), rank, .names = "{col}_rank"))
# Train Test Train_rank Test_rank
#1 14.2 10.0 3 1
#2 2.2 11.2 1 2
#3 11.9 12.0 2 3
如果我们需要保留 row.names(tidyverse 省略),请创建一个包含行名称的列 (rownames_to_column
),然后将该列更改为 rownames
library(tibble)
mydata %>%
rownames_to_column('rn') %>%
mutate(across(-rn, rank, .names = "{col}_rank")) %>%
column_to_rownames('rn')
# Train Test Train_rank Test_rank
#Method1 14.2 10.0 3 1
#Method2 2.2 11.2 1 2
#Method3 11.9 12.0 2 3
或 base R
mydata[paste0(names(mydata), "_rank")] <- lapply(mydata, rank)
我会用 data.table
library(data.table)
mydata <- data.table(Train = c(14.2, 2.2, 11.9), Test = c(10, 11.2, 12))
col_to_rank <- c('Train', 'Test')
mydata[, paste0('rank', col_to_rank) := lapply(.SD, rank), .SDcols = col_to_rank]
您可以使用 dense_rank
:
library(dplyr)
mydata %>% mutate(across(everything(), dense_rank, .names = '{col}_rank'))
# Train Test Train_rank Test_rank
#1 14.2 10.0 3 1
#2 2.2 11.2 1 2
#3 11.9 12.0 2 3