循环遍历 r 中列的优雅方式
elegant way to loop through columns in r
假设我有一个数据框:
set.seed(10)
col_a <- (c("mouse", "dog", "rabbit", "cat", "horse", "monkey", "pig", "shark", "flea","ant"))
col_b <- (c(round(rnorm(10), digits=0)))
col_c <- (c(round(rnorm(10), digits=0)))
col_d <- (c(round(rnorm(10), digits=0)))
col_e <- (c(round(rnorm(10), digits=0)))
df <- data.frame(col_a, col_b, col_c, col_d, col_e)
df
col_a col_b col_c col_d col_e
> 1 mouse 0 0 0 0
> 2 dog 0 0 0 1
> 3 rabbit -2 0 -1 -1
> 4 cat 0 0 1 0
> 5 horse 1 2 0 0
> 6 monkey 1 1 -1 1
> 7 pig 0 -1 0 1
> 8 shark -2 0 0 -1
> 9 flea 0 1 0 0
> 10 ant 0 2 1 1
>
我想把“shark”对应的“col_b”和“col_d”中的值替换为“mouse”对应的值。我想使用列 headers 和 col_a.
中的值进行索引
我可以分两步完成:
df[df$col_a == “shark”, “col_b”] <- df[df$col_a == “mouse” , “col_b”]
df[df$col_a == “shark”, “col_d”] <- df[df$col_a == “mouse” , “col_d”]
对于许多列,代码变得相当笨重。有没有办法用 for 循环或 apply 函数来简化这个?
你可以这样做:
library(tidyverse)
df %>%
mutate(across(c(col_b, col_d), ~if_else(col_a == 'shark', .[col_a == 'mouse'], .)))
使用 across
,您可以非常灵活地定义要进行替换的列。
请注意,我使用与您相同的随机种子得到不同的结果,因此我选择了另一个,即 set.seed(110)
输入数据:
col_a col_b col_c col_d col_e
1 mouse 0 1 0 0
2 dog 1 1 -2 0
3 rabbit 1 2 1 0
4 cat 1 -1 -1 -1
5 horse 0 2 1 -1
6 monkey 1 0 2 0
7 pig 0 -2 -1 1
8 shark 1 1 -1 0
9 flea -2 2 0 -1
10 ant 0 -2 0 0
输出数据:
col_a col_b col_c col_d col_e
1 mouse 0 1 0 0
2 dog 1 1 -2 0
3 rabbit 1 2 1 0
4 cat 1 -1 -1 -1
5 horse 0 2 1 -1
6 monkey 1 0 2 0
7 pig 0 -2 -1 1
8 shark 0 1 0 0
9 flea -2 2 0 -1
10 ant 0 -2 0 0
如果您要处理许多要指定的情况,使用长数据可能会更容易。下面,我制作了一个长版本的数据,查找 table 要交换的名称,以及要在其中进行交换的列列表。
library(tidyverse)
df_long = pivot_longer(df, -col_a)
chg_tbl = tibble(col_a = c("shark", "mouse"), new = c("mouse", "shark"))
cols <- c("col_b", "col_d")
然后这些可以应用到长table:
df_long %>%
left_join(chg_tbl) %>%
transmute(col_a = if_else(name %in% cols, coalesce(new, col_a), col_a),
name, value) %>%
pivot_wider(names_from = name, values_from = value)
您不必为每一列单独执行此操作。您可以将多个列一起替换。
cols <- c('col_b', 'col_d')
df[df$col_a == "shark", cols] <- df[df$col_a == "mouse" , cols]
df
这是一个简化的示例,因为相同的 set.seed
给出了不同的值。
df <- data.frame(col1 = letters[1:5], col2 = 1:5, col3 = 5:1, col4 = 1:5)
df
# col1 col2 col3 col4
#1 a 1 5 1
#2 b 2 4 2
#3 c 3 3 3
#4 d 4 2 4
#5 e 5 1 5
cols <- c('col2', 'col4')
df[df$col1 == "d", cols] <- df[df$col1 == "a" , cols]
df
# col1 col2 col3 col4
#1 a 1 5 1
#2 b 2 4 2
#3 c 3 3 3
#4 d 1 2 1
#5 e 5 1 5
假设我有一个数据框:
set.seed(10)
col_a <- (c("mouse", "dog", "rabbit", "cat", "horse", "monkey", "pig", "shark", "flea","ant"))
col_b <- (c(round(rnorm(10), digits=0)))
col_c <- (c(round(rnorm(10), digits=0)))
col_d <- (c(round(rnorm(10), digits=0)))
col_e <- (c(round(rnorm(10), digits=0)))
df <- data.frame(col_a, col_b, col_c, col_d, col_e)
df
col_a col_b col_c col_d col_e
> 1 mouse 0 0 0 0
> 2 dog 0 0 0 1
> 3 rabbit -2 0 -1 -1
> 4 cat 0 0 1 0
> 5 horse 1 2 0 0
> 6 monkey 1 1 -1 1
> 7 pig 0 -1 0 1
> 8 shark -2 0 0 -1
> 9 flea 0 1 0 0
> 10 ant 0 2 1 1
>
我想把“shark”对应的“col_b”和“col_d”中的值替换为“mouse”对应的值。我想使用列 headers 和 col_a.
中的值进行索引我可以分两步完成:
df[df$col_a == “shark”, “col_b”] <- df[df$col_a == “mouse” , “col_b”]
df[df$col_a == “shark”, “col_d”] <- df[df$col_a == “mouse” , “col_d”]
对于许多列,代码变得相当笨重。有没有办法用 for 循环或 apply 函数来简化这个?
你可以这样做:
library(tidyverse)
df %>%
mutate(across(c(col_b, col_d), ~if_else(col_a == 'shark', .[col_a == 'mouse'], .)))
使用 across
,您可以非常灵活地定义要进行替换的列。
请注意,我使用与您相同的随机种子得到不同的结果,因此我选择了另一个,即 set.seed(110)
输入数据:
col_a col_b col_c col_d col_e
1 mouse 0 1 0 0
2 dog 1 1 -2 0
3 rabbit 1 2 1 0
4 cat 1 -1 -1 -1
5 horse 0 2 1 -1
6 monkey 1 0 2 0
7 pig 0 -2 -1 1
8 shark 1 1 -1 0
9 flea -2 2 0 -1
10 ant 0 -2 0 0
输出数据:
col_a col_b col_c col_d col_e
1 mouse 0 1 0 0
2 dog 1 1 -2 0
3 rabbit 1 2 1 0
4 cat 1 -1 -1 -1
5 horse 0 2 1 -1
6 monkey 1 0 2 0
7 pig 0 -2 -1 1
8 shark 0 1 0 0
9 flea -2 2 0 -1
10 ant 0 -2 0 0
如果您要处理许多要指定的情况,使用长数据可能会更容易。下面,我制作了一个长版本的数据,查找 table 要交换的名称,以及要在其中进行交换的列列表。
library(tidyverse)
df_long = pivot_longer(df, -col_a)
chg_tbl = tibble(col_a = c("shark", "mouse"), new = c("mouse", "shark"))
cols <- c("col_b", "col_d")
然后这些可以应用到长table:
df_long %>%
left_join(chg_tbl) %>%
transmute(col_a = if_else(name %in% cols, coalesce(new, col_a), col_a),
name, value) %>%
pivot_wider(names_from = name, values_from = value)
您不必为每一列单独执行此操作。您可以将多个列一起替换。
cols <- c('col_b', 'col_d')
df[df$col_a == "shark", cols] <- df[df$col_a == "mouse" , cols]
df
这是一个简化的示例,因为相同的 set.seed
给出了不同的值。
df <- data.frame(col1 = letters[1:5], col2 = 1:5, col3 = 5:1, col4 = 1:5)
df
# col1 col2 col3 col4
#1 a 1 5 1
#2 b 2 4 2
#3 c 3 3 3
#4 d 4 2 4
#5 e 5 1 5
cols <- c('col2', 'col4')
df[df$col1 == "d", cols] <- df[df$col1 == "a" , cols]
df
# col1 col2 col3 col4
#1 a 1 5 1
#2 b 2 4 2
#3 c 3 3 3
#4 d 1 2 1
#5 e 5 1 5