我给出三个参数,输入 df,我要清理的列,我要添加的新列,名称已清理。我哪里错了?
I give three arguments, the input df, the column I want to clean,the new column I want to be added with cleansed names. Where am I going wrong?
library(dplyr)
clean_name <- function(df,col_name,new_col_name){
#remove whitespace and common titles.
df$new_col_name <- mutate_all(df,
trimws(gsub("MR.?|MRS.?|MS.?|MISS.?|MASTER.?","",df$col_name)))
#remove any chunks of text where a number is present
df$new_col_name<- transmute_all(df,
gsub("[^\s]*[\d]+[^\s]*","",df$col_name,perl = TRUE))
}
我收到以下错误
"Error: Column new_col_name
must be a 1d atomic #vector or a list"
如果我们传递不带引号的列名,则使用
library(tidyverse)
clean_name <- function(df,col_name, new_col_name){
col_name <- enquo(col_name)
new_col_name <- enquo(new_col_name)
df %>%
mutate(!! new_col_name :=
trimws(str_replace_all(!!col_name, "MR.?|MRS.?|MS.?|MISS.?|MASTER.?","")) ) %>%
transmute(!! new_col_name := trimws(str_replace_all(!! new_col_name,
"[^\s]*[\d]+[^\s]*","")))
}
clean_name(dat1, col1, colN)
# colN
#1 one
#2 two
数据
dat1 <- data.frame(col1 = c("MR. one", "MS. two 24"), stringsAsFactors = FALSE)
您要做的是确保您使用的函数的输出是一个向量或只有一维的列表,以便您可以将其作为新列添加到所需的数据框中。您可以使用基础包中的 Class 函数验证对象的 class。
mutate 函数本身应该做你想做的,它 returns 相同的数据框但具有新列:
library(dplyr)
clean_name <- function(df, col_name, new_col_name) {
# first_cleaning_to_colname = The first change you want to make to the col_name column. This should be a vector.
# second_cleaning_to_colname = The change you're going to make to the col_name column after the first one. This should be a vector too.
first_change <- mutate(df, col_name = first_cleaning_to_colname)
second_change <- mutate(first_change, new_col_name = second_cleaning_to_colname)
return(second_change)
}
您可以同时进行这两项更改,但我认为这样更容易阅读。
library(dplyr)
clean_name <- function(df,col_name,new_col_name){
#remove whitespace and common titles.
df$new_col_name <- mutate_all(df,
trimws(gsub("MR.?|MRS.?|MS.?|MISS.?|MASTER.?","",df$col_name)))
#remove any chunks of text where a number is present
df$new_col_name<- transmute_all(df,
gsub("[^\s]*[\d]+[^\s]*","",df$col_name,perl = TRUE))
}
我收到以下错误
"Error: Column
new_col_name
must be a 1d atomic #vector or a list"
如果我们传递不带引号的列名,则使用
library(tidyverse)
clean_name <- function(df,col_name, new_col_name){
col_name <- enquo(col_name)
new_col_name <- enquo(new_col_name)
df %>%
mutate(!! new_col_name :=
trimws(str_replace_all(!!col_name, "MR.?|MRS.?|MS.?|MISS.?|MASTER.?","")) ) %>%
transmute(!! new_col_name := trimws(str_replace_all(!! new_col_name,
"[^\s]*[\d]+[^\s]*","")))
}
clean_name(dat1, col1, colN)
# colN
#1 one
#2 two
数据
dat1 <- data.frame(col1 = c("MR. one", "MS. two 24"), stringsAsFactors = FALSE)
您要做的是确保您使用的函数的输出是一个向量或只有一维的列表,以便您可以将其作为新列添加到所需的数据框中。您可以使用基础包中的 Class 函数验证对象的 class。
mutate 函数本身应该做你想做的,它 returns 相同的数据框但具有新列:
library(dplyr)
clean_name <- function(df, col_name, new_col_name) {
# first_cleaning_to_colname = The first change you want to make to the col_name column. This should be a vector.
# second_cleaning_to_colname = The change you're going to make to the col_name column after the first one. This should be a vector too.
first_change <- mutate(df, col_name = first_cleaning_to_colname)
second_change <- mutate(first_change, new_col_name = second_cleaning_to_colname)
return(second_change)
}
您可以同时进行这两项更改,但我认为这样更容易阅读。