完全由用户定义的过滤数据 r - 多列和过滤器
Filter Data completely user defined r - multiple columns and filters
我正在尝试创建一个函数,允许用户定义无限数量的列并将匹配的过滤器应用于这些列。
df <- data.frame(a=1:10, b=round(runif(10)), c=round(runif(10)))
|a| b|c|
|1| 1|1|
|2| 0|0|
|3| 0|1|
|4| 1|0|
|5| 1|0|
|6| 1|0|
|7| 1|1|
|8| 1|1|
|9| 1|0|
|10|1|1|
我希望用户能够根据任一列过滤数据,并对每一列应用不同的过滤器。我知道以下内容不起作用。但这是一般的想法。
test <- function(df, fCol, fParam){
df %>% filter(fCol[1] %in% fParam[1] | fCol[2] %in% fParam[2])
}
test(df, c("b","c"),c(1,0)
# Which I would want it to return
|a|b|c|
|4|1|0|
|5|1|0|
|6|1|0|
|9|1|0|
我 运行 遇到的问题是我不知道用户要过滤多少列,也不知道列名。
如有任何帮助,我们将不胜感激。如果你有问题,请提问。我尽力给出了一个代表。
相信这应该能满足你的需求
library(tidyr)
library(dplyr)
test <- function(df,
fCol,
fParam,
match_type = "any")
{
if(!is.element(match_type, c("any","all"))|length(match_type)!=1){
stop()
}
df <- df %>% ungroup() %>%
mutate(..id..=1:n())
meta <- data.frame(fCol=fCol,fParam=fParam)
logi <- df %>%
select("..id..",fCol) %>%
gather(key = "key", value = "value", -..id..) %>%
left_join(., y = meta, by = c("key"="fCol")) %>%
mutate(match = value==fParam) %>%
select(-key,-value, -fParam) %>%
group_by_at(setdiff(names(.),"match")) %>%
summarise(match = ifelse(match_type%in%"any",any(match), all(match)))
df2 <- left_join(df, logi, by = intersect(colnames(df),colnames(logi))) %>%
filter(match)%>%
select(-match, -..id..)
return(df2)
}
df <- data.frame(a=1:10, b=round(runif(10)), c=round(runif(10)))
df
# a b c
#1 1 0 1
#2 2 1 0
#3 3 0 0
#4 4 0 1
#5 5 0 1
#6 6 0 1
#7 7 1 0
#8 8 1 1
#9 9 1 0
#10 10 1 0
#use "any" to do an | match
test(df, c("b","c"),c(1,0), match_type = "any")
# a b c
#1 2 1 0
#2 3 0 0
#3 7 1 0
#4 8 1 1
#5 9 1 0
#6 10 1 0
#use "all" to do an & match
test(df, c("b","c"),c(1,0), match_type = "all")
# a b c
#1 2 1 0
#2 7 1 0
#3 9 1 0
#4 10 1 0
如果要匹配多个值
,也可以多次为fCol
指定相同的colname
test(df, c("b","b"),c(1,0)) #matches everything but you get the point
(my original response):
I am not sure this quite gives you the process you
want, but here's my best attempt before running out of
patience!!! :-)
I am sure there is a good way to make this an AND filter not an OR but I
can't quite get there myself. (Maybe a combination of map_dfc
and
inner_join
?)
编辑:终于到了!改进了下面的代码(删除了原始代码)。
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(purrr))
my_df <- tibble(
a=1:10,
b=round(runif(10)),
c=round(runif(10))
)
my_df
#> # A tibble: 10 x 3
#> a b c
#> <int> <dbl> <dbl>
#> 1 1 1 0
#> 2 2 1 0
#> 3 3 0 1
#> 4 4 0 0
#> 5 5 1 1
#> 6 6 0 1
#> 7 7 0 0
#> 8 8 0 1
#> 9 9 1 0
#> 10 10 1 0
col_names <- c("b", "c")
tests <- c(1, 0)
# option 1: with a named function:
make_test_frame <- function(col_name, test) {
tibble({{col_name}} := test)
}
my_df1 <- map2_dfc(col_names, tests, make_test_frame) %>%
inner_join(x = my_df)
#> Joining, by = c("b", "c")
my_df1
#> # A tibble: 4 x 3
#> a b c
#> <int> <dbl> <dbl>
#> 1 1 1 0
#> 2 2 1 0
#> 3 9 1 0
#> 4 10 1 0
# 2. or with an anonymous function:
my_df1 <- map2_dfc(
col_names, tests,
function(col_name, test) {
tibble({{col_name}} := test)
}
) %>%
inner_join(x = my_df)
#> Joining, by = c("b", "c")
my_df1
#> # A tibble: 4 x 3
#> a b c
#> <int> <dbl> <dbl>
#> 1 1 1 0
#> 2 2 1 0
#> 3 9 1 0
#> 4 10 1 0
# 3. or as one big, hairy function:
filter_df <- function(df, col_names, tests) {
map2_dfc(
col_names, tests,
function(col_name, test) {
tibble({{col_name}} := test)
}
) %>%
inner_join(x = df)
}
my_df1 <- filter_df(my_df, col_names = c("b", "c"), tests = c(1, 0))
#> Joining, by = c("b", "c")
my_df1
#> # A tibble: 4 x 3
#> a b c
#> <int> <dbl> <dbl>
#> 1 1 1 0
#> 2 2 1 0
#> 3 9 1 0
#> 4 10 1 0
由 reprex package (v0.3.0)
于 2020 年 2 月 28 日创建
我正在尝试创建一个函数,允许用户定义无限数量的列并将匹配的过滤器应用于这些列。
df <- data.frame(a=1:10, b=round(runif(10)), c=round(runif(10)))
|a| b|c|
|1| 1|1|
|2| 0|0|
|3| 0|1|
|4| 1|0|
|5| 1|0|
|6| 1|0|
|7| 1|1|
|8| 1|1|
|9| 1|0|
|10|1|1|
我希望用户能够根据任一列过滤数据,并对每一列应用不同的过滤器。我知道以下内容不起作用。但这是一般的想法。
test <- function(df, fCol, fParam){
df %>% filter(fCol[1] %in% fParam[1] | fCol[2] %in% fParam[2])
}
test(df, c("b","c"),c(1,0)
# Which I would want it to return
|a|b|c|
|4|1|0|
|5|1|0|
|6|1|0|
|9|1|0|
我 运行 遇到的问题是我不知道用户要过滤多少列,也不知道列名。
如有任何帮助,我们将不胜感激。如果你有问题,请提问。我尽力给出了一个代表。
相信这应该能满足你的需求
library(tidyr)
library(dplyr)
test <- function(df,
fCol,
fParam,
match_type = "any")
{
if(!is.element(match_type, c("any","all"))|length(match_type)!=1){
stop()
}
df <- df %>% ungroup() %>%
mutate(..id..=1:n())
meta <- data.frame(fCol=fCol,fParam=fParam)
logi <- df %>%
select("..id..",fCol) %>%
gather(key = "key", value = "value", -..id..) %>%
left_join(., y = meta, by = c("key"="fCol")) %>%
mutate(match = value==fParam) %>%
select(-key,-value, -fParam) %>%
group_by_at(setdiff(names(.),"match")) %>%
summarise(match = ifelse(match_type%in%"any",any(match), all(match)))
df2 <- left_join(df, logi, by = intersect(colnames(df),colnames(logi))) %>%
filter(match)%>%
select(-match, -..id..)
return(df2)
}
df <- data.frame(a=1:10, b=round(runif(10)), c=round(runif(10)))
df
# a b c
#1 1 0 1
#2 2 1 0
#3 3 0 0
#4 4 0 1
#5 5 0 1
#6 6 0 1
#7 7 1 0
#8 8 1 1
#9 9 1 0
#10 10 1 0
#use "any" to do an | match
test(df, c("b","c"),c(1,0), match_type = "any")
# a b c
#1 2 1 0
#2 3 0 0
#3 7 1 0
#4 8 1 1
#5 9 1 0
#6 10 1 0
#use "all" to do an & match
test(df, c("b","c"),c(1,0), match_type = "all")
# a b c
#1 2 1 0
#2 7 1 0
#3 9 1 0
#4 10 1 0
如果要匹配多个值
,也可以多次为fCol
指定相同的colname
test(df, c("b","b"),c(1,0)) #matches everything but you get the point
(my original response):
I am not sure this quite gives you the process you want, but here's my best attempt before running out of patience!!! :-)
I am sure there is a good way to make this an AND filter not an OR but I can't quite get there myself. (Maybe a combination of
map_dfc
andinner_join
?)
编辑:终于到了!改进了下面的代码(删除了原始代码)。
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(purrr))
my_df <- tibble(
a=1:10,
b=round(runif(10)),
c=round(runif(10))
)
my_df
#> # A tibble: 10 x 3
#> a b c
#> <int> <dbl> <dbl>
#> 1 1 1 0
#> 2 2 1 0
#> 3 3 0 1
#> 4 4 0 0
#> 5 5 1 1
#> 6 6 0 1
#> 7 7 0 0
#> 8 8 0 1
#> 9 9 1 0
#> 10 10 1 0
col_names <- c("b", "c")
tests <- c(1, 0)
# option 1: with a named function:
make_test_frame <- function(col_name, test) {
tibble({{col_name}} := test)
}
my_df1 <- map2_dfc(col_names, tests, make_test_frame) %>%
inner_join(x = my_df)
#> Joining, by = c("b", "c")
my_df1
#> # A tibble: 4 x 3
#> a b c
#> <int> <dbl> <dbl>
#> 1 1 1 0
#> 2 2 1 0
#> 3 9 1 0
#> 4 10 1 0
# 2. or with an anonymous function:
my_df1 <- map2_dfc(
col_names, tests,
function(col_name, test) {
tibble({{col_name}} := test)
}
) %>%
inner_join(x = my_df)
#> Joining, by = c("b", "c")
my_df1
#> # A tibble: 4 x 3
#> a b c
#> <int> <dbl> <dbl>
#> 1 1 1 0
#> 2 2 1 0
#> 3 9 1 0
#> 4 10 1 0
# 3. or as one big, hairy function:
filter_df <- function(df, col_names, tests) {
map2_dfc(
col_names, tests,
function(col_name, test) {
tibble({{col_name}} := test)
}
) %>%
inner_join(x = df)
}
my_df1 <- filter_df(my_df, col_names = c("b", "c"), tests = c(1, 0))
#> Joining, by = c("b", "c")
my_df1
#> # A tibble: 4 x 3
#> a b c
#> <int> <dbl> <dbl>
#> 1 1 1 0
#> 2 2 1 0
#> 3 9 1 0
#> 4 10 1 0
由 reprex package (v0.3.0)
于 2020 年 2 月 28 日创建