计算满足特定标准的独特组合

Question

问题：

我想计算每个团队使用以下数据满足下述条件的独特 5 人组合的数量 n。

数据：

TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
df <- data.frame(TEAM,PLAYER,LP,POS)

df:

    TEAM  PLAYER   LP  POS
 1  A     Will     1   3B
 2  A     Will     1   OF
 3  A     Roy      2   1B
 4  A     Roy      3   OF
 5  A     Jaylon   3   SS
 6  A     Dean     4   OF
 7  A     Yosef    5   C
 8  A     Devan    6   OF
 9  B     Quincy   1   2B
10  B     Quincy   1   OF
11  B     Luis     2   OF
12  B     Xzavier  3   C
13  B     Seth     4   3B
14  B     Layne    5   1B
15  B     Layne    5   OF
16  B     Antwan   6   SS

编辑：LP 列与输出无关。这并不像我希望的那样清晰 post.

标准：

必须使用五名独特的球员 PLAYER（总是会遗漏一名球员，因为每支球队都有六名球员可供选择）。
每个位置 POS 只能使用一次，但 OF 除外，最多可以使用三次 OF <= 3.
组合不得使用来自多支球队 TEAM 的球员 PLAYER。

例如：

这些只是我期待的众多可能组合中的一小部分 create/count:

   TEAM  1          2          3          4         5
1  A     Will-OF    Roy-1B     Jaylon-SS  Dean-OF   Devan-OF
2  A     Roy-OF     Jaylon-SS  Dean-OF    Yosef-C   Devan-OF
3  A     Will-3B    Roy-OF     Jaylon-SS  Dean-OF   Yosef-C
...
n  A     Will-3B    Roy-1B     Jaylon-SS  Dean-OF   Yosef-C       

   TEAM  1          2          3          4         5
1  B     Quincy-2B  Luis-OF    Xzavier-C  Seth-3B   Layne-1B
2  B     Quincy-2B  Luis-OF    Seth-3B    Layne-1B  Antwan-SS
3  B     Quincy-OF  Luis-OF    Xzavier-C  Seth-3B   Layne-OF
...
n  B     Quincy-2B  Luis-OF    Xzavier-C  Seth-3B   Layne-OF

想要的结果：

TEAM  UNIQUE
A     n
B     n

我试过的：

我知道如何为每支球队获得所有可能的 5 人组合并进行总结。我只是不确定如何使用为其位置定义的特定标准来获得我正在寻找的组合。

我希望我知道从哪里开始。我真的需要你的帮助。谢谢！

Answer 1

考虑几个争论步骤：

将新列指定为 PLAYER 和 POS 的串联。
运行 by 按团队拆分数据帧和运行拆分操作（规则 #3）。
运行 combn 在 PLAYER_POS 上选择 5 个列表。
运行 ave 相似 PLAYER 的运行ning 计数。
运行 Filter 保留 5 行的数据框，5 个不同的球员，并遵守位置标准（规则 #1 和 #2）。

基本 R 代码

# HELPER COLUMN
df$PLAYER_POS <- with(df, paste(PLAYER, POS, sep="_"))

# BUILD LIST OF DFs BY TEAM
df_list <- by(df, df$TEAM, function(sub){
  combn(sub$PLAYER_POS, 5, FUN = function(p) 
    transform(subset(sub, PLAYER_POS %in% p),
              PLAYER_NUM = ave(LP, PLAYER, FUN=seq_along)), 
    simplify = FALSE)
})
  
# FILTER LIST OF DFs BY TEAM
df_list <- lapply(df_list, function(dfs) 
  Filter(function(df) 
           nrow(df) == 5 & 
           max(df$PLAYER_NUM)==1 &
           length(df$POS[df$POS == "OF"]) <= 3 &
           length(df$POS[df$POS != "OF"]) == length(unique(df$POS[df$POS != "OF"])), 
         dfs)
)

# COUNT REMAINING DFs BY TEAM FOR UNIQUE n
lengths(df_list)
#  A  B 
# 18 20 

data.frame(TEAMS=names(df_list), UNIQUE=lengths(df_list), row.names=NULL)
#   TEAMS UNIQUE
# 1     A     18
# 2     B     20

输出 （子集数据帧列表）

df_list$A[[1]]
#   TEAM PLAYER LP POS PLAYER_POS PLAYER_NUM
# 1    A   Will  1  3B    Will_3B          1
# 3    A    Roy  2  1B     Roy_1B          1
# 5    A Jaylon  3  SS  Jaylon_SS          1
# 6    A   Dean  4  OF    Dean_OF          1
# 7    A  Yosef  5   C    Yosef_C          1
df_list$A[[2]]
df_list$A[[3]]
...
df_list$A[[18]]


df_list$B[[1]]
#    TEAM  PLAYER LP POS PLAYER_POS PLAYER_NUM
# 9     B  Quincy  1  2B  Quincy_2B          1
# 11    B    Luis  2  OF    Luis_OF          1
# 12    B Xzavier  3   C  Xzavier_C          1
# 13    B    Seth  4  3B    Seth_3B          1
# 14    B   Layne  5  1B   Layne_1B          1
df_list$B[[2]]
df_list$B[[3]]
...
df_list$B[[20]]

Answer 2

更混乱的解决方案，

我一上午都在研究这个问题，刚刚找到了我的解决方案（只是看到发布了一个更优雅的解决方案。但我还是把它提供给你，以分享我如何得出解决方案的思考过程.

        library(tidyverse)
    
    TEAM <- c("A","A","A","A","A","A","A","A","B","B","B","B","B","B","B","B")
    PLAYER <- c("Will","Will","Roy","Roy","Jaylon","Dean","Yosef","Devan","Quincy","Quincy","Luis","Xzavier","Seth","Layne","Layne","Antwan")
    LP <- c(1,1,2,2,3,4,5,6,1,1,2,3,4,5,5,6)
    POS <- c("3B","OF","1B","OF","SS","OF","C","OF","2B","OF","OF","C","3B","1B","OF","SS")
    df <- data.frame(TEAM,PLAYER,LP,POS)
    rm(TEAM, PLAYER, LP, POS)
    
    # Each team has 6 players and I want to find the groups of 5 that are possible.
    posible_player_combinations <- combn(1:6, 5) %>% as_tibble() 
    team = "A"
    
    make_2nd_column <- function(first_stage, mydata_byteam, pcomp){
      mydf <- mydata_byteam %>% filter(LP == pcomp[2])
      col2_filter <- tibble(
        col1LP =  rep(first_stage$LP, each = nrow(mydf)),
        col1POS = rep(first_stage$POS, each = nrow(mydf)))
      helper <- tibble(
        col2LP = rep(mydf$LP, nrow(first_stage)),
        col2POS = rep(mydf$POS, nrow(first_stage))
      )
      col2_filter <- cbind(col2_filter, helper)
      second_stage <- col2_filter %>% filter(col1POS != col2POS)
      return(second_stage)
    }
    make_3rd_column <- function(second_stage, mydata_byteam, pcomp){
      mydf <- mydata_byteam %>% filter(LP == pcomp[3])
      col3_filter <- tibble(
        col1LP =  rep(second_stage$col1LP, each = nrow(mydf)),
        col1POS = rep(second_stage$col1POS, each = nrow(mydf)),
        col2LP =  rep(second_stage$col2LP, each = nrow(mydf)),
        col2POS = rep(second_stage$col2POS, each = nrow(mydf)))
      helper <- tibble(
        col3LP = rep(mydf$LP, nrow(second_stage)),
        col3POS = rep(mydf$POS, nrow(second_stage))
      )
      col3_filter <- cbind(col3_filter, helper)
      third_stage <- col3_filter %>% filter(col1POS != col2POS,
                                            col2POS != col3POS,
                                            col3POS != col1POS)
      return(third_stage)
    }
    make_4th_column <- function(third_stage, mydata_byteam, pcomp){
      mydf <- mydata_byteam %>% filter(LP == pcomp[4])
      col4_filter <- tibble(
        col1LP =  rep(third_stage$col1LP, each = nrow(mydf)),
        col1POS = rep(third_stage$col1POS, each = nrow(mydf)),
        col2LP =  rep(third_stage$col2LP, each = nrow(mydf)),
        col2POS = rep(third_stage$col2POS, each = nrow(mydf)),
        col3LP =  rep(third_stage$col3LP, each = nrow(mydf)),
        col3POS = rep(third_stage$col3POS, each = nrow(mydf)))
      helper <- tibble(
        col4LP = rep(mydf$LP, nrow(third_stage)),
        col4POS = rep(mydf$POS, nrow(third_stage))
      )
      col4_filter <- cbind(col4_filter, helper)
      fourth_stage <- col4_filter %>% filter(col1POS != col2POS,
                                             col1POS != col3POS,
                                             col1POS != col4POS,
                                             col2POS != col3POS,
                                             col2POS != col4POS,
                                             col3POS != col4POS)
      return(fourth_stage)
    }
    make_5th_column <- function(fourth_stage, mydata_byteam, pcomp){
      mydf <- mydata_byteam %>% filter(LP == pcomp[5])
      col5_filter <- tibble(
        col1LP =  rep(fourth_stage$col1LP, each = nrow(mydf)),
        col1POS = rep(fourth_stage$col1POS, each = nrow(mydf)),
        col2LP =  rep(fourth_stage$col2LP, each = nrow(mydf)),
        col2POS = rep(fourth_stage$col2POS, each = nrow(mydf)),
        col3LP =  rep(fourth_stage$col3LP, each = nrow(mydf)),
        col3POS = rep(fourth_stage$col3POS, each = nrow(mydf)),
        col4LP =  rep(fourth_stage$col4LP, each = nrow(mydf)),
        col4POS = rep(fourth_stage$col4POS, each = nrow(mydf)))
      helper <- tibble(
        col5LP = rep(mydf$LP, nrow(fourth_stage)),
        col5POS = rep(mydf$POS, nrow(fourth_stage))
      )
      col5_filter <- cbind(col5_filter, helper)
      final_stage_prefilter <- col5_filter %>% filter(
        col1POS != col2POS,
        col1POS != col3POS,
        col1POS != col4POS,
        col1POS != col5POS,
        col2POS != col3POS,
        col2POS != col4POS,
        col2POS != col5POS,
        col3POS != col4POS,
        col3POS != col5POS,
        col4POS != col5POS)
      return(final_stage_prefilter)
    }
    make_final <- function(final_stage_prefilter){
      final_stage_prefilter %>% mutate(
        Player1 = paste(col1LP, str_remove_all(col1POS, "-.*")),
        Player2 = paste(col2LP, str_remove_all(col2POS, "-.*")),
        Player3 = paste(col3LP, str_remove_all(col3POS, "-.*")),
        Player4 = paste(col4LP, str_remove_all(col4POS, "-.*")),
        Player5 = paste(col5LP, str_remove_all(col5POS, "-.*"))
      ) %>% select(
        11:15
      ) %>% distinct()
    }
    
    make_teams <- function(posible_player_combinations, mydata, k){
      pcomp  <- posible_player_combinations[k] %>% as_vector() %>% unname()
      mydata_byteam <- mydata %>% filter(LP %in% pcomp)
      first_stage            <- mydata_byteam %>% filter(LP == pcomp[1])
      second_stage           <- make_2nd_column(first_stage, mydata_byteam, pcomp)
      third_stage            <- make_3rd_column(second_stage, mydata_byteam, pcomp)
      fourth_stage           <- make_4th_column(third_stage, mydata_byteam, pcomp)
      final_stage_prefilter  <- make_5th_column(fourth_stage, mydata_byteam, pcomp)
      final_stage            <- make_final(final_stage_prefilter)
      return(final_stage)
    }
    
    
    make_all_combinations <- function(df, team, posible_player_combinations) {
      mydata <- df %>% filter(TEAM == team) %>% select(LP, POS)
      of_p <- mydata %>% filter(POS == "OF") %>% select(LP) %>% as_vector()
      # I want to treat 3 possible "OF"s as separate positions
      # so that that a later restirction on POS can occur.
      # Later I will need to filter out non-unique results
      # by separating the strings with "-" and dropping the letter.
      of_df <- bind_rows(lapply(
        seq_along(of_p),
        function(x, k){
          of_df <- tibble(
            LP = rep(of_p[k], 3),
            POS = c("OF-a", "OF-b", "OF-c")
          )
        },
        x = of_p
      ))
      mydata <- rbind(mydata %>% filter(POS != "OF"), of_df)
      all_combinations <- bind_rows(lapply(
        X = seq_along(posible_player_combinations),
        FUN = make_teams,
        posible_player_combinations = posible_player_combinations,
        mydata = mydata
      ))
    }
mydata_a <- make_all_combinations(df, "A", posible_player_combinations)
mydata_b <- make_all_combinations(df, "B", posible_player_combinations)

tail(mydata_a)
tail(mydata_b)

# > tail(mydata_a)
#      Player1 Player2 Player3 Player4 Player5
# 13    1 3B    2 OF    4 OF     5 C    6 OF
# 14    1 OF    2 1B    4 OF     5 C    6 OF
# 15    1 3B    3 SS    4 OF     5 C    6 OF
# 16    1 OF    3 SS    4 OF     5 C    6 OF
# 17    2 1B    3 SS    4 OF     5 C    6 OF
# 18    2 OF    3 SS    4 OF     5 C    6 OF
# > tail(mydata_b)
#      Player1 Player2 Player3 Player4 Player5
# 15    1 2B     3 C    4 3B    5 1B    6 SS
# 16    1 2B     3 C    4 3B    5 OF    6 SS
# 17    1 OF     3 C    4 3B    5 1B    6 SS
# 18    1 OF     3 C    4 3B    5 OF    6 SS
# 19    2 OF     3 C    4 3B    5 1B    6 SS
# 20    2 OF     3 C    4 3B    5 OF    6 SS

计算满足特定标准的独特组合

Count Unique Combinations Meeting Specific Criteria

combinations

group-by

r

unique

count

更混乱的解决方案，