R 中的嵌套函数
Nesting Functions in R
我对 R 比较陌生;并且,我需要有关用户定义函数的帮助。我想查看数据框的每个观察结果在同一数据框的类似观察结果的子集中的位置。我在引用原始观察时遇到问题,无法在我的函数中提取其排名。
这是我的数据示例:
> dput(df)
structure(list(Name = c("Alex Abrines", "Steven Adams", "Cole Aldrich",
"LaMarcus Aldridge", "Kyle Anderson", "Ryan Anderson", "Giannis Antetokounmpo",
"Carmelo Anthony", "OG Anunoby", "Darrell Arthur", "Will Barton",
"Bradley Beal", "Davis Bertans", "Nemanja Bjelica", "Malcolm Brogdon",
"Aaron Brooks", "Dillon Brooks", "Lorenzo Brown", "Sterling Brown",
"Reggie Bullock", "Jimmy Butler", "Dwight Buycks", "Clint Capela",
"Wilson Chandler", "Torrey Craig", "Jamal Crawford", "Deyonta Davis",
"Matthew Dellavedova", "DeMar DeRozan", "Gorgui Dieng", "Andre Drummond",
"James Ennis", "Kenneth Faried", "Raymond Felton", "Terrance Ferguson",
"Bryn Forbes", "Tim Frazier", "Langston Galloway", "Marc Gasol",
"Pau Gasol", "Paul George", "Marcus Georges-Hunt", "Taj Gibson",
"Manu Ginobili", "Marcin Gortat", "Jerami Grant", "Danny Green",
"Gerald Green", "JaMychal Green", "Blake Griffin", "James Harden",
"Gary Harris", "Andrew Harrison", "Myke Henry", "John Henson",
"Nene Hilario", "Darrun Hilliard", "Josh Huestis", "Serge Ibaka",
"Stanley Johnson", "Nikola Jokic", "Tyus Jones", "Luke Kennard",
"Sean Kilpatrick", "Joffrey Lauvergne", "Kyle Lowry", "Trey Lyles",
"Ian Mahinmi", "Thon Maker", "Jarell Martin", "Luc Mbah a Moute",
"Ben McLemore", "Jodie Meeks", "Khris Middleton", "Patty Mills",
"Eric Moreland", "Markieff Morris", "Emmanuel Mudiay", "Shabazz Muhammad",
"Xavier Munford", "Dejounte Murray", "Jamal Murray", "Lucas Nogueira",
"Kelly Oubre", "Tony Parker", "Patrick Patterson", "Brandon Paul",
"Chris Paul", "Marshall Plumlee", "Jakob Poeltl", "Otto Porter",
"Norman Powell", "Willie Reed", "Tomas Satoransky", "Mike Scott",
"Wayne Selden", "Pascal Siakam", "Ish Smith", "Tony Snell", "Jeff Teague",
"Anthony Tolliver", "Karl-Anthony Towns", "P.J. Tucker", "Jonas Valanciunas",
"Rashad Vaughn", "Russell Westbrook", "Andrew Wiggins", "D.J. Wilson",
"Delon Wright"), Pos = structure(c(5L, 1L, 1L, 1L, 3L, 2L, 3L,
2L, 2L, 2L, 4L, 4L, 2L, 2L, 4L, 4L, 5L, 4L, 4L, 5L, 3L, 4L, 1L,
2L, 5L, 4L, 1L, 4L, 5L, 1L, 1L, 2L, 2L, 4L, 5L, 4L, 4L, 4L, 1L,
1L, 2L, 4L, 2L, 4L, 1L, 2L, 5L, 5L, 2L, 2L, 4L, 4L, 4L, 2L, 1L,
1L, 4L, 2L, 1L, 2L, 1L, 4L, 4L, 4L, 1L, 4L, 2L, 1L, 1L, 2L, 2L,
4L, 4L, 3L, 4L, 1L, 2L, 4L, 3L, 4L, 4L, 4L, 1L, 2L, 4L, 2L, 4L,
4L, 1L, 1L, 2L, 4L, 1L, 4L, 2L, 5L, 2L, 4L, 5L, 4L, 1L, 1L, 2L,
1L, 4L, 4L, 3L, 2L, 4L), .Label = c("C", "PF", "SF", "PG", "SG"
), class = "factor"), Date = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "2018-02-01 *", class = "factor"),
Tm = structure(c(7L, 7L, 6L, 8L, 8L, 3L, 5L, 7L, 9L, 1L,
1L, 10L, 8L, 6L, 5L, 6L, 4L, 9L, 5L, 2L, 6L, 2L, 3L, 1L,
1L, 6L, 4L, 5L, 9L, 6L, 2L, 4L, 1L, 7L, 7L, 8L, 10L, 2L,
4L, 8L, 7L, 6L, 6L, 8L, 10L, 7L, 8L, 3L, 4L, 2L, 3L, 1L,
4L, 4L, 5L, 3L, 8L, 7L, 9L, 2L, 1L, 6L, 2L, 5L, 8L, 9L, 1L,
10L, 5L, 4L, 3L, 4L, 10L, 5L, 8L, 2L, 10L, 1L, 6L, 5L, 8L,
1L, 9L, 10L, 8L, 7L, 8L, 3L, 5L, 9L, 10L, 9L, 2L, 10L, 10L,
4L, 9L, 2L, 5L, 6L, 2L, 6L, 3L, 9L, 5L, 7L, 6L, 5L, 9L), .Label = c("DEN",
"DET", "HOU", "MEM", "MIL", "MIN", "OKC", "SAS", "TOR", "WAS"
), class = "factor"), Opp = structure(c(1L, 1L, 5L, 3L, 3L,
8L, 6L, 1L, 10L, 7L, 7L, 9L, 3L, 5L, 6L, 5L, 2L, 10L, 6L,
4L, 5L, 4L, 8L, 7L, 7L, 5L, 2L, 6L, 10L, 5L, 4L, 2L, 7L,
1L, 1L, 3L, 9L, 4L, 2L, 3L, 1L, 5L, 5L, 3L, 9L, 1L, 3L, 8L,
2L, 4L, 8L, 7L, 2L, 2L, 6L, 8L, 3L, 1L, 10L, 4L, 7L, 5L,
4L, 6L, 3L, 10L, 7L, 9L, 6L, 2L, 8L, 2L, 9L, 6L, 3L, 4L,
9L, 7L, 5L, 6L, 3L, 7L, 10L, 9L, 3L, 1L, 3L, 8L, 6L, 10L,
9L, 10L, 4L, 9L, 9L, 2L, 10L, 4L, 6L, 5L, 4L, 5L, 8L, 10L,
6L, 1L, 5L, 6L, 10L), .Label = c("DEN", "DET", "HOU", "MEM",
"MIL", "MIN", "OKC", "SAS", "TOR", "WAS"), class = "factor"),
MP = c(29L, 32L, 3L, 34L, 30L, 29L, 36L, 34L, 21L, 1L, 36L,
38L, 13L, 14L, 10L, 3L, 32L, 11L, 24L, 35L, 40L, 19L, 35L,
34L, 22L, 17L, 15L, 25L, 38L, 13L, 28L, 15L, 10L, 14L, 4L,
18L, 17L, 4L, 33L, 20L, 36L, 6L, 33L, 20L, 26L, 25L, 28L,
30L, 20L, 35L, 37L, 38L, 34L, 22L, 32L, 13L, 8L, 12L, 35L,
36L, 37L, 17L, 21L, 18L, 2L, 35L, 15L, 19L, 13L, 28L, 35L,
10L, 9L, 35L, 24L, 5L, 32L, 14L, 3L, 7L, 24L, 34L, 3L, 23L,
17L, 15L, 2L, 30L, 5L, 16L, 29L, 26L, 5L, 28L, 19L, 31L,
13L, 29L, 29L, 28L, 22L, 33L, 31L, 29L, 4L, 39L, 30L, 4L,
13L), Player.ID = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 42L, 41L, 43L,
44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 53L, 52L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L,
68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L,
80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L,
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L,
103L, 104L, 105L, 106L, 107L, 108L, 109L), .Label = c("abrinal01",
"adamsst01", "aldrico01", "aldrila01", "anderky01", "anderry01",
"antetgi01", "anthoca01", "anunoog01", "arthuda01", "bartowi01",
"bealbr01", "bertada01", "bjeline01", "brogdma01", "brookaa01",
"brookdi01", "brownlo01", "brownst02", "bullore01", "butleji01",
"buyckdw01", "capelca01", "chandwi01", "craigto01", "crawfja01",
"davisde01", "dellama01", "derozde01", "dienggo01", "drumman01",
"ennisja01", "farieke01", "feltora01", "fergute01", "forbebr01",
"fraziti01", "gallola01", "gasolma01", "gasolpa01", "georgma01",
"georgpa01", "gibsota01", "ginobma01", "gortama01", "grantje01",
"greenda02", "greenge01", "greenja01", "griffbl01", "hardeja01",
"harrian01", "harriga01", "henrymy01", "hensojo01", "hilarne01",
"hillida01", "huestjo01", "ibakase01", "johnsst04", "jokicni01",
"jonesty01", "kennalu01", "kilpase01", "lauvejo01", "lowryky01",
"lylestr01", "mahinia01", "makerth01", "martija01", "mbahalu01",
"mclembe01", "meeksjo01", "middlkh01", "millspa02", "moreler01",
"morrima02", "mudiaem01", "muhamsh01", "munfoxa02", "murrade01",
"murraja01", "noguelu01", "oubreke01", "parketo01", "pattepa01",
"paulbr01", "paulch01", "plumlma02", "poeltja01", "porteot01",
"powelno01", "reedwi02", "satorto01", "scottmi01", "seldewa01",
"siakapa01", "smithis01", "snellto01", "teaguje01", "tollian01",
"townska01", "tuckepj01", "valanjo01", "vaughra01", "westbru01",
"wiggian01", "wilsodj01", "wrighde01"), class = "factor"),
Game.ID = structure(c(7L, 7L, 6L, 8L, 8L, 3L, 5L, 7L, 9L,
1L, 1L, 10L, 8L, 6L, 5L, 6L, 4L, 9L, 5L, 2L, 6L, 2L, 3L,
1L, 1L, 6L, 4L, 5L, 9L, 6L, 2L, 4L, 1L, 7L, 7L, 8L, 10L,
2L, 4L, 8L, 7L, 6L, 6L, 8L, 10L, 7L, 8L, 3L, 4L, 2L, 3L,
1L, 4L, 4L, 5L, 3L, 8L, 7L, 9L, 2L, 1L, 6L, 2L, 5L, 8L, 9L,
1L, 10L, 5L, 4L, 3L, 4L, 10L, 5L, 8L, 2L, 10L, 1L, 6L, 5L,
8L, 1L, 9L, 10L, 8L, 7L, 8L, 3L, 5L, 9L, 10L, 9L, 2L, 10L,
10L, 4L, 9L, 2L, 5L, 6L, 2L, 6L, 3L, 9L, 5L, 7L, 6L, 5L,
9L), .Label = c("2018-02-01 * DEN", "2018-02-01 * DET", "2018-02-01 * HOU",
"2018-02-01 * MEM", "2018-02-01 * MIL", "2018-02-01 * MIN",
"2018-02-01 * OKC", "2018-02-01 * SAS", "2018-02-01 * TOR",
"2018-02-01 * WAS"), class = "factor")), .Names = c("Name",
"Pos", "Date", "Tm", "Opp", "MP", "Player.ID", "Game.ID"), class = "data.frame", row.names = c(NA,
109L))
我想编写一个函数,对于每次观察:
> df[1, ]
Name Pos Date Tm Opp MP Player.ID Game.ID
1 Alex Abrines SG 2018-02-01 * OKC DEN 29 abrinal01 2018-02-01 * OKC
创建具有匹配 df$Game.ID
.
的所有其他观测值的子集
> df[df$Game.ID == '2018-02-01 * OKC', ]
Name Pos Date Tm Opp MP Player.ID Game.ID
1 Alex Abrines SG 2018-02-01 * OKC DEN 29 abrinal01 2018-02-01 * OKC
2 Steven Adams C 2018-02-01 * OKC DEN 32 adamsst01 2018-02-01 * OKC
8 Carmelo Anthony PF 2018-02-01 * OKC DEN 34 anthoca01 2018-02-01 * OKC
34 Raymond Felton PG 2018-02-01 * OKC DEN 14 feltora01 2018-02-01 * OKC
35 Terrance Ferguson SG 2018-02-01 * OKC DEN 4 fergute01 2018-02-01 * OKC
41 Paul George PF 2018-02-01 * OKC DEN 36 georgpa01 2018-02-01 * OKC
46 Jerami Grant PF 2018-02-01 * OKC DEN 25 grantje01 2018-02-01 * OKC
58 Josh Huestis PF 2018-02-01 * OKC DEN 12 huestjo01 2018-02-01 * OKC
86 Patrick Patterson PF 2018-02-01 * OKC DEN 15 pattepa01 2018-02-01 * OKC
106 Russell Westbrook PG 2018-02-01 * OKC DEN 39 westbru01 2018-02-01 * OKC
然后 returns 原始观测值的排名 df$MP
> df[1, c('MP')]
[1] 29
在新子集的层次结构中。
> xx <- data.frame(cbind(sort(df[df$Game.ID == '2018-02-01 * OKC', c('MP')], decreasing = TRUE), rownames(data.table(sort(df[df$Game.ID == '2018-02-01 * OKC', c('MP')], decreasing = TRUE)))))
> xx
X1 X2
1 39 1
2 36 2
3 34 3
4 32 4
5 29 5
6 25 6
7 15 7
8 14 8
9 12 9
10 4 10
> colnames(xx) <- c('MP', 'Depth.Chart')
> yy <- df[df$Game.ID == '2018-02-01 * OKC', ]
> yy
Name Pos Date Tm Opp MP Player.ID
1 Alex Abrines SG 2018-02-01 * OKC DEN 29 abrinal01
2 Steven Adams C 2018-02-01 * OKC DEN 32 adamsst01
8 Carmelo Anthony PF 2018-02-01 * OKC DEN 34 anthoca01
34 Raymond Felton PG 2018-02-01 * OKC DEN 14 feltora01
35 Terrance Ferguson SG 2018-02-01 * OKC DEN 4 fergute01
41 Paul George PF 2018-02-01 * OKC DEN 36 georgpa01
46 Jerami Grant PF 2018-02-01 * OKC DEN 25 grantje01
58 Josh Huestis PF 2018-02-01 * OKC DEN 12 huestjo01
86 Patrick Patterson PF 2018-02-01 * OKC DEN 15 pattepa01
106 Russell Westbrook PG 2018-02-01 * OKC DEN 39 westbru01
Game.ID
1 2018-02-01 * OKC
2 2018-02-01 * OKC
8 2018-02-01 * OKC
34 2018-02-01 * OKC
35 2018-02-01 * OKC
41 2018-02-01 * OKC
46 2018-02-01 * OKC
58 2018-02-01 * OKC
86 2018-02-01 * OKC
106 2018-02-01 * OKC
> zz <- merge(yy, xx, all.x = TRUE)
> zz
MP Name Pos Date Tm Opp Player.ID
1 4 Terrance Ferguson SG 2018-02-01 * OKC DEN fergute01
2 12 Josh Huestis PF 2018-02-01 * OKC DEN huestjo01
3 14 Raymond Felton PG 2018-02-01 * OKC DEN feltora01
4 15 Patrick Patterson PF 2018-02-01 * OKC DEN pattepa01
5 25 Jerami Grant PF 2018-02-01 * OKC DEN grantje01
6 29 Alex Abrines SG 2018-02-01 * OKC DEN abrinal01
7 32 Steven Adams C 2018-02-01 * OKC DEN adamsst01
8 34 Carmelo Anthony PF 2018-02-01 * OKC DEN anthoca01
9 36 Paul George PF 2018-02-01 * OKC DEN georgpa01
10 39 Russell Westbrook PG 2018-02-01 * OKC DEN westbru01
Game.ID Depth.Chart
1 2018-02-01 * OKC 10
2 2018-02-01 * OKC 9
3 2018-02-01 * OKC 8
4 2018-02-01 * OKC 7
5 2018-02-01 * OKC 6
6 2018-02-01 * OKC 5
7 2018-02-01 * OKC 4
8 2018-02-01 * OKC 3
9 2018-02-01 * OKC 2
10 2018-02-01 * OKC 1
最后,我需要提取对应于原始观察值 zz$Depth.Chart
的值,5
。
> zz[zz$MP == 29, c('Depth.Chart')]
[1] 5
Levels: 1 10 2 3 4 5 6 7 8 9
我想定义一个函数,为数据框中的每个观察结果执行上述费力且混乱的步骤,并 returns 结果向量。我如何引用与我正在处理的观察相对应的 df$MP
的值,而不像上面那样显式调用它 29
?以下是我尝试过的一些方法,但未成功。
> f1 <- function(col1, df, col2){
+ lapply(col1, function(i){
+ df2 <- df[col1 == i, col2]
+ df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2, decreasing = TRUE)))))
+ df3[i, 2]
+ })}
> f1(df$Game.ID, df, c('MP'))[1:10]
[[1]]
[1] 7
Levels: 1 10 2 3 4 5 6 7 8 9
[[2]]
[1] 7
Levels: 1 10 2 3 4 5 6 7 8 9
[[3]]
[1] 6
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[4]]
[1] 8
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[5]]
[1] 8
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[6]]
[1] 3
Levels: 1 2 3 4 5 6 7 8
[[7]]
[1] 5
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[8]]
[1] 7
Levels: 1 10 2 3 4 5 6 7 8 9
[[9]]
[1] 9
Levels: 1 10 11 2 3 4 5 6 7 8 9
[[10]]
[1] 1
Levels: 1 10 2 3 4 5 6 7 8 9
> f1 <- function(col1, df, col2){
+ lapply(col1, function(i){
+ df2 <- df[col1 == i, col2]
+ df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2, decreasing = TRUE)))))
+ df3[df3$X1 == i, 2]
+ })}
> f1(df$Game.ID, df, c('MP'))
Hide Traceback
Rerun with Debug
Error in Ops.factor(df3$X1, i) : level sets of factors are different
7.
stop("level sets of factors are different")
6.
Ops.factor(df3$X1, i)
5.
`[.data.frame`(df3, df3$X1 == i, 2)
4.
df3[df3$X1 == i, 2]
3.
FUN(X[[i]], ...)
2.
lapply(col1, function(i) {
df2 <- df[col1 == i, col2]
df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2,
decreasing = TRUE))))) ...
1.
f1(df$Game.ID, df, c("MP"))
> f1 <- function(col1, df, col2){
+ lapply(col1, function(i){
+ df2 <- df[col1 == i, col2]
+ df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2, decreasing = TRUE)))))
+ df3[col2 == i, 2]
+ })}
> f1(df$Game.ID, df, c('MP'))[1:10]
[[1]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9
[[2]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9
[[3]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[4]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[5]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[6]]
factor(0)
Levels: 1 2 3 4 5 6 7 8
[[7]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[8]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9
[[9]]
factor(0)
Levels: 1 10 11 2 3 4 5 6 7 8 9
[[10]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9
我想我不完全理解 R 如何在函数内部处理这个 i
变量;或者,因此,如何适当地引用它。在浏览这个论坛时,我在 Python 中找到了函数内部嵌套函数的通用示例,但在 R 中没有。任何帮助将不胜感激。
编辑
这是我的数据的一个更简单的子集:
> dput(df)
structure(list(MP = c(29L, 32L, 3L, 34L, 14L, 3L, 40L, 17L, 13L,
14L, 4L, 36L, 6L, 33L, 25L, 12L, 17L, 3L, 15L, 28L, 33L, 39L,
30L), Player.ID = structure(c(1L, 2L, 3L, 8L, 14L, 16L, 21L,
26L, 30L, 34L, 35L, 42L, 41L, 43L, 46L, 58L, 62L, 79L, 86L, 100L,
102L, 106L, 107L), .Label = c("abrinal01", "adamsst01", "aldrico01",
"aldrila01", "anderky01", "anderry01", "antetgi01", "anthoca01",
"anunoog01", "arthuda01", "bartowi01", "bealbr01", "bertada01",
"bjeline01", "brogdma01", "brookaa01", "brookdi01", "brownlo01",
"brownst02", "bullore01", "butleji01", "buyckdw01", "capelca01",
"chandwi01", "craigto01", "crawfja01", "davisde01", "dellama01",
"derozde01", "dienggo01", "drumman01", "ennisja01", "farieke01",
"feltora01", "fergute01", "forbebr01", "fraziti01", "gallola01",
"gasolma01", "gasolpa01", "georgma01", "georgpa01", "gibsota01",
"ginobma01", "gortama01", "grantje01", "greenda02", "greenge01",
"greenja01", "griffbl01", "hardeja01", "harrian01", "harriga01",
"henrymy01", "hensojo01", "hilarne01", "hillida01", "huestjo01",
"ibakase01", "johnsst04", "jokicni01", "jonesty01", "kennalu01",
"kilpase01", "lauvejo01", "lowryky01", "lylestr01", "mahinia01",
"makerth01", "martija01", "mbahalu01", "mclembe01", "meeksjo01",
"middlkh01", "millspa02", "moreler01", "morrima02", "mudiaem01",
"muhamsh01", "munfoxa02", "murrade01", "murraja01", "noguelu01",
"oubreke01", "parketo01", "pattepa01", "paulbr01", "paulch01",
"plumlma02", "poeltja01", "porteot01", "powelno01", "reedwi02",
"satorto01", "scottmi01", "seldewa01", "siakapa01", "smithis01",
"snellto01", "teaguje01", "tollian01", "townska01", "tuckepj01",
"valanjo01", "vaughra01", "westbru01", "wiggian01", "wilsodj01",
"wrighde01"), class = "factor"), Game.ID = structure(c(7L, 7L,
6L, 7L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 6L, 6L, 7L, 7L, 6L, 6L,
7L, 6L, 6L, 7L, 6L), .Label = c("2018-02-01 * DEN", "2018-02-01 * DET",
"2018-02-01 * HOU", "2018-02-01 * MEM", "2018-02-01 * MIL", "2018-02-01 * MIN",
"2018-02-01 * OKC", "2018-02-01 * SAS", "2018-02-01 * TOR", "2018-02-01 * WAS"
), class = "factor")), .Names = c("MP", "Player.ID", "Game.ID"
), row.names = c(1L, 2L, 3L, 8L, 14L, 16L, 21L, 26L, 30L, 34L,
35L, 41L, 42L, 43L, 46L, 58L, 62L, 79L, 86L, 100L, 102L, 106L,
107L), class = "data.frame")
您正在将 data.table
用于流程中的小步骤,但您应该将其用于整个过程。做操作"by group"非常方便,在本例中使用rank()
by Game.ID
。使用您的小样本数据:
library(data.table)
setDT(df)
df[, Depth.Chart := rank(-MP), by = Game.ID]
df
# MP Player.ID Game.ID Depth.Chart
# 1: 29 abrinal01 2018-02-01 * OKC 5.0
# 2: 32 adamsst01 2018-02-01 * OKC 4.0
# 3: 3 aldrico01 2018-02-01 * MIN 12.0
# 4: 34 anthoca01 2018-02-01 * OKC 3.0
# 5: 14 bjeline01 2018-02-01 * MIN 8.0
# 6: 3 brookaa01 2018-02-01 * MIN 12.0
# 7: 40 butleji01 2018-02-01 * MIN 1.0
# 8: 17 crawfja01 2018-02-01 * MIN 6.5
# 9: 13 dienggo01 2018-02-01 * MIN 9.0
# 10: 14 feltora01 2018-02-01 * OKC 8.0
# 11: 4 fergute01 2018-02-01 * OKC 10.0
# 12: 36 georgpa01 2018-02-01 * OKC 2.0
# 13: 6 georgma01 2018-02-01 * MIN 10.0
# 14: 33 gibsota01 2018-02-01 * MIN 2.5
# 15: 25 grantje01 2018-02-01 * OKC 6.0
# 16: 12 huestjo01 2018-02-01 * OKC 9.0
# 17: 17 jonesty01 2018-02-01 * MIN 6.5
# 18: 3 muhamsh01 2018-02-01 * MIN 12.0
# 19: 15 pattepa01 2018-02-01 * OKC 7.0
# 20: 28 teaguje01 2018-02-01 * MIN 5.0
# 21: 33 townska01 2018-02-01 * MIN 2.5
# 22: 39 westbru01 2018-02-01 * OKC 1.0
# 23: 30 wiggian01 2018-02-01 * MIN 4.0
# MP Player.ID Game.ID Depth.Chart
rank
,默认情况下,平均平局,但请参阅 ?rank
了解其他选项。
我对 R 比较陌生;并且,我需要有关用户定义函数的帮助。我想查看数据框的每个观察结果在同一数据框的类似观察结果的子集中的位置。我在引用原始观察时遇到问题,无法在我的函数中提取其排名。
这是我的数据示例:
> dput(df)
structure(list(Name = c("Alex Abrines", "Steven Adams", "Cole Aldrich",
"LaMarcus Aldridge", "Kyle Anderson", "Ryan Anderson", "Giannis Antetokounmpo",
"Carmelo Anthony", "OG Anunoby", "Darrell Arthur", "Will Barton",
"Bradley Beal", "Davis Bertans", "Nemanja Bjelica", "Malcolm Brogdon",
"Aaron Brooks", "Dillon Brooks", "Lorenzo Brown", "Sterling Brown",
"Reggie Bullock", "Jimmy Butler", "Dwight Buycks", "Clint Capela",
"Wilson Chandler", "Torrey Craig", "Jamal Crawford", "Deyonta Davis",
"Matthew Dellavedova", "DeMar DeRozan", "Gorgui Dieng", "Andre Drummond",
"James Ennis", "Kenneth Faried", "Raymond Felton", "Terrance Ferguson",
"Bryn Forbes", "Tim Frazier", "Langston Galloway", "Marc Gasol",
"Pau Gasol", "Paul George", "Marcus Georges-Hunt", "Taj Gibson",
"Manu Ginobili", "Marcin Gortat", "Jerami Grant", "Danny Green",
"Gerald Green", "JaMychal Green", "Blake Griffin", "James Harden",
"Gary Harris", "Andrew Harrison", "Myke Henry", "John Henson",
"Nene Hilario", "Darrun Hilliard", "Josh Huestis", "Serge Ibaka",
"Stanley Johnson", "Nikola Jokic", "Tyus Jones", "Luke Kennard",
"Sean Kilpatrick", "Joffrey Lauvergne", "Kyle Lowry", "Trey Lyles",
"Ian Mahinmi", "Thon Maker", "Jarell Martin", "Luc Mbah a Moute",
"Ben McLemore", "Jodie Meeks", "Khris Middleton", "Patty Mills",
"Eric Moreland", "Markieff Morris", "Emmanuel Mudiay", "Shabazz Muhammad",
"Xavier Munford", "Dejounte Murray", "Jamal Murray", "Lucas Nogueira",
"Kelly Oubre", "Tony Parker", "Patrick Patterson", "Brandon Paul",
"Chris Paul", "Marshall Plumlee", "Jakob Poeltl", "Otto Porter",
"Norman Powell", "Willie Reed", "Tomas Satoransky", "Mike Scott",
"Wayne Selden", "Pascal Siakam", "Ish Smith", "Tony Snell", "Jeff Teague",
"Anthony Tolliver", "Karl-Anthony Towns", "P.J. Tucker", "Jonas Valanciunas",
"Rashad Vaughn", "Russell Westbrook", "Andrew Wiggins", "D.J. Wilson",
"Delon Wright"), Pos = structure(c(5L, 1L, 1L, 1L, 3L, 2L, 3L,
2L, 2L, 2L, 4L, 4L, 2L, 2L, 4L, 4L, 5L, 4L, 4L, 5L, 3L, 4L, 1L,
2L, 5L, 4L, 1L, 4L, 5L, 1L, 1L, 2L, 2L, 4L, 5L, 4L, 4L, 4L, 1L,
1L, 2L, 4L, 2L, 4L, 1L, 2L, 5L, 5L, 2L, 2L, 4L, 4L, 4L, 2L, 1L,
1L, 4L, 2L, 1L, 2L, 1L, 4L, 4L, 4L, 1L, 4L, 2L, 1L, 1L, 2L, 2L,
4L, 4L, 3L, 4L, 1L, 2L, 4L, 3L, 4L, 4L, 4L, 1L, 2L, 4L, 2L, 4L,
4L, 1L, 1L, 2L, 4L, 1L, 4L, 2L, 5L, 2L, 4L, 5L, 4L, 1L, 1L, 2L,
1L, 4L, 4L, 3L, 2L, 4L), .Label = c("C", "PF", "SF", "PG", "SG"
), class = "factor"), Date = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "2018-02-01 *", class = "factor"),
Tm = structure(c(7L, 7L, 6L, 8L, 8L, 3L, 5L, 7L, 9L, 1L,
1L, 10L, 8L, 6L, 5L, 6L, 4L, 9L, 5L, 2L, 6L, 2L, 3L, 1L,
1L, 6L, 4L, 5L, 9L, 6L, 2L, 4L, 1L, 7L, 7L, 8L, 10L, 2L,
4L, 8L, 7L, 6L, 6L, 8L, 10L, 7L, 8L, 3L, 4L, 2L, 3L, 1L,
4L, 4L, 5L, 3L, 8L, 7L, 9L, 2L, 1L, 6L, 2L, 5L, 8L, 9L, 1L,
10L, 5L, 4L, 3L, 4L, 10L, 5L, 8L, 2L, 10L, 1L, 6L, 5L, 8L,
1L, 9L, 10L, 8L, 7L, 8L, 3L, 5L, 9L, 10L, 9L, 2L, 10L, 10L,
4L, 9L, 2L, 5L, 6L, 2L, 6L, 3L, 9L, 5L, 7L, 6L, 5L, 9L), .Label = c("DEN",
"DET", "HOU", "MEM", "MIL", "MIN", "OKC", "SAS", "TOR", "WAS"
), class = "factor"), Opp = structure(c(1L, 1L, 5L, 3L, 3L,
8L, 6L, 1L, 10L, 7L, 7L, 9L, 3L, 5L, 6L, 5L, 2L, 10L, 6L,
4L, 5L, 4L, 8L, 7L, 7L, 5L, 2L, 6L, 10L, 5L, 4L, 2L, 7L,
1L, 1L, 3L, 9L, 4L, 2L, 3L, 1L, 5L, 5L, 3L, 9L, 1L, 3L, 8L,
2L, 4L, 8L, 7L, 2L, 2L, 6L, 8L, 3L, 1L, 10L, 4L, 7L, 5L,
4L, 6L, 3L, 10L, 7L, 9L, 6L, 2L, 8L, 2L, 9L, 6L, 3L, 4L,
9L, 7L, 5L, 6L, 3L, 7L, 10L, 9L, 3L, 1L, 3L, 8L, 6L, 10L,
9L, 10L, 4L, 9L, 9L, 2L, 10L, 4L, 6L, 5L, 4L, 5L, 8L, 10L,
6L, 1L, 5L, 6L, 10L), .Label = c("DEN", "DET", "HOU", "MEM",
"MIL", "MIN", "OKC", "SAS", "TOR", "WAS"), class = "factor"),
MP = c(29L, 32L, 3L, 34L, 30L, 29L, 36L, 34L, 21L, 1L, 36L,
38L, 13L, 14L, 10L, 3L, 32L, 11L, 24L, 35L, 40L, 19L, 35L,
34L, 22L, 17L, 15L, 25L, 38L, 13L, 28L, 15L, 10L, 14L, 4L,
18L, 17L, 4L, 33L, 20L, 36L, 6L, 33L, 20L, 26L, 25L, 28L,
30L, 20L, 35L, 37L, 38L, 34L, 22L, 32L, 13L, 8L, 12L, 35L,
36L, 37L, 17L, 21L, 18L, 2L, 35L, 15L, 19L, 13L, 28L, 35L,
10L, 9L, 35L, 24L, 5L, 32L, 14L, 3L, 7L, 24L, 34L, 3L, 23L,
17L, 15L, 2L, 30L, 5L, 16L, 29L, 26L, 5L, 28L, 19L, 31L,
13L, 29L, 29L, 28L, 22L, 33L, 31L, 29L, 4L, 39L, 30L, 4L,
13L), Player.ID = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 42L, 41L, 43L,
44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 53L, 52L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L,
68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L,
80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L,
92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L,
103L, 104L, 105L, 106L, 107L, 108L, 109L), .Label = c("abrinal01",
"adamsst01", "aldrico01", "aldrila01", "anderky01", "anderry01",
"antetgi01", "anthoca01", "anunoog01", "arthuda01", "bartowi01",
"bealbr01", "bertada01", "bjeline01", "brogdma01", "brookaa01",
"brookdi01", "brownlo01", "brownst02", "bullore01", "butleji01",
"buyckdw01", "capelca01", "chandwi01", "craigto01", "crawfja01",
"davisde01", "dellama01", "derozde01", "dienggo01", "drumman01",
"ennisja01", "farieke01", "feltora01", "fergute01", "forbebr01",
"fraziti01", "gallola01", "gasolma01", "gasolpa01", "georgma01",
"georgpa01", "gibsota01", "ginobma01", "gortama01", "grantje01",
"greenda02", "greenge01", "greenja01", "griffbl01", "hardeja01",
"harrian01", "harriga01", "henrymy01", "hensojo01", "hilarne01",
"hillida01", "huestjo01", "ibakase01", "johnsst04", "jokicni01",
"jonesty01", "kennalu01", "kilpase01", "lauvejo01", "lowryky01",
"lylestr01", "mahinia01", "makerth01", "martija01", "mbahalu01",
"mclembe01", "meeksjo01", "middlkh01", "millspa02", "moreler01",
"morrima02", "mudiaem01", "muhamsh01", "munfoxa02", "murrade01",
"murraja01", "noguelu01", "oubreke01", "parketo01", "pattepa01",
"paulbr01", "paulch01", "plumlma02", "poeltja01", "porteot01",
"powelno01", "reedwi02", "satorto01", "scottmi01", "seldewa01",
"siakapa01", "smithis01", "snellto01", "teaguje01", "tollian01",
"townska01", "tuckepj01", "valanjo01", "vaughra01", "westbru01",
"wiggian01", "wilsodj01", "wrighde01"), class = "factor"),
Game.ID = structure(c(7L, 7L, 6L, 8L, 8L, 3L, 5L, 7L, 9L,
1L, 1L, 10L, 8L, 6L, 5L, 6L, 4L, 9L, 5L, 2L, 6L, 2L, 3L,
1L, 1L, 6L, 4L, 5L, 9L, 6L, 2L, 4L, 1L, 7L, 7L, 8L, 10L,
2L, 4L, 8L, 7L, 6L, 6L, 8L, 10L, 7L, 8L, 3L, 4L, 2L, 3L,
1L, 4L, 4L, 5L, 3L, 8L, 7L, 9L, 2L, 1L, 6L, 2L, 5L, 8L, 9L,
1L, 10L, 5L, 4L, 3L, 4L, 10L, 5L, 8L, 2L, 10L, 1L, 6L, 5L,
8L, 1L, 9L, 10L, 8L, 7L, 8L, 3L, 5L, 9L, 10L, 9L, 2L, 10L,
10L, 4L, 9L, 2L, 5L, 6L, 2L, 6L, 3L, 9L, 5L, 7L, 6L, 5L,
9L), .Label = c("2018-02-01 * DEN", "2018-02-01 * DET", "2018-02-01 * HOU",
"2018-02-01 * MEM", "2018-02-01 * MIL", "2018-02-01 * MIN",
"2018-02-01 * OKC", "2018-02-01 * SAS", "2018-02-01 * TOR",
"2018-02-01 * WAS"), class = "factor")), .Names = c("Name",
"Pos", "Date", "Tm", "Opp", "MP", "Player.ID", "Game.ID"), class = "data.frame", row.names = c(NA,
109L))
我想编写一个函数,对于每次观察:
> df[1, ]
Name Pos Date Tm Opp MP Player.ID Game.ID
1 Alex Abrines SG 2018-02-01 * OKC DEN 29 abrinal01 2018-02-01 * OKC
创建具有匹配 df$Game.ID
.
> df[df$Game.ID == '2018-02-01 * OKC', ]
Name Pos Date Tm Opp MP Player.ID Game.ID
1 Alex Abrines SG 2018-02-01 * OKC DEN 29 abrinal01 2018-02-01 * OKC
2 Steven Adams C 2018-02-01 * OKC DEN 32 adamsst01 2018-02-01 * OKC
8 Carmelo Anthony PF 2018-02-01 * OKC DEN 34 anthoca01 2018-02-01 * OKC
34 Raymond Felton PG 2018-02-01 * OKC DEN 14 feltora01 2018-02-01 * OKC
35 Terrance Ferguson SG 2018-02-01 * OKC DEN 4 fergute01 2018-02-01 * OKC
41 Paul George PF 2018-02-01 * OKC DEN 36 georgpa01 2018-02-01 * OKC
46 Jerami Grant PF 2018-02-01 * OKC DEN 25 grantje01 2018-02-01 * OKC
58 Josh Huestis PF 2018-02-01 * OKC DEN 12 huestjo01 2018-02-01 * OKC
86 Patrick Patterson PF 2018-02-01 * OKC DEN 15 pattepa01 2018-02-01 * OKC
106 Russell Westbrook PG 2018-02-01 * OKC DEN 39 westbru01 2018-02-01 * OKC
然后 returns 原始观测值的排名 df$MP
> df[1, c('MP')]
[1] 29
在新子集的层次结构中。
> xx <- data.frame(cbind(sort(df[df$Game.ID == '2018-02-01 * OKC', c('MP')], decreasing = TRUE), rownames(data.table(sort(df[df$Game.ID == '2018-02-01 * OKC', c('MP')], decreasing = TRUE)))))
> xx
X1 X2
1 39 1
2 36 2
3 34 3
4 32 4
5 29 5
6 25 6
7 15 7
8 14 8
9 12 9
10 4 10
> colnames(xx) <- c('MP', 'Depth.Chart')
> yy <- df[df$Game.ID == '2018-02-01 * OKC', ]
> yy
Name Pos Date Tm Opp MP Player.ID
1 Alex Abrines SG 2018-02-01 * OKC DEN 29 abrinal01
2 Steven Adams C 2018-02-01 * OKC DEN 32 adamsst01
8 Carmelo Anthony PF 2018-02-01 * OKC DEN 34 anthoca01
34 Raymond Felton PG 2018-02-01 * OKC DEN 14 feltora01
35 Terrance Ferguson SG 2018-02-01 * OKC DEN 4 fergute01
41 Paul George PF 2018-02-01 * OKC DEN 36 georgpa01
46 Jerami Grant PF 2018-02-01 * OKC DEN 25 grantje01
58 Josh Huestis PF 2018-02-01 * OKC DEN 12 huestjo01
86 Patrick Patterson PF 2018-02-01 * OKC DEN 15 pattepa01
106 Russell Westbrook PG 2018-02-01 * OKC DEN 39 westbru01
Game.ID
1 2018-02-01 * OKC
2 2018-02-01 * OKC
8 2018-02-01 * OKC
34 2018-02-01 * OKC
35 2018-02-01 * OKC
41 2018-02-01 * OKC
46 2018-02-01 * OKC
58 2018-02-01 * OKC
86 2018-02-01 * OKC
106 2018-02-01 * OKC
> zz <- merge(yy, xx, all.x = TRUE)
> zz
MP Name Pos Date Tm Opp Player.ID
1 4 Terrance Ferguson SG 2018-02-01 * OKC DEN fergute01
2 12 Josh Huestis PF 2018-02-01 * OKC DEN huestjo01
3 14 Raymond Felton PG 2018-02-01 * OKC DEN feltora01
4 15 Patrick Patterson PF 2018-02-01 * OKC DEN pattepa01
5 25 Jerami Grant PF 2018-02-01 * OKC DEN grantje01
6 29 Alex Abrines SG 2018-02-01 * OKC DEN abrinal01
7 32 Steven Adams C 2018-02-01 * OKC DEN adamsst01
8 34 Carmelo Anthony PF 2018-02-01 * OKC DEN anthoca01
9 36 Paul George PF 2018-02-01 * OKC DEN georgpa01
10 39 Russell Westbrook PG 2018-02-01 * OKC DEN westbru01
Game.ID Depth.Chart
1 2018-02-01 * OKC 10
2 2018-02-01 * OKC 9
3 2018-02-01 * OKC 8
4 2018-02-01 * OKC 7
5 2018-02-01 * OKC 6
6 2018-02-01 * OKC 5
7 2018-02-01 * OKC 4
8 2018-02-01 * OKC 3
9 2018-02-01 * OKC 2
10 2018-02-01 * OKC 1
最后,我需要提取对应于原始观察值 zz$Depth.Chart
的值,5
。
> zz[zz$MP == 29, c('Depth.Chart')]
[1] 5
Levels: 1 10 2 3 4 5 6 7 8 9
我想定义一个函数,为数据框中的每个观察结果执行上述费力且混乱的步骤,并 returns 结果向量。我如何引用与我正在处理的观察相对应的 df$MP
的值,而不像上面那样显式调用它 29
?以下是我尝试过的一些方法,但未成功。
> f1 <- function(col1, df, col2){
+ lapply(col1, function(i){
+ df2 <- df[col1 == i, col2]
+ df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2, decreasing = TRUE)))))
+ df3[i, 2]
+ })}
> f1(df$Game.ID, df, c('MP'))[1:10]
[[1]]
[1] 7
Levels: 1 10 2 3 4 5 6 7 8 9
[[2]]
[1] 7
Levels: 1 10 2 3 4 5 6 7 8 9
[[3]]
[1] 6
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[4]]
[1] 8
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[5]]
[1] 8
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[6]]
[1] 3
Levels: 1 2 3 4 5 6 7 8
[[7]]
[1] 5
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[8]]
[1] 7
Levels: 1 10 2 3 4 5 6 7 8 9
[[9]]
[1] 9
Levels: 1 10 11 2 3 4 5 6 7 8 9
[[10]]
[1] 1
Levels: 1 10 2 3 4 5 6 7 8 9
> f1 <- function(col1, df, col2){
+ lapply(col1, function(i){
+ df2 <- df[col1 == i, col2]
+ df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2, decreasing = TRUE)))))
+ df3[df3$X1 == i, 2]
+ })}
> f1(df$Game.ID, df, c('MP'))
Hide Traceback
Rerun with Debug
Error in Ops.factor(df3$X1, i) : level sets of factors are different
7.
stop("level sets of factors are different")
6.
Ops.factor(df3$X1, i)
5.
`[.data.frame`(df3, df3$X1 == i, 2)
4.
df3[df3$X1 == i, 2]
3.
FUN(X[[i]], ...)
2.
lapply(col1, function(i) {
df2 <- df[col1 == i, col2]
df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2,
decreasing = TRUE))))) ...
1.
f1(df$Game.ID, df, c("MP"))
> f1 <- function(col1, df, col2){
+ lapply(col1, function(i){
+ df2 <- df[col1 == i, col2]
+ df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2, decreasing = TRUE)))))
+ df3[col2 == i, 2]
+ })}
> f1(df$Game.ID, df, c('MP'))[1:10]
[[1]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9
[[2]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9
[[3]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[4]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[5]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[6]]
factor(0)
Levels: 1 2 3 4 5 6 7 8
[[7]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9
[[8]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9
[[9]]
factor(0)
Levels: 1 10 11 2 3 4 5 6 7 8 9
[[10]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9
我想我不完全理解 R 如何在函数内部处理这个 i
变量;或者,因此,如何适当地引用它。在浏览这个论坛时,我在 Python 中找到了函数内部嵌套函数的通用示例,但在 R 中没有。任何帮助将不胜感激。
编辑
这是我的数据的一个更简单的子集:
> dput(df)
structure(list(MP = c(29L, 32L, 3L, 34L, 14L, 3L, 40L, 17L, 13L,
14L, 4L, 36L, 6L, 33L, 25L, 12L, 17L, 3L, 15L, 28L, 33L, 39L,
30L), Player.ID = structure(c(1L, 2L, 3L, 8L, 14L, 16L, 21L,
26L, 30L, 34L, 35L, 42L, 41L, 43L, 46L, 58L, 62L, 79L, 86L, 100L,
102L, 106L, 107L), .Label = c("abrinal01", "adamsst01", "aldrico01",
"aldrila01", "anderky01", "anderry01", "antetgi01", "anthoca01",
"anunoog01", "arthuda01", "bartowi01", "bealbr01", "bertada01",
"bjeline01", "brogdma01", "brookaa01", "brookdi01", "brownlo01",
"brownst02", "bullore01", "butleji01", "buyckdw01", "capelca01",
"chandwi01", "craigto01", "crawfja01", "davisde01", "dellama01",
"derozde01", "dienggo01", "drumman01", "ennisja01", "farieke01",
"feltora01", "fergute01", "forbebr01", "fraziti01", "gallola01",
"gasolma01", "gasolpa01", "georgma01", "georgpa01", "gibsota01",
"ginobma01", "gortama01", "grantje01", "greenda02", "greenge01",
"greenja01", "griffbl01", "hardeja01", "harrian01", "harriga01",
"henrymy01", "hensojo01", "hilarne01", "hillida01", "huestjo01",
"ibakase01", "johnsst04", "jokicni01", "jonesty01", "kennalu01",
"kilpase01", "lauvejo01", "lowryky01", "lylestr01", "mahinia01",
"makerth01", "martija01", "mbahalu01", "mclembe01", "meeksjo01",
"middlkh01", "millspa02", "moreler01", "morrima02", "mudiaem01",
"muhamsh01", "munfoxa02", "murrade01", "murraja01", "noguelu01",
"oubreke01", "parketo01", "pattepa01", "paulbr01", "paulch01",
"plumlma02", "poeltja01", "porteot01", "powelno01", "reedwi02",
"satorto01", "scottmi01", "seldewa01", "siakapa01", "smithis01",
"snellto01", "teaguje01", "tollian01", "townska01", "tuckepj01",
"valanjo01", "vaughra01", "westbru01", "wiggian01", "wilsodj01",
"wrighde01"), class = "factor"), Game.ID = structure(c(7L, 7L,
6L, 7L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 6L, 6L, 7L, 7L, 6L, 6L,
7L, 6L, 6L, 7L, 6L), .Label = c("2018-02-01 * DEN", "2018-02-01 * DET",
"2018-02-01 * HOU", "2018-02-01 * MEM", "2018-02-01 * MIL", "2018-02-01 * MIN",
"2018-02-01 * OKC", "2018-02-01 * SAS", "2018-02-01 * TOR", "2018-02-01 * WAS"
), class = "factor")), .Names = c("MP", "Player.ID", "Game.ID"
), row.names = c(1L, 2L, 3L, 8L, 14L, 16L, 21L, 26L, 30L, 34L,
35L, 41L, 42L, 43L, 46L, 58L, 62L, 79L, 86L, 100L, 102L, 106L,
107L), class = "data.frame")
您正在将 data.table
用于流程中的小步骤,但您应该将其用于整个过程。做操作"by group"非常方便,在本例中使用rank()
by Game.ID
。使用您的小样本数据:
library(data.table)
setDT(df)
df[, Depth.Chart := rank(-MP), by = Game.ID]
df
# MP Player.ID Game.ID Depth.Chart
# 1: 29 abrinal01 2018-02-01 * OKC 5.0
# 2: 32 adamsst01 2018-02-01 * OKC 4.0
# 3: 3 aldrico01 2018-02-01 * MIN 12.0
# 4: 34 anthoca01 2018-02-01 * OKC 3.0
# 5: 14 bjeline01 2018-02-01 * MIN 8.0
# 6: 3 brookaa01 2018-02-01 * MIN 12.0
# 7: 40 butleji01 2018-02-01 * MIN 1.0
# 8: 17 crawfja01 2018-02-01 * MIN 6.5
# 9: 13 dienggo01 2018-02-01 * MIN 9.0
# 10: 14 feltora01 2018-02-01 * OKC 8.0
# 11: 4 fergute01 2018-02-01 * OKC 10.0
# 12: 36 georgpa01 2018-02-01 * OKC 2.0
# 13: 6 georgma01 2018-02-01 * MIN 10.0
# 14: 33 gibsota01 2018-02-01 * MIN 2.5
# 15: 25 grantje01 2018-02-01 * OKC 6.0
# 16: 12 huestjo01 2018-02-01 * OKC 9.0
# 17: 17 jonesty01 2018-02-01 * MIN 6.5
# 18: 3 muhamsh01 2018-02-01 * MIN 12.0
# 19: 15 pattepa01 2018-02-01 * OKC 7.0
# 20: 28 teaguje01 2018-02-01 * MIN 5.0
# 21: 33 townska01 2018-02-01 * MIN 2.5
# 22: 39 westbru01 2018-02-01 * OKC 1.0
# 23: 30 wiggian01 2018-02-01 * MIN 4.0
# MP Player.ID Game.ID Depth.Chart
rank
,默认情况下,平均平局,但请参阅 ?rank
了解其他选项。