在 R 中使用 dplyr 循环变量名

Looping variable names using dplyr in R

我需要使用 R 中的“qcc”包获取异常值数据。首先,这是我的数据

monthx <- rep("Jan", 1477)
transaction <- c(119,118,93,93,103,113,124,158,193,201,204,157,131,150,160,
                 162,145,274,184,194,189,140,145,170,99,111,97,91,97,102,149,
                 165,190,205,179,175,206,155,162,165,205,272,155,169,167,144,93,151,60,38,41,34,36,74,144,155,179,185,171,171,170,153,182,172,157,175,170,195,245,136,92,201,49,38,29,35,41,80,130,181,182,205,252,228,182,139,159,169,154,214,198,169,168,147,92,91,48,34,32,30,47,96,132,184,180,182,230,193,172,194,141,155,207,181,162,190,177,222,122,193,80,73,99,98,80,120,154,165,224,193,200,183,179,185,169,162,166,195,189,225,207,158,120,114,84,87,91,88,98,116,151,192,224,212,215,210,222,169,177,161,156,252,202,192,162,153,138,179,99,97,101,99,99,124,134,208,229,189,200,196,196,174,163,159,166,161,218,178,294,201,153,171,104,107,87,102,98,120,165,265,350,386,351,304,347,253,267,240,284,328,369,360,413,223,139,125,112,99,101,106,99,121,160,229,301,301,266,269,256,234,316,224,263,389,287,312,307,202,123,139,117,104,106,104,106,122,178,265,361,358,375,334,303,294,247,285,257,343,309,225,207,158,120,114,104,107,87,102,98,120,165,265,350,386,351,304,347,253,346,299,239,155,178,105,111,104,105,99,118,187,277,360,410,525,390,384,316,300,328,309,543,288,379,287,192,147,141,137,101,95,93,104,124,173,298,338,362,398,337,348,302,286,330,305,374,342,425,451,219,194,145,99,95,97,93,96,109,148,210,311,343,314,272,246,280,341,255,287,333,259,423,345,217,152,132,126,105,98,99,99,117,170,286,334,354,353,299,278,245,286,268,286,300,289,371,311,215,165,182,128,108,106,104,104,110,154,211,271,326,282,276,243,252,231,224,277,281,274,323,276,187,147,175,121,111,110,105,108,111,157,252,354,331,331,322,295,287,289,242,260,291,276,300,280,203,125,128,103,94,95,101,99,109,154,250,372,374,398,291,315,296,250,277,270,313,345,319,232,185,162,109,107,98,102,101,118,163,264,345,328,362,358,255,283,229,266,314,409,294,337,361,221,146,128,104,108,98,99,99,113,160,268,353,368,401,344,321,321,261,228,277,256,311,322,314,221,141,140,109,106,99,26,30,72,183,224,318,351,393,246,208,245,242,233,226,236,278,269,247,174,87,350,39,35,22,18,24,58,119,228,279,293,320,256,239,199,178,198,237,374,292,344,298,167,107,127,53,31,27,28,27,63,109,181,253,225,220,234,227,171,166,175,195,241,247,303,301,174,116,197,94,107,85,81,83,96,148,241,345,366,322,304,277,286,246,254,268,365,260,294,293,249,121,217,102,97,83,83,80,83,131,238,312,326,292,304,251,267,257,240,289,278,340,324,266,202,112,188,99,78,85,78,79,93,163,239,334,314,338,287,286,260,264,202,259,246,246,319,260,176,111,110,88,85,92,77,80,91,141,225,303,302,307,315,264,268,214,261,236,271,275,260,246,192,116,155,77,70,71,66,66,77,123,232,272,320,341,264,224,247,300,237,231,370,266,333,282,181,118,145,96,83,80,80,81,93,130,254,328,376,323,298,286,275,272,232,272,258,265,297,342,217,139,139,96,94,83,84,88,92,139,215,256,277,311,270,241,246,217,276,246,339,275,329,328,206,116,150,25,26,27,24,24,26,22,25,26,27,24,24,23,22,23,19,20,23,20,20,21,19,16,17,16,15,16,14,15,14,14,21,22,25,29,35,38,36,41,51,52,53,41,36,40,31,36,27,27,25,28,24,24,23,35,27,30,30,32,34,30,26,27,29,23,25,24,26,25,23,18,15,14,14,12,13,13,17,26,25,34,30,48,49,53,52,50,51,54,56,53,56,51,44,38,36,35,32,29,27,22,26,25,27,31,38,42,45,43,49,51,48,51,48,49,42,52,35,29,32,26,26,24,23,24,22,24,28,29,35,38,47,47,48,52,52,53,51,54,46,48,42,39,38,32,29,27,26,29,35,31,30,33,38,43,58,51,52,53,55,55,56,54,52,46,43,38,36,36,33,31,32,31,31,33,34,37,47,49,43,44,49,52,52,56,54,53,48,42,40,39,35,35,33,34,29,29,28,27,31,28,38,42,44,48,51,47,43,48,45,43,40,39,31,31,28,27,23,22,23,22,25,28,28,28,26,22,23,23,21,23,19,19,21,19,20,19,20,15,12,12,12,13,12,11,11,16,19,20,30,44,48,46,40,43,46,45,42,41,46,48,42,39,38,35,33,34,29,29,28,27,31,28,38,42,44,48,51,46,36,31,25,24,22,20,20,18,18,18,20,25,29,36
                 ,40,40,43,50,49,46,51,48,44,42,45,40,41,46,40,34,32,37,35,31,34,39,37,43,42,52,47,49,52,51,49,49,48,44,41,40,29,27,29,34,25,25,24,27,22,23,29,34,40,39,40,43,39,44,39,37,40,41,41,37,38,35,34,34,30,28,26,28,31,33,34,38,37,47,50,50,45,45,45,38,39,38,33,33,29,25,26,24,23,21,22,21,22,24,23,27,27,26,24,24,27,25,26,25,28,29,25,19,20,15,13,12,14,14,19,16,20,24,27,28,39,45,53,47,49,49,49,50,47,48,44,42,36,31,32,31,28,28,26,27,31,29,28,35,43,48,45,50,46,46,46,52,52,48,45,40,30,23,20,22,19,19,17,19,23,22,23,36,35,39,39,37,37,45,45,45,41,44,42,38,39,35,33,33,32,32,35,29,32,28,37,41,46,43,44,44,47,46,42,46,50,41,40,37,36,35,33,30,34,34,28,31,31,33,44,47,49,46,46,50,51,52,58,56,52,251,46,41,37,32,15,16,17,766,40,56,37,36,34,41,50,50,53,49,50,50,49,45,46,48,41,39,35,30,28,28,27,25,27,24,26,27,28,30,28,30,26,25,20,21,21,24,27,31,23,21,20,17,15,14,14,18,18,17,20,22,22,30,40,45,48,51,47,45,44,46,45,49,45,38,32,34,26,25,23,24,24,18,15,14,15,22,29,38,38,37,41,40,49,43,41,1174,42,33,35,30,33,28,28,26,27,20,25,30,31,31,37,44,43,45,46,47,45,45,44,48,43,40,39,39,37,37,36,34,34,30,31,34,36,38,48,44,45,46,47,51,52,46,51,54,45,42,40,35,35,36,31,33,25,2860,25,27,31,37,42,42,43,43,43,42,43,44,44,42,544,37,32,31,33,28,28,30,37,30,32,43,49,48,51,54,53,55,1772,55,53,52,45,36,39,40,1786,33,38,32,41,25,23,24,26,29,28,28,27,25,25,26,1856,
                 26,24,24,23,23,26,19,15,17)
hourx <- c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,
         13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,
         0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2
         ,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23)
store <- c(rep("store1",738), rep("store2", 739))
mydata <- data.frame(monthx, hourx, store, transaction)

然后,我使用此脚本获取 store1 和 store2 的异常值

# Library
library(qcc)

# Get Outliers for Store1
store1 <- subset(mydata, store == "store1")
store1.outlier <- qcc(store1, type = "R", plot = F)
myresult1 <- subset(store1, transaction > store1.outlier$limits[2])
myresult1

#    monthx hourx  store transaction
#294    Jan    10 store1         525
#301    Jan    17 store1         543

# Get Outliers for Store2
store2 <- subset(mydata, store == "store2")
store2.outlier <- qcc(store2, type = "R", plot = F)
myresult2 <- subset(store2, transaction > store2.outlier$limits[2])
myresult2

#    monthx hourx  store transaction
#1257    Jan    19 store2         251
#1265    Jan     3 store2         766
#1353    Jan    19 store2        1174
#1411    Jan     5 store2        2860
#1426    Jan    20 store2         544
#1444    Jan    14 store2        1772
#1452    Jan    22 store2        1786
#1468    Jan    14 store2        1856

# my result for 2 stores
all.myresult <- rbind(myresult, myresult2)

#    monthx hourx  store transaction
#294     Jan    10 store1         525
#301     Jan    17 store1         543
#1257    Jan    19 store2         251
#1265    Jan     3 store2         766
#1353    Jan    19 store2        1174
#1411    Jan     5 store2        2860
#1426    Jan    20 store2         544
#1444    Jan    14 store2        1772
#1452    Jan    22 store2        1786
#1468    Jan    14 store2        1856

我在 mydata 中有 100K 家商店,我如何循环使用 dplyr 包从所有商店获取所有异常值数据?。谢谢

你可以使用-

library(dplyr)
library(qcc)

mydata %>%
  group_by(store) %>%
  filter(transaction > qcc(cur_data(), type = "R", plot = F)$limits[2]) %>%
  ungroup

#   monthx hourx store  transaction
#   <chr>  <dbl> <chr>        <dbl>
# 1 Jan       10 store1         525
# 2 Jan       17 store1         543
# 3 Jan       19 store2         251
# 4 Jan        3 store2         766
# 5 Jan       19 store2        1174
# 6 Jan        5 store2        2860
# 7 Jan       20 store2         544
# 8 Jan       14 store2        1772
# 9 Jan       22 store2        1786
#10 Jan       14 store2        1856