dplyr>获取变量的最小值和最大值的行

dplyr>Get rows with minimum and maximum of variable

我有一个 data.frame 我想 return minmax 时间观察 值。

df<- data.frame(
   time=c(24594.55, 29495.45, 24594.55, 39297.27, 24594.55, 34396.36, 19693.64, 14792.73, 29495.45),
   Mz=c(-0.04729751, -0.50902297, -0.04376393, -0.22218980, -0.36407263, -0.38341534, -0.34597255, -0.01480776, -0.00999671),
   set_nbr=c(1, 1,1, 2, 2, 2, 3, 3, 3))         


library(dplyr)

min_time <- df %>% 
  group_by(set_nbr) %>%
  slice(which(Mz<0))%>%
  filter(rank(time,ties.method="min")==1)%>%
  distinct


min_time 

 ##Source: local data frame [3 x 3]
 ##Groups: set_nbr

      time          Mz set_nbr
## 1 24594.55 -0.04729751       1
## 2 24594.55 -0.36407263       2
## 3 14792.73 -0.01480776       3

这行得通,但是当我尝试获取 max_time 时,出现了奇怪的结果:

max_time <- df %>% 
  group_by(set_nbr) %>%
  slice(which(Mz<0))%>%
  filter(rank(time,ties.method="max")==1)%>%
  distinct

max_time 

##Source: local data frame [2 x 3]
##Groups: set_nbr

        time          Mz set_nbr
##1 24594.55 -0.36407263       2
##2 14792.73 -0.01480776       3

set_nbr 1、max time 值不正确。不知道为什么。

预期输出

max_time 

      time            Mz    set_nbr
##1 29495.45 -0.50902297       1
##2 39297.27 -0.22218980       2
##3 29495.45 -0.00999671       3

尝试

df %>% 
   group_by(set_nbr) %>% 
   filter(time==max(time))
#     time          Mz set_nbr
#1 29495.45 -0.50902297       1
#2 39297.27 -0.22218980       2
#3 29495.45 -0.00999671       3

或者

 df %>%
    group_by(set_nbr) %>%
    slice(which.max(time))
 #      time          Mz set_nbr
 #1 29495.45 -0.50902297       1
 #2 39297.27 -0.22218980       2
 #3 29495.45 -0.00999671       3

关于您的代码为何不起作用

 df %>% 
    group_by(set_nbr) %>%
    slice(which(Mz <0)) %>%
    mutate(rn = rank(time, ties.method='max'))
 #      time          Mz set_nbr rn
 #1 24594.55 -0.04729751       1  2
 #2 29495.45 -0.50902297       1  3
 #3 24594.55 -0.04376393       1  2
 #4 39297.27 -0.22218980       2  3
 #5 24594.55 -0.36407263       2  1
 #6 34396.36 -0.38341534       2  2
 #7 19693.64 -0.34597255       3  2
 #8 14792.73 -0.01480776       3  1
 #9 29495.45 -0.00999671       3  3

如果您查看输出,对于 'set_nbr' 组“1”,'rn' 没有“1”,因为存在并列。你可以做

 df %>% 
    group_by(set_nbr) %>%
    slice(which(Mz <0)) %>%
    filter(rn = rank(-time, ties.method='first')==1)
 #      time          Mz set_nbr
 #1 29495.45 -0.50902297       1
 #2 39297.27 -0.22218980       2
 #3 29495.45 -0.00999671       3