在 R 中使用 data.table 识别和注释分组最小值
Identify and annotate group wise minimum values using data.table in R
我需要为示例数据中的每个 class
识别最小的 value
(忽略 NA) 并将其标记为 'min'在如下所示的新列中使用 data.table
示例数据:
df = structure(list(class = c("apple", "apple", "apple", "banana",
"banana", "berry", "berry", "grape", "grape", "grape", "grape",
"grape", "melon", "melon", "melon"), value = c(108816872, 108851837,
108890411, 108784778, NA, 108784778, 108816872, 108816872, 108850460,
NA, NA, NA, NA, NA, NA)), .Names = c("class", "value"), class = "data.frame", row.names = c(NA,
-15L))
期望的输出:
# class value anno
#1 apple 108816872 min
#2 apple 108851837 NA
#3 apple 108890411 NA
#4 banana 108784778 min
#5 banana NA NA
#6 berry 108784778 min
#7 berry 108816872 NA
#8 grape 108816872 min
#9 grape 108850460 NA
#10 grape NA NA
#11 grape NA NA
#12 grape NA NA
#13 melon NA NA
#14 melon NA NA
#15 melon NA NA
dt = as.data.table(df) # or convert in place using setDT
dt[dt[, .I[which.min(value)], by = class]$V1, anno := 'min']
我打算建议@eddies 方法,但这里有一个替代方法
setDT(df)[order(value), min := c("min", rep(NA, .N - 1)), by = class]
编辑,如果你想要实际值而不是"min",你可以修改为
setDT(df)[order(value), min := c(value[1L], rep(NA, .N - 1L)), by = class]
我需要为示例数据中的每个 class
识别最小的 value
(忽略 NA) 并将其标记为 'min'在如下所示的新列中使用 data.table
示例数据:
df = structure(list(class = c("apple", "apple", "apple", "banana",
"banana", "berry", "berry", "grape", "grape", "grape", "grape",
"grape", "melon", "melon", "melon"), value = c(108816872, 108851837,
108890411, 108784778, NA, 108784778, 108816872, 108816872, 108850460,
NA, NA, NA, NA, NA, NA)), .Names = c("class", "value"), class = "data.frame", row.names = c(NA,
-15L))
期望的输出:
# class value anno
#1 apple 108816872 min
#2 apple 108851837 NA
#3 apple 108890411 NA
#4 banana 108784778 min
#5 banana NA NA
#6 berry 108784778 min
#7 berry 108816872 NA
#8 grape 108816872 min
#9 grape 108850460 NA
#10 grape NA NA
#11 grape NA NA
#12 grape NA NA
#13 melon NA NA
#14 melon NA NA
#15 melon NA NA
dt = as.data.table(df) # or convert in place using setDT
dt[dt[, .I[which.min(value)], by = class]$V1, anno := 'min']
我打算建议@eddies 方法,但这里有一个替代方法
setDT(df)[order(value), min := c("min", rep(NA, .N - 1)), by = class]
编辑,如果你想要实际值而不是"min",你可以修改为
setDT(df)[order(value), min := c(value[1L], rep(NA, .N - 1L)), by = class]