有没有办法知道在使用 ggplot 制作箱线图时删除了哪些行?

Is there a way to know what rows were removed when making boxplot using ggplot?

当我使用 ggplot 生成箱线图时,出现一条警告消息“
删除了包含非有限值 (stat_boxplot) 的 6588 行。”但我无法根据此消息判断删除了哪些行。我使用的数据看起来没问题。

这是我用来生成箱线图的代码

ggplot(data = df.08.long,
      aes(x = TMT_signals, y = as.numeric(TMT_Intensities), fill = `probe.Mod.or.not(Y/N)`)) +
  geom_boxplot() +
  ylim(0, 2.5e3) +
  theme_classic() +
  theme(axis.title=element_text(size=8),
        axis.text=element_text(size=10),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

下面显示了数据框(仅前 20 行)。

structure(list(Scan.number = c(10017, 10017, 10017, 10017, 10017, 
10017, 10017, 10017, 10017, 13240, 13240, 13240, 13240, 13240, 
13240, 13240, 13240, 13240, 27592, 27592), Sequence = c("AAAYSAQVQPVDGATR", 
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", 
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", 
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", 
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", 
"AAAYSAQVQPVDGATR", "AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK", 
"AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK"), Length = c(16L, 16L, 
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 37L, 37L), Missed.cleavages = c(0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L
), Modified.sequence = c("_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_", 
"_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_", 
"_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_", 
"_AAAYSAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_", 
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_", 
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_", 
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_", 
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK_", 
"_AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK_"), probe_TMT6.Probabilities = c("", 
"", "", "", "", "", "", "", "", "AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR", 
"AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR", 
"AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR", 
"AAAY(1)SAQVQPVDGATR", "", ""), `Uniprot ID` = c("Q9H7E9", "Q9H7E9", 
"Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", 
"Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", 
"Q9H7E9", "Q9H7E9", "Q00610", "Q00610"), `probe.Mod.or.not(Y/N)` = c("N", 
"N", "N", "N", "N", "N", "N", "N", "N", "Y", "Y", "Y", "Y", "Y", 
"Y", "Y", "Y", "Y", "N", "N"), `kinase.or.not(Y/N)` = c("N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N"), Gene.Names = c("C8orf33", "C8orf33", 
"C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33", 
"C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33", 
"C8orf33", "C8orf33", "C8orf33", "C8orf33", "CLTC", "CLTC"), 
    Charge = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), m.z = c(802.90499, 802.90499, 
    802.90499, 802.90499, 802.90499, 802.90499, 802.90499, 802.90499, 
    802.90499, 647.57262, 647.57262, 647.57262, 647.57262, 647.57262, 
    647.57262, 647.57262, 647.57262, 647.57262, 1107.5251, 1107.5251
    ), Score = c(86.313, 86.313, 86.313, 86.313, 86.313, 86.313, 
    86.313, 86.313, 86.313, 41.695, 41.695, 41.695, 41.695, 41.695, 
    41.695, 41.695, 41.695, 41.695, 28.532, 28.532), Retention.time = c(27.774, 
    27.774, 27.774, 27.774, 27.774, 27.774, 27.774, 27.774, 27.774, 
    35.978, 35.978, 35.978, 35.978, 35.978, 35.978, 35.978, 35.978, 
    35.978, 72.556, 72.556), Precursor.Intensity = c(460631.45703125, 
    460631.45703125, 460631.45703125, 460631.45703125, 460631.45703125, 
    460631.45703125, 460631.45703125, 460631.45703125, 460631.45703125, 
    472201.625, 472201.625, 472201.625, 472201.625, 472201.625, 
    472201.625, 472201.625, 472201.625, 472201.625, 388790.9296875, 
    388790.9296875), Localization.prob = c(NaN, NaN, NaN, NaN, 
    NaN, NaN, NaN, NaN, NaN, 1, 1, 1, 1, 1, 1, 1, 1, 1, NaN, 
    NaN), probe_TMT6.site.IDs = c("", "", "", "", "", "", "", 
    "", "", "308", "308", "308", "308", "308", "308", "308", 
    "308", "308", "", ""), TMT_signals = c("TMT126", "TMT127N", 
    "TMT128N", "TMT128C", "TMT129N", "TMT129C", "TMT130N", "TMT130C", 
    "TMT131", "TMT126", "TMT127N", "TMT128N", "TMT128C", "TMT129N", 
    "TMT129C", "TMT130N", "TMT130C", "TMT131", "TMT126", "TMT127N"
    ), TMT_Intensities = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1824.667, 
    3470.869, 1691.413, 2367.219, 1895.059, 1712.427, 1529.349, 
    1617.825, 1677.578, 0, 0)), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))

感谢您的帮助和建议!

您数据中的某些值大于 ylim 中的限制,因此它们已从绘图中删除

使用 ylim

> ggplot(data = df,
+        aes(x = TMT_signals, y = as.numeric(TMT_Intensities), fill = `probe.Mod.or.not(Y/N)`)) +
+    geom_boxplot() +
+    ylim(0, 2.5e3) +
+    theme_classic() +
+    theme(axis.title=element_text(size=8),
+          axis.text=element_text(size=10),
+          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

Warning message:
Removed 1 rows containing non-finite values (stat_boxplot).

没有 ylim

> ggplot(data = df,
+        aes(x = TMT_signals, y = as.numeric(TMT_Intensities), fill = `probe.Mod.or.not(Y/N)`)) +
+    geom_boxplot() +
+    #ylim(0, 2.5e3) +
+    theme_classic() +
+    theme(axis.title=element_text(size=8),
+          axis.text=element_text(size=10),
+          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))