有没有办法知道在使用 ggplot 制作箱线图时删除了哪些行?
Is there a way to know what rows were removed when making boxplot using ggplot?
当我使用 ggplot 生成箱线图时,出现一条警告消息“
删除了包含非有限值 (stat_boxplot) 的 6588 行。”但我无法根据此消息判断删除了哪些行。我使用的数据看起来没问题。
这是我用来生成箱线图的代码
ggplot(data = df.08.long,
aes(x = TMT_signals, y = as.numeric(TMT_Intensities), fill = `probe.Mod.or.not(Y/N)`)) +
geom_boxplot() +
ylim(0, 2.5e3) +
theme_classic() +
theme(axis.title=element_text(size=8),
axis.text=element_text(size=10),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
下面显示了数据框(仅前 20 行)。
structure(list(Scan.number = c(10017, 10017, 10017, 10017, 10017,
10017, 10017, 10017, 10017, 13240, 13240, 13240, 13240, 13240,
13240, 13240, 13240, 13240, 27592, 27592), Sequence = c("AAAYSAQVQPVDGATR",
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR",
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR",
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR",
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR",
"AAAYSAQVQPVDGATR", "AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK",
"AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK"), Length = c(16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 37L, 37L), Missed.cleavages = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L
), Modified.sequence = c("_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_",
"_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_",
"_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_",
"_AAAYSAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_",
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_",
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_",
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_",
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK_",
"_AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK_"), probe_TMT6.Probabilities = c("",
"", "", "", "", "", "", "", "", "AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR",
"AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR",
"AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR",
"AAAY(1)SAQVQPVDGATR", "", ""), `Uniprot ID` = c("Q9H7E9", "Q9H7E9",
"Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9",
"Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9",
"Q9H7E9", "Q9H7E9", "Q00610", "Q00610"), `probe.Mod.or.not(Y/N)` = c("N",
"N", "N", "N", "N", "N", "N", "N", "N", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "N", "N"), `kinase.or.not(Y/N)` = c("N",
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",
"N", "N", "N", "N", "N", "N"), Gene.Names = c("C8orf33", "C8orf33",
"C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33",
"C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33",
"C8orf33", "C8orf33", "C8orf33", "C8orf33", "CLTC", "CLTC"),
Charge = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), m.z = c(802.90499, 802.90499,
802.90499, 802.90499, 802.90499, 802.90499, 802.90499, 802.90499,
802.90499, 647.57262, 647.57262, 647.57262, 647.57262, 647.57262,
647.57262, 647.57262, 647.57262, 647.57262, 1107.5251, 1107.5251
), Score = c(86.313, 86.313, 86.313, 86.313, 86.313, 86.313,
86.313, 86.313, 86.313, 41.695, 41.695, 41.695, 41.695, 41.695,
41.695, 41.695, 41.695, 41.695, 28.532, 28.532), Retention.time = c(27.774,
27.774, 27.774, 27.774, 27.774, 27.774, 27.774, 27.774, 27.774,
35.978, 35.978, 35.978, 35.978, 35.978, 35.978, 35.978, 35.978,
35.978, 72.556, 72.556), Precursor.Intensity = c(460631.45703125,
460631.45703125, 460631.45703125, 460631.45703125, 460631.45703125,
460631.45703125, 460631.45703125, 460631.45703125, 460631.45703125,
472201.625, 472201.625, 472201.625, 472201.625, 472201.625,
472201.625, 472201.625, 472201.625, 472201.625, 388790.9296875,
388790.9296875), Localization.prob = c(NaN, NaN, NaN, NaN,
NaN, NaN, NaN, NaN, NaN, 1, 1, 1, 1, 1, 1, 1, 1, 1, NaN,
NaN), probe_TMT6.site.IDs = c("", "", "", "", "", "", "",
"", "", "308", "308", "308", "308", "308", "308", "308",
"308", "308", "", ""), TMT_signals = c("TMT126", "TMT127N",
"TMT128N", "TMT128C", "TMT129N", "TMT129C", "TMT130N", "TMT130C",
"TMT131", "TMT126", "TMT127N", "TMT128N", "TMT128C", "TMT129N",
"TMT129C", "TMT130N", "TMT130C", "TMT131", "TMT126", "TMT127N"
), TMT_Intensities = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1824.667,
3470.869, 1691.413, 2367.219, 1895.059, 1712.427, 1529.349,
1617.825, 1677.578, 0, 0)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
感谢您的帮助和建议!
您数据中的某些值大于 ylim
中的限制,因此它们已从绘图中删除
使用 ylim
> ggplot(data = df,
+ aes(x = TMT_signals, y = as.numeric(TMT_Intensities), fill = `probe.Mod.or.not(Y/N)`)) +
+ geom_boxplot() +
+ ylim(0, 2.5e3) +
+ theme_classic() +
+ theme(axis.title=element_text(size=8),
+ axis.text=element_text(size=10),
+ axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
Warning message:
Removed 1 rows containing non-finite values (stat_boxplot).
没有 ylim
> ggplot(data = df,
+ aes(x = TMT_signals, y = as.numeric(TMT_Intensities), fill = `probe.Mod.or.not(Y/N)`)) +
+ geom_boxplot() +
+ #ylim(0, 2.5e3) +
+ theme_classic() +
+ theme(axis.title=element_text(size=8),
+ axis.text=element_text(size=10),
+ axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
当我使用 ggplot 生成箱线图时,出现一条警告消息“
删除了包含非有限值 (stat_boxplot) 的 6588 行。”但我无法根据此消息判断删除了哪些行。我使用的数据看起来没问题。
这是我用来生成箱线图的代码
ggplot(data = df.08.long,
aes(x = TMT_signals, y = as.numeric(TMT_Intensities), fill = `probe.Mod.or.not(Y/N)`)) +
geom_boxplot() +
ylim(0, 2.5e3) +
theme_classic() +
theme(axis.title=element_text(size=8),
axis.text=element_text(size=10),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
下面显示了数据框(仅前 20 行)。
structure(list(Scan.number = c(10017, 10017, 10017, 10017, 10017,
10017, 10017, 10017, 10017, 13240, 13240, 13240, 13240, 13240,
13240, 13240, 13240, 13240, 27592, 27592), Sequence = c("AAAYSAQVQPVDGATR",
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR",
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR",
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR",
"AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR", "AAAYSAQVQPVDGATR",
"AAAYSAQVQPVDGATR", "AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK",
"AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK"), Length = c(16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 37L, 37L), Missed.cleavages = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L
), Modified.sequence = c("_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_",
"_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_",
"_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_", "_AAAYSAQVQPVDGATR_",
"_AAAYSAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_",
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_",
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_",
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAAY(XO44_TMT6)SAQVQPVDGATR_",
"_AAAY(XO44_TMT6)SAQVQPVDGATR_", "_AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK_",
"_AAEQAHLWAELVFLYDKYEEYDNAIITMMNHPTDAWK_"), probe_TMT6.Probabilities = c("",
"", "", "", "", "", "", "", "", "AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR",
"AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR",
"AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR", "AAAY(1)SAQVQPVDGATR",
"AAAY(1)SAQVQPVDGATR", "", ""), `Uniprot ID` = c("Q9H7E9", "Q9H7E9",
"Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9",
"Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9", "Q9H7E9",
"Q9H7E9", "Q9H7E9", "Q00610", "Q00610"), `probe.Mod.or.not(Y/N)` = c("N",
"N", "N", "N", "N", "N", "N", "N", "N", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "N", "N"), `kinase.or.not(Y/N)` = c("N",
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N",
"N", "N", "N", "N", "N", "N"), Gene.Names = c("C8orf33", "C8orf33",
"C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33",
"C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33", "C8orf33",
"C8orf33", "C8orf33", "C8orf33", "C8orf33", "CLTC", "CLTC"),
Charge = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), m.z = c(802.90499, 802.90499,
802.90499, 802.90499, 802.90499, 802.90499, 802.90499, 802.90499,
802.90499, 647.57262, 647.57262, 647.57262, 647.57262, 647.57262,
647.57262, 647.57262, 647.57262, 647.57262, 1107.5251, 1107.5251
), Score = c(86.313, 86.313, 86.313, 86.313, 86.313, 86.313,
86.313, 86.313, 86.313, 41.695, 41.695, 41.695, 41.695, 41.695,
41.695, 41.695, 41.695, 41.695, 28.532, 28.532), Retention.time = c(27.774,
27.774, 27.774, 27.774, 27.774, 27.774, 27.774, 27.774, 27.774,
35.978, 35.978, 35.978, 35.978, 35.978, 35.978, 35.978, 35.978,
35.978, 72.556, 72.556), Precursor.Intensity = c(460631.45703125,
460631.45703125, 460631.45703125, 460631.45703125, 460631.45703125,
460631.45703125, 460631.45703125, 460631.45703125, 460631.45703125,
472201.625, 472201.625, 472201.625, 472201.625, 472201.625,
472201.625, 472201.625, 472201.625, 472201.625, 388790.9296875,
388790.9296875), Localization.prob = c(NaN, NaN, NaN, NaN,
NaN, NaN, NaN, NaN, NaN, 1, 1, 1, 1, 1, 1, 1, 1, 1, NaN,
NaN), probe_TMT6.site.IDs = c("", "", "", "", "", "", "",
"", "", "308", "308", "308", "308", "308", "308", "308",
"308", "308", "", ""), TMT_signals = c("TMT126", "TMT127N",
"TMT128N", "TMT128C", "TMT129N", "TMT129C", "TMT130N", "TMT130C",
"TMT131", "TMT126", "TMT127N", "TMT128N", "TMT128C", "TMT129N",
"TMT129C", "TMT130N", "TMT130C", "TMT131", "TMT126", "TMT127N"
), TMT_Intensities = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1824.667,
3470.869, 1691.413, 2367.219, 1895.059, 1712.427, 1529.349,
1617.825, 1677.578, 0, 0)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
感谢您的帮助和建议!
您数据中的某些值大于 ylim
中的限制,因此它们已从绘图中删除
使用 ylim
> ggplot(data = df,
+ aes(x = TMT_signals, y = as.numeric(TMT_Intensities), fill = `probe.Mod.or.not(Y/N)`)) +
+ geom_boxplot() +
+ ylim(0, 2.5e3) +
+ theme_classic() +
+ theme(axis.title=element_text(size=8),
+ axis.text=element_text(size=10),
+ axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
Warning message:
Removed 1 rows containing non-finite values (stat_boxplot).
没有 ylim
> ggplot(data = df,
+ aes(x = TMT_signals, y = as.numeric(TMT_Intensities), fill = `probe.Mod.or.not(Y/N)`)) +
+ geom_boxplot() +
+ #ylim(0, 2.5e3) +
+ theme_classic() +
+ theme(axis.title=element_text(size=8),
+ axis.text=element_text(size=10),
+ axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))