如何根据两个变量更改 geom_point 大小?

How do you change the geom_point size based on two variables?

我有这个数据叫做 df。我需要根据 Nucleotides 的比例及其各自的 Percent 值更改形状的大小。例如,如果 A 的总体百分比高于所有其他核苷酸,我想让 A 看起来比 G、T 或 C 大。我下面的代码绘制了所有变量,但我无法根据以下内容更改核苷酸的大小相应的百分比值。如果有人可以帮助我提供这方面的专业知识,我将不胜感激!

数据:

df<- structure(list(Pos = c(9, 9, 22, 9, 12, 1, 5, 21, 17, 10, 18, 
15, 13, 10, 12, 23, 15, 6, 5, 2, 8, 10, 3, 9, 4, 16, 19, 6, 23, 
1, 20, 22, 19, 18, 11, 20, 11, 8, 13, 16, 9, 9, 18, 9, 10, 20, 
3, 10, 6, 6), Nucleotides = structure(c(1L, 2L, 4L, 3L, 3L, 2L, 
3L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 3L, 3L, 3L, 3L, 3L, 2L, 1L, 4L, 
2L, 4L, 3L, 2L, 1L, 3L, 4L, 2L, 3L, 3L, 2L, 2L, 2L, 4L, 1L, 4L, 
2L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 3L, 4L, 4L, 1L), .Label = c("A", 
"C", "G", "T"), class = "factor"), Percent = c(1.25, 0.550314465408805, 
20.0731497418244, 0.995604395604396, 0, 1.00671140939597, 2.03009876156137, 
0.436898024029391, 0, 0.126082666374301, 0.137827450633918, 0.0728332119446468, 
0.13412531136233, 0.488293476899962, 0.606980273141123, 4.5262817940713, 
0.873737176736748, 2.09909875651215, 1.13913751017087, 0.330169211721007, 
0.305810397553517, 0.18524604858227, 1.25, 0.183503959822291, 
0.443521668065556, 0.601235873740466, 0.404688430512696, 0, 1.69868233052866, 
0.955167790489426, 1.2019531739076, 1.08638871657579, 0.691915513474144, 
3.44827586206897, 0, 25.6706562113083, 0.440528634361234, 0.203389830508475, 
0.547150411280846, 0.348986173079472, 1.26238676129117, 0.554843839731413, 
0.55041831792162, 0.180733779143322, 0.509484245179495, 1.31092996173682, 
0.651041666666667, 0, 0.182417582417582, 0.38268672472948), Samples = structure(c(9L, 
2L, 2L, 3L, 10L, 9L, 4L, 5L, 10L, 4L, 3L, 6L, 4L, 1L, 8L, 11L, 
4L, 4L, 1L, 5L, 2L, 7L, 9L, 7L, 3L, 7L, 11L, 10L, 2L, 11L, 1L, 
8L, 6L, 9L, 10L, 5L, 10L, 6L, 1L, 3L, 8L, 3L, 8L, 11L, 4L, 2L, 
5L, 10L, 3L, 5L), .Label = c("Ago2_SsHV2L_1_CATGGC_L003_R1_001", 
"Ago2_SsHV2L_2_CATTTT_L003_R1_001", "Ago4_SsHV2L_1_CCAACA_L003_R1_001", 
"Ago4_SsHV2L_2_TAATCG_L003_R1_001", "Dcl1_SsHV2L_1_GTAGAG_L003_R1_001", 
"Dcl1_SsHV2L_2_GGTAGC_L003_R1_001", "Dcl2_SsHV2L_1_ATGAGC_L003_R1_001", 
"Dcl2_SsHV2L_2_CAAAAG_L003_R1_001", "WTDK3_SsHV2L_1_GACGAC_L003_R1_001", 
"WTDK3_SsHV2L_2_TACAGC_L003_R1_001", "WTDK3_SsHV2L_3_TATAAT_L003_R1_001"
), class = "factor"), Read.Length = c("24", "18", "22", "20", 
"18", "23", "20", "24", "23", "22", "21", "24", "19", "24", "19", 
"23", "21", "23", "18", "20", "19", "24", "24", "23", "21", "20", 
"21", "23", "24", "23", "24", "23", "24", "19", "24", "20", "23", 
"20", "21", "21", "20", "21", "22", "19", "20", "21", "18", "22", 
"20", "21")), .Names = c("Pos", "Nucleotides", "Percent", "Samples", 
"Read.Length"), row.names = c("9.A63", "9.C8", "22.T4", "9.G17", 
"12.G64", "1.C62", "5.G24", "21.C20", "17.C69", "10.T26", "18.T18", 
"15.T42", "13.T23", "10.A7", "12.G51", "23.G21", "15.G25", "6.G27", 
"5.G1", "2.C31", "8.A9", "10.T49", "3.C63", "9.T48", "4.G18", 
"16.C45", "19.A63", "6.G69", "23.T4", "1.C76", "20.G5", "22.G23", 
"19.C36", "18.C58", "11.C70", "20.T21", "11.A69", "8.T38", "13.C4", 
"16.C18", "9.G52", "9.C18", "18.C54", "9.T72", "10.C24", "20.G7", 
"3.G29", "10.T68", "6.T17", "6.A32"), class = "data.frame")

代码:

p <- ggplot(df) +
  geom_point(aes(x=Pos, y = Percent, color = Read.Length,group = Samples, shape = Samples, size = Nucleotides))+
  scale_shape_manual(values=1:nlevels(df$Samples)) +
  scale_x_continuous(breaks = c(1,seq(2,24,2)))+ 
  theme_bw() +
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank()) +
  scale_color_manual(values = c("lavender", "darkslategray2", "darkseagreen4", "yellow", "blue", "red","deeppink4"))+
  theme(text = element_text(size=12),
        axis.text.x = element_text(angle=90, hjust=1))+
  theme(axis.text=element_text(size=14),
        axis.title=element_text(size=14,face="bold"))
p

如果我很好地理解了你的问题,那么你的代码已经可以满足你的要求了。尺寸映射基于整体百分比。下面的代码显示 "A" 的总百分比值最低,而 "T" 的总百分比值最高。

df2 <- df %>% 
  group_by(Nucleotides) %>% 
  mutate(overall_percent = sum(Percent))

但是,具有多种形状可能会造成混淆。在这种情况下,facet_gridfacet_wrap 可能有助于增加维度。例如,您可以为 Samples 使用颜色映射,为 Read.Length 使用分面,这样您的尺寸映射就可以通过点更加明显。