如何根据两个变量更改 geom_point 大小?
How do you change the geom_point size based on two variables?
我有这个数据叫做 df
。我需要根据 Nucleotides
的比例及其各自的 Percent
值更改形状的大小。例如,如果 A 的总体百分比高于所有其他核苷酸,我想让 A 看起来比 G、T 或 C 大。我下面的代码绘制了所有变量,但我无法根据以下内容更改核苷酸的大小相应的百分比值。如果有人可以帮助我提供这方面的专业知识,我将不胜感激!
数据:
df<- structure(list(Pos = c(9, 9, 22, 9, 12, 1, 5, 21, 17, 10, 18,
15, 13, 10, 12, 23, 15, 6, 5, 2, 8, 10, 3, 9, 4, 16, 19, 6, 23,
1, 20, 22, 19, 18, 11, 20, 11, 8, 13, 16, 9, 9, 18, 9, 10, 20,
3, 10, 6, 6), Nucleotides = structure(c(1L, 2L, 4L, 3L, 3L, 2L,
3L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 3L, 3L, 3L, 3L, 3L, 2L, 1L, 4L,
2L, 4L, 3L, 2L, 1L, 3L, 4L, 2L, 3L, 3L, 2L, 2L, 2L, 4L, 1L, 4L,
2L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 3L, 4L, 4L, 1L), .Label = c("A",
"C", "G", "T"), class = "factor"), Percent = c(1.25, 0.550314465408805,
20.0731497418244, 0.995604395604396, 0, 1.00671140939597, 2.03009876156137,
0.436898024029391, 0, 0.126082666374301, 0.137827450633918, 0.0728332119446468,
0.13412531136233, 0.488293476899962, 0.606980273141123, 4.5262817940713,
0.873737176736748, 2.09909875651215, 1.13913751017087, 0.330169211721007,
0.305810397553517, 0.18524604858227, 1.25, 0.183503959822291,
0.443521668065556, 0.601235873740466, 0.404688430512696, 0, 1.69868233052866,
0.955167790489426, 1.2019531739076, 1.08638871657579, 0.691915513474144,
3.44827586206897, 0, 25.6706562113083, 0.440528634361234, 0.203389830508475,
0.547150411280846, 0.348986173079472, 1.26238676129117, 0.554843839731413,
0.55041831792162, 0.180733779143322, 0.509484245179495, 1.31092996173682,
0.651041666666667, 0, 0.182417582417582, 0.38268672472948), Samples = structure(c(9L,
2L, 2L, 3L, 10L, 9L, 4L, 5L, 10L, 4L, 3L, 6L, 4L, 1L, 8L, 11L,
4L, 4L, 1L, 5L, 2L, 7L, 9L, 7L, 3L, 7L, 11L, 10L, 2L, 11L, 1L,
8L, 6L, 9L, 10L, 5L, 10L, 6L, 1L, 3L, 8L, 3L, 8L, 11L, 4L, 2L,
5L, 10L, 3L, 5L), .Label = c("Ago2_SsHV2L_1_CATGGC_L003_R1_001",
"Ago2_SsHV2L_2_CATTTT_L003_R1_001", "Ago4_SsHV2L_1_CCAACA_L003_R1_001",
"Ago4_SsHV2L_2_TAATCG_L003_R1_001", "Dcl1_SsHV2L_1_GTAGAG_L003_R1_001",
"Dcl1_SsHV2L_2_GGTAGC_L003_R1_001", "Dcl2_SsHV2L_1_ATGAGC_L003_R1_001",
"Dcl2_SsHV2L_2_CAAAAG_L003_R1_001", "WTDK3_SsHV2L_1_GACGAC_L003_R1_001",
"WTDK3_SsHV2L_2_TACAGC_L003_R1_001", "WTDK3_SsHV2L_3_TATAAT_L003_R1_001"
), class = "factor"), Read.Length = c("24", "18", "22", "20",
"18", "23", "20", "24", "23", "22", "21", "24", "19", "24", "19",
"23", "21", "23", "18", "20", "19", "24", "24", "23", "21", "20",
"21", "23", "24", "23", "24", "23", "24", "19", "24", "20", "23",
"20", "21", "21", "20", "21", "22", "19", "20", "21", "18", "22",
"20", "21")), .Names = c("Pos", "Nucleotides", "Percent", "Samples",
"Read.Length"), row.names = c("9.A63", "9.C8", "22.T4", "9.G17",
"12.G64", "1.C62", "5.G24", "21.C20", "17.C69", "10.T26", "18.T18",
"15.T42", "13.T23", "10.A7", "12.G51", "23.G21", "15.G25", "6.G27",
"5.G1", "2.C31", "8.A9", "10.T49", "3.C63", "9.T48", "4.G18",
"16.C45", "19.A63", "6.G69", "23.T4", "1.C76", "20.G5", "22.G23",
"19.C36", "18.C58", "11.C70", "20.T21", "11.A69", "8.T38", "13.C4",
"16.C18", "9.G52", "9.C18", "18.C54", "9.T72", "10.C24", "20.G7",
"3.G29", "10.T68", "6.T17", "6.A32"), class = "data.frame")
代码:
p <- ggplot(df) +
geom_point(aes(x=Pos, y = Percent, color = Read.Length,group = Samples, shape = Samples, size = Nucleotides))+
scale_shape_manual(values=1:nlevels(df$Samples)) +
scale_x_continuous(breaks = c(1,seq(2,24,2)))+
theme_bw() +
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
scale_color_manual(values = c("lavender", "darkslategray2", "darkseagreen4", "yellow", "blue", "red","deeppink4"))+
theme(text = element_text(size=12),
axis.text.x = element_text(angle=90, hjust=1))+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14,face="bold"))
p
如果我很好地理解了你的问题,那么你的代码已经可以满足你的要求了。尺寸映射基于整体百分比。下面的代码显示 "A" 的总百分比值最低,而 "T" 的总百分比值最高。
df2 <- df %>%
group_by(Nucleotides) %>%
mutate(overall_percent = sum(Percent))
但是,具有多种形状可能会造成混淆。在这种情况下,facet_grid
或 facet_wrap
可能有助于增加维度。例如,您可以为 Samples 使用颜色映射,为 Read.Length 使用分面,这样您的尺寸映射就可以通过点更加明显。
我有这个数据叫做 df
。我需要根据 Nucleotides
的比例及其各自的 Percent
值更改形状的大小。例如,如果 A 的总体百分比高于所有其他核苷酸,我想让 A 看起来比 G、T 或 C 大。我下面的代码绘制了所有变量,但我无法根据以下内容更改核苷酸的大小相应的百分比值。如果有人可以帮助我提供这方面的专业知识,我将不胜感激!
数据:
df<- structure(list(Pos = c(9, 9, 22, 9, 12, 1, 5, 21, 17, 10, 18,
15, 13, 10, 12, 23, 15, 6, 5, 2, 8, 10, 3, 9, 4, 16, 19, 6, 23,
1, 20, 22, 19, 18, 11, 20, 11, 8, 13, 16, 9, 9, 18, 9, 10, 20,
3, 10, 6, 6), Nucleotides = structure(c(1L, 2L, 4L, 3L, 3L, 2L,
3L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 3L, 3L, 3L, 3L, 3L, 2L, 1L, 4L,
2L, 4L, 3L, 2L, 1L, 3L, 4L, 2L, 3L, 3L, 2L, 2L, 2L, 4L, 1L, 4L,
2L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 3L, 4L, 4L, 1L), .Label = c("A",
"C", "G", "T"), class = "factor"), Percent = c(1.25, 0.550314465408805,
20.0731497418244, 0.995604395604396, 0, 1.00671140939597, 2.03009876156137,
0.436898024029391, 0, 0.126082666374301, 0.137827450633918, 0.0728332119446468,
0.13412531136233, 0.488293476899962, 0.606980273141123, 4.5262817940713,
0.873737176736748, 2.09909875651215, 1.13913751017087, 0.330169211721007,
0.305810397553517, 0.18524604858227, 1.25, 0.183503959822291,
0.443521668065556, 0.601235873740466, 0.404688430512696, 0, 1.69868233052866,
0.955167790489426, 1.2019531739076, 1.08638871657579, 0.691915513474144,
3.44827586206897, 0, 25.6706562113083, 0.440528634361234, 0.203389830508475,
0.547150411280846, 0.348986173079472, 1.26238676129117, 0.554843839731413,
0.55041831792162, 0.180733779143322, 0.509484245179495, 1.31092996173682,
0.651041666666667, 0, 0.182417582417582, 0.38268672472948), Samples = structure(c(9L,
2L, 2L, 3L, 10L, 9L, 4L, 5L, 10L, 4L, 3L, 6L, 4L, 1L, 8L, 11L,
4L, 4L, 1L, 5L, 2L, 7L, 9L, 7L, 3L, 7L, 11L, 10L, 2L, 11L, 1L,
8L, 6L, 9L, 10L, 5L, 10L, 6L, 1L, 3L, 8L, 3L, 8L, 11L, 4L, 2L,
5L, 10L, 3L, 5L), .Label = c("Ago2_SsHV2L_1_CATGGC_L003_R1_001",
"Ago2_SsHV2L_2_CATTTT_L003_R1_001", "Ago4_SsHV2L_1_CCAACA_L003_R1_001",
"Ago4_SsHV2L_2_TAATCG_L003_R1_001", "Dcl1_SsHV2L_1_GTAGAG_L003_R1_001",
"Dcl1_SsHV2L_2_GGTAGC_L003_R1_001", "Dcl2_SsHV2L_1_ATGAGC_L003_R1_001",
"Dcl2_SsHV2L_2_CAAAAG_L003_R1_001", "WTDK3_SsHV2L_1_GACGAC_L003_R1_001",
"WTDK3_SsHV2L_2_TACAGC_L003_R1_001", "WTDK3_SsHV2L_3_TATAAT_L003_R1_001"
), class = "factor"), Read.Length = c("24", "18", "22", "20",
"18", "23", "20", "24", "23", "22", "21", "24", "19", "24", "19",
"23", "21", "23", "18", "20", "19", "24", "24", "23", "21", "20",
"21", "23", "24", "23", "24", "23", "24", "19", "24", "20", "23",
"20", "21", "21", "20", "21", "22", "19", "20", "21", "18", "22",
"20", "21")), .Names = c("Pos", "Nucleotides", "Percent", "Samples",
"Read.Length"), row.names = c("9.A63", "9.C8", "22.T4", "9.G17",
"12.G64", "1.C62", "5.G24", "21.C20", "17.C69", "10.T26", "18.T18",
"15.T42", "13.T23", "10.A7", "12.G51", "23.G21", "15.G25", "6.G27",
"5.G1", "2.C31", "8.A9", "10.T49", "3.C63", "9.T48", "4.G18",
"16.C45", "19.A63", "6.G69", "23.T4", "1.C76", "20.G5", "22.G23",
"19.C36", "18.C58", "11.C70", "20.T21", "11.A69", "8.T38", "13.C4",
"16.C18", "9.G52", "9.C18", "18.C54", "9.T72", "10.C24", "20.G7",
"3.G29", "10.T68", "6.T17", "6.A32"), class = "data.frame")
代码:
p <- ggplot(df) +
geom_point(aes(x=Pos, y = Percent, color = Read.Length,group = Samples, shape = Samples, size = Nucleotides))+
scale_shape_manual(values=1:nlevels(df$Samples)) +
scale_x_continuous(breaks = c(1,seq(2,24,2)))+
theme_bw() +
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()) +
scale_color_manual(values = c("lavender", "darkslategray2", "darkseagreen4", "yellow", "blue", "red","deeppink4"))+
theme(text = element_text(size=12),
axis.text.x = element_text(angle=90, hjust=1))+
theme(axis.text=element_text(size=14),
axis.title=element_text(size=14,face="bold"))
p
如果我很好地理解了你的问题,那么你的代码已经可以满足你的要求了。尺寸映射基于整体百分比。下面的代码显示 "A" 的总百分比值最低,而 "T" 的总百分比值最高。
df2 <- df %>%
group_by(Nucleotides) %>%
mutate(overall_percent = sum(Percent))
但是,具有多种形状可能会造成混淆。在这种情况下,facet_grid
或 facet_wrap
可能有助于增加维度。例如,您可以为 Samples 使用颜色映射,为 Read.Length 使用分面,这样您的尺寸映射就可以通过点更加明显。