如何用其他信息创建代表不同集群的网络?
How to create a network representing different clusters with other information?
我有一个数据框 nodes
,其信息如下所示:
dput(nodes)
structure(list(Names = c("A4GALT", "AASS", "ABCA10", "ABCA7",
"ABCD4", "ABHD4", "ABTB1", "AC006978.2", "AC009119.2"), type = c("typeA",
"typeA", "typeC", "typeA", "typeC", "typeC", "typeB", "typeB",
"typeB"), type_num = c(1L, 1L, 3L, 1L, 3L, 3L, 2L, 2L, 2L), Clusters = c("Cluster1",
"Cluster1", "Cluster2", "Cluster3", "Cluster3", "Cluster1", "Cluster2",
"Cluster3", "Cluster2")), row.names = c(NA, 9L), class = "data.frame")
因此,在 nodes
数据框中,有 4 列。 Names
是基因名称,type
是不同的类型,type_num
是每个基因类型的编号,Clusters
列显示每个基因所属的 3 个簇。
同样,我还有其他数据框 edges
,其中包含如下信息:
dput(边)
structure(list(fromNode = c("A4GALT", "A4GALT", "A4GALT", "A4GALT",
"A4GALT", "A4GALT", "A4GALT", "A4GALT", "AASS", "AASS", "AASS",
"AASS", "AASS", "AASS", "AASS", "ABCA10", "ABCA10", "ABCA10",
"ABCA10", "ABCA10", "ABCA10", "ABCA7", "ABCA7", "ABCA7", "ABCA7",
"ABCA7", "ABCD4", "ABCD4", "ABCD4", "ABCD4", "ABHD4", "ABHD4",
"ABHD4", "ABTB1", "ABTB1", "AC006978.2"), toNode = c("AASS",
"ABCA10", "ABCA7", "ABCD4", "ABHD4", "ABTB1", "AC006978.2", "AC009119.2",
"ABCA10", "ABCA7", "ABCD4", "ABHD4", "ABTB1", "AC006978.2", "AC009119.2",
"ABCA7", "ABCD4", "ABHD4", "ABTB1", "AC006978.2", "AC009119.2",
"ABCD4", "ABHD4", "ABTB1", "AC006978.2", "AC009119.2", "ABHD4",
"ABTB1", "AC006978.2", "AC009119.2", "ABTB1", "AC006978.2", "AC009119.2",
"AC006978.2", "AC009119.2", "AC009119.2"), weight = c(0.005842835,
0.002253695, 0.014513253, 0.004851739, 0.066702792, 0.009418991,
0.001136938, 0.000474221, 0.004405601, 0.000666001, 0.005625977,
0.0333554, 0.004666223, 0.000103131, 0.00026302, 0.004514819,
0.029632695, 0.001825839, 0.028379806, 0.001403298, 0.008339397,
0.02393394, 0.004782329, 0.024767355, 0.002986813, 0.00559471,
0.005961539, 0.064831874, 0.013023138, 0.027935729, 0.006618816,
0.001134219, 0.012798368, 0.007961242, 0.01640476, 0.007997743
), direction = c("undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected")), row.names = c(NA, -36L), class = "data.frame")
尝试过 igraph
,但看起来不是我想要的样子。
library(igraph)
net <- graph_from_data_frame(d=edges, vertices=nodes, directed=F)
as_edgelist(net, names=T)
as_adjacency_matrix(net, attr="weight")
# Removing loops from the graph:
net <- simplify(net, remove.multiple = F, remove.loops = T)
# Let's and reduce the arrow size and remove the labels:
plot(net, edge.arrow.size=.4,vertex.label=NA)
看起来像这样:
任何人都可以帮助我如何使用上面给出的数据创建一个像上面这样的网络。任何帮助表示赞赏。提前谢谢你。
我不确定下面的代码是否有效
plot(net,
edge.width = E(net)$weight,
vertex.color = factor(V(net)$name),
mark.groups = split(V(net)$name,V(net)$Clusters))
这给出了
这主要是对 Grouped layout based on attribute 答案的重复。
我 认为 您想要通过 Clusters
属性对顶点进行分组并使用 type
属性为它们着色。我会在这个答案中这样做。
您创建网络的代码很好,但是一个简单的图不会按集群对顶点进行分组(我添加了按类型着色的顶点)。
plot(net, edge.arrow.size=.4,vertex.label=NA,
vertex.color=as.numeric(factor(nodes$type)))
您需要的是强调集群的布局。上面引用的先前答案显示了如何通过生成具有相同顶点但在同一簇中的顶点之间具有重边权重的不同图形来做到这一点。在你的情况下,它将是
Grouped.net = net
E(Grouped.net)$weight = 1
## Add edges with high weight between all nodes in the same group
for(Clus in unique(nodes$Clusters)) {
GroupV = which(nodes$Clusters == Clus)
Grouped.net = add_edges(Grouped.net, combn(GroupV, 2), attr=list(weight=80))
}
## Now create a layout based on G_Grouped
set.seed(567)
LO = layout_with_fr(Grouped.net)
## Use the layout to plot the original graph
plot(net, layout=LO, edge.arrow.size=.4,vertex.label=NA,
vertex.color=as.numeric(factor(nodes$type)))
如果您有大量顶点,您可能还希望通过 vertex.size=4
减小它们的大小
我有一个数据框 nodes
,其信息如下所示:
dput(nodes)
structure(list(Names = c("A4GALT", "AASS", "ABCA10", "ABCA7",
"ABCD4", "ABHD4", "ABTB1", "AC006978.2", "AC009119.2"), type = c("typeA",
"typeA", "typeC", "typeA", "typeC", "typeC", "typeB", "typeB",
"typeB"), type_num = c(1L, 1L, 3L, 1L, 3L, 3L, 2L, 2L, 2L), Clusters = c("Cluster1",
"Cluster1", "Cluster2", "Cluster3", "Cluster3", "Cluster1", "Cluster2",
"Cluster3", "Cluster2")), row.names = c(NA, 9L), class = "data.frame")
因此,在 nodes
数据框中,有 4 列。 Names
是基因名称,type
是不同的类型,type_num
是每个基因类型的编号,Clusters
列显示每个基因所属的 3 个簇。
同样,我还有其他数据框 edges
,其中包含如下信息:
dput(边)
structure(list(fromNode = c("A4GALT", "A4GALT", "A4GALT", "A4GALT",
"A4GALT", "A4GALT", "A4GALT", "A4GALT", "AASS", "AASS", "AASS",
"AASS", "AASS", "AASS", "AASS", "ABCA10", "ABCA10", "ABCA10",
"ABCA10", "ABCA10", "ABCA10", "ABCA7", "ABCA7", "ABCA7", "ABCA7",
"ABCA7", "ABCD4", "ABCD4", "ABCD4", "ABCD4", "ABHD4", "ABHD4",
"ABHD4", "ABTB1", "ABTB1", "AC006978.2"), toNode = c("AASS",
"ABCA10", "ABCA7", "ABCD4", "ABHD4", "ABTB1", "AC006978.2", "AC009119.2",
"ABCA10", "ABCA7", "ABCD4", "ABHD4", "ABTB1", "AC006978.2", "AC009119.2",
"ABCA7", "ABCD4", "ABHD4", "ABTB1", "AC006978.2", "AC009119.2",
"ABCD4", "ABHD4", "ABTB1", "AC006978.2", "AC009119.2", "ABHD4",
"ABTB1", "AC006978.2", "AC009119.2", "ABTB1", "AC006978.2", "AC009119.2",
"AC006978.2", "AC009119.2", "AC009119.2"), weight = c(0.005842835,
0.002253695, 0.014513253, 0.004851739, 0.066702792, 0.009418991,
0.001136938, 0.000474221, 0.004405601, 0.000666001, 0.005625977,
0.0333554, 0.004666223, 0.000103131, 0.00026302, 0.004514819,
0.029632695, 0.001825839, 0.028379806, 0.001403298, 0.008339397,
0.02393394, 0.004782329, 0.024767355, 0.002986813, 0.00559471,
0.005961539, 0.064831874, 0.013023138, 0.027935729, 0.006618816,
0.001134219, 0.012798368, 0.007961242, 0.01640476, 0.007997743
), direction = c("undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected", "undirected", "undirected", "undirected",
"undirected", "undirected")), row.names = c(NA, -36L), class = "data.frame")
尝试过 igraph
,但看起来不是我想要的样子。
library(igraph)
net <- graph_from_data_frame(d=edges, vertices=nodes, directed=F)
as_edgelist(net, names=T)
as_adjacency_matrix(net, attr="weight")
# Removing loops from the graph:
net <- simplify(net, remove.multiple = F, remove.loops = T)
# Let's and reduce the arrow size and remove the labels:
plot(net, edge.arrow.size=.4,vertex.label=NA)
看起来像这样:
任何人都可以帮助我如何使用上面给出的数据创建一个像上面这样的网络。任何帮助表示赞赏。提前谢谢你。
我不确定下面的代码是否有效
plot(net,
edge.width = E(net)$weight,
vertex.color = factor(V(net)$name),
mark.groups = split(V(net)$name,V(net)$Clusters))
这给出了
这主要是对 Grouped layout based on attribute 答案的重复。
我 认为 您想要通过 Clusters
属性对顶点进行分组并使用 type
属性为它们着色。我会在这个答案中这样做。
您创建网络的代码很好,但是一个简单的图不会按集群对顶点进行分组(我添加了按类型着色的顶点)。
plot(net, edge.arrow.size=.4,vertex.label=NA,
vertex.color=as.numeric(factor(nodes$type)))
您需要的是强调集群的布局。上面引用的先前答案显示了如何通过生成具有相同顶点但在同一簇中的顶点之间具有重边权重的不同图形来做到这一点。在你的情况下,它将是
Grouped.net = net
E(Grouped.net)$weight = 1
## Add edges with high weight between all nodes in the same group
for(Clus in unique(nodes$Clusters)) {
GroupV = which(nodes$Clusters == Clus)
Grouped.net = add_edges(Grouped.net, combn(GroupV, 2), attr=list(weight=80))
}
## Now create a layout based on G_Grouped
set.seed(567)
LO = layout_with_fr(Grouped.net)
## Use the layout to plot the original graph
plot(net, layout=LO, edge.arrow.size=.4,vertex.label=NA,
vertex.color=as.numeric(factor(nodes$type)))
如果您有大量顶点,您可能还希望通过 vertex.size=4
减小它们的大小