ggplot:如何绘制热图而不考虑变量的数量
ggplot: how to plot heatmap regardless of the number of variables
使用下面的data.frame
数据
df <- read.table(text = c("
NA NA NA NA NA NA NA NA NA NA NA NA
0.4748 NA NA NA NA NA NA NA NA NA NA NA
0.905 0.5362 NA NA NA NA NA NA NA NA NA NA
0.0754 0.0118 0.0614 NA NA NA NA NA NA NA NA NA
0.8768 0.3958 0.7952 0.1034 NA NA NA NA NA NA NA NA
0.5708 0.2056 0.4984 0.2356 0.6736 NA NA NA NA NA NA NA
0.2248 0.6204 0.268 0.0014 0.183 0.0768 NA NA NA NA NA NA
0.483 0.9824 0.5314 0.0114 0.3906 0.1968 0.6308 NA NA NA NA NA
0.697 0.732 0.7604 0.0264 0.594 0.3334 0.416 0.7388 NA NA NA NA
0.2918 0.7286 0.3382 0.003 0.2386 0.1122 0.8712 0.7266 0.509 NA NA NA
0.5904 0.8352 0.6704 0.0188 0.4966 0.273 0.5192 0.8328 0.8736 0.5914 NA NA
0.3838 0.8768 0.4476 0.0042 0.3148 0.1498 0.7288 0.873 0.6178 0.8276 0.7432 NA
"), header = F)
colnames(df) <- c( "TK1", "TK2", "TK3", "TK4" , "TK5", "TK6", "TK7", "TK8", "TK9", "TK10", "TK11", "TK12")
rownames(df) <- c( "TK1", "TK2", "TK3", "TK4" , "TK5", "TK6", "TK7", "TK8", "TK9", "TK10", "TK11", "TK12")
df
# TK1 TK2 TK3 TK4 TK5 TK6 TK7 TK8 TK9 TK10 TK11 TK12
#TK1 NA NA NA NA NA NA NA NA NA NA NA NA
#TK2 0.4748 NA NA NA NA NA NA NA NA NA NA NA
#TK3 0.9050 0.5362 NA NA NA NA NA NA NA NA NA NA
#TK4 0.0754 0.0118 0.0614 NA NA NA NA NA NA NA NA NA
#TK5 0.8768 0.3958 0.7952 0.1034 NA NA NA NA NA NA NA NA
#TK6 0.5708 0.2056 0.4984 0.2356 0.6736 NA NA NA NA NA NA NA
#TK7 0.2248 0.6204 0.2680 0.0014 0.1830 0.0768 NA NA NA NA NA NA
#TK8 0.4830 0.9824 0.5314 0.0114 0.3906 0.1968 0.6308 NA NA NA NA NA
#TK9 0.6970 0.7320 0.7604 0.0264 0.5940 0.3334 0.4160 0.7388 NA NA NA NA
#TK10 0.2918 0.7286 0.3382 0.0030 0.2386 0.1122 0.8712 0.7266 0.5090 NA NA NA
#TK11 0.5904 0.8352 0.6704 0.0188 0.4966 0.2730 0.5192 0.8328 0.8736 0.5914 NA NA
#TK12 0.3838 0.8768 0.4476 0.0042 0.3148 0.1498 0.7288 0.8730 0.6178 0.8276 0.7432 NA
我无法更改输入数据。我将根据用户每次使用不同的变量以这种格式获取它。
我使用下面的代码创建了一个新变量Relationship
来将df
从宽格式转换为长格式,然后排列Relation1
和Relationship
变量的级别感谢 。最后,我创建了如下所示的热图
trial <- df
trial$Relationship <- rownames(df)
trial1 <- subset(trial, select = c(13, 1, 2, 3,4,5,6,7,8,9,10,11,12))
df2 <- gather(trial1, "Relation1", "Strength", 2:13)
df2 <- df2 %>%
dplyr::mutate(Strength1 = round(Strength, digits = 2))%>%
dplyr::select(Relationship,Relation1, Strength1 )
df3 <- df2 %>%
extract(Relationship, into = c("Relationship1", "Relationship2"), "(\D+)(\d+)",
remove = FALSE, convert=TRUE) %>%
mutate(Relationship = factor(Relationship, levels = paste0(Relationship1[1],
min(Relationship2):max(Relationship2)))) %>%
select(-Relationship1, -Relationship2) %>%
extract(Relation1, into = c("Relation11", "Relation12"), "(\D+)(\d+)",
remove = FALSE, convert=TRUE) %>%
mutate(Relation1 = factor(Relation1, levels = paste0(Relation11[1],
min(Relation12):max(Relation12)))) %>%
select(-Relation11, -Relation12)
df3$Relation1 = with(df3, factor(Relation1, levels = rev(levels(Relation1))))
ggheatmap <- ggplot(df3, aes(Relationship, Relation1, fill = Strength1))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "red", high = "green", mid = "lightgreen",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Correlation") + theme_minimal()
ggheatmap +
geom_text(aes(Relationship, Relation1, label = Strength1), color = "black", size = 4) +
labs(x = expression(""),
y=expression(""))
结果
问题
我想动态绘制热图。因此,无论变量和观察的数量如何,都可以绘制热图而不需要为不同数量的变量更改代码?
有办法吗?
library(ggplot2)
library(tidyr)
library(dplyr)
无论列数和行数如何,此代码块都有效
df <-
df %>%
mutate(Relationship = rownames(.)) %>% #Replaces trial$Relationship <- rownames(df)
select(Relationship, everything()) %>% #Replaces trial1 <- subset(trial, select = c(13, 1, 2, 3,4,5,6,7,8,9,10,11,12))
gather('Relation1', 'Strength', -1) %>% #Replaces df2 <- gather(trial1, "Relation1", "Strength", 2:13)
mutate(Strength = round(Strength, digits = 2))
下面的代码块是获取列的因子水平的更简洁的方法
# Order Relatinoship variables by numeric suffix
# Since its a square matrix you only have to do it once for both columns
factorLevels <-
df %>%
select(Relationship) %>%
distinct() %>%
extract(Relationship, into = c("TK", "num"), "(\D+)(\d+)",
remove = FALSE, convert=TRUE) %>%
arrange(num) %>%
select(Relationship)
df <-
df %>%
mutate(Relationship = factor(Relationship, levels = factorLevels$Relationship),
Relation1 = factor(Relation1, levels = rev(factorLevels$Relationship)))
修改绘图代码
ggheatmap <- ggplot(df, aes(Relationship, Relation1, fill = Strength))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "red", high = "green", mid = "lightgreen",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Correlation") + theme_minimal()
ggheatmap +
geom_text(aes(Relationship, Relation1, label = Strength), color = "black", size = 4) +
labs(x = expression(""),
y=expression(""))
在这种情况下,我觉得你的做法很迂回(我指的是heatmap with values (ggplot2))。此代码只需要 colnames(df) 和 rownames(df) are.
library(reshape2); library(ggplot2)
df2 <- melt(as.matrix(df), id.var = names(df)[1]) # as.matrix() fixes colnames of long df.
df2$Var2 <- with(df2, factor(Var2, levels=rev(levels(Var2))))
ggheatmap <- ggplot(df2, aes(Var1, Var2, fill=value)) +
geom_tile(color = "white")+
scale_fill_gradient2(low = "red", high = "green", mid = "lightgreen",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Correlation") + theme_minimal()
ggheatmap +
geom_text(aes(label = round(value, 2)), color = "black", size = 4) +
labs(x = expression(""), y=expression(""))
使用下面的data.frame
数据
df <- read.table(text = c("
NA NA NA NA NA NA NA NA NA NA NA NA
0.4748 NA NA NA NA NA NA NA NA NA NA NA
0.905 0.5362 NA NA NA NA NA NA NA NA NA NA
0.0754 0.0118 0.0614 NA NA NA NA NA NA NA NA NA
0.8768 0.3958 0.7952 0.1034 NA NA NA NA NA NA NA NA
0.5708 0.2056 0.4984 0.2356 0.6736 NA NA NA NA NA NA NA
0.2248 0.6204 0.268 0.0014 0.183 0.0768 NA NA NA NA NA NA
0.483 0.9824 0.5314 0.0114 0.3906 0.1968 0.6308 NA NA NA NA NA
0.697 0.732 0.7604 0.0264 0.594 0.3334 0.416 0.7388 NA NA NA NA
0.2918 0.7286 0.3382 0.003 0.2386 0.1122 0.8712 0.7266 0.509 NA NA NA
0.5904 0.8352 0.6704 0.0188 0.4966 0.273 0.5192 0.8328 0.8736 0.5914 NA NA
0.3838 0.8768 0.4476 0.0042 0.3148 0.1498 0.7288 0.873 0.6178 0.8276 0.7432 NA
"), header = F)
colnames(df) <- c( "TK1", "TK2", "TK3", "TK4" , "TK5", "TK6", "TK7", "TK8", "TK9", "TK10", "TK11", "TK12")
rownames(df) <- c( "TK1", "TK2", "TK3", "TK4" , "TK5", "TK6", "TK7", "TK8", "TK9", "TK10", "TK11", "TK12")
df
# TK1 TK2 TK3 TK4 TK5 TK6 TK7 TK8 TK9 TK10 TK11 TK12
#TK1 NA NA NA NA NA NA NA NA NA NA NA NA
#TK2 0.4748 NA NA NA NA NA NA NA NA NA NA NA
#TK3 0.9050 0.5362 NA NA NA NA NA NA NA NA NA NA
#TK4 0.0754 0.0118 0.0614 NA NA NA NA NA NA NA NA NA
#TK5 0.8768 0.3958 0.7952 0.1034 NA NA NA NA NA NA NA NA
#TK6 0.5708 0.2056 0.4984 0.2356 0.6736 NA NA NA NA NA NA NA
#TK7 0.2248 0.6204 0.2680 0.0014 0.1830 0.0768 NA NA NA NA NA NA
#TK8 0.4830 0.9824 0.5314 0.0114 0.3906 0.1968 0.6308 NA NA NA NA NA
#TK9 0.6970 0.7320 0.7604 0.0264 0.5940 0.3334 0.4160 0.7388 NA NA NA NA
#TK10 0.2918 0.7286 0.3382 0.0030 0.2386 0.1122 0.8712 0.7266 0.5090 NA NA NA
#TK11 0.5904 0.8352 0.6704 0.0188 0.4966 0.2730 0.5192 0.8328 0.8736 0.5914 NA NA
#TK12 0.3838 0.8768 0.4476 0.0042 0.3148 0.1498 0.7288 0.8730 0.6178 0.8276 0.7432 NA
我无法更改输入数据。我将根据用户每次使用不同的变量以这种格式获取它。
我使用下面的代码创建了一个新变量Relationship
来将df
从宽格式转换为长格式,然后排列Relation1
和Relationship
变量的级别感谢
trial <- df
trial$Relationship <- rownames(df)
trial1 <- subset(trial, select = c(13, 1, 2, 3,4,5,6,7,8,9,10,11,12))
df2 <- gather(trial1, "Relation1", "Strength", 2:13)
df2 <- df2 %>%
dplyr::mutate(Strength1 = round(Strength, digits = 2))%>%
dplyr::select(Relationship,Relation1, Strength1 )
df3 <- df2 %>%
extract(Relationship, into = c("Relationship1", "Relationship2"), "(\D+)(\d+)",
remove = FALSE, convert=TRUE) %>%
mutate(Relationship = factor(Relationship, levels = paste0(Relationship1[1],
min(Relationship2):max(Relationship2)))) %>%
select(-Relationship1, -Relationship2) %>%
extract(Relation1, into = c("Relation11", "Relation12"), "(\D+)(\d+)",
remove = FALSE, convert=TRUE) %>%
mutate(Relation1 = factor(Relation1, levels = paste0(Relation11[1],
min(Relation12):max(Relation12)))) %>%
select(-Relation11, -Relation12)
df3$Relation1 = with(df3, factor(Relation1, levels = rev(levels(Relation1))))
ggheatmap <- ggplot(df3, aes(Relationship, Relation1, fill = Strength1))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "red", high = "green", mid = "lightgreen",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Correlation") + theme_minimal()
ggheatmap +
geom_text(aes(Relationship, Relation1, label = Strength1), color = "black", size = 4) +
labs(x = expression(""),
y=expression(""))
结果
问题
我想动态绘制热图。因此,无论变量和观察的数量如何,都可以绘制热图而不需要为不同数量的变量更改代码?
有办法吗?
library(ggplot2)
library(tidyr)
library(dplyr)
无论列数和行数如何,此代码块都有效
df <-
df %>%
mutate(Relationship = rownames(.)) %>% #Replaces trial$Relationship <- rownames(df)
select(Relationship, everything()) %>% #Replaces trial1 <- subset(trial, select = c(13, 1, 2, 3,4,5,6,7,8,9,10,11,12))
gather('Relation1', 'Strength', -1) %>% #Replaces df2 <- gather(trial1, "Relation1", "Strength", 2:13)
mutate(Strength = round(Strength, digits = 2))
下面的代码块是获取列的因子水平的更简洁的方法
# Order Relatinoship variables by numeric suffix
# Since its a square matrix you only have to do it once for both columns
factorLevels <-
df %>%
select(Relationship) %>%
distinct() %>%
extract(Relationship, into = c("TK", "num"), "(\D+)(\d+)",
remove = FALSE, convert=TRUE) %>%
arrange(num) %>%
select(Relationship)
df <-
df %>%
mutate(Relationship = factor(Relationship, levels = factorLevels$Relationship),
Relation1 = factor(Relation1, levels = rev(factorLevels$Relationship)))
修改绘图代码
ggheatmap <- ggplot(df, aes(Relationship, Relation1, fill = Strength))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "red", high = "green", mid = "lightgreen",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Correlation") + theme_minimal()
ggheatmap +
geom_text(aes(Relationship, Relation1, label = Strength), color = "black", size = 4) +
labs(x = expression(""),
y=expression(""))
在这种情况下,我觉得你的做法很迂回(我指的是heatmap with values (ggplot2))。此代码只需要 colnames(df) 和 rownames(df) are.
library(reshape2); library(ggplot2)
df2 <- melt(as.matrix(df), id.var = names(df)[1]) # as.matrix() fixes colnames of long df.
df2$Var2 <- with(df2, factor(Var2, levels=rev(levels(Var2))))
ggheatmap <- ggplot(df2, aes(Var1, Var2, fill=value)) +
geom_tile(color = "white")+
scale_fill_gradient2(low = "red", high = "green", mid = "lightgreen",
midpoint = 0.5, limit = c(0,1), space = "Lab",
name="Correlation") + theme_minimal()
ggheatmap +
geom_text(aes(label = round(value, 2)), color = "black", size = 4) +
labs(x = expression(""), y=expression(""))