R - 分类变量的一维 "Heatmap"
R - One dimensional "Heatmap" for categorial variables
想要创建一维热图堆栈:
- 显示中心性(例如平均值,由 突出显示 表示)
- 显示分散度(例如标准偏差,由等级表示)
注意事项:中心性或分散性不依赖于样本大小。每个变量的条形长度应该是恒定的,样本大小不是(必然)。
例如它看起来如何
这里是类似变量的最小示例:
library(plyr)
v1 <- c("yes", "rather no", "yes", "yes", "yes", "rather yes", "rather yes", "rather no", "rather no", "no", "no", "no")
(v1 <- factor(v1, levels=c("no", "rather no", "rather yes", "yes"), ordered = TRUE)) # order factor values & show
# now, one variant how to re-code/transform the _ordered_ factors as/to values
# (you may have a better proposal/oppinion)
(v1n <- sapply(v1, function(x) as.numeric(as.character(mapvalues(x, from=c("no", "rather no", "rather yes", "yes"), to=c("0", "0.333", "0.666", "1")))))) # re-code to numeric & show
(v1n.mean <- mean(v1n)) # calculate mean & show
(v1n.sd <- sd(v1n)) # calculate standard deviation & show
v2 <- c("rather yes", "rather yes", "rather no", "rather no", "rather no", "rather no", "rather no", "rather no", "rather no")
v2 <- factor(v2, levels=c("no", "rather no", "rather yes", "yes"), ordered = TRUE)
v2
v2n <- sapply(v2, function(x) as.numeric(as.character(mapvalues(x, from=c("no", "rather no", "rather yes", "yes"), to=c("0", "0.333", "0.666", "1")))))
v2n
(v2n.mean <- mean(v2n))
(v2n.sd <- sd(v2n))
v3 <- c("yes", "yes", "yes", "rather yes", "rather yes", "rather yes", "rather no", "no")
v3 <- factor(v3, levels=c("no", "rather no", "rather yes", "yes"), ordered = TRUE)
v3
v3n <- sapply(v3, function(x) as.numeric(as.character(mapvalues(x, from=c("no", "rather no", "rather yes", "yes"), to=c("0", "0.333", "0.666", "1")))))
v3n
(v3n.mean <- mean(v3n))
(v3n.sd <- sd(v3n))
更新答案:
此答案已更新,因为
1.题中的数据v1,v2,v3已改
2. 添加了三个栏的标签
上半部分还是原答案为主。以下是回应 OP 澄清的新答案。
大部分是原回答
这是您要的东西。但是,它 不能 显示 none 存在的集中趋势。在我们查看图表之后,我将更全面地讨论这一点。在我们查看图表之后,我将更全面地讨论这一点。
想法是制作一个空白图,然后为每个变量(v1、v2、v3)绘制一个灰度条。图中响应数量最少的地方将是黑色的。响应最多的区域将是白色的。在这两者之间,灰度级将根据响应数量按比例缩放。
## To make it easy to refer to the different variables
Responses = list(v1,v2,v3)
## 100 colors to allow for a lot of continuity
## color 1 is black, color 100 is white
GrayScale = gray.colors(100, start=0.05, end=0.97)
## Make a blank plot
plot(NULL, type="n", xlab="", ylab="", bty="n", xaxt="n", yaxt="n",
xlim=c(1,4), ylim=c(1,length(Responses)+1))
## Plot all of the bars
for(j in 1:length(Responses)) {
Tab = table(Responses[[j]])
Tab = round(99*(Tab-min(Tab))/(max(Tab)-min(Tab)))+1
x = seq(1,4,0.01)
Density = round(approx(1:4, Tab , x)$y)
## Make a smooth looking bar
for(i in 1:(length(x)-1)) {
polygon(c(x[i],x[i],x[i+1],x[i+1]), c(j,j+0.75,j+0.75,j),
col=GrayScale[Density[i]], border=NA)
}
}
## Add labels
text(1:4, 4, levels(v1))
axis(2, at=(1:3)+0.4, labels=c("v1", "v2", "v3"), lwd=0, lwd.ticks=1, las=1)
已修改问题的答案
这个答案只是使用均值和标准绘制高斯分布
您计算的偏差。高斯分布的绘制风格为
之前的答案,白色代表平均值和距离最远的点
平均值是黑色的。
Means = c(v1n.mean, v2n.mean, v3n.mean)
SD = c(v1n.sd, v2n.sd, v3n.sd)
## 100 colors to allow for a lot of continuity
## color 1 is black, color 100 is white
GrayScale = gray.colors(100, start=0.05, end=0.97)
## Make a blank plot
plot(NULL, type="n", xlab="", ylab="", bty="n", xaxt="n", yaxt="n",
xlim=c(1,4), ylim=c(1,length(Responses)+1))
for(j in 1:length(Responses)) {
x = seq(1,4,0.03)
y = dnorm((x-1)/3, Means[j], SD[j])
y = round(99*(y-min(y))/(max(y)-min(y))) + 1
for(i in 1:(length(x)-1)) {
polygon(c(x[i],x[i],x[i+1],x[i+1]), c(j,j+0.75,j+0.75,j),
col=GrayScale[y[i]], border=NA)
}
}
## Add labels
text(1:4, 4, levels(v1))
axis(2, at=(1:3)+0.4, labels=c("v1", "v2", "v3"), lwd=0, lwd.ticks=1, las=1)
想要创建一维热图堆栈:
- 显示中心性(例如平均值,由 突出显示 表示)
- 显示分散度(例如标准偏差,由等级表示)
注意事项:中心性或分散性不依赖于样本大小。每个变量的条形长度应该是恒定的,样本大小不是(必然)。
例如它看起来如何
这里是类似变量的最小示例:
library(plyr)
v1 <- c("yes", "rather no", "yes", "yes", "yes", "rather yes", "rather yes", "rather no", "rather no", "no", "no", "no")
(v1 <- factor(v1, levels=c("no", "rather no", "rather yes", "yes"), ordered = TRUE)) # order factor values & show
# now, one variant how to re-code/transform the _ordered_ factors as/to values
# (you may have a better proposal/oppinion)
(v1n <- sapply(v1, function(x) as.numeric(as.character(mapvalues(x, from=c("no", "rather no", "rather yes", "yes"), to=c("0", "0.333", "0.666", "1")))))) # re-code to numeric & show
(v1n.mean <- mean(v1n)) # calculate mean & show
(v1n.sd <- sd(v1n)) # calculate standard deviation & show
v2 <- c("rather yes", "rather yes", "rather no", "rather no", "rather no", "rather no", "rather no", "rather no", "rather no")
v2 <- factor(v2, levels=c("no", "rather no", "rather yes", "yes"), ordered = TRUE)
v2
v2n <- sapply(v2, function(x) as.numeric(as.character(mapvalues(x, from=c("no", "rather no", "rather yes", "yes"), to=c("0", "0.333", "0.666", "1")))))
v2n
(v2n.mean <- mean(v2n))
(v2n.sd <- sd(v2n))
v3 <- c("yes", "yes", "yes", "rather yes", "rather yes", "rather yes", "rather no", "no")
v3 <- factor(v3, levels=c("no", "rather no", "rather yes", "yes"), ordered = TRUE)
v3
v3n <- sapply(v3, function(x) as.numeric(as.character(mapvalues(x, from=c("no", "rather no", "rather yes", "yes"), to=c("0", "0.333", "0.666", "1")))))
v3n
(v3n.mean <- mean(v3n))
(v3n.sd <- sd(v3n))
更新答案:
此答案已更新,因为
1.题中的数据v1,v2,v3已改
2. 添加了三个栏的标签
上半部分还是原答案为主。以下是回应 OP 澄清的新答案。
大部分是原回答
这是您要的东西。但是,它 不能 显示 none 存在的集中趋势。在我们查看图表之后,我将更全面地讨论这一点。在我们查看图表之后,我将更全面地讨论这一点。
想法是制作一个空白图,然后为每个变量(v1、v2、v3)绘制一个灰度条。图中响应数量最少的地方将是黑色的。响应最多的区域将是白色的。在这两者之间,灰度级将根据响应数量按比例缩放。
## To make it easy to refer to the different variables
Responses = list(v1,v2,v3)
## 100 colors to allow for a lot of continuity
## color 1 is black, color 100 is white
GrayScale = gray.colors(100, start=0.05, end=0.97)
## Make a blank plot
plot(NULL, type="n", xlab="", ylab="", bty="n", xaxt="n", yaxt="n",
xlim=c(1,4), ylim=c(1,length(Responses)+1))
## Plot all of the bars
for(j in 1:length(Responses)) {
Tab = table(Responses[[j]])
Tab = round(99*(Tab-min(Tab))/(max(Tab)-min(Tab)))+1
x = seq(1,4,0.01)
Density = round(approx(1:4, Tab , x)$y)
## Make a smooth looking bar
for(i in 1:(length(x)-1)) {
polygon(c(x[i],x[i],x[i+1],x[i+1]), c(j,j+0.75,j+0.75,j),
col=GrayScale[Density[i]], border=NA)
}
}
## Add labels
text(1:4, 4, levels(v1))
axis(2, at=(1:3)+0.4, labels=c("v1", "v2", "v3"), lwd=0, lwd.ticks=1, las=1)
已修改问题的答案
这个答案只是使用均值和标准绘制高斯分布
您计算的偏差。高斯分布的绘制风格为
之前的答案,白色代表平均值和距离最远的点
平均值是黑色的。
Means = c(v1n.mean, v2n.mean, v3n.mean)
SD = c(v1n.sd, v2n.sd, v3n.sd)
## 100 colors to allow for a lot of continuity
## color 1 is black, color 100 is white
GrayScale = gray.colors(100, start=0.05, end=0.97)
## Make a blank plot
plot(NULL, type="n", xlab="", ylab="", bty="n", xaxt="n", yaxt="n",
xlim=c(1,4), ylim=c(1,length(Responses)+1))
for(j in 1:length(Responses)) {
x = seq(1,4,0.03)
y = dnorm((x-1)/3, Means[j], SD[j])
y = round(99*(y-min(y))/(max(y)-min(y))) + 1
for(i in 1:(length(x)-1)) {
polygon(c(x[i],x[i],x[i+1],x[i+1]), c(j,j+0.75,j+0.75,j),
col=GrayScale[y[i]], border=NA)
}
}
## Add labels
text(1:4, 4, levels(v1))
axis(2, at=(1:3)+0.4, labels=c("v1", "v2", "v3"), lwd=0, lwd.ticks=1, las=1)