seaborn 热图 auto-ordering 标签以平滑颜色偏移
seaborn heatmap auto-ordering labels to smoothen color shifts
我想知道是否有 built-in 功能或至少有 'smart' 方式来根据它们的值结合 seaborn 热图对 x- 和 y-labels 进行排序。
假设无序热图如下所示:
但是,目标是重新排序颜色偏移的标签 'smoothened'。之后应该看起来更像:
感谢您的建议!
此致
第二个图按 x 和 y 轴标签排序,而不是按值排序。您将无法让随机数据看起来像有序数据。您可以按一行和一列的值对数据进行排序,但其余数据将是固定的。这是绘制热图的代码,该热图按第 0 行和第 0 列的值排序。请注意图中间的 "cross":
import numpy as np; np.random.seed(0)
import seaborn as sns; sns.set()
uniform_data = np.random.rand(10, 12)
df = pd.DataFrame(uniform_data)
df2 = df.sort_values(by=0).T.sort_values(by=0).T
ax = sns.heatmap(df2)
需要以某种方式量化 "smoothened colorshifts"。为此,可以定义成本函数。在最简单的情况下,这可能是相邻像素之间差异的总和。如果总和很小,则相邻像素的颜色差异很小。
然后可以随机交换矩阵中的列和行,并检查是否产生了更小的成本。迭代地执行此操作,会在某个时候产生平滑的热图。然而,这当然取决于初始热图中的随机程度。对于完全随机的像素,预计不会有太多优化。
下面class实现了这样的优化。这将需要 nrand
个不同的起始排列,并且对于每个排列,进行 niter
次交换。最好的结果被存储并可以通过 .get_opt
.
检索
import matplotlib.pyplot as plt
import numpy as np
class ReOrder():
def __init__(self, array, nrand=2, niter=800):
self.a = array
self.indi = np.arange(self.a.shape[0])
self.indj = np.arange(self.a.shape[1])
self.i = np.arange(self.a.shape[0])
self.j = np.arange(self.a.shape[1])
self.nrand = nrand
self.niter = niter
def apply(self, a, i, j):
return a[:,j][i,:]
def get_opt(self):
return self.apply(self.a, self.i, self.j)
def get_labels(self, x=None, y=None):
if x is None:
x = self.indj
if y is None:
y = self.indi
return np.array(x)[self.j], np.array(y)[self.i]
def cost(self, a=None):
if a is None:
a = self.get_opt()
m = a[1:-1, 1:-1]
b = 0.5 * ((m - a[0:-2, 0:-2])**2 + \
(m - a[2: , 2: ])**2 + \
(m - a[0:-2, 2: ])**2 + \
(m - a[2: , 0:-2])**2) + \
(m - a[0:-2, 1:-1])**2 + \
(m - a[1:-1, 0:-2])**2 + \
(m - a[2: , 1:-1])**2 + \
(m - a[1:-1, 2: ])**2
return b.sum()
def randomize(self):
newj = np.random.permutation(self.a.shape[1])
newi = np.random.permutation(self.a.shape[0])
return newi, newj
def compare(self, i1, j1, i2, j2, a=None):
if a is None:
a = self.a
if self.cost(self.apply(a,i1,j1)) < self.cost(self.apply(a,i2,j2)):
return i1, j1
else:
return i2, j2
def rowswap(self, i, j):
rows = np.random.choice(self.indi, replace=False, size=2)
ir = np.copy(i)
ir[rows] = ir[rows[::-1]]
return ir, j
def colswap(self, i, j):
cols = np.random.choice(self.indj, replace=False, size=2)
jr = np.copy(j)
jr[cols] = jr[cols[::-1]]
return i, jr
def swap(self, i, j):
ic, jc = self.rowswap(i,j)
ir, jr = self.colswap(i,j)
io, jo = self.compare(ic,jc, ir,jr)
return self.compare(i,j, io,jo)
def optimize(self, nrand=None, niter=None):
nrand = nrand or self.nrand
niter = niter or self.niter
i,j = self.i, self.j
for kk in range(niter):
i,j = self.swap(i,j)
self.i, self.j = self.compare(i,j, self.i, self.j)
print(self.cost())
for ii in range(nrand):
i,j = self.randomize()
for kk in range(niter):
i,j = self.swap(i,j)
self.i, self.j = self.compare(i,j, self.i, self.j)
print(self.cost())
print("finished")
所以让我们取两个起始数组,
def get_sample_ord():
x,y = np.meshgrid(np.arange(12), np.arange(10))
z = x+y
j = np.random.permutation(12)
i = np.random.permutation(10)
return z[:,j][i,:]
def get_sample():
return np.random.randint(0,120,size=(10,12))
和运行就通过上面的class.
def reorder_plot(nrand=4, niter=10000):
fig, ((ax1, ax2),(ax3,ax4)) = plt.subplots(nrows=2, ncols=2,
constrained_layout=True)
fig.suptitle("nrand={}, niter={}".format(nrand, niter))
z1 = get_sample()
r1 = ReOrder(z1)
r1.optimize(nrand=nrand, niter=niter)
ax1.imshow(z1)
ax3.imshow(r1.get_opt())
xl, yl = r1.get_labels()
ax1.set(xticks = np.arange(z1.shape[1]),
yticks = np.arange(z1.shape[0]),
title=f"Start, cost={r1.cost(z1)}")
ax3.set(xticks = np.arange(z1.shape[1]), xticklabels=xl,
yticks = np.arange(z1.shape[0]), yticklabels=yl,
title=f"Optimized, cost={r1.cost()}")
z2 = get_sample_ord()
r2 = ReOrder(z2)
r2.optimize(nrand=nrand, niter=niter)
ax2.imshow(z2)
ax4.imshow(r2.get_opt())
xl, yl = r2.get_labels()
ax2.set(xticks = np.arange(z2.shape[1]),
yticks = np.arange(z2.shape[0]),
title=f"Start, cost={r2.cost(z2)}")
ax4.set(xticks = np.arange(z2.shape[1]), xticklabels=xl,
yticks = np.arange(z2.shape[0]), yticklabels=yl,
title=f"Optimized, cost={r2.cost()}")
reorder_plot(nrand=4, niter=10000)
plt.show()
完全随机的矩阵(左列)只平滑了很少 - 它看起来仍然有点排序。成本值仍然很高。然而,一个不那么随机的矩阵被完美平滑并且成本显着降低。
我想知道是否有 built-in 功能或至少有 'smart' 方式来根据它们的值结合 seaborn 热图对 x- 和 y-labels 进行排序。
假设无序热图如下所示:
但是,目标是重新排序颜色偏移的标签 'smoothened'。之后应该看起来更像:
感谢您的建议!
此致
第二个图按 x 和 y 轴标签排序,而不是按值排序。您将无法让随机数据看起来像有序数据。您可以按一行和一列的值对数据进行排序,但其余数据将是固定的。这是绘制热图的代码,该热图按第 0 行和第 0 列的值排序。请注意图中间的 "cross":
import numpy as np; np.random.seed(0)
import seaborn as sns; sns.set()
uniform_data = np.random.rand(10, 12)
df = pd.DataFrame(uniform_data)
df2 = df.sort_values(by=0).T.sort_values(by=0).T
ax = sns.heatmap(df2)
需要以某种方式量化 "smoothened colorshifts"。为此,可以定义成本函数。在最简单的情况下,这可能是相邻像素之间差异的总和。如果总和很小,则相邻像素的颜色差异很小。
然后可以随机交换矩阵中的列和行,并检查是否产生了更小的成本。迭代地执行此操作,会在某个时候产生平滑的热图。然而,这当然取决于初始热图中的随机程度。对于完全随机的像素,预计不会有太多优化。
下面class实现了这样的优化。这将需要 nrand
个不同的起始排列,并且对于每个排列,进行 niter
次交换。最好的结果被存储并可以通过 .get_opt
.
import matplotlib.pyplot as plt
import numpy as np
class ReOrder():
def __init__(self, array, nrand=2, niter=800):
self.a = array
self.indi = np.arange(self.a.shape[0])
self.indj = np.arange(self.a.shape[1])
self.i = np.arange(self.a.shape[0])
self.j = np.arange(self.a.shape[1])
self.nrand = nrand
self.niter = niter
def apply(self, a, i, j):
return a[:,j][i,:]
def get_opt(self):
return self.apply(self.a, self.i, self.j)
def get_labels(self, x=None, y=None):
if x is None:
x = self.indj
if y is None:
y = self.indi
return np.array(x)[self.j], np.array(y)[self.i]
def cost(self, a=None):
if a is None:
a = self.get_opt()
m = a[1:-1, 1:-1]
b = 0.5 * ((m - a[0:-2, 0:-2])**2 + \
(m - a[2: , 2: ])**2 + \
(m - a[0:-2, 2: ])**2 + \
(m - a[2: , 0:-2])**2) + \
(m - a[0:-2, 1:-1])**2 + \
(m - a[1:-1, 0:-2])**2 + \
(m - a[2: , 1:-1])**2 + \
(m - a[1:-1, 2: ])**2
return b.sum()
def randomize(self):
newj = np.random.permutation(self.a.shape[1])
newi = np.random.permutation(self.a.shape[0])
return newi, newj
def compare(self, i1, j1, i2, j2, a=None):
if a is None:
a = self.a
if self.cost(self.apply(a,i1,j1)) < self.cost(self.apply(a,i2,j2)):
return i1, j1
else:
return i2, j2
def rowswap(self, i, j):
rows = np.random.choice(self.indi, replace=False, size=2)
ir = np.copy(i)
ir[rows] = ir[rows[::-1]]
return ir, j
def colswap(self, i, j):
cols = np.random.choice(self.indj, replace=False, size=2)
jr = np.copy(j)
jr[cols] = jr[cols[::-1]]
return i, jr
def swap(self, i, j):
ic, jc = self.rowswap(i,j)
ir, jr = self.colswap(i,j)
io, jo = self.compare(ic,jc, ir,jr)
return self.compare(i,j, io,jo)
def optimize(self, nrand=None, niter=None):
nrand = nrand or self.nrand
niter = niter or self.niter
i,j = self.i, self.j
for kk in range(niter):
i,j = self.swap(i,j)
self.i, self.j = self.compare(i,j, self.i, self.j)
print(self.cost())
for ii in range(nrand):
i,j = self.randomize()
for kk in range(niter):
i,j = self.swap(i,j)
self.i, self.j = self.compare(i,j, self.i, self.j)
print(self.cost())
print("finished")
所以让我们取两个起始数组,
def get_sample_ord():
x,y = np.meshgrid(np.arange(12), np.arange(10))
z = x+y
j = np.random.permutation(12)
i = np.random.permutation(10)
return z[:,j][i,:]
def get_sample():
return np.random.randint(0,120,size=(10,12))
和运行就通过上面的class.
def reorder_plot(nrand=4, niter=10000):
fig, ((ax1, ax2),(ax3,ax4)) = plt.subplots(nrows=2, ncols=2,
constrained_layout=True)
fig.suptitle("nrand={}, niter={}".format(nrand, niter))
z1 = get_sample()
r1 = ReOrder(z1)
r1.optimize(nrand=nrand, niter=niter)
ax1.imshow(z1)
ax3.imshow(r1.get_opt())
xl, yl = r1.get_labels()
ax1.set(xticks = np.arange(z1.shape[1]),
yticks = np.arange(z1.shape[0]),
title=f"Start, cost={r1.cost(z1)}")
ax3.set(xticks = np.arange(z1.shape[1]), xticklabels=xl,
yticks = np.arange(z1.shape[0]), yticklabels=yl,
title=f"Optimized, cost={r1.cost()}")
z2 = get_sample_ord()
r2 = ReOrder(z2)
r2.optimize(nrand=nrand, niter=niter)
ax2.imshow(z2)
ax4.imshow(r2.get_opt())
xl, yl = r2.get_labels()
ax2.set(xticks = np.arange(z2.shape[1]),
yticks = np.arange(z2.shape[0]),
title=f"Start, cost={r2.cost(z2)}")
ax4.set(xticks = np.arange(z2.shape[1]), xticklabels=xl,
yticks = np.arange(z2.shape[0]), yticklabels=yl,
title=f"Optimized, cost={r2.cost()}")
reorder_plot(nrand=4, niter=10000)
plt.show()
完全随机的矩阵(左列)只平滑了很少 - 它看起来仍然有点排序。成本值仍然很高。然而,一个不那么随机的矩阵被完美平滑并且成本显着降低。