python 散景,如何制作相关图?
python bokeh, how to make a correlation plot?
如何在 Bokeh 中制作相关热图?
import pandas as pd
import bokeh.charts
df = pd.util.testing.makeTimeDataFrame(1000)
c = df.corr()
p = bokeh.charts.HeatMap(c) # not right
# try to make it a long form
# (and it's ugly in pandas to use 'index' in melt)
c['x'] = c.index
c = pd.melt(c, 'x', ['A','B','C','D'])
# this shows the right 4x4 matrix, but values are still wrong
p = bokeh.charts.HeatMap(c, x = 'x', y = 'variable', values = 'value')
顺便问一下,我可以在侧面制作一个颜色条,而不是情节中的图例吗?以及如何选择颜色 range/mapping 例如深蓝色 (-1) 到白色 (0) 到深红色 (+1)?
在现代散景中,您应该使用 bokeh.plotting
interface。您可以在图库中看到使用此界面生成的分类热图示例:
http://docs.bokeh.org/en/latest/docs/gallery/categorical.html
关于图例,对于像这样的颜色图,您实际上需要离散的 ColorBar
而不是 Legend
。这是一项新功能,将出现在本周晚些时候即将发布的 0.12.2
版本中 (今天的日期:2016-08-28)。这些新的颜色条注释可以位于主绘图区域之外。
GitHub 仓库中也有一个示例:
https://github.com/bokeh/bokeh/blob/master/examples/plotting/file/color_data_map.py
请注意,最后一个示例还使用了另一个新功能在浏览器中进行颜色映射,而不必预先计算 python 中的颜色。基本上全部看起来像:
# create a color mapper with your palette - can be any list of colors
mapper = LinearColorMapper(palette=Viridis3, low=0, high=100)
p = figure(toolbar_location=None, tools='', title=title)
p.circle(
x='x', y='y', source=source
# use the mapper to colormap according to the 'z' column (in the browser)
fill_color={'field': 'z', 'transform': mapper},
)
# create a ColorBar and addit to the side of the plot
color_bar = ColorBar(color_mapper=mapper, location=(0, 0))
p.add_layout(color_bar, 'right')
还有更复杂的选项,例如如果你想更仔细地控制颜色条上的滴答声,你可以像在普通 Axis
上一样添加一个自定义的滴答声或滴答声格式器,以实现如下目的:
不清楚您的实际要求是什么,所以我只是提一下以防知道它有用。
最后,Bokeh 是一个大型项目,找到这样做的最佳方法通常需要询问更多信息和上下文,并且通常需要进行讨论。 SO 似乎不赞成这种协作帮助(他们是 "not real answers"),所以我鼓励您也随时查看 project Discourse 寻求帮助。
我尝试使用 Bokeh 库创建交互式相关图。该代码是 SO 和其他网站上可用的不同解决方案的组合。在上面的解决方案中,bigreddot 已经详细解释了事情。相关热图的代码如下:
import pandas as pd
from bokeh.io import output_file, show
from bokeh.models import BasicTicker, ColorBar, LinearColorMapper, ColumnDataSource, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.transform import transform
from bokeh.palettes import Viridis3, Viridis256
# Read your data in pandas dataframe
data = pd.read_csv(%%%%%Your Path%%%%%)
#Now we will create correlation matrix using pandas
df = data.corr()
df.index.name = 'AllColumns1'
df.columns.name = 'AllColumns2'
# Prepare data.frame in the right format
df = df.stack().rename("value").reset_index()
# here the plot :
output_file("CorrelationPlot.html")
# You can use your own palette here
# colors = ['#d7191c', '#fdae61', '#ffffbf', '#a6d96a', '#1a9641']
# I am using 'Viridis256' to map colors with value, change it with 'colors' if you need some specific colors
mapper = LinearColorMapper(
palette=Viridis256, low=df.value.min(), high=df.value.max())
# Define a figure and tools
TOOLS = "box_select,lasso_select,pan,wheel_zoom,box_zoom,reset,help"
p = figure(
tools=TOOLS,
plot_width=1200,
plot_height=1000,
title="Correlation plot",
x_range=list(df.AllColumns1.drop_duplicates()),
y_range=list(df.AllColumns2.drop_duplicates()),
toolbar_location="right",
x_axis_location="below")
# Create rectangle for heatmap
p.rect(
x="AllColumns1",
y="AllColumns2",
width=1,
height=1,
source=ColumnDataSource(df),
line_color=None,
fill_color=transform('value', mapper))
# Add legend
color_bar = ColorBar(
color_mapper=mapper,
location=(0, 0),
ticker=BasicTicker(desired_num_ticks=10))
p.add_layout(color_bar, 'right')
show(p)
参考文献:
[1] https://docs.bokeh.org/en/latest/docs/user_guide.html
[2]
所以我想我可以提供一个基线代码来帮助完成你所要求的,结合使用上面的答案和一些额外的预处理。
假设您已经加载了数据帧 df(在本例中为 UCI Adult Data)并计算了相关系数(p_corr)。
import bisect
#
from math import pi
from numpy import arange
from itertools import chain
from collections import OrderedDict
#
from bokeh.palettes import RdBu as colors # just make sure to import a palette that centers on white (-ish)
from bokeh.models import ColorBar, LinearColorMapper
colors = list(reversed(colors[9])) # we want an odd number to ensure 0 correlation is a distinct color
labels = df.columns
nlabels = len(labels)
def get_bounds(n):
"""Gets bounds for quads with n features"""
bottom = list(chain.from_iterable([[ii]*nlabels for ii in range(nlabels)]))
top = list(chain.from_iterable([[ii+1]*nlabels for ii in range(nlabels)]))
left = list(chain.from_iterable([list(range(nlabels)) for ii in range(nlabels)]))
right = list(chain.from_iterable([list(range(1,nlabels+1)) for ii in range(nlabels)]))
return top, bottom, left, right
def get_colors(corr_array, colors):
"""Aligns color values from palette with the correlation coefficient values"""
ccorr = arange(-1, 1, 1/(len(colors)/2))
color = []
for value in corr_array:
ind = bisect.bisect_left(ccorr, value)
color.append(colors[ind-1])
return color
p = figure(plot_width=600, plot_height=600,
x_range=(0,nlabels), y_range=(0,nlabels),
title="Correlation Coefficient Heatmap (lighter is worse)",
toolbar_location=None, tools='')
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.xaxis.major_label_orientation = pi/4
p.yaxis.major_label_orientation = pi/4
top, bottom, left, right = get_bounds(nlabels) # creates sqaures for plot
color_list = get_colors(p_corr.values.flatten(), colors)
p.quad(top=top, bottom=bottom, left=left,
right=right, line_color='white',
color=color_list)
# Set ticks with labels
ticks = [tick+0.5 for tick in list(range(nlabels))]
tick_dict = OrderedDict([[tick, labels[ii]] for ii, tick in enumerate(ticks)])
# Create the correct number of ticks for each axis
p.xaxis.ticker = ticks
p.yaxis.ticker = ticks
# Override the labels
p.xaxis.major_label_overrides = tick_dict
p.yaxis.major_label_overrides = tick_dict
# Setup color bar
mapper = LinearColorMapper(palette=colors, low=-1, high=1)
color_bar = ColorBar(color_mapper=mapper, location=(0, 0))
p.add_layout(color_bar, 'right')
show(p)
如果类别是整数编码(这是一个可怕的数据示例),这将导致以下图表:
如何在 Bokeh 中制作相关热图?
import pandas as pd
import bokeh.charts
df = pd.util.testing.makeTimeDataFrame(1000)
c = df.corr()
p = bokeh.charts.HeatMap(c) # not right
# try to make it a long form
# (and it's ugly in pandas to use 'index' in melt)
c['x'] = c.index
c = pd.melt(c, 'x', ['A','B','C','D'])
# this shows the right 4x4 matrix, but values are still wrong
p = bokeh.charts.HeatMap(c, x = 'x', y = 'variable', values = 'value')
顺便问一下,我可以在侧面制作一个颜色条,而不是情节中的图例吗?以及如何选择颜色 range/mapping 例如深蓝色 (-1) 到白色 (0) 到深红色 (+1)?
在现代散景中,您应该使用 bokeh.plotting
interface。您可以在图库中看到使用此界面生成的分类热图示例:
http://docs.bokeh.org/en/latest/docs/gallery/categorical.html
关于图例,对于像这样的颜色图,您实际上需要离散的 ColorBar
而不是 Legend
。这是一项新功能,将出现在本周晚些时候即将发布的 0.12.2
版本中 (今天的日期:2016-08-28)。这些新的颜色条注释可以位于主绘图区域之外。
GitHub 仓库中也有一个示例:
https://github.com/bokeh/bokeh/blob/master/examples/plotting/file/color_data_map.py
请注意,最后一个示例还使用了另一个新功能在浏览器中进行颜色映射,而不必预先计算 python 中的颜色。基本上全部看起来像:
# create a color mapper with your palette - can be any list of colors
mapper = LinearColorMapper(palette=Viridis3, low=0, high=100)
p = figure(toolbar_location=None, tools='', title=title)
p.circle(
x='x', y='y', source=source
# use the mapper to colormap according to the 'z' column (in the browser)
fill_color={'field': 'z', 'transform': mapper},
)
# create a ColorBar and addit to the side of the plot
color_bar = ColorBar(color_mapper=mapper, location=(0, 0))
p.add_layout(color_bar, 'right')
还有更复杂的选项,例如如果你想更仔细地控制颜色条上的滴答声,你可以像在普通 Axis
上一样添加一个自定义的滴答声或滴答声格式器,以实现如下目的:
不清楚您的实际要求是什么,所以我只是提一下以防知道它有用。
最后,Bokeh 是一个大型项目,找到这样做的最佳方法通常需要询问更多信息和上下文,并且通常需要进行讨论。 SO 似乎不赞成这种协作帮助(他们是 "not real answers"),所以我鼓励您也随时查看 project Discourse 寻求帮助。
我尝试使用 Bokeh 库创建交互式相关图。该代码是 SO 和其他网站上可用的不同解决方案的组合。在上面的解决方案中,bigreddot 已经详细解释了事情。相关热图的代码如下:
import pandas as pd
from bokeh.io import output_file, show
from bokeh.models import BasicTicker, ColorBar, LinearColorMapper, ColumnDataSource, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.transform import transform
from bokeh.palettes import Viridis3, Viridis256
# Read your data in pandas dataframe
data = pd.read_csv(%%%%%Your Path%%%%%)
#Now we will create correlation matrix using pandas
df = data.corr()
df.index.name = 'AllColumns1'
df.columns.name = 'AllColumns2'
# Prepare data.frame in the right format
df = df.stack().rename("value").reset_index()
# here the plot :
output_file("CorrelationPlot.html")
# You can use your own palette here
# colors = ['#d7191c', '#fdae61', '#ffffbf', '#a6d96a', '#1a9641']
# I am using 'Viridis256' to map colors with value, change it with 'colors' if you need some specific colors
mapper = LinearColorMapper(
palette=Viridis256, low=df.value.min(), high=df.value.max())
# Define a figure and tools
TOOLS = "box_select,lasso_select,pan,wheel_zoom,box_zoom,reset,help"
p = figure(
tools=TOOLS,
plot_width=1200,
plot_height=1000,
title="Correlation plot",
x_range=list(df.AllColumns1.drop_duplicates()),
y_range=list(df.AllColumns2.drop_duplicates()),
toolbar_location="right",
x_axis_location="below")
# Create rectangle for heatmap
p.rect(
x="AllColumns1",
y="AllColumns2",
width=1,
height=1,
source=ColumnDataSource(df),
line_color=None,
fill_color=transform('value', mapper))
# Add legend
color_bar = ColorBar(
color_mapper=mapper,
location=(0, 0),
ticker=BasicTicker(desired_num_ticks=10))
p.add_layout(color_bar, 'right')
show(p)
参考文献:
[1] https://docs.bokeh.org/en/latest/docs/user_guide.html
[2]
所以我想我可以提供一个基线代码来帮助完成你所要求的,结合使用上面的答案和一些额外的预处理。
假设您已经加载了数据帧 df(在本例中为 UCI Adult Data)并计算了相关系数(p_corr)。
import bisect
#
from math import pi
from numpy import arange
from itertools import chain
from collections import OrderedDict
#
from bokeh.palettes import RdBu as colors # just make sure to import a palette that centers on white (-ish)
from bokeh.models import ColorBar, LinearColorMapper
colors = list(reversed(colors[9])) # we want an odd number to ensure 0 correlation is a distinct color
labels = df.columns
nlabels = len(labels)
def get_bounds(n):
"""Gets bounds for quads with n features"""
bottom = list(chain.from_iterable([[ii]*nlabels for ii in range(nlabels)]))
top = list(chain.from_iterable([[ii+1]*nlabels for ii in range(nlabels)]))
left = list(chain.from_iterable([list(range(nlabels)) for ii in range(nlabels)]))
right = list(chain.from_iterable([list(range(1,nlabels+1)) for ii in range(nlabels)]))
return top, bottom, left, right
def get_colors(corr_array, colors):
"""Aligns color values from palette with the correlation coefficient values"""
ccorr = arange(-1, 1, 1/(len(colors)/2))
color = []
for value in corr_array:
ind = bisect.bisect_left(ccorr, value)
color.append(colors[ind-1])
return color
p = figure(plot_width=600, plot_height=600,
x_range=(0,nlabels), y_range=(0,nlabels),
title="Correlation Coefficient Heatmap (lighter is worse)",
toolbar_location=None, tools='')
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.xaxis.major_label_orientation = pi/4
p.yaxis.major_label_orientation = pi/4
top, bottom, left, right = get_bounds(nlabels) # creates sqaures for plot
color_list = get_colors(p_corr.values.flatten(), colors)
p.quad(top=top, bottom=bottom, left=left,
right=right, line_color='white',
color=color_list)
# Set ticks with labels
ticks = [tick+0.5 for tick in list(range(nlabels))]
tick_dict = OrderedDict([[tick, labels[ii]] for ii, tick in enumerate(ticks)])
# Create the correct number of ticks for each axis
p.xaxis.ticker = ticks
p.yaxis.ticker = ticks
# Override the labels
p.xaxis.major_label_overrides = tick_dict
p.yaxis.major_label_overrides = tick_dict
# Setup color bar
mapper = LinearColorMapper(palette=colors, low=-1, high=1)
color_bar = ColorBar(color_mapper=mapper, location=(0, 0))
p.add_layout(color_bar, 'right')
show(p)
如果类别是整数编码(这是一个可怕的数据示例),这将导致以下图表: