Python: 如何在一个直方图中绘制多列
Python: How to plot multiple columns in one histogram
我正在学习 python,我坚持的事情是绘制基于 FLT 列的直方图以指示每个过滤器。我有这个数据框:
VARLIST: MJD FLT FIELD FLUXCAL FLUXCALERR MAG MAGERR \
0 OBS: 55161.6 g NaN -62.016 23.428 NaN -0.410
1 OBS: 55176.6 g NaN -8.183 21.252 NaN -2.820
2 OBS: 55179.6 g NaN 0.451 19.109 -4.134 46.053
3 OBS: 55188.6 g NaN 511.964 21.218 -11.773 0.045
4 OBS: 55206.6 g NaN 682.704 22.329 -12.086 0.036
.. ... ... .. ... ... ... ... ...
259 OBS: 56659.6 z NaN 193.577 44.434 -10.717 0.249
260 OBS: 56662.6 z NaN 2.728 30.422 -6.089 12.109
261 OBS: 56667.7 z NaN 51.009 30.915 -9.269 0.658
262 OBS: 56681.5 z NaN 8.945 30.450 -7.379 3.696
263 OBS: 56754.3 z NaN 12.488 60.586 -7.741 5.268
peakMJD mag_zpt27.5 snr_zpt27.5 magerr_zpt27.5
0 55206.6 23.018741 -2.647089 -0.410262
1 55206.6 25.217719 -0.385046 -2.820441
2 55206.6 28.364559 0.023601 46.014133
3 55206.6 20.726901 24.128759 0.045009
4 55206.6 20.414419 30.574768 0.035519
.. ... ... ... ...
259 55206.6 21.782866 4.356506 0.249282
260 55206.6 26.410389 0.089672 12.110811
261 55206.6 23.230883 1.649976 0.658191
262 55206.6 25.121049 0.293760 3.696892
263 55206.6 24.758768 0.206120 5.268770
我的目标是绘制 FLT 列,但要指示不同的过滤器:g、i、r 和 z。通过搜索如何制作直方图,我可以绘制一个基于 MAG(幅度)的基本图。理想情况下,我想要的是 4 种不同的颜色来表示不同的过滤器。我知道该图会被过滤器重叠 Mag,但我可以接受它,因为我需要视觉效果,如果需要,我可以放大。
下面的代码是我现在拥有的。这是现在的基础,因为我一直在查看文档却一无所获。需要注意的是,中间是一堆关于我的尝试的评论。一个是遍历每一行并查看过滤器,然后是图表,但我无法弄清楚。另一个想法是创建 4 个新列,每个过滤器一个,然后绘制每个列的图表。我卡住了,想不通。
#HISTOGRAM
def plot_histogram(source, data_file):
if source == 'villar':
filename, ext = os.path.splitext(data_file)
SnName_villar = filename[34:-6]
# read in for clarifying info
ps1_phot_info = pd.read_csv(data_file)
ra_deg = ps1_phot_info.loc[2][0][11:19] #deg
dec_deg = ps1_phot_info.loc[3][0][12:19] #deg
final_z_villar = ps1_phot_info.loc[5][0][17:23]
# read in for data
ps1_phot = pd.read_csv(data_file, skiprows=15, delim_whitespace=True)
ps1_phot.drop(ps1_phot.tail(1).index,inplace=True)
ps1_phot['peakMJD'] = ps1_phot.iloc[ps1_phot['FLUXCAL'].idxmax()]["MJD"]
# Calculate mag. Look at zeropoints (A. Villar = 32.5, YSE = 27.5)
ps1_phot['mag_zpt27.5'] = np.array(-2.5*np.log10(np.abs(ps1_phot['FLUXCAL'])))+27.5
ps1_phot['snr_zpt27.5'] = (ps1_phot['FLUXCAL'] / np.abs(ps1_phot['FLUXCALERR']))
ps1_phot['magerr_zpt27.5'] = np.array(1.086 / ps1_phot['snr_zpt27.5'])
mask = (ps1_phot['FLUXCAL'].notna()) #& (ps1_phot['FLUXCALERR'] <= 50)
ps1_masked = ps1_phot.loc[mask] #has mag obs and reasonable error
print(ps1_masked)
ps1_phot['peakMJD'] = ps1_phot.iloc[ps1_phot['FLUXCAL'].idxmax()]["MJD"]
print('here')
print(ps1_phot)
#Calculate mag. Look at zeropoints (A. Villar = 32.5, YSE = 27.5)
ps1_phot['mag_zpt27.5'] = np.array(-2.5*np.log10(np.abs(ps1_phot['FLUXCAL'])))+27.5
ps1_phot['snr_zpt27.5'] = (ps1_phot['FLUXCAL'] / np.abs(ps1_phot['FLUXCALERR']))
ps1_phot['magerr_zpt27.5'] = np.array(1.086 / ps1_phot['snr_zpt27.5'])
mask = (ps1_phot['FLUXCAL'].notna()) #& (ps1_phot['FLUXCALERR'] <= 50)
ps1_masked = ps1_phot.loc[mask] #has mag obs and reasonable error
#print(ps1_masked)
#ps1_phot['g_band'] = ps1_phot.iloc[ps1_phot['FLT']=='g']
#print("THIS IS THE G BAND")
#print(ps1_masked)
#ps1_phot['r_band'] = ps1_phot.iloc[ps1_phot['FLT']=='r']
#print("THIS IS THE R BAND")
#print(ps1_masked)
#ps1_phot['i_band'] = ps1_phot.iloc[ps1_phot['FLT']=='i']
#print("THIS IS THE I BAND")
#print(ps1_masked)
#ps1_phot['z_band'] = ps1_phot.iloc[ps1_phot['FLT']=='z']
#print("THIS IS THE Z BAND")
#print(ps1_masked)
#for pb in passbands:
# plt.hist(x=ps1_masked[''])
#numpy.histogram(a, bins=10, range=None, normed=None, weights=None, density=None)[source]
#passbands = ('g', 'r', 'i', 'z')
#for pb in passbands:
# #x = passbands[pd]
# #blah blah code --> Plot histogram
# #errorbar only for scatter plot
# ax1.errorbar(x=ps1_masked[passbands[pd]]['MJD'] - ps1_masked[passbands[pd]]['peakMJD'], y=ps1_masked[passbands[pd]][f'{yaxis_is}'], yerr=ps1_masked[passbands[pd]][f'{yaxiserr_is}'],
# fmt='o', alpha=0.5, color='g', label=f'{x}-PS1')
#ax1.ticklabel_format(useOffset=False, style='plain')
yaxis_is = 'mag_zpt27.5' # or FLUXCAL
yaxiserr_is = 'magerr_zpt27.5' # or FLUXCALERR
fig = plt.figure(figsize=(12, 4))
ax1 = fig.add_subplot(121)
ax1.set_ylabel('Observations', fontsize=16)
ax1.set_xlabel('Mag', fontsize=16)
ax1.tick_params(labelsize=12)
ax1.set_title(f'{SnName_yse}, z_yse={final_z_yse}; {SnName_villar}, z_villar={final_z_villar}', fontsize=16)
#Plot
ps1_phot['mag_zpt27.5'].hist()
plt.savefig(f"path/to/file/my_plot_{pb}.png")
plt.tight_layout()
plt.show()
fig.savefig(f'./Villar_Data_Graphs/{SnName_villar}_{SnName_yse}_{yaxis_is}_Histogram.png', format='png', bbox_inches='tight', dpi=300)
plt.close(fig)
#Read all the files "fileV = sorted(glob.glob('./Villar/ps1_sne_zenodo/*.dat'))" and then supernova type (has the same)
# check the redshift to see if the are comparable (range +/- 0.1)
这是我调用上面函数的地方
x = './Villar/ps1_sne_zenodo/PS1_PS1MD_PSc000186.snana.dat'
SnName_villar, final_z_villar, ps1_masked_g, ps1_masked_r, ps1_masked_i, ps1_masked_z = get_dataframes(source='villar', data_file=x, plot_color='c', sntype_label='VIa')
y = './Photpipe/yselc_v3_photoz/GPC1v3_2020add.snana.dat'
SnName_yse, final_z_yse, pp_phot_g, pp_phot_r, pp_phot_i, pp_phot_z = get_dataframes(source='yse', data_file=y, plot_color='b', sntype_label='VIa')
plot_villar_and_yse(SnName_villar, final_z_villar, ps1_masked_g, ps1_masked_r, ps1_masked_i, ps1_masked_z,
SnName_yse, final_z_yse, pp_phot_g, pp_phot_r, pp_phot_i, pp_phot_z)
plot_histogram(source='villar', data_file=x)
#Call and plot histogram function
请告诉我您的想法和建议。
这可以使用 Seaborn's histplot function 相对简单地完成。
import pandas as pd
import numpy as np
import seaborn as sns
# construct some sample data
ps1_phot = pd.concat([pd.DataFrame({'FLT': flt, 'MAG': np.random.randn(1000) + i}) for i, flt in enumerate('girz')])
# plot overlapping histograms
sns.histplot(ps1_phot.dropna(subset=['MAG']), x='MAG', hue='FLT')
结果
seaborn 是必经之路。要绘制分布图,您在 seaborn 库中有几个选项:sns.distplot()、sns.kdeplot()、sns.boxplot()、sns.histplot()。我个人喜欢使用 sns.distplot(),因为它为您提供了显示直方图和曲线的选项。
我正在学习 python,我坚持的事情是绘制基于 FLT 列的直方图以指示每个过滤器。我有这个数据框:
VARLIST: MJD FLT FIELD FLUXCAL FLUXCALERR MAG MAGERR \
0 OBS: 55161.6 g NaN -62.016 23.428 NaN -0.410
1 OBS: 55176.6 g NaN -8.183 21.252 NaN -2.820
2 OBS: 55179.6 g NaN 0.451 19.109 -4.134 46.053
3 OBS: 55188.6 g NaN 511.964 21.218 -11.773 0.045
4 OBS: 55206.6 g NaN 682.704 22.329 -12.086 0.036
.. ... ... .. ... ... ... ... ...
259 OBS: 56659.6 z NaN 193.577 44.434 -10.717 0.249
260 OBS: 56662.6 z NaN 2.728 30.422 -6.089 12.109
261 OBS: 56667.7 z NaN 51.009 30.915 -9.269 0.658
262 OBS: 56681.5 z NaN 8.945 30.450 -7.379 3.696
263 OBS: 56754.3 z NaN 12.488 60.586 -7.741 5.268
peakMJD mag_zpt27.5 snr_zpt27.5 magerr_zpt27.5
0 55206.6 23.018741 -2.647089 -0.410262
1 55206.6 25.217719 -0.385046 -2.820441
2 55206.6 28.364559 0.023601 46.014133
3 55206.6 20.726901 24.128759 0.045009
4 55206.6 20.414419 30.574768 0.035519
.. ... ... ... ...
259 55206.6 21.782866 4.356506 0.249282
260 55206.6 26.410389 0.089672 12.110811
261 55206.6 23.230883 1.649976 0.658191
262 55206.6 25.121049 0.293760 3.696892
263 55206.6 24.758768 0.206120 5.268770
我的目标是绘制 FLT 列,但要指示不同的过滤器:g、i、r 和 z。通过搜索如何制作直方图,我可以绘制一个基于 MAG(幅度)的基本图。理想情况下,我想要的是 4 种不同的颜色来表示不同的过滤器。我知道该图会被过滤器重叠 Mag,但我可以接受它,因为我需要视觉效果,如果需要,我可以放大。
下面的代码是我现在拥有的。这是现在的基础,因为我一直在查看文档却一无所获。需要注意的是,中间是一堆关于我的尝试的评论。一个是遍历每一行并查看过滤器,然后是图表,但我无法弄清楚。另一个想法是创建 4 个新列,每个过滤器一个,然后绘制每个列的图表。我卡住了,想不通。
#HISTOGRAM
def plot_histogram(source, data_file):
if source == 'villar':
filename, ext = os.path.splitext(data_file)
SnName_villar = filename[34:-6]
# read in for clarifying info
ps1_phot_info = pd.read_csv(data_file)
ra_deg = ps1_phot_info.loc[2][0][11:19] #deg
dec_deg = ps1_phot_info.loc[3][0][12:19] #deg
final_z_villar = ps1_phot_info.loc[5][0][17:23]
# read in for data
ps1_phot = pd.read_csv(data_file, skiprows=15, delim_whitespace=True)
ps1_phot.drop(ps1_phot.tail(1).index,inplace=True)
ps1_phot['peakMJD'] = ps1_phot.iloc[ps1_phot['FLUXCAL'].idxmax()]["MJD"]
# Calculate mag. Look at zeropoints (A. Villar = 32.5, YSE = 27.5)
ps1_phot['mag_zpt27.5'] = np.array(-2.5*np.log10(np.abs(ps1_phot['FLUXCAL'])))+27.5
ps1_phot['snr_zpt27.5'] = (ps1_phot['FLUXCAL'] / np.abs(ps1_phot['FLUXCALERR']))
ps1_phot['magerr_zpt27.5'] = np.array(1.086 / ps1_phot['snr_zpt27.5'])
mask = (ps1_phot['FLUXCAL'].notna()) #& (ps1_phot['FLUXCALERR'] <= 50)
ps1_masked = ps1_phot.loc[mask] #has mag obs and reasonable error
print(ps1_masked)
ps1_phot['peakMJD'] = ps1_phot.iloc[ps1_phot['FLUXCAL'].idxmax()]["MJD"]
print('here')
print(ps1_phot)
#Calculate mag. Look at zeropoints (A. Villar = 32.5, YSE = 27.5)
ps1_phot['mag_zpt27.5'] = np.array(-2.5*np.log10(np.abs(ps1_phot['FLUXCAL'])))+27.5
ps1_phot['snr_zpt27.5'] = (ps1_phot['FLUXCAL'] / np.abs(ps1_phot['FLUXCALERR']))
ps1_phot['magerr_zpt27.5'] = np.array(1.086 / ps1_phot['snr_zpt27.5'])
mask = (ps1_phot['FLUXCAL'].notna()) #& (ps1_phot['FLUXCALERR'] <= 50)
ps1_masked = ps1_phot.loc[mask] #has mag obs and reasonable error
#print(ps1_masked)
#ps1_phot['g_band'] = ps1_phot.iloc[ps1_phot['FLT']=='g']
#print("THIS IS THE G BAND")
#print(ps1_masked)
#ps1_phot['r_band'] = ps1_phot.iloc[ps1_phot['FLT']=='r']
#print("THIS IS THE R BAND")
#print(ps1_masked)
#ps1_phot['i_band'] = ps1_phot.iloc[ps1_phot['FLT']=='i']
#print("THIS IS THE I BAND")
#print(ps1_masked)
#ps1_phot['z_band'] = ps1_phot.iloc[ps1_phot['FLT']=='z']
#print("THIS IS THE Z BAND")
#print(ps1_masked)
#for pb in passbands:
# plt.hist(x=ps1_masked[''])
#numpy.histogram(a, bins=10, range=None, normed=None, weights=None, density=None)[source]
#passbands = ('g', 'r', 'i', 'z')
#for pb in passbands:
# #x = passbands[pd]
# #blah blah code --> Plot histogram
# #errorbar only for scatter plot
# ax1.errorbar(x=ps1_masked[passbands[pd]]['MJD'] - ps1_masked[passbands[pd]]['peakMJD'], y=ps1_masked[passbands[pd]][f'{yaxis_is}'], yerr=ps1_masked[passbands[pd]][f'{yaxiserr_is}'],
# fmt='o', alpha=0.5, color='g', label=f'{x}-PS1')
#ax1.ticklabel_format(useOffset=False, style='plain')
yaxis_is = 'mag_zpt27.5' # or FLUXCAL
yaxiserr_is = 'magerr_zpt27.5' # or FLUXCALERR
fig = plt.figure(figsize=(12, 4))
ax1 = fig.add_subplot(121)
ax1.set_ylabel('Observations', fontsize=16)
ax1.set_xlabel('Mag', fontsize=16)
ax1.tick_params(labelsize=12)
ax1.set_title(f'{SnName_yse}, z_yse={final_z_yse}; {SnName_villar}, z_villar={final_z_villar}', fontsize=16)
#Plot
ps1_phot['mag_zpt27.5'].hist()
plt.savefig(f"path/to/file/my_plot_{pb}.png")
plt.tight_layout()
plt.show()
fig.savefig(f'./Villar_Data_Graphs/{SnName_villar}_{SnName_yse}_{yaxis_is}_Histogram.png', format='png', bbox_inches='tight', dpi=300)
plt.close(fig)
#Read all the files "fileV = sorted(glob.glob('./Villar/ps1_sne_zenodo/*.dat'))" and then supernova type (has the same)
# check the redshift to see if the are comparable (range +/- 0.1)
这是我调用上面函数的地方
x = './Villar/ps1_sne_zenodo/PS1_PS1MD_PSc000186.snana.dat'
SnName_villar, final_z_villar, ps1_masked_g, ps1_masked_r, ps1_masked_i, ps1_masked_z = get_dataframes(source='villar', data_file=x, plot_color='c', sntype_label='VIa')
y = './Photpipe/yselc_v3_photoz/GPC1v3_2020add.snana.dat'
SnName_yse, final_z_yse, pp_phot_g, pp_phot_r, pp_phot_i, pp_phot_z = get_dataframes(source='yse', data_file=y, plot_color='b', sntype_label='VIa')
plot_villar_and_yse(SnName_villar, final_z_villar, ps1_masked_g, ps1_masked_r, ps1_masked_i, ps1_masked_z,
SnName_yse, final_z_yse, pp_phot_g, pp_phot_r, pp_phot_i, pp_phot_z)
plot_histogram(source='villar', data_file=x)
#Call and plot histogram function
请告诉我您的想法和建议。
这可以使用 Seaborn's histplot function 相对简单地完成。
import pandas as pd
import numpy as np
import seaborn as sns
# construct some sample data
ps1_phot = pd.concat([pd.DataFrame({'FLT': flt, 'MAG': np.random.randn(1000) + i}) for i, flt in enumerate('girz')])
# plot overlapping histograms
sns.histplot(ps1_phot.dropna(subset=['MAG']), x='MAG', hue='FLT')
结果
seaborn 是必经之路。要绘制分布图,您在 seaborn 库中有几个选项:sns.distplot()、sns.kdeplot()、sns.boxplot()、sns.histplot()。我个人喜欢使用 sns.distplot(),因为它为您提供了显示直方图和曲线的选项。