I am learning python and the thing I am stuck on is plotting a histogram based on the FLT column to indicate each of the filters. I have this data frame:
VARLIST: MJD FLT FIELD FLUXCAL FLUXCALERR MAG MAGERR \
0 OBS: 55161.6 g NaN -62.016 23.428 NaN -0.410
1 OBS: 55176.6 g NaN -8.183 21.252 NaN -2.820
2 OBS: 55179.6 g NaN 0.451 19.109 -4.134 46.053
3 OBS: 55188.6 g NaN 511.964 21.218 -11.773 0.045
4 OBS: 55206.6 g NaN 682.704 22.329 -12.086 0.036
.. ... ... .. ... ... ... ... ...
259 OBS: 56659.6 z NaN 193.577 44.434 -10.717 0.249
260 OBS: 56662.6 z NaN 2.728 30.422 -6.089 12.109
261 OBS: 56667.7 z NaN 51.009 30.915 -9.269 0.658
262 OBS: 56681.5 z NaN 8.945 30.450 -7.379 3.696
263 OBS: 56754.3 z NaN 12.488 60.586 -7.741 5.268
peakMJD mag_zpt27.5 snr_zpt27.5 magerr_zpt27.5
0 55206.6 23.018741 -2.647089 -0.410262
1 55206.6 25.217719 -0.385046 -2.820441
2 55206.6 28.364559 0.023601 46.014133
3 55206.6 20.726901 24.128759 0.045009
4 55206.6 20.414419 30.574768 0.035519
.. ... ... ... ...
259 55206.6 21.782866 4.356506 0.249282
260 55206.6 26.410389 0.089672 12.110811
261 55206.6 23.230883 1.649976 0.658191
262 55206.6 25.121049 0.293760 3.696892
263 55206.6 24.758768 0.206120 5.268770
My goal is to plot the FLT columns but to indicate the different filters: g, i, r, and z. From searching up how to make a histogram, I can plot a fundamental one based on the MAG (Magnitude). Ideally, what I would want are 4 different colors to indicate the different filters. I am aware that the graph would have overlapping Mag by the filter, but I am okay with it as I need a visual, and I can zoom in if I have to.
The code I have below is what I have right now. It is fundamental now since I have been stuck looking at the documentation and getting nowhere. Something to note is that in the middle are a bunch of comments about my attempts. One was to iterate through each row and look at the filter and then graph, but I couldn't figure that out. The other idea was to create 4 new columns, one for each filter, and then graph each column. I got stuck and couldn't figure it out.
#HISTOGRAM
def plot_histogram(source, data_file):
if source == 'villar':
filename, ext = os.path.splitext(data_file)
SnName_villar = filename[34:-6]
# read in for clarifying info
ps1_phot_info = pd.read_csv(data_file)
ra_deg = ps1_phot_info.loc[2][0][11:19] #deg
dec_deg = ps1_phot_info.loc[3][0][12:19] #deg
final_z_villar = ps1_phot_info.loc[5][0][17:23]
# read in for data
ps1_phot = pd.read_csv(data_file, skiprows=15, delim_whitespace=True)
ps1_phot.drop(ps1_phot.tail(1).index,inplace=True)
ps1_phot['peakMJD'] = ps1_phot.iloc[ps1_phot['FLUXCAL'].idxmax()]["MJD"]
# Calculate mag. Look at zeropoints (A. Villar = 32.5, YSE = 27.5)
ps1_phot['mag_zpt27.5'] = np.array(-2.5*np.log10(np.abs(ps1_phot['FLUXCAL'])))+27.5
ps1_phot['snr_zpt27.5'] = (ps1_phot['FLUXCAL'] / np.abs(ps1_phot['FLUXCALERR']))
ps1_phot['magerr_zpt27.5'] = np.array(1.086 / ps1_phot['snr_zpt27.5'])
mask = (ps1_phot['FLUXCAL'].notna()) #& (ps1_phot['FLUXCALERR'] <= 50)
ps1_masked = ps1_phot.loc[mask] #has mag obs and reasonable error
print(ps1_masked)
ps1_phot['peakMJD'] = ps1_phot.iloc[ps1_phot['FLUXCAL'].idxmax()]["MJD"]
print('here')
print(ps1_phot)
#Calculate mag. Look at zeropoints (A. Villar = 32.5, YSE = 27.5)
ps1_phot['mag_zpt27.5'] = np.array(-2.5*np.log10(np.abs(ps1_phot['FLUXCAL'])))+27.5
ps1_phot['snr_zpt27.5'] = (ps1_phot['FLUXCAL'] / np.abs(ps1_phot['FLUXCALERR']))
ps1_phot['magerr_zpt27.5'] = np.array(1.086 / ps1_phot['snr_zpt27.5'])
mask = (ps1_phot['FLUXCAL'].notna()) #& (ps1_phot['FLUXCALERR'] <= 50)
ps1_masked = ps1_phot.loc[mask] #has mag obs and reasonable error
#print(ps1_masked)
#ps1_phot['g_band'] = ps1_phot.iloc[ps1_phot['FLT']=='g']
#print("THIS IS THE G BAND")
#print(ps1_masked)
#ps1_phot['r_band'] = ps1_phot.iloc[ps1_phot['FLT']=='r']
#print("THIS IS THE R BAND")
#print(ps1_masked)
#ps1_phot['i_band'] = ps1_phot.iloc[ps1_phot['FLT']=='i']
#print("THIS IS THE I BAND")
#print(ps1_masked)
#ps1_phot['z_band'] = ps1_phot.iloc[ps1_phot['FLT']=='z']
#print("THIS IS THE Z BAND")
#print(ps1_masked)
#for pb in passbands:
# plt.hist(x=ps1_masked[''])
#numpy.histogram(a, bins=10, range=None, normed=None, weights=None, density=None)[source]
#passbands = ('g', 'r', 'i', 'z')
#for pb in passbands:
# #x = passbands[pd]
# #blah blah code --> Plot histogram
# #errorbar only for scatter plot
# ax1.errorbar(x=ps1_masked[passbands[pd]]['MJD'] - ps1_masked[passbands[pd]]['peakMJD'], y=ps1_masked[passbands[pd]][f'{yaxis_is}'], yerr=ps1_masked[passbands[pd]][f'{yaxiserr_is}'],
# fmt='o', alpha=0.5, color='g', label=f'{x}-PS1')
#ax1.ticklabel_format(useOffset=False, style='plain')
yaxis_is = 'mag_zpt27.5' # or FLUXCAL
yaxiserr_is = 'magerr_zpt27.5' # or FLUXCALERR
fig = plt.figure(figsize=(12, 4))
ax1 = fig.add_subplot(121)
ax1.set_ylabel('Observations', fontsize=16)
ax1.set_xlabel('Mag', fontsize=16)
ax1.tick_params(labelsize=12)
ax1.set_title(f'{SnName_yse}, z_yse={final_z_yse}; {SnName_villar}, z_villar={final_z_villar}', fontsize=16)
#Plot
ps1_phot['mag_zpt27.5'].hist()
plt.savefig(f"path/to/file/my_plot_{pb}.png")
plt.tight_layout()
plt.show()
fig.savefig(f'./Villar_Data_Graphs/{SnName_villar}_{SnName_yse}_{yaxis_is}_Histogram.png', format='png', bbox_inches='tight', dpi=300)
plt.close(fig)
#Read all the files "fileV = sorted(glob.glob('./Villar/ps1_sne_zenodo/*.dat'))" and then supernova type (has the same)
# check the redshift to see if the are comparable (range +/- 0.1)
This is where I call the function above
x = './Villar/ps1_sne_zenodo/PS1_PS1MD_PSc000186.snana.dat'
SnName_villar, final_z_villar, ps1_masked_g, ps1_masked_r, ps1_masked_i, ps1_masked_z = get_dataframes(source='villar', data_file=x, plot_color='c', sntype_label='VIa')
y = './Photpipe/yselc_v3_photoz/GPC1v3_2020add.snana.dat'
SnName_yse, final_z_yse, pp_phot_g, pp_phot_r, pp_phot_i, pp_phot_z = get_dataframes(source='yse', data_file=y, plot_color='b', sntype_label='VIa')
plot_villar_and_yse(SnName_villar, final_z_villar, ps1_masked_g, ps1_masked_r, ps1_masked_i, ps1_masked_z,
SnName_yse, final_z_yse, pp_phot_g, pp_phot_r, pp_phot_i, pp_phot_z)
plot_histogram(source='villar', data_file=x)
#Call and plot histogram function
Please let me know your thoughts and suggestions.
This can be done relatively simply using Seaborn's histplot function.
import pandas as pd
import numpy as np
import seaborn as sns
# construct some sample data
ps1_phot = pd.concat([pd.DataFrame({'FLT': flt, 'MAG': np.random.randn(1000) + i}) for i, flt in enumerate('girz')])
# plot overlapping histograms
sns.histplot(ps1_phot.dropna(subset=['MAG']), x='MAG', hue='FLT')
Results in