I have the code below which produces the output I want.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
plt.style.use('ggplot')
%matplotlib inline
data = dict({'Variable_Grouping':['Type_A', 'Type_A', 'Type_A', 'Type_C', 'Type_C', 'Type_C', 'Type_C', 'Type_D', 'Type_D', 'Type_E', 'Type_E', 'Type_E', 'Type_H', 'Type_H'], 'Variable':['a1', 'a2', 'a3', 'c1', 'c2', 'c3', 'c4', 'd1', 'd2', 'e1', 'e2', 'e3', 'h1', 'h2'], 'Count':[5, 3, 8, 4, 3, 9, 5, 3, 8, 5, 3, 8, 5, 3],'Percent':[0.0625, 0.125, 0.4375, 0.0, 0.125, 0.5, 0.02, 0.125, 0.03, 0.0625, 0.05, 0.44, 0.07, 0.023]})
to_plot = pd.DataFrame(data)
g = sns.FacetGrid(to_plot, col='Variable_Grouping', col_wrap = 2, sharex=False, sharey = False, height = 5, aspect = 1, margin_titles=True)
g=g.map(plt.bar, "Variable","Count").add_legend()
for ax, (_, subdata) in zip(g.axes, to_plot.groupby('Variable_Grouping')):
ax2=ax.twinx()
subdata.plot(x='Variable',y='Percent', ax = ax2, legend=True, color='g', label = 'Percent')
ax2.set_ylabel('Percent')
ax2.grid(False)
for ax in g.axes.flatten():
ax.tick_params(labelbottom=True, labelrotation = 90)
g.fig.suptitle('Analysis', fontsize=16, fontweight = 'demibold', y = 1.02)
g.fig.subplots_adjust(hspace=0.3, wspace=0.7, right = 0.9)
plt.show();
Now I am using matplotlib.backends.backend_pdf to plot the figures in pdf. I want 4 figures per page.
with PdfPages('Analysis.pdf') as pdf:
g = sns.FacetGrid(to_plot, col='Variable_Grouping', col_wrap = 2, sharex=False, sharey = False, height = 5, aspect = 1, margin_titles=True)
g=g.map(plt.bar, "Variable","Count").add_legend()
for ax, (_, subdata) in zip(g.axes, to_plot.groupby('Variable_Grouping')):
ax2=ax.twinx()
subdata.plot(x='Variable',y='Percent', ax = ax2, legend=True, color='g', label = 'Percent')
ax2.set_ylabel('Percent')
ax2.grid(False)
for ax in g.axes.flatten():
ax.tick_params(labelbottom=True, labelrotation = 90)
g.fig.suptitle('Analysis', fontsize=16, fontweight = 'demibold', y = 1.02)
g.fig.subplots_adjust(hspace=0.3, wspace=0.7, right = 0.9)
pdf.savefig(bbox_inches = 'tight')
plt.close();
The code above gives me all the plots in a single page as expected.
def grouper(iterable, n, fillvalue=None):
from itertools import zip_longest
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
if len(to_plot['Variable_Grouping'].unique()) < 4:
N_plots_per_page =len(to_plot['Variable_Grouping'].unique())
elif len(to_plot['Variable_Grouping'].unique()) >= 4:
N_plots_per_page = 4
with PdfPages('Analysis.pdf') as pdf:
for cols in grouper(to_plot['Variable_Grouping'].unique(), N_plots_per_page):
g = sns.FacetGrid(to_plot, col='Variable_Grouping', col_wrap = 2, sharex=False, sharey = False, height = 5, aspect = 1, margin_titles=True)
g=g.map(plt.bar, "Variable","Count").add_legend()
for ax, (_, subdata) in zip(g.axes, to_plot.groupby('Variable_Grouping')):
ax2=ax.twinx()
subdata.plot(x='Variable',y='Percent', ax = ax2, legend=True, color='g', label = 'Percent')
ax2.set_ylabel('Percent')
ax2.grid(False)
for ax in g.axes.flatten():
ax.tick_params(labelbottom=True, labelrotation = 90)
g.fig.suptitle('Analysis', fontsize=16, fontweight = 'demibold', y = 1.02)
g.fig.subplots_adjust(hspace=0.3, wspace=0.7, right = 0.9)
pdf.savefig(bbox_inches = 'tight')
plt.show()
plt.close();
In the code above I have tried using the grouper function (https://docs.python.org/3/library/itertools.html#itertools-recipes). This was also mentioned in Export huge seaborn chart into pdf with multiple pages and this repeats all the graphs in all the pages. I wanted to enquire if there is an easy way to get 4 graphs per page or what's wrong with the above code I used using the grouper function which is repeating the graphs. Any help will be appreciated. Thanks.
The problem is, even you try to get the number of plots per page, you take the whole data inside the loop to plot with to_plot
. You need to filter your to_plot
with the cols you get by your grouper and your code will work.
The only changes I made is create the variable data_per_page
and replace that with to_plot
inside of sns.FaceGrid
and in for ax, (_,subdata) in zip(...)
.
with PdfPages('Analysis.pdf') as pdf:
for cols in grouper(to_plot['Variable_Grouping'].unique(), N_plots_per_page):
data_per_page = to_plot.loc[to_plot['Variable_Grouping'].isin(cols)]
g = sns.FacetGrid(data_per_page, col='Variable_Grouping', col_wrap = 2, sharex=False, sharey = False, height = 5, aspect = 1, margin_titles=True)
g=g.map(plt.bar, "Variable","Count").add_legend()
for ax, (_,subdata) in zip(g.axes, data_per_page.groupby(['Variable_Grouping'])):
ax2=ax.twinx()
subdata.plot(x='Variable',y='Percent', ax = ax2, legend=True, color='g', label = 'Percent')
ax2.set_ylabel('Percent')
ax2.grid(False)
for ax in g.axes.flatten():
ax.tick_params(labelbottom=True, labelrotation = 90)
g.fig.suptitle('Analysis', fontsize=16, fontweight = 'demibold', y = 1.02)
g.fig.subplots_adjust(hspace=0.3, wspace=0.7, right = 0.9)
pdf.savefig(bbox_inches='tight')
plt.show()
plt.close()
As a result I get a pdf with 2 pages, on the first there are 4 plots, and on the second only 1.