Create subplot of multiple columns, by overlapping two dataframes of different shapes and column names, for every group/id

I have the below two datasets with different shapes and column/feature names.

#Load the required libraries
import pandas as pd
import matplotlib.pyplot as plt

#Create dataset_1
data_set_A = {'id': [1, 1, 1,1, 1, 1, 1, 1, 1,
               2, 2, 2, 2,
               3, 3, 3, 3, 3, 3, 3,3,],
        'cycle_1': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,
                  0.0, 0.2,0.4, 0.6,
                  0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,],
        'Salary_1': [6, 7, 7, 7,8,9,10,11,12,
                   3, 4, 4, 4,
                   2, 8,9,10,11,12,13,14,],
        'Children_1': [1, 2, 1, 0, 1, 1, 0,2, 1,
                     0, 1, 1, 2,  
                     1, 2,0, 1, 2,0, 1,0,],
        'Expenditure_1': [141, 123, 128, 66, 66, 120, 141, 52, 52,
                 141, 96, 120,120, 
                 141,  15,123, 128, 66, 120, 141, 141,],
        }

#Convert to dataframe_1
data_A = pd.DataFrame(data_set_A)
print("\n data_A = \n",data_A)



#Create dataset_2
data_set_B = {'id': [1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1,
               2, 2, 2, 2, 2, 2, 2,
               3, 3, 3, 3, 3, 3, 3,3,],
        'cycle_2': [0.0, 0.2,0.4, 0.6, 0.8, 1,1.2,1.4,1.6,1.8,2.0,2.2,
                  0.0, 0.2,0.4, 0.6,0.8,1.0,1.2,
                  0.0, 0.2,0.4, 0.6, 0.8,1.0,1.2,1.4,],
        'Salary_2': [7, 8, 8, 8,8,9,14,21,12,19,14,20,
                   1, 6, 3, 8,4,9,8,
                   6, 4,9,10,4,12,13,6,],
        'Children_2': [1, 0, 1, 1, 0, 1, 2,2, 0, 1, 0, 2,
                     1, 0, 1, 2, 1, 1, 1, 
                     1, 2,1, 0, 2,0, 1,0,],
        'Expenditure_2': [79, 82, 128, 66, 42, 120, 141, 52,96, 120, 141, 52,
                 30, 96, 86,120, 91, 52,96,
                 28,  15,51, 128, 76, 120, 87, 141,],
        }

#Convert to dataframe_2
data_B = pd.DataFrame(data_set_B)
print("\n data_B = \n",data_B)

The features are: 'Salary', 'Children', and 'Expenditure'.

NOTE: 'id' and 'cycle' are the only column names that are same in datasets A and B.

Now for every id, I need to plot the features vs cycle, such that the datasets-A and B must overlap, in one single plot.

Thus, I have used the subplot function as such:

plt_fig_verify = plt.figure(figsize=(10,8))

##########################################################################

## Plots for id1

## Salary plot: overlap for df1 and df2
plt.subplot(3,3,1)
plt.plot(data_A.groupby(by="id").get_group(1)['cycle_1'], data_A.groupby(by="id").get_group(1)['Salary_1'], 'b',  linewidth = '1', label ='Salary: data_A')
plt.plot(data_B.groupby(by="id").get_group(1)['cycle_2'], data_B.groupby(by="id").get_group(1)['Salary_2'], 'r',  linewidth = '1', label ='Salary: data_B')
plt.xlabel('cycle')
plt.ylabel('wrt to id1')
plt.legend()

## Children plot: overlap for df1 and df2
plt.subplot(3,3,2)
plt.plot(data_A.groupby(by="id").get_group(1)['cycle_1'], data_A.groupby(by="id").get_group(1)['Children_1'], 'b',  linewidth = '1', label ='Children: data_A')
plt.plot(data_B.groupby(by="id").get_group(1)['cycle_2'], data_B.groupby(by="id").get_group(1)['Children_2'], 'r',  linewidth = '1', label ='Children: data_B')
plt.xlabel('cycle')
plt.legend()

## Expenditure plot: overlap for df1 and df2
plt.subplot(3,3,3)
plt.plot(data_A.groupby(by="id").get_group(1)['cycle_1'], data_A.groupby(by="id").get_group(1)['Expenditure_1'], 'b',  linewidth = '1', label ='Expenditure: data_A')
plt.plot(data_B.groupby(by="id").get_group(1)['cycle_2'], data_B.groupby(by="id").get_group(1)['Expenditure_2'], 'r',  linewidth = '1', label ='Expenditure: data_B')
plt.xlabel('cycle')
plt.legend()

##########################################################################

## Plots for id2

## Children plot: overlap for df1 and df2
plt.subplot(3,3,4)
plt.plot(data_A.groupby(by="id").get_group(2)['cycle_1'], data_A.groupby(by="id").get_group(2)['Children_1'], 'b',  linewidth = '1', label ='Salary: data_A')
plt.plot(data_B.groupby(by="id").get_group(2)['cycle_2'], data_B.groupby(by="id").get_group(2)['Children_2'], 'r',  linewidth = '1', label ='Salary: data_B')
plt.xlabel('cycle')
plt.ylabel('wrt to id2')
plt.legend()

## Children plot: overlap for df1 and df2
plt.subplot(3,3,5)
plt.plot(data_A.groupby(by="id").get_group(2)['cycle_1'], data_A.groupby(by="id").get_group(2)['Children_1'], 'b',  linewidth = '1', label ='Children: data_A')
plt.plot(data_B.groupby(by="id").get_group(2)['cycle_2'], data_B.groupby(by="id").get_group(2)['Children_2'], 'r',  linewidth = '1', label ='Children: data_B')
plt.xlabel('cycle')
plt.legend()

## Expenditure plot: overlap for df1 and df2
plt.subplot(3,3,6)
plt.plot(data_A.groupby(by="id").get_group(2)['cycle_1'], data_A.groupby(by="id").get_group(2)['Expenditure_1'], 'b',  linewidth = '1', label ='Expenditure: data_A')
plt.plot(data_B.groupby(by="id").get_group(2)['cycle_2'], data_B.groupby(by="id").get_group(2)['Expenditure_2'], 'r',  linewidth = '1', label ='Expenditure: data_B')
plt.xlabel('cycle')
plt.legend()

##########################################################################

## Plots for id3

## Children plot: overlap for df1 and df2
plt.subplot(3,3,7)
plt.plot(data_A.groupby(by="id").get_group(3)['cycle_1'], data_A.groupby(by="id").get_group(3)['Children_1'], 'b',  linewidth = '1', label ='Salary: data_A')
plt.plot(data_B.groupby(by="id").get_group(3)['cycle_2'], data_B.groupby(by="id").get_group(3)['Children_2'], 'r',  linewidth = '1', label ='Salary: data_B')
plt.xlabel('cycle')
plt.ylabel('wrt to id3')
plt.legend()

## Children plot: overlap for df1 and df2
plt.subplot(3,3,8)
plt.plot(data_A.groupby(by="id").get_group(3)['cycle_1'], data_A.groupby(by="id").get_group(3)['Children_1'], 'b',  linewidth = '1', label ='Children: data_A')
plt.plot(data_B.groupby(by="id").get_group(3)['cycle_2'], data_B.groupby(by="id").get_group(3)['Children_2'], 'r',  linewidth = '1', label ='Children: data_B')
plt.xlabel('cycle')
plt.legend()

## Expenditure plot: overlap for df1 and df2
plt.subplot(3,3,9)
plt.plot(data_A.groupby(by="id").get_group(3)['cycle_1'], data_A.groupby(by="id").get_group(3)['Expenditure_1'], 'b',  linewidth = '1', label ='Expenditure: data_A')
plt.plot(data_B.groupby(by="id").get_group(3)['cycle_2'], data_B.groupby(by="id").get_group(3)['Expenditure_2'], 'r',  linewidth = '1', label ='Expenditure: data_B')
plt.xlabel('cycle')
plt.legend()

plt.show()

The plot looks as such:

Here, in the plot, you can observe that the y-label appears only once in each row, since we aware that along the rows, we ha have plots for id's.

For example, along row-1, we have subplots with respect to (wrt) id1, for features (Salary/Children/Expenditure) of datasets A and B, against 'cycle'.

Here I need to write the codes for the subplot function NINE times.

However, is there any way out, by which we can have some iterative function and write the subplot function only once and get all the subplots.

Solution

I believe you are looking to build the 9 subplots (id=1,2,3 vs. the features). You can do that using 2 for loops and plot the graphs that way. You can replace the second part of the code with the below code. Hope this helps...

plt_fig_verify = plt.figure(figsize=(10,8))

## For all IDs found in either data_A.id or data_B.id
for row, id in enumerate(set(data_A.id.unique()) | set(data_B.id.unique())):
    ## For all 3 features...
    for col, feature in enumerate(['Salary', 'Children', 'Expenditure']):
        plt.subplot(3,3,row*3+col+1)  ## Get the plot number and plot
        plt.plot(data_A.groupby(by="id").get_group(id)['cycle_1'], data_A.groupby(by="id").get_group(id)[feature+"_1"], 'b',  linewidth = '1', label =feature+': data_A')
        plt.plot(data_B.groupby(by="id").get_group(id)['cycle_2'], data_B.groupby(by="id").get_group(id)[feature+"_2"], 'r',  linewidth = '1', label =feature+': data_B')
        plt.xlabel('cycle')
        plt.ylabel('wrt to id'+str(id))
        plt.legend()

plt.show()

Output plot