Search code examples
pythonplotseabornbar-chart

Why 0.00% shows in each barplot's first bar?


When I use the below code and try to run it. It shows everything perfectly except 0.00% shown in each bar plot.

dt = pd.DataFrame({'witness':['No', 'No', 'No', 'No', 'No', 'No', 'No', 'Yes', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No'],
              'category':['Sport', 'Sport', 'Sport', 'Sport', 'Sport', 'Sport', 'Sport', 'Sport', 'Utility', 'Sedan', 'Sport', 'Sport', 'Sport', 'Sport', 'Sedan', 'Sedan', 'Sport', 'Sport', 'Sport', 'Sedan', 'Sport', 'Sport', 'Sedan', 'Sport', 'Sedan'],
              'make':['Honda', 'Honda', 'Honda', 'Toyota', 'Honda', 'Honda', 'Honda', 'Honda', 'Ford', 'Mazda', 'Honda', 'Ford', 'Ford', 'Ford', 'Ford', 'Chevrolet', 'Pontiac', 'Honda', 'Mazda', 'Chevrolet', 'Mazda', 'Pontiac', 'Mazda', 'Pontiac', 'Honda'],
              'sex':['Female', 'Male', 'Male', 'Male', 'Female', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Female', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male'],
              'fraud':[0, 0, 0, 0,1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0,1]})

fea = ['witness', 'category', 'make', 'sex']

fig, axes =plt.subplots(10,2,figsize=(10,40))
plt.subplots_adjust(wspace=0.9, hspace=0.9)
i=0
total = len(dt['witness']) 

for fea_name in fea:
     ax= axes[i//2, i%2]
     i += 1
     sns.countplot(x=fea_name, data=dt, ax=ax,hue='fraud' )
     ax.set_title(f'Countplot of {fea_name}')
     ax.tick_params(axis='x', rotation=45)

for i in range(len(fea), 10 * 2):
    fig.delaxes(axes[i//2, i%2])
for ax in axes.flat:
    for p in ax.patches:
        percentage = '{:.1f}%'.format(100 * p.get_height() / total)
        x = p.get_x() + p.get_width()/3
        y = p.get_y() + p.get_height()
        ax.annotate(percentage, (x, y))

plt.tight_layout()
plt.show()

The result shows me the bar chart fine but

Why is shows 0.00%


Solution

  • The main problem is you're calculating the percentage based on the total number of records in the entire dataset in there total = len(dt['witness']). So, u need to call total in for fea_name in fea:

    dt = pd.DataFrame({'witness':['No', 'No', 'No', 'No', 'No', 'No', 'No', 'Yes', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No'],
                  'category':['Sport', 'Sport', 'Sport', 'Sport', 'Sport', 'Sport', 'Sport', 'Sport', 'Utility', 'Sedan', 'Sport', 'Sport', 'Sport', 'Sport', 'Sedan', 'Sedan', 'Sport', 'Sport', 'Sport', 'Sedan', 'Sport', 'Sport', 'Sedan', 'Sport', 'Sedan'],
                  'make':['Honda', 'Honda', 'Honda', 'Toyota', 'Honda', 'Honda', 'Honda', 'Honda', 'Ford', 'Mazda', 'Honda', 'Ford', 'Ford', 'Ford', 'Ford', 'Chevrolet', 'Pontiac', 'Honda', 'Mazda', 'Chevrolet', 'Mazda', 'Pontiac', 'Mazda', 'Pontiac', 'Honda'],
                  'sex':['Female', 'Male', 'Male', 'Male', 'Female', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Female', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male', 'Male'],
                  'fraud':[0, 0, 0, 0,1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0,1]})
    
    fea = ['witness', 'category', 'make', 'sex']
    
    fig, axes = plt.subplots(10,2,figsize=(40,50))  
    plt.subplots_adjust(wspace=0, hspace=0.7) 
    
    i = 0
    
    for fea_name in fea:
        ax = axes[i//2, i%2]
        i += 1
        sns.countplot(x=fea_name, data=dt, ax=ax, hue='fraud')
        ax.set_title(f'Countplot of {fea_name}')
        ax.tick_params(axis='x', rotation=45)
        total = len(dt[fea_name])  # 
        for p in ax.patches:
            percentage = '{:.1f}%'.format(100 * p.get_height() / total)
            x = p.get_x() + p.get_width()/3
            y = p.get_height()
            ax.annotate(percentage, (x, y), ha='center')
    
    for i in range(len(fea), 10 * 2):
        fig.delaxes(axes[i//2, i%2])
    
    plt.tight_layout()
    plt.show()
    

    If you want to remove all 0.0% in your data even your bar value. Then use

    for p in ax.patches:
        
        percentage = '{:.1f}%'.format(100 * p.get_height() / total)
        x = p.get_x() + p.get_width()/3
        y = p.get_height()
        if percentage == "0.0%":
            percentage =""
    
        ax.annotate(percentage, (x, y), ha='center')