Search code examples
pythonpython-3.xpandasdataframepermutation

How to plot the dataframes with Permutations?


I am not sure if my title is a precise title or not. If not, feel free to edit it!

Hi! I have three dataframes and want to plot it to scatter plots. But currently, I just write in three different plots and don't know how to write into one. To be more specific, according to the below codes, I want to do like this. x[i], y[i], y[i], z[i]and x[i], z[i]are three choices, so i need to adjust the variable manually. Also, the kind is a list, contain three strings (kind = ["food", "meat", "vege"]) , and if kind1 and kind2 equal 0 and 1, it is for x and y; if kinds equal 0 and 2 respectively, it is for x and z; if kind1 and kind2 are 1 and 2, it's for y and z.

Here are some dataframes you can try.

food = {'col1': [1, 2], 'col2': [3, 4]}
food_df = pd.DataFrame(data=food)

meat = {'col1': [3, 6], 'col2': [1, 2]}
meat_df = pd.DataFrame(data=meat)

vege = {'col1': [5, 9], 'col2': [0, 3]}
vege_df = pd.DataFrame(data=vege)

Here is my current codes

fig = plt.figure(figsize=(15, 10))
ax = fig.add_subplot(111)

kind = ["fooood", "meats", "vegea"]
features = ["Attack", "Volume"]
kind1 = 1
kind2 = 2
col = ["dd", "aa"]
def plot_scatter(feature):
    ind = features.index(feature)
    x = food_df.iloc[ind]
    y = meat_df.iloc[ind]
    z = vege_df.iloc[ind]
    for i, c in enumerate(np.unique(col)):
        plt.scatter(y[i], z[i], c = "b")
    for i, txt in enumerate(col):
        ax.annotate(txt, (y[i], z[i]), xytext=(y[i]+0.0001, z[i]+0.0001), size = 12)

    plt.xlabel(kind[kind1], size = 15)
    plt.ylabel(kind[kind2], size = 15)
    plt.title(f"{feature}")

plot_scatter("Attack")
plot_scatter("Volume")

I appreciate it!!!!!!!!


Solution

  • Edit:

    After reading the dependency of kind1 and kind2 on the choice of x, y and z, I think this is better suited:

    import pandas as pd
    import matplotlib.pyplot as plt
    import numpy as np
    
    food = {"col1": [1, 2], "col2": [3, 4]}
    food_df = pd.DataFrame(data=food)
    
    meat = {"col1": [3, 6], "col2": [1, 2]}
    meat_df = pd.DataFrame(data=meat)
    
    vege = {"col1": [5, 9], "col2": [0, 3]}
    vege_df = pd.DataFrame(data=vege)
    # Here is my current codes
    
    kind = ["fooood", "meats", "vegea"]
    features = ["Attack", "Volume"]
    col = ["dd", "aa"]
    
    
    def plot_scatter(feature):
        ind = features.index(feature)
        x = food_df.iloc[ind]
        y = meat_df.iloc[ind]
        z = vege_df.iloc[ind]
        combinations = ((0, 1, x, y), (0, 2, x, z), (1, 2, y, z))
    
        fig, axes = plt.subplots(1, 3, figsize=(15, 10))
    
        for idx, (kind1, kind2, col1, col2) in enumerate(combinations):
            ax = axes[idx]
    
            for i, c in enumerate(np.unique(col)):
                ax.scatter(col1, col2, c="b")
    
            for i, txt in enumerate(col):
                ax.annotate(
                    txt,
                    (col1[i], col2[i]),
                    xytext=(col1[i] + 0.0001, col2[i] + 0.0001),
                    size=12,
                )
    
            ax.set_xlabel(kind[kind1], size=15)
            ax.set_ylabel(kind[kind2], size=15)
            ax.set_title(f"{feature}")
    
        return fig, axes
    
    
    fig_attack, axes_attack = plot_scatter("Attack")
    fig_volume, axes_volume = plot_scatter("Volume")
    plt.show()
    fig_attack.savefig("test_attack.png")
    fig_volume.savefig("test_volume.png")
    

    Result:

    test_volume.png test_attack

    Original answer

    IIUC, you can use itertools.combinations() to create combinations of the x, y and z and plot the data. I have combined both plots into one, which looks better IMO. However, you could separate them if needed.

    import pandas as pd
    import matplotlib.pyplot as plt
    import itertools
    import numpy as np
    
    fig, axes = plt.subplots(1, 2, figsize=(15, 10))
    
    food = {"col1": [1, 2], "col2": [3, 4]}
    food_df = pd.DataFrame(data=food)
    
    meat = {"col1": [3, 6], "col2": [1, 2]}
    meat_df = pd.DataFrame(data=meat)
    
    vege = {"col1": [5, 9], "col2": [0, 3]}
    vege_df = pd.DataFrame(data=vege)
    # Here is my current codes
    
    kind = ["fooood", "meats", "vegea"]
    features = ["Attack", "Volume"]
    kind1 = 1
    kind2 = 2
    col = ["dd", "aa"]
    
    
    def plot_scatter(feature):
        ind = features.index(feature)
        x = food_df.iloc[ind]
        y = meat_df.iloc[ind]
        z = vege_df.iloc[ind]
        combinations = list(itertools.combinations((x, y, z), 2))
    
        ax = axes[ind]
    
        for col1, col2 in combinations:
            for i, c in enumerate(np.unique(col)):
                ax.scatter(col1, col2, c="b")
            for i, txt in enumerate(col):
                ax.annotate(
                    txt,
                    (col1[i], col2[i]),
                    xytext=(col1[i] + 0.0001, col2[i] + 0.0001),
                    size=12,
                )
    
        ax.set_xlabel(kind[kind1], size=15)
        ax.set_ylabel(kind[kind2], size=15)
        ax.set_title(f"{feature}")
    
    
    plot_scatter("Attack")
    plot_scatter("Volume")
    plt.show()
    fig.savefig("test.png")
    

    Result: resultant plot