Search code examples
pythonpandasdataframeidx

DataFrame column values in specific format


Here is my script ....

import pandas as pd

data = {'Col1': [132, 148, 149], 'Col2': [232, 248, 249], 'Col3': [312, 308, 309], 'Col4': [1500, 1550, 1600], 'Col5': [530, 590, 568]}
df = pd.DataFrame(data)
print(df)

def lrgst(df, cols, n):
    for key, col in df.set_index('Col4')[cols].items():
        x = col.nlargest(n)
        J=', '.join(f'{a}[{b}]' for a,b in zip(x.index, x))
        return J
print(f"{lrgst(df, ['Col1'], 2)}") 

With output as...

1600[149], 1550[148]

Now I want to create another function to get value of some special column (let's say Col3) in the same row in which col1 values are found in my function and want to keep them inside bracket of my above output. My output should be like this...

1600[149,309], 1550[148,308]

I want my new function something like this...

def lrgst(df, cols, sp_col, n):
    for key, col in df.set_index('Col4')[cols].items():
        x = col.nlargest(n)
        J=', '.join(f'{a}[{b},{c}]' for a,b,c in zip(x.index, x, idx.sp_col))
        return J
print(f"{lrgst(df, ['Col1'], ['Col3'], 2)}")

Plz may I have any help here ???


Solution

  • Please add:

    sp_col_values = df.set_index('Col4')[sp_cols].loc[x.index]
    J = ', '.join(f'{a}[{b},{c}]' for a, b, c in zip(x.index, x, sp_col_values.iloc[:, 0]))
    

    All Function:

    def lrgst(df, cols, sp_cols, n):
        results = []
        for key, col in df.set_index('Col4')[cols].items():
            x = col.nlargest(n)
            sp_col_values = df.set_index('Col4')[sp_cols].loc[x.index]
            J = ', '.join(f'{a}[{b},{c}]' for a, b, c in zip(x.index, x, sp_col_values.iloc[:, 0]))
            results.append(J)
        return results
    
    print(', '.join(lrgst(df, ['Col1'], ['Col3'], 2)))
    

    Output:

    1600[149,309], 1550[148,308]
    

    EDIT:

    Calling function in the same way that u mentioned on the post: print(f"{lrgst(df, ['Col1'], ['Col3'], 2)}")

    New function:

    def lrgst(df, cols, sp_cols, n):
        result = []
        for key, col in df.set_index('Col4')[cols].items():
            x = col.nlargest(n)
            sp_col_values = df.set_index('Col4')[sp_cols].loc[x.index]
            J = ', '.join(f'{a}[{b},{c}]' for a, b, c in zip(x.index, x, sp_col_values.iloc[:, 0]))
            result.append(J)
        return ', '.join(result)
    
    print(f"{lrgst(df, ['Col1'], ['Col3'], 2)}")
    

    Another way:

    def lrgst(df, cols, sp_cols, n):
        return ', '.join(
            f'{a}[{b},{c}]'
            for key, col in df.set_index('Col4')[cols].items()
            for a, b, c in zip(col.nlargest(n).index, col.nlargest(n).values, df.set_index('Col4')[sp_cols].loc[col.nlargest(n).index][sp_cols[0]])
        )
    
    print(f"{lrgst(df, ['Col1'], ['Col3'], 2)}")