I am using JupyterLab to print some data in a spreadsheet in a specific way.
I have two different files:
For every original_id == id I want to group by country and list the brands and summing and listing the holding for each brand.
The result I got with my code is this:
FundID Domicile (brand, AUM)
0 A1 IT (BBB, 10.0), UK (BBB, 7.0),
1 B2 CH (AAA, 12.0),
2 C3 DE (CCC, 5.0),
3 D4 CH (EEE, 9.0), UK (EEE, 11.0),
While, my objective is to get something like this:
The code is
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 500)
df_fofs = pd.read_excel('SampleDF.xlsx')
df_extract = pd.read_excel('SampleID_ex.xlsx')
df_extract
original_id
0 A1
1 B2
2 C3
3 D4
df_fofs
brand country id holding
0 AAA UK A1 2000000
1 AAA CH B2 4000000
2 BBB UK A1 7000000
3 CCC DE C3 5000000
4 BBB IT A1 10000000
5 EEE UK D4 11000000
6 EEE CH D4 3000000
7 EEE CH D4 6000000
8 AAA CH B2 8000000
fund_ids = list(df_extract['original_id'])
result = {}
for fund in fund_ids:
temp = []
df_funds = df_fofs[(df_fofs['id'] == fund )][['country', 'brand', 'holding']]
domicile_fof = df_fofs[df_fofs['id'] == fund ][['country', 'holding']]
df_funds = df_funds.groupby(['country', 'brand'])["holding"].sum()
domicile_fof = domicile_fof.groupby('country')["holding"].sum()
s = ''
for i in range(len(df_funds)):
row = df_funds.reset_index().iloc[i]
if row['holding'] >= 5000000:
s += row['country'] + ' (' + str(row['brand']) + ', ' + str(round(((row['holding'])/1000000), 2)) + '), '
result[fund] = [s]
df_result = pd.DataFrame.from_dict(result, orient = 'index')
df_result.reset_index(inplace = True)
df_result.columns = ['FundID', 'Domicile (brand, AUM)']
df_result
FundID Domicile (brand, AUM)
0 A1 IT (BBB, 10.0), UK (BBB, 7.0),
1 B2 CH (AAA, 12.0),
2 C3 DE (CCC, 5.0),
3 D4 CH (EEE, 9.0), UK (EEE, 11.0),
You can combine tables on id, group by id and country to make inner items and then save grouping by id only to create the outer level
def f(x):
n = x.apply(lambda r: '{} ({})'.format(r['brand'],int(r['holding']/1000000)), axis=1)
return '{} [{}]'.format(x.iloc[0]['country'],', '.join(n))
df_extract.merge(df_fofs, left_on='original_id', right_on='id')
.groupby(['original_id','country']).apply(f) \
.groupby(level=0).apply(', '.join)
original_id
A1 IT [BBB (10)], UK [AAA (2), BBB (7)]
B2 CH [AAA (4), AAA (8)]
C3 DE [CCC (5)]
D4 CH [EEE (3), EEE (6)], UK [EEE (11)]
dtype: object