I have been scratching my head for days about this problem. Please, find below the structure of my input data and the output that I want. I color-coded per ID, Plot, Survey, Trial and the 3 estimation methods. In the output, I want to get all the scorings for each group, which are represented by color, on the same row. By doing that, we should get rid of the Estimation Method column in the output. I kept it for sake of clarity.
This is my code. Thank you in advance for your time.
import pandas as pd
import functools
data_dict = {'ID': {0: 'id1',
1: 'id1',
2: 'id1',
3: 'id1',
4: 'id1',
5: 'id1',
6: 'id1',
7: 'id1',
8: 'id1',
9: 'id1',
10: 'id1',
11: 'id1',
12: 'id1',
13: 'id1',
14: 'id1',
15: 'id1',
16: 'id1',
17: 'id1',
18: 'id1',
19: 'id1',
20: 'id1',
21: 'id1',
22: 'id1',
23: 'id1'},
'Plot': {0: 'p1',
1: 'p1',
2: 'p1',
3: 'p1',
4: 'p1',
5: 'p1',
6: 'p1',
7: 'p1',
8: 'p1',
9: 'p1',
10: 'p1',
11: 'p1',
12: 'p1',
13: 'p1',
14: 'p1',
15: 'p1',
16: 'p1',
17: 'p1',
18: 'p1',
19: 'p1',
20: 'p1',
21: 'p1',
22: 'p1',
23: 'p1'},
'Survey': {0: 'Sv1',
1: 'Sv1',
2: 'Sv1',
3: 'Sv1',
4: 'Sv1',
5: 'Sv1',
6: 'Sv2',
7: 'Sv2',
8: 'Sv2',
9: 'Sv2',
10: 'Sv2',
11: 'Sv2',
12: 'Sv1',
13: 'Sv1',
14: 'Sv1',
15: 'Sv1',
16: 'Sv1',
17: 'Sv1',
18: 'Sv2',
19: 'Sv2',
20: 'Sv2',
21: 'Sv2',
22: 'Sv2',
23: 'Sv2'},
'Trial': {0: 't1',
1: 't1',
2: 't1',
3: 't2',
4: 't2',
5: 't2',
6: 't1',
7: 't1',
8: 't1',
9: 't2',
10: 't2',
11: 't2',
12: 't1',
13: 't1',
14: 't1',
15: 't2',
16: 't2',
17: 't2',
18: 't1',
19: 't1',
20: 't1',
21: 't2',
22: 't2',
23: 't2'},
'Mission': {0: 'mission1',
1: 'mission1',
2: 'mission1',
3: 'mission1',
4: 'mission1',
5: 'mission1',
6: 'mission1',
7: 'mission1',
8: 'mission1',
9: 'mission1',
10: 'mission1',
11: 'mission2',
12: 'mission2',
13: 'mission2',
14: 'mission2',
15: 'mission2',
16: 'mission2',
17: 'mission2',
18: 'mission2',
19: 'mission2',
20: 'mission2',
21: 'mission2',
22: 'mission2',
23: 'mission2'},
'Estimation Method': {0: 'MCARI2',
1: 'NDVI',
2: 'NDRE',
3: 'MCARI2',
4: 'NDVI',
5: 'NDRE',
6: 'MCARI2',
7: 'NDVI',
8: 'NDRE',
9: 'MCARI2',
10: 'NDVI',
11: 'NDRE',
12: 'MCARI2',
13: 'NDVI',
14: 'NDRE',
15: 'MCARI2',
16: 'NDVI',
17: 'NDRE',
18: 'MCARI2',
19: 'NDVI',
20: 'NDRE',
21: 'MCARI2',
22: 'NDVI',
23: 'NDRE'},
'MCARI2_sd': {0: 1.5,
1: np.nan,
2: np.nan,
3: 10.0,
4: np.nan,
5: np.nan,
6: 1.5,
7: np.nan,
8: np.nan,
9: 10.0,
10: np.nan,
11: np.nan,
12: 101.0,
13: np.nan,
14: np.nan,
15: 23.5,
16: np.nan,
17: np.nan,
18: 111.0,
19: np.nan,
20: np.nan,
21: 72.0,
22: np.nan,
23: np.nan},
'MACRI2_50': {0: 12.4,
1: np.nan,
2: np.nan,
3: 11.0,
4: np.nan,
5: np.nan,
6: 12.4,
7: np.nan,
8: np.nan,
9: 11.0,
10: np.nan,
11: np.nan,
12: 102.0,
13: np.nan,
14: np.nan,
15: 2.1,
16: np.nan,
17: np.nan,
18: 112.0,
19: np.nan,
20: np.nan,
21: 74.0,
22: np.nan,
23: np.nan},
'MACRI2_AVG': {0: 15.0,
1: np.nan,
2: np.nan,
3: 12.0,
4: np.nan,
5: np.nan,
6: 15.0,
7: np.nan,
8: np.nan,
9: 12.0,
10: np.nan,
11: np.nan,
12: 103.0,
13: np.nan,
14: np.nan,
15: 24.0,
16: np.nan,
17: np.nan,
18: 113.0,
19: np.nan,
20: np.nan,
21: 77.0,
22: np.nan,
23: np.nan},
'NDVI_sd': {0: np.nan,
1: 2.9,
2: np.nan,
3: np.nan,
4: 20.0,
5: np.nan,
6: np.nan,
7: 2.9,
8: np.nan,
9: np.nan,
10: 20.0,
11: np.nan,
12: np.nan,
13: 201.0,
14: np.nan,
15: np.nan,
16: 11.0,
17: np.nan,
18: np.nan,
19: 200.0,
20: np.nan,
21: np.nan,
22: 32.0,
23: np.nan},
'NDVI_50': {0: np.nan,
1: 21.0,
2: np.nan,
3: np.nan,
4: 21.0,
5: np.nan,
6: np.nan,
7: 21.0,
8: np.nan,
9: np.nan,
10: 21.0,
11: np.nan,
12: np.nan,
13: 201.0,
14: np.nan,
15: np.nan,
16: 12.0,
17: np.nan,
18: np.nan,
19: 300.0,
20: np.nan,
21: np.nan,
22: 39.0,
23: np.nan},
'NDVI_AVG': {0: np.nan,
1: 27.0,
2: np.nan,
3: np.nan,
4: 22.0,
5: np.nan,
6: np.nan,
7: 27.0,
8: np.nan,
9: np.nan,
10: 22.0,
11: np.nan,
12: np.nan,
13: 203.0,
14: np.nan,
15: np.nan,
16: 13.0,
17: np.nan,
18: np.nan,
19: 400.0,
20: np.nan,
21: np.nan,
22: 40.0,
23: np.nan},
'NDRE_sd': {0: np.nan,
1: np.nan,
2: 3.1,
3: np.nan,
4: np.nan,
5: 31.0,
6: np.nan,
7: np.nan,
8: 3.1,
9: np.nan,
10: np.nan,
11: 31.0,
12: np.nan,
13: np.nan,
14: 301.0,
15: np.nan,
16: np.nan,
17: 15.0,
18: np.nan,
19: np.nan,
20: 57.0,
21: np.nan,
22: np.nan,
23: 21.0},
'NDRE_50': {0: np.nan,
1: np.nan,
2: 33.0,
3: np.nan,
4: np.nan,
5: 32.0,
6: np.nan,
7: np.nan,
8: 33.0,
9: np.nan,
10: np.nan,
11: 32.0,
12: np.nan,
13: np.nan,
14: 302.0,
15: np.nan,
16: np.nan,
17: 16.0,
18: np.nan,
19: np.nan,
20: 58.0,
21: np.nan,
22: np.nan,
23: 22.0},
'NDRE_AVG': {0: np.nan,
1: np.nan,
2: 330.0,
3: np.nan,
4: np.nan,
5: 33.0,
6: np.nan,
7: np.nan,
8: 330.0,
9: np.nan,
10: np.nan,
11: 33.0,
12: np.nan,
13: np.nan,
14: 303.0,
15: np.nan,
16: np.nan,
17: 17.0,
18: np.nan,
19: np.nan,
20: 59.0,
21: np.nan,
22: np.nan,
23: 32.0}}
df_test = pd.DataFrame(data_dict)
def generate_data_per_EM(df):
data_survey = []
for (survey,mission,trial,em),data in df.groupby(['Survey','Mission','Trial','Estimation Method']):
df_em = data.set_index('ID').dropna(axis=1)
df_em.to_csv(f'tmp_data_{survey}_{mission}_{trial}_{em}.csv') #This generates 74 files, but not sure how to join/merge them
data_survey.append(df_em)
#Merge the df_em column-wise
df_final = functools.reduce(lambda left, right: pd.merge(left, right, on=['ID','Survey','Mission','Trial']), data_survey)
df_final.to_csv(f'final_{survey}_{mission}_{em}.csv') #Output is not what I expected
generate_data_per_EM(df_test)
You need a groupby:
(df_test
.groupby(['ID', 'Plot', 'Survey', 'Trial','Mission'], as_index=False, sort=False)
.first(numeric_only=True)
ID Plot Survey Trial Mission MCARI2_sd MACRI2_50 MACRI2_AVG NDVI_sd NDVI_50 NDVI_AVG NDRE_sd NDRE_50 NDRE_AVG
0 id1 p1 Sv1 t1 mission1 1.5 12.4 15.0 2.9 21.0 27.0 3.1 33.0 330.0
1 id1 p1 Sv1 t2 mission1 10.0 11.0 12.0 20.0 21.0 22.0 31.0 32.0 33.0
2 id1 p1 Sv2 t1 mission1 1.5 12.4 15.0 2.9 21.0 27.0 3.1 33.0 330.0
3 id1 p1 Sv2 t2 mission1 10.0 11.0 12.0 20.0 21.0 22.0 NaN NaN NaN
4 id1 p1 Sv2 t2 mission2 72.0 74.0 77.0 32.0 39.0 40.0 31.0 32.0 33.0
5 id1 p1 Sv1 t1 mission2 101.0 102.0 103.0 201.0 201.0 203.0 301.0 302.0 303.0
6 id1 p1 Sv1 t2 mission2 23.5 2.1 24.0 11.0 12.0 13.0 15.0 16.0 17.0
7 id1 p1 Sv2 t1 mission2 111.0 112.0 113.0 200.0 300.0 400.0 57.0 58.0 59.0