dic= {'distinct_id': {0: 1,
1: 2,
2: 3,
3: 4,
4: 5},
'first_name': {0: 'Joe',
1: 'Barry',
2: 'David',
3: 'Marcus',
4: 'Anthony'},
'activity': {0: 'Jump',
1: 'Jump',
2: 'Run',
3: 'Run',
4: 'Climb'},
'tasks_completed': {0: 3, 1: 3, 2: 3, 3: 3, 4: 1},
'tasks_available': {0: 3, 1: 3, 2: 3, 3: 3, 4: 3}}
tasks = pd.DataFrame(dic)
I'm trying to make every id/name pair have a row for every unique activity, for example I want "Joe" to have rows where the activity column is "Run" and "Climb", but I want him to have a 0 in the tasks_completed column (those rows not being present already means that he hasn't done these activity tasks). I have tried using df.iterrows() and making a list of the unique ids and activity names and checking to see if they're both present, but it didn't work. Any help is very appreciated!
This is what I am hoping to have:
1: 2,
2: 3,
3: 4,
4: 5,
5: 1,
6: 1,
7: 2,
8: 2,
9: 3,
10: 3,
11: 4,
12: 4,
13: 5,
14: 5},
'email': {0: 'Joe',
1: 'Barry',
2: 'David',
3: 'Marcus',
4: 'Anthony',
5: 'Joe',
6: 'Joe',
7: 'Barry',
8: 'Barry',
9: 'David',
10: 'David',
11: 'Marcus',
12: 'Marcus',
13: 'Anthony',
14: 'Anthony'},
'activity': {0: 'Jump',
1: 'Jump',
2: 'Run',
3: 'Run',
4: 'Climb',
5: 'Run',
6: 'Climb',
7: 'Run',
8: 'Climb',
9: 'Jump',
10: 'Climb',
11: 'Climb',
12: 'Jump',
13: 'Run',
14: 'Jump'},
'tasks_completed': {0: 3,
1: 3,
2: 3,
3: 3,
4: 1,
5: 0,
6: 0,
7: 0,
8: 0,
9: 0,
10: 0,
11: 0,
12: 0,
13: 0,
14: 0},
'tasks_available': {0: 3,
1: 3,
2: 3,
3: 3,
4: 3,
5: 3,
6: 3,
7: 3,
8: 3,
9: 3,
10: 3,
11: 3,
12: 3,
13: 3,
14: 3}}
pd.DataFrame(tasks_new)
idx_cols = ['distinct_id', 'first_name', 'activity']
tasks.set_index(idx_cols).unstack(fill_value=0).stack().reset_index()
distinct_id first_name activity tasks_completed tasks_available
0 1 Joe Climb 0 0
1 1 Joe Jump 3 3
2 1 Joe Run 0 0
3 2 Barry Climb 0 0
4 2 Barry Jump 3 3
5 2 Barry Run 0 0
6 3 David Climb 0 0
7 3 David Jump 0 0
8 3 David Run 3 3
9 4 Marcus Climb 0 0
10 4 Marcus Jump 0 0
11 4 Marcus Run 3 3
12 5 Anthony Climb 1 3
13 5 Anthony Jump 0 0
14 5 Anthony Run 0 0