I have a list of employee punch date:
that needs to be formatted into a different table LIKE:
each new row has to be a new date. I have gotten started with variables, however I am failing at iterating through the columns to get the IN and OUT times. A little guidance would be appreciate for a beginner.
Code:
import pandas as pd
file = pd.read_csv("sample_file.csv")
# save unique employee in array
employeeID = file['EMP ID'].unique()
dates = file['PUNCH DATE'].unique()
punchTimes = []
# print(employeeID)
# print(dates)
# print(file)
for employeeID, dates in file:
Thank you!
import pandas as pd
data = [
['MCDONALD', 123987, 'IN', '4/1/22', '3:56:00 AM'],
['MCDONALD', 123987, 'OUT', '4/1/22', '##########'],
['MCDONALD', 123987, 'IN', '4/1/22', '3:54:00 PM'],
['MCDONALD', 123987, 'OUT', '4/1/22', '8:02:00 PM'],
['MCDONALD', 123987, 'IN', '4/2/22', '3:57:00 AM'],
['MCDONALD', 123987, 'OUT', '4/2/22', '##########'],
['MCDONALD', 123987, 'IN', '4/2/22', '3:56:00 PM'],
['MCDONALD', 123987, 'OUT', '4/2/22', '8:01:00 PM'],
['MCDONALD', 123987, 'IN', '4/3/22', '3:55:00 AM'],
['MCDONALD', 123987, 'OUT', '4/3/22', '##########'],
['MCDONALD', 123987, 'IN', '4/3/22', '3:57:00 PM'],
['MCDONALD', 123987, 'OUT', '4/3/22', '8:00:00 PM']]
pks = ['EMP NAME','EMP ID','PUNCH DATE']
cols = ['EMP NAME', 'EMP ID', 'PUNCH TYPE', 'PUNCH DATE', 'PUNCH TIME']
df = pd.DataFrame(data)
df.columns = cols
def merge_dfs(left,right):
df = pd.merge(left,right,how='outer',on=pks)
return df
left = df.loc[df['PUNCH TYPE']=='IN']
l1 = left.drop_duplicates(subset=pks, keep='first')
l2 = left.drop_duplicates(subset=pks, keep='last')
right = df.loc[df['PUNCH TYPE']=='OUT']
r1 = right.drop_duplicates(subset=pks, keep='first')
r2 = right.drop_duplicates(subset=pks, keep='last')
tmp1 = merge_dfs(l1,r1)
tmp2 = merge_dfs(l2,r2)
final = merge_dfs(tmp1,tmp2)
EMP NAME EMP ID PUNCH TYPE_x_x PUNCH DATE PUNCH TIME_x_x PUNCH TYPE_y_x PUNCH TIME_y_x PUNCH TYPE_x_y PUNCH TIME_x_y PUNCH TYPE_y_y PUNCH TIME_y_y
0 MCDONALD 123987 IN 4/1/22 3:56:00 AM OUT ########## IN 3:54:00 PM OUT 8:02:00 PM
1 MCDONALD 123987 IN 4/2/22 3:57:00 AM OUT ########## IN 3:56:00 PM OUT 8:01:00 PM
2 MCDONALD 123987 IN 4/3/22 3:55:00 AM OUT ########## IN 3:57:00 PM OUT 8:00:00 PM
def funk(x):
# do something
pass
df.colum_name.apply(lambda x: funk(x))