I am trying to return the filed in the dataframe that is of type datetime and than replace the field name with "date" in order to split the datetime into year and months.
when I run the code it crash and display the below error:
df = df.rename(columns={converteddate[0]: 'date'})
UnboundLocalError: local variable 'converteddate' referenced before assignment
import pandas as pd
df = pd.DataFrame({'event_type': ['watch movie ', 'stay at home', 'swimming','camping','meeting'],
'date': ['8/11/2020', '2/13/2020', '7/04/2020','1/22/2020','7/28/2020'],
'event_mohafaza':['loc1','loc3','loc2','loc5','loc4'],
' number_person ':[24,39,20,10,33],})
non_numeric_cols = [col for col, col_type in df.dtypes.iteritems() if col_type == 'object']
if len(non_numeric_cols) > 0:
mask = df.astype(str).apply(lambda x : x.str.match('[0-3]?[0-9]-[0-3]?[0-9]-(?:[0-9]{2})?[0-9]{2}$').any())
if mask.any() == True:
df.loc[:,mask] = df.loc[:,mask].apply(pd.to_datetime,dayfirst=False)
converteddate = [col for col in df.columns if df[col].dtype == 'datetime64[ns]']
df = df.rename(columns={converteddate[0]: 'date'})
if "date" in df.columns:
df['year_month'] = df['date'].map(lambda x: x.strftime('%Y/%m'))
If you modify your regex the match works:
import pandas as pd
df = pd.DataFrame({'event_type': ['watch movie ', 'stay at home', 'swimming','camping','meeting'],
'date': ['8/11/2020', '2/13/2020', '7/04/2020','1/22/2020','7/28/2020'],
'event_mohafaza':['loc1','loc3','loc2','loc5','loc4'],
' number_person ':[24,39,20,10,33],})
print(df)
non_numeric_cols = [col for col, col_type in df.dtypes.iteritems() if col_type == 'object']
if len(non_numeric_cols) > 0:
#mask = df.astype(str).apply(lambda x : x.str.match('[0-3]?[0-9]-[0-3]?[0-9]-(?:[0-9]{2})?[0-9]{2}$').any())
mask = df.astype(str).apply(lambda x : x.str.match('^([1-9]|1[0-9]|2[0-9]|3[0-1])(.|-|/)([1-9]|1[0-2])(.|-|/)20[0-9][0-9]$').any())
if mask.any() == True:
df.loc[:,mask] = df.loc[:,mask].apply(pd.to_datetime,dayfirst=False)
converteddate = [col for col in df.columns if df[col].dtype == 'datetime64[ns]']
df = df.rename(columns={converteddate[0]: 'date'})
if "date" in df.columns:
df['year_month'] = df['date'].map(lambda x: x.strftime('%Y/%m'))
print(df)
which produces
event_type date event_mohafaza number_person
0 watch movie 8/11/2020 loc1 24
1 stay at home 2/13/2020 loc3 39
2 swimming 7/04/2020 loc2 20
3 camping 1/22/2020 loc5 10
4 meeting 7/28/2020 loc4 33
event_type date event_mohafaza number_person year_month
0 watch movie 2020-08-11 loc1 24 2020/08
1 stay at home 2020-02-13 loc3 39 2020/02
2 swimming 2020-07-04 loc2 20 2020/07
3 camping 2020-01-22 loc5 10 2020/01
4 meeting 2020-07-28 loc4 33 2020/07
You are not replacing the 'date'-column but rather adding a 'year_month' column (I left that as is).