I have a datarame with Id of orders, Id Client,Date_order and some metrics (not to much important) I want to get number of last ID order of Client for all rows
I tried this one:
data=pd.DataFrame({'ID': [ 133853.0,155755.0,149331.0,337270.0,
775727.0,200868.0,138453.0,738497.0,666802.0,697070.0,128148.0,1042225.0,
303441.0,940515.0,143548.0],
'CLIENT':[ 235632.0,231562.0,235632.0,231562.0,734243.0,
235632.0,235632.0,734243.0,231562.0,734243.0,235632.0,734243.0,231562.0,
734243.0,235632.0],
'DATE_START': [ ('2017-09-01 00:00:00'),
('2017-10-05 00:00:00'),('2017-09-26 00:00:00'),
('2018-03-23 00:00:00'),('2018-12-21 00:00:00'),
('2017-11-23 00:00:00'),('2017-09-08 00:00:00'),
('2018-12-12 00:00:00'),('2018-11-21 00:00:00'),
('2018-12-01 00:00:00'),('2017-08-22 00:00:00'),
('2019-02-06 00:00:00'),('2018-02-20 00:00:00'),
('2019-01-20 00:00:00'),('2017-09-17 00:00:00')]})
data.groupby('CLIENT').apply(lambda x:max(x['ID']))
It takes into account all the IDs and displays only three rows of Client and max ID, but I need to look only among the previous ones for all rows DataFrame. Help please)
data['id_last_order']= data.sort_values('DATE_START').groupby('CLIENT')['ID'].transform(lambda x: x.shift())
or with creation function
def select_last_order_id(row):
df = data[(data['CLIENT']==row['CLIENT'])&(data['DATE_START']<row['DATE_START'])]
try:
value = df.groupby(by=['ID','CLIENT'],as_index=False,sort = False).agg('max')['ID'].values[0]
except Exception:
value = None
return(value)
data['id_last_order'] = data.apply(select_last_order_id,axis=1)