I have a linear interpolation problem with nans in my data. I have instantaneous measurements that I want to resample from 6 min intervals to 5 min intervals.
df = pd.DataFrame(zip(['10:00','10:06','10:12','10:18','10:24'],
[1, 2, 3, 0.5, 2.5], [0, np.nan, 5, 2.5, 10]),
columns=['date','column_a','column_b'])
df['date'] = pd.to_datetime(df['date'], infer_datetime_format=True)
df = df.set_index('date')
print(df)
column_a column_b
date
2023-07-19 10:00:00 1.0 0.0
2023-07-19 10:06:00 2.0 NaN
2023-07-19 10:12:00 3.0 5.0
2023-07-19 10:18:00 0.5 2.5
2023-07-19 10:24:00 2.5 10.0
I used this code but at 10:05 there is supposed to be nan instead of value. Thanks for helping.
print(df.resample('1Min').interpolate(method='linear', limit=5).resample('5Min').asfreq())
column_a column_b
date
2023-07-19 10:00:00 1.000000 0.000000
2023-07-19 10:05:00 1.833333 2.083333 <--- here should be nan
2023-07-19 10:10:00 2.666667 NaN
2023-07-19 10:15:00 1.750000 3.750000
2023-07-19 10:20:00 1.166667 5.000000
Here df_6min
is df
df_5min = []
# create list of 5 minute values in an hour
list_min5 = list(range(0, 60, 5))
# for each column get pandas data series of 5 min interval resampled with linear interpolation
for column in df_6min.columns:
ds_5min = df_6min[column].resample('1Min').interpolate(method='linear').resample('5Min').asfreq()
# for each index of 6 min interval check if there is nan
for i in df_6min.index:
if np.isnan(df_6min.loc[i, column]) == True:
# if yes replace upper and lower index of 5 min by nan
for j in range(len(list_min5)):
if list_min5[j-1] < i.minute < list_min5[j]:
ds_5min.loc[i.replace(minute=list_min5[j-1])] = np.nan
ds_5min.loc[i.replace(minute=list_min5[j])] = np.nan
# if index is 0 minute or 30 minute, get j+1 index instead of j
if list_min5[j-1] == i.minute:
ds_5min.loc[i.replace(minute=list_min5[j-1])] = np.nan
ds_5min.loc[i.replace(minute=list_min5[j+1])] = np.nan
df_5min.append(ds_5min)
df_5min = pd.concat(df_5min, axis=1)