Here I'm adding time to data where only date is given. There are 5 minutes between values or 288 values per date.
The code works when the input dataframe is 1 day (288 rows) or less, but gives an error when the input is longer. Any idea what I'm missing? Thanks in advance.
Relevant section of code:
import datetime as dt
print("Print df_raw:\n", df_raw)
df = df_raw[:288]
# df = df_raw[:289] # Gives KeyError, see traceback below
print("\nPrint df BEFORE groubpy/apply:\n", df)
df.loc[:,'date'] = pd.to_datetime(df.date)
def f(x):
x['DT']=[val+dt.timedelta(minutes=(pos*5)) for val,pos in zip(x.loc[:,'date'], range(0,len(x.loc[:,'date'])))]
return x
df = df.groupby('date').apply(f)
df = df.set_index('DT').drop(columns='date')
print("\nPrint df AFTER groubpy/apply:\n", df)
Output (with 288 rows or less, works as expected):
Print df_raw:
date values
0 2015-03-10 556.25
0 2015-03-10 516.993
0 2015-03-10 468.75
0 2015-03-10 432.812
0 2015-03-10 87.1095
.. ... ...
84 2014-12-16 None
84 2014-12-16 None
84 2014-12-16 160.938
84 2014-12-16 145.118
84 2014-12-16 125.977
[24480 rows x 2 columns]
Print df BEFORE groubpy/apply:
date values
0 2015-03-10 556.25
0 2015-03-10 516.993
0 2015-03-10 468.75
0 2015-03-10 432.812
0 2015-03-10 87.1095
.. ... ...
0 2015-03-10 781.446
0 2015-03-10 743.36
0 2015-03-10 708.985
0 2015-03-10 669.922
0 2015-03-10 632.422
[288 rows x 2 columns]
Print df AFTER groubpy/apply:
values
DT
2015-03-10 00:00:00 556.25
2015-03-10 00:05:00 516.993
2015-03-10 00:10:00 468.75
2015-03-10 00:15:00 432.812
2015-03-10 00:20:00 87.1095
... ...
2015-03-10 23:35:00 781.446
2015-03-10 23:40:00 743.36
2015-03-10 23:45:00 708.985
2015-03-10 23:50:00 669.922
2015-03-10 23:55:00 632.422
[288 rows x 1 columns]
Traceback (with 289 rows or more, I get this):
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
724 try:
--> 725 result = self._python_apply_general(f)
726 except Exception:
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f)
744 return self._wrap_applied_output(
--> 745 keys, values, not_indexed_same=mutated or self.mutated
746 )
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\generic.py in _wrap_applied_output(self, keys, values, not_indexed_same)
371 elif isinstance(v, DataFrame):
--> 372 return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
373 elif self.grouper.groupings is not None:
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\groupby.py in _concat_objects(self, keys, values, not_indexed_same)
954 else:
--> 955 result = result.reindex(ax, axis=self.axis)
956
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
220 def wrapper(*args, **kwargs):
--> 221 return func(*args, **kwargs)
222
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\frame.py in reindex(self, *args, **kwargs)
3975 kwargs.pop("labels", None)
-> 3976 return super().reindex(**kwargs)
3977
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\generic.py in reindex(self, *args, **kwargs)
4513 return self._reindex_axes(
-> 4514 axes, level, limit, tolerance, method, fill_value, copy
4515 ).__finalize__(self)
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3863 frame = frame._reindex_index(
-> 3864 index, method, copy, level, fill_value, limit, tolerance
3865 )
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\frame.py in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
3885 fill_value=fill_value,
-> 3886 allow_dups=False,
3887 )
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
4576 allow_dups=allow_dups,
-> 4577 copy=copy,
4578 )
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\internals\managers.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
1250 if not allow_dups:
-> 1251 self.axes[axis]._can_reindex(indexer)
1252
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexes\base.py in _can_reindex(self, indexer)
3361 if not self.is_unique and len(indexer):
-> 3362 raise ValueError("cannot reindex from a duplicate axis")
3363
ValueError: cannot reindex from a duplicate axis
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'date'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-207-872197908aef> in <module>
36 return x
37
---> 38 df = df.groupby('date').apply(f)
39 df = df.set_index('DT').drop(columns='date')
40
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
735
736 with _group_selection_context(self):
--> 737 return self._python_apply_general(f)
738
739 return result
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f)
740
741 def _python_apply_general(self, f):
--> 742 keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis)
743
744 return self._wrap_applied_output(
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\ops.py in apply(self, f, data, axis)
235 # group might be modified
236 group_axes = _get_axes(group)
--> 237 res = f(group)
238 if not _is_indexed_like(res, group_axes):
239 mutated = True
<ipython-input-207-872197908aef> in f(x)
33
34 def f(x):
---> 35 x['DT']=[val+dt.timedelta(minutes=(pos*5)) for val,pos in zip(x.loc[:,'date'], range(0,len(x.loc[:,'date'])))]
36 return x
37
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1416 except (KeyError, IndexError, AttributeError):
1417 pass
-> 1418 return self._getitem_tuple(key)
1419 else:
1420 # we by definition only have the 0th axis
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
803 def _getitem_tuple(self, tup):
804 try:
--> 805 return self._getitem_lowerdim(tup)
806 except IndexingError:
807 pass
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexing.py in _getitem_lowerdim(self, tup)
927 for i, key in enumerate(tup):
928 if is_label_like(key) or isinstance(key, tuple):
--> 929 section = self._getitem_axis(key, axis=i)
930
931 # we have yielded a scalar ?
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1848 # fall thru to straight lookup
1849 self._validate_key(key, axis)
-> 1850 return self._get_label(key, axis=axis)
1851
1852
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
158 raise IndexingError("no slices here, handle elsewhere")
159
--> 160 return self.obj._xs(label, axis=axis)
161
162 def _get_loc(self, key: int, axis: int):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
3727
3728 if axis == 1:
-> 3729 return self[key]
3730
3731 self._consolidate_inplace()
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2993 if self.columns.nlevels > 1:
2994 return self._getitem_multilevel(key)
-> 2995 indexer = self.columns.get_loc(key)
2996 if is_integer(indexer):
2997 indexer = [indexer]
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'date'
EDIT
Thanks for all the help. Based on your suggestions, this solution seems to be working for me, with no SettingWithCopyWarning
's:
import pandas as pd
import datetime as dt
# Note: df_raw is set elsewhere...
df = df_raw
df.loc[:,'date'] = pd.to_datetime(df['date'])
em = pd.DataFrame()
for key,t in df.groupby('date'):
temp = t
temp.loc[:,'DT'] = [i+dt.timedelta(minutes=j*5) for i,j in zip(t['date'],range(t['date'].shape[0]))]
em = pd.concat([em,temp])
print(em)
Python: 3.7.4
Pandas: 0.25.3
Jupyter Lab
When you use apply function with groupby, you can't access the group key inside the function.
Instead you can loop through the grouped dataframe group by group, and append the processed dataframes together.
import pandas as pd
t = pd.DataFrame({'date':['2015-03-10','2015-03-10','2015-03-10','2015-03-10','2015-03-11','2015-03-11','2015-03-11'],'value':[1,2,3,4,5,6,7]})
t['date'] = pd.to_datetime(t['date'])
Create a new dataframe and append the processed groups to it.
em = pd.DataFrame()
for key,df in t.groupby('date'):
temp = df
temp['dt'] = [i+datetime.timedelta(minutes=j*5) for i,j in zip(df['date'],range(df['date'].shape[0]))]
em = pd.concat([em,temp])
Output:
date value dt
0 2015-03-10 1 2015-03-10 00:00:00
1 2015-03-10 2 2015-03-10 00:05:00
2 2015-03-10 3 2015-03-10 00:10:00
3 2015-03-10 4 2015-03-10 00:15:00
4 2015-03-11 5 2015-03-11 00:00:00
5 2015-03-11 6 2015-03-11 00:05:00
6 2015-03-11 7 2015-03-11 00:10:00