I am trying to create a column that computes the ratio of consumption based on a monthly basis. I have created the function but once i use i run it pandas shows a typeerror: Below is the function error and error stack
The consumption ratio function is:
def consumption_ratio(row):
c_consumption = row["consumption"].iloc[0]
month = row["month"].iloc[0]
year = row["year"].iloc[0]
house = row["houseid-meterid"].iloc[0]
if month == 2 and year == 2019:
return 0
else:
if month == 1:
# print(f"This is the {month} month")
prevyear = year - 1
prevmonth = 12
prev_record = water_df.query("`houseid-meterid` == @house and year == @prevyear and month == @prevmonth")
try:
ratio = c_consumption / prev_record["consumption"]
except ZeroDivisionError:
ratio = 0
# print(f"Non regular rations {ratio}")
return ratio
else:
prevmonth = month - 1
prev_record = water_df.query("`houseid-meterid` == @house and year == @year and month == @prevmonth")
# print(prev_record)
try:
ratio = c_consumption/ prev_record["consumption"]
except ZeroDivisionError:
ratio = 0
# ratio = c_consumption / prev_record["consumption"]
# print(f"Regular ratios {ratio}")
return ratio
The code executes here:
water_df["consumption_ratio"] = water_df.groupby(['Datetime', 'houseid-meterid']).apply(consumption_ratio)
The error stack looks like this:
ValueError Traceback (most recent call last)
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\frame.py:12017, in _reindex_for_setitem(value, index)
12016 try:
> 12017 reindexed_value = value.reindex(index)._values
12018 except ValueError as err:
12019 # raised in MultiIndex.from_tuples, see test_insert_error_msmgs
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\series.py:5094, in Series.reindex(self, *args, **kwargs)
5093 kwargs.update({"index": index})
-> 5094 return super().reindex(**kwargs)
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\generic.py:5289, in NDFrame.reindex(self, *args, **kwargs)
5288 # perform the reindex on the axes
-> 5289 return self._reindex_axes(
5290 axes, level, limit, tolerance, method, fill_value, copy
5291 ).__finalize__(self, method="reindex")
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\generic.py:5304, in NDFrame._reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
5303 ax = self._get_axis(a)
-> 5304 new_index, indexer = ax.reindex(
5305 labels, level=level, limit=limit, tolerance=tolerance, method=method
5306 )
5308 axis = self._get_axis_number(a)
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\indexes\base.py:4477, in Index.reindex(self, target, method, level, limit, tolerance)
4470 warnings.warn(
4471 "reindexing with a non-unique Index is deprecated and "
4472 "will raise in a future version.",
4473 FutureWarning,
4474 stacklevel=find_stack_level(),
4475 )
-> 4477 target = self._wrap_reindex_result(target, indexer, preserve_names)
4478 return target, indexer
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\indexes\multi.py:2556, in MultiIndex._wrap_reindex_result(self, target, indexer, preserve_names)
2555 try:
-> 2556 target = MultiIndex.from_tuples(target)
2557 except TypeError:
2558 # not all tuples, see test_constructor_dict_multiindex_reindex_flat
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\indexes\multi.py:205, in names_compat.<locals>.new_meth(self_or_cls, *args, **kwargs)
203 kwargs["names"] = kwargs.pop("name")
--> 205 return meth(self_or_cls, *args, **kwargs)
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\indexes\multi.py:573, in MultiIndex.from_tuples(cls, tuples, sortorder, names)
571 tuples = np.asarray(tuples._values)
--> 573 arrays = list(lib.tuples_to_object_array(tuples).T)
574 elif isinstance(tuples, list):
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\_libs\lib.pyx:2978, in pandas._libs.lib.tuples_to_object_array()
ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long long'
The above exception was the direct cause of the following exception:
TypeError Traceback (most recent call last)
Cell In[34], line 1
----> 1 water_df["consumption_ratio"] = water_df.groupby(['Datetime', 'houseid-meterid']).apply(consumption_ratio)
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\frame.py:3978, in DataFrame.__setitem__(self, key, value)
3975 self._setitem_array([key], value)
3976 else:
3977 # set column
-> 3978 self._set_item(key, value)
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\frame.py:4172, in DataFrame._set_item(self, key, value)
4162 def _set_item(self, key, value) -> None:
4163 """
4164 Add series to DataFrame in specified column.
4165
(...)
4170 ensure homogeneity.
4171 """
-> 4172 value = self._sanitize_column(value)
4174 if (
4175 key in self.columns
4176 and value.ndim == 1
4177 and not is_extension_array_dtype(value)
4178 ):
4179 # broadcast across multiple columns if necessary
4180 if not self.columns.is_unique or isinstance(self.columns, MultiIndex):
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\frame.py:4909, in DataFrame._sanitize_column(self, value)
4907 return _reindex_for_setitem(value, self.index)
4908 elif is_dict_like(value):
-> 4909 return _reindex_for_setitem(Series(value), self.index)
4911 if is_list_like(value):
4912 com.require_length_match(value, self.index)
File D:\ML Projects\Bityarn-UtilitiesAnalysis\venv\lib\site-packages\pandas\core\frame.py:12024, in _reindex_for_setitem(value, index)
12020 if not value.index.is_unique:
12021 # duplicate axis
12022 raise err
> 12024 raise TypeError(
12025 "incompatible index of inserted column with frame index"
12026 ) from err
12027 return reindexed_value
TypeError: incompatible index of inserted column with frame index
The dataset is of the form
year month houseid-meterid Datetime cleaned_quantity
2019 2 m5 2019-02-01 7.0
2019 3 m5 2019-03-01 23.0
2019 4 m5 2019-04-01 14.0
2019 4 m5 2019-05-01 22.0
The expected output should be
year month houseid-meterid Datetime consumption consumption-ratio
2019 2 m5 2019-02-01 7.0 0
2019 3 m5 2019-03-01 23.0 3.285
2019 4 m5 2019-04-01 14.0 0.608
2019 4 m5 2019-05-01 22.0 1.571
What am i doing wrong?
Cahnge your function for next
with iter
for first consumption
, if no exist is added 0
, then append ratio to column consumption_ratio
and return row
instead ratio
or 0
, last remove assign to water_df["consumption_ratio"] =
in groupby in last row of code:
def consumption_ratio(row):
c_consumption = row["consumption"].iloc[0]
# print (c_consumption)
month = row["month"].iloc[0]
year = row["year"].iloc[0]
house = row["houseid-meterid"].iloc[0]
if month == 2 and year == 2019:
ratio=0
else:
if month == 1:
# print(f"This is the {month} month")
prevyear = year - 1
prevmonth = 12
prev_record = water_df.query("`houseid-meterid` == @house and year == @prevyear and month == @prevmonth")
try:
ratio = c_consumption / next(iter(prev_record["consumption"]), 0)
except ZeroDivisionError:
ratio = 0
# print(f"Non regular rations {ratio}")
else:
prevmonth = month - 1
prev_record = water_df.query("`houseid-meterid` == @house and year == @year and month == @prevmonth")
# print(prev_record)
try:
ratio = c_consumption/ next(iter(prev_record["consumption"]), 0)
except ZeroDivisionError:
ratio = 0
# ratio = c_consumption / prev_record["consumption"]
# print(f"Regular ratios {ratio}")
row['consumption_ratio'] = ratio
return row
df = water_df.groupby(['Datetime', 'houseid-meterid']).apply(consumption_ratio)