Search code examples
pythonpandasdataframenumpycut

`ValueError: Index contains duplicate entries, cannot reshape` when doing pandas cut


Here's my dataframe

sitename    SOREANG SOREANG SATC    L SUMELAP   L SUKAJAYA  L TAWANG
sitename1                                                                                   
SOREANG         1.0          0.5          0.0          0.0       0.0
SOREANG SATC    1.0          1.0          0.0          0.0       0.0
L SUMELAP       0.0          0.0          1.0          0.5       0.5
L SUKAJAYA      0.0          0.0          0.5          1.0       0.5    
L TAWANG        0.0          0.0          0.5          0.5       1.0

Here's my code

import numpy as np
D_50 = (pd.cut(df.stack(),
              bins=[-np.inf, 0.5 , np.inf], 
              labels=[1,0])
        .unstack())

Here's the error message

ValueError                                Traceback (most recent call last)
<timed exec> in <module>

~/.local/lib/python3.6/site-packages/pandas/core/series.py in unstack(self, level, fill_value)
   3901         from pandas.core.reshape.reshape import unstack
   3902 
-> 3903         return unstack(self, level, fill_value)
   3904 
   3905     # ----------------------------------------------------------------------

~/.local/lib/python3.6/site-packages/pandas/core/reshape/reshape.py in unstack(obj, level, fill_value)
    421     else:
    422         if is_extension_array_dtype(obj.dtype):
--> 423             return _unstack_extension_series(obj, level, fill_value)
    424         unstacker = _Unstacker(
    425             obj.index, level=level, constructor=obj._constructor_expanddim,

~/.local/lib/python3.6/site-packages/pandas/core/reshape/reshape.py in _unstack_extension_series(series, level, fill_value)
    466     # Defer to the logic in ExtensionBlock._unstack
    467     df = series.to_frame()
--> 468     result = df.unstack(level=level, fill_value=fill_value)
    469     return result.droplevel(level=0, axis=1)
    470 

~/.local/lib/python3.6/site-packages/pandas/core/frame.py in unstack(self, level, fill_value)
   7144         from pandas.core.reshape.reshape import unstack
   7145 
-> 7146         return unstack(self, level, fill_value)
   7147 
   7148     @Appender(

~/.local/lib/python3.6/site-packages/pandas/core/reshape/reshape.py in unstack(obj, level, fill_value)
    416     if isinstance(obj, DataFrame):
    417         if isinstance(obj.index, MultiIndex):
--> 418             return _unstack_frame(obj, level, fill_value=fill_value)
    419         else:
    420             return obj.T.stack(dropna=False)

~/.local/lib/python3.6/site-packages/pandas/core/reshape/reshape.py in _unstack_frame(obj, level, fill_value)
    432 def _unstack_frame(obj, level, fill_value=None):
    433     if not obj._can_fast_transpose:
--> 434         unstacker = _Unstacker(obj.index, level=level)
    435         mgr = obj._mgr.unstack(unstacker, fill_value=fill_value)
    436         return obj._constructor(mgr)

~/.local/lib/python3.6/site-packages/pandas/core/reshape/reshape.py in __init__(self, index, level, constructor)
    118             raise ValueError("Unstacked DataFrame is too big, causing int32 overflow")
    119 
--> 120         self._make_selectors()
    121 
    122     @cache_readonly

~/.local/lib/python3.6/site-packages/pandas/core/reshape/reshape.py in _make_selectors(self)
    167 
    168         if mask.sum() < len(self.index):
--> 169             raise ValueError("Index contains duplicate entries, cannot reshape")
    170 
    171         self.group_index = comp_index

ValueError: Index contains duplicate entries, cannot reshape

Solution

  • You can deduplicated columns or index values by add 3rd level of MultiIndex by counter by GroupBy.cumcount, apply solution and last use DataFrame.droplevel:

    s = df.stack()
    df1 = s.groupby(level=[0,1]).cumcount().reset_index()
    s.index = pd.MultiIndex.from_frame(df1)
    D_50 = (pd.cut(s,
                  bins=[-np.inf, 0.5 , np.inf], 
                  labels=[1,0])
            .unstack(1).droplevel(1)
            )