Search code examples
pandaspython-applymap

Can't perform calculations on DataFrame values


I am trying to apply a formula to each value in a Pandas DataFrame, however, I am getting an error.

def transform_x(x):
return x/0.65


transformed = input_df.applymap(transform_x)

This returns the following error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-72-66afcc1d1b80> in <module>
      3 
      4 
----> 5 transformed = input_df.applymap(transform_x)

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in applymap(self, func)
   6551             return lib.map_infer(x.astype(object).values, func)
   6552 
-> 6553         return self.apply(infer)
   6554 
   6555     # ----------------------------------------------------------------------

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
   6485                          args=args,
   6486                          kwds=kwds)
-> 6487         return op.get_result()
   6488 
   6489     def applymap(self, func):

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
    149             return self.apply_raw()
    150 
--> 151         return self.apply_standard()
    152 
    153     def apply_empty_result(self):

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
    255 
    256         # compute the result using the series generator
--> 257         self.apply_series_generator()
    258 
    259         # wrap results

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
    284             try:
    285                 for i, v in enumerate(series_gen):
--> 286                     results[i] = self.f(v)
    287                     keys.append(v.name)
    288             except Exception as e:

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in infer(x)
   6549             if x.empty:
   6550                 return lib.map_infer(x, func)
-> 6551             return lib.map_infer(x.astype(object).values, func)
   6552 
   6553         return self.apply(infer)

pandas\_libs\lib.pyx in pandas._libs.lib.map_infer()

<ipython-input-72-66afcc1d1b80> in transform_x(x)
      1 def transform_x(x):
----> 2     return x/0.65
      3 
      4 
      5 transformed = input_df.applymap(transform_x)

TypeError: ("unsupported operand type(s) for /: 'str' and 'float'", 'occurred at index (column_a)')

I have tried converting the type of the DataFrame to float, as I thought that this might be the issue, however, I am encountering a different problem.

input_df = input_df.astype(float)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-71-2102a8e5c505> in <module>
----> 1 input_df= input_df.astype(float)

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors, **kwargs)
   5689             # else, only a single dtype is given
   5690             new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 5691                                          **kwargs)
   5692             return self._constructor(new_data).__finalize__(self)
   5693 

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, **kwargs)
    529 
    530     def astype(self, dtype, **kwargs):
--> 531         return self.apply('astype', dtype=dtype, **kwargs)
    532 
    533     def convert(self, **kwargs):

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
    393                                             copy=align_copy)
    394 
--> 395             applied = getattr(b, f)(**kwargs)
    396             result_blocks = _extend_blocks(applied, result_blocks)
    397 

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
    532     def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
    533         return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 534                             **kwargs)
    535 
    536     def _astype(self, dtype, copy=False, errors='raise', values=None,

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
    631 
    632                     # _astype_nansafe works fine with 1-d only
--> 633                     values = astype_nansafe(values.ravel(), dtype, copy=True)
    634 
    635                 # TODO(extension)

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
    700     if copy or is_object_dtype(arr) or is_object_dtype(dtype):
    701         # Explicit copy, or required since NumPy can't view from / to object.
--> 702         return arr.astype(dtype, copy=True)
    703 
    704     return arr.view(dtype)

ValueError: could not convert string to float: 

I am really not sure what is going wrong. I have tried exporting the DataFrames as a csv and, aside from the indexes which do contain text, the values are all floats. Is this something to do with the indexes perhaps?

As an addendum, I tried using pd.to_numeric outside of a lambda function but it also returned an error:

input_df = pd.to_numeric(input_df, errors='coerce')
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-93-7178dce9054b> in <module>
----> 1 input_df = pd.to_numeric(input_df, errors='coerce')

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\tools\numeric.py in to_numeric(arg, errors, downcast)
    120         values = np.array([arg], dtype='O')
    121     elif getattr(arg, 'ndim', 1) > 1:
--> 122         raise TypeError('arg must be a list, tuple, 1-d array, or Series')
    123     else:
    124         values = arg

TypeError: arg must be a list, tuple, 1-d array, or Series

Solution

  • You may try something like:

    input_df = input_df.apply(lambda x: pd.to_neumeric(x,errors='coerce')).applymap(transform_x)
    

    the input_df is a 2D array but pd.to_neumeric() takes only list, tuple, 1-d array, or Series so you cannot call a dataframe under it.Hence we take the help of lambda x to pass each series individually .

    Once all the df has neumeric data, apply your function.