I get an error when using sort_values on cudf DataFrame (Version: 22.2.0) :
>>> import cudf
>>> df = cudf.DataFrame()
>>> df['a'] = [0, 1, 2]
>>> df['b'] = [-3, 2, 0]
>>> df.sort_values('b')
ValueError: Cannot convert value of type NotImplementedType to cudf scalar
Do you know why I get this kind of error? (PS: the example above is from the documentation)
For information, here are the last calls from Traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File ~/miniconda3/envs/devs/lib/python3.9/site-packages/cudf/core/indexed_frame.py:554, in IndexedFrame._gather(self, gather_map, keep_index, nullify, check_bounds)
551 if not is_integer_dtype(gather_map.dtype):
552 gather_map = gather_map.astype("int32")
--> 554 if not libcudf.copying._gather_map_is_valid(
555 gather_map, len(self), check_bounds, nullify
556 ):
557 raise IndexError("Gather map index is out of bounds.")
559 return self._from_columns_like_self(
560 libcudf.copying.gather(
561 list(self._index._columns + self._columns)
(...)
568 self._index.names if keep_index else None,
569 )
File cudf/_lib/copying.pyx:65, in cudf._lib.copying._gather_map_is_valid()
File ~/miniconda3/envs/devs/lib/python3.9/site-packages/cudf/core/scalar.py:264, in Scalar.__ge__(self, other)
263 def __ge__(self, other):
--> 264 return self._scalar_binop(other, "__ge__")
File ~/miniconda3/envs/devs/lib/python3.9/site-packages/cudf/core/scalar.py:346, in Scalar._scalar_binop(self, other, op)
344 else:
345 result = self._dispatch_scalar_binop(other, op)
--> 346 return Scalar(result, dtype=out_dtype)
File ~/miniconda3/envs/devs/lib/python3.9/site-packages/cudf/core/scalar.py:75, in Scalar.__init__(self, value, dtype)
73 self._device_value = value
74 else:
---> 75 self._host_value, self._host_dtype = self._preprocess_host_value(
76 value, dtype
77 )
File ~/miniconda3/envs/devs/lib/python3.9/site-packages/cudf/core/scalar.py:156, in Scalar._preprocess_host_value(self, value, dtype)
153 if isinstance(value, decimal.Decimal) and dtype is None:
154 dtype = cudf.Decimal128Dtype._from_decimal(value)
--> 156 value = to_cudf_compatible_scalar(value, dtype=dtype)
158 if dtype is None:
159 if not valid:
File ~/miniconda3/envs/devs/lib/python3.9/site-packages/cudf/utils/dtypes.py:246, in to_cudf_compatible_scalar(val, dtype)
243 return val
245 if not cudf.api.types._is_scalar_or_zero_d_array(val):
--> 246 raise ValueError(
247 f"Cannot convert value of type {type(val).__name__} "
248 "to cudf scalar"
249 )
251 if isinstance(val, Decimal):
252 return val
ValueError: Cannot convert value of type NotImplementedType to cudf scalar
Thank you very much for your help
I think the issue comes down to scalar handling in NumPy 1.23. If you install NumPy 1.22 this should work. xref https://github.com/rapidsai/integration/pull/539