Search code examples
python-3.xpandasnumpynumexpr

df.query("'string'") produces ValueError: NumExpr 2 does not support Unicode as a dtype


If anyone has a solution for how I can get this to work please let me know. I would prefer not downgrading python to 2.x.

I have tried to remaps some of the columns to different dtypes. I think python 3.x may be storing strings as unicode and perhaps pandas and/or numexpr does not support this with the versions I am on.

  • pandas 1.1.5
  • numexpr 2.8.1
  • numpy 1.19.5
  • python 3.6.9
data = [['tom', 10], ['nick', 15], ['juli', 14]]
df = pd.DataFrame(data, columns=['Name', 'Age'])
df['Name'] = df['Name'].astype('string')
df.dtypes
df.query("'tom'")
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-37-a5f548d874ef> in <module>()
      7 df['Name'] = df['Name'].astype('string')
      8 df.dtypes
----> 9 df.query("'tom'")

/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in query(self, expr, inplace, **kwargs)
   3343         kwargs["level"] = kwargs.pop("level", 0) + 1
   3344         kwargs["target"] = None
-> 3345         res = self.eval(expr, **kwargs)
   3346 
   3347         try:

/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in eval(self, expr, inplace, **kwargs)
   3473         kwargs["resolvers"] = kwargs.get("resolvers", ()) + tuple(resolvers)
   3474 
-> 3475         return _eval(expr, inplace=inplace, **kwargs)
   3476 
   3477     def select_dtypes(self, include=None, exclude=None) -> "DataFrame":

/usr/local/lib/python3.6/dist-packages/pandas/core/computation/eval.py in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)
    344         eng = _engines[engine]
    345         eng_inst = eng(parsed_expr)
--> 346         ret = eng_inst.evaluate()
    347 
    348         if parsed_expr.assigner is None:

/usr/local/lib/python3.6/dist-packages/pandas/core/computation/engines.py in evaluate(self)
     71 
     72         # make sure no names in resolvers and locals/globals clash
---> 73         res = self._evaluate()
     74         return reconstruct_object(
     75             self.result_type, res, self.aligned_axes, self.expr.terms.return_type

/usr/local/lib/python3.6/dist-packages/pandas/core/computation/engines.py in _evaluate(self)
    112         scope = env.full_scope
    113         _check_ne_builtin_clash(self.expr)
--> 114         return ne.evaluate(s, local_dict=scope)
    115 
    116 

~/.local/lib/python3.6/site-packages/numexpr/necompiler.py in evaluate(ex, local_dict, global_dict, out, order, casting, **kwargs)
    813     # Create a signature
    814     signature = [(name, getType(arg)) for (name, arg) in
--> 815                  zip(names, arguments)]
    816 
    817     # Look up numexpr if possible.

~/.local/lib/python3.6/site-packages/numexpr/necompiler.py in <listcomp>(.0)
    812 
    813     # Create a signature
--> 814     signature = [(name, getType(arg)) for (name, arg) in
    815                  zip(names, arguments)]
    816 

~/.local/lib/python3.6/site-packages/numexpr/necompiler.py in getType(a)
    689         return bytes
    690     if kind == 'U':
--> 691         raise ValueError('NumExpr 2 does not support Unicode as a dtype.')
    692     raise ValueError("unknown type %s" % a.dtype.name)
    693 

ValueError: NumExpr 2 does not support Unicode as a dtype.

Solution

  • The only reason you have a scuffed error message that references anything about dtypes, is because you're using the NumExpr engine.

    Here, using the python engine, getting a KeyError is clearer:

    >>> df.query("'tom'", engine='python')
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
      File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/frame.py", line 3348, in query
        result = self.loc[res]
      File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/indexing.py", line 879, in __getitem__
        return self._getitem_axis(maybe_callable, axis=axis)
      File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/indexing.py", line 1110, in _getitem_axis
        return self._get_label(key, axis=axis)
      File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/indexing.py", line 1059, in _get_label
        return self.obj.xs(label, axis=axis)
      File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/generic.py", line 3493, in xs
        loc = self.index.get_loc(key)
      File "/home/bert2me/miniconda3/envs/deleteme/lib/python3.6/site-packages/pandas/core/indexes/range.py", line 358, in get_loc
        raise KeyError(key)
    KeyError: 'tom'
    

    As wjandrea pointed out... this isn't a valid query statement to begin with... did you mean?:

    >>> df.query("Name == 'tom'")
      Name  Age
    0  tom   10