import pandas as pd
import numpy as np
import random
import copy
import feather
import plotly.graph_objects as go
import plotly.express as px
import panel as pn
import holoviews as hv
import geoviews as gv
import geoviews.feature as gf
import cartopy
import cartopy.feature as cf
from geoviews import opts
from cartopy import crs as ccrs
import hvplot.pandas # noqa
import colorcet as cc
from colorcet.plotting import swatch
hv.extension("bokeh","plotly")
I have a dataframe called test
:
Out[5]:
age age_band car_ins_renew_month people_type
0 NaN NaN NaN sign_up_only
1 61.0 55-64 7.0 active_interest
2 NaN NaN NaN sign_up_only
3 55.0 55-64 8.0 previous_customer
4 NaN NaN NaN sign_up_only
... ... ... ... ...
107627 42.0 35-44 6.0 previous_customer
107628 73.0 65+ 7.0 previous_customer
107629 NaN NaN NaN sign_up_only
107630 NaN NaN NaN sign_up_only
107631 NaN NaN NaN sign_up_only
[107632 rows x 4 columns]
In [6]: test.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 107632 entries, 0 to 107631
Data columns (total 4 columns):
age 73289 non-null float32
age_band 73289 non-null category
car_ins_renew_month 64290 non-null float32
people_type 107632 non-null category
dtypes: category(2), float32(2)
memory usage: 1.0 MB
For the entire test
dataframe, I can successfully produce histograms using hvplot:
age
(with hover data for age_band
):
In [7]: test.hvplot.hist(
...: y="age",by=["age_band"],
...: bins=[18,25,35,45,55,65,74],
...: xticks=[(21.5,"18-24"),(30,"25-34"),(40,"35-44"),(50,"45-54"),(60,"55-64"),(69.5,"65-74")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
car_ins_renew_month
:
test.hvplot.hist(
...: y="car_ins_renew_month",
...: bins=[1,2,3,4,5,6,7,8,9,10,11,12,13],
...: xticks=[(1.5,"JAN"),(2.5,"FEB"),(3.5,"MAR"),(4.5,"APR"),(5.5,"MAY"),(6.5,"JUN"),(7.5,"JUL"),(8.5,"AUG"),(9.5,"SEP"),(10.5,"OCT"),(11.5,"NOV"),(12.5,"DEC")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
However, for the subset of test
where people_type
is equal to previous_customer
:
In [11]: test_prev_cust = test.loc[test["people_type"]=="previous_customer"]
while I can successfully produce a histogram for the car_ins_renew_month
attribute:
In [13]: test_prev_cust.hvplot.hist(
...: y="car_ins_renew_month",
...: bins=[1,2,3,4,5,6,7,8,9,10,11,12,13],
...: xticks=[(1.5,"JAN"),(2.5,"FEB"),(3.5,"MAR"),(4.5,"APR"),(5.5,"MAY"),(6.5,"JUN"),(7.5,"JUL"),(8.5,"AUG"),(9.5,"SEP"),(10.5,"OCT"),(11.5,"NOV"),(12.5,"DEC")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
when I try to produce a histogram for the age
attribute then I get the following error:
In [14]: test_prev_cust = hvplot.hist(
...: y="age",by=["age_band"],
...: bins=[18,25,35,45,55,65,74],
...: xticks=[(21.5,"18-24"),(30,"25-34"),(40,"35-44"),(50,"45-54"),(60,"55-64"),(69.5,"65-74")],
...: color="teal",legend=False,
...: line_width=4,line_color="w",
...: width=650,height=280
...: )
---------------------------------------------------------------------------
DataError Traceback (most recent call last)
<ipython-input-100-b2108cee586d> in <module>
7 color="teal",legend=False,
8 line_width=4,line_color="w",
----> 9 width=650,height=280
10 )
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/plotting/core.py in hist(self, y, by, **kwds)
399 The HoloViews representation of the plot.
400 """
--> 401 return self(kind='hist', x=None, y=y, by=by, **kwds)
402
403 def kde(self, y=None, by=None, **kwds):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/plotting/core.py in __call__(self, x, y, kind, **kwds)
70 return pn.panel(plot, **panel_dict)
71
---> 72 return self._get_converter(x, y, kind, **kwds)(kind, x, y)
73
74 def _get_converter(self, x=None, y=None, kind=None, **kwds):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/converter.py in __call__(self, kind, x, y)
942 obj = DynamicMap(cbcallable, streams=[self.stream])
943 else:
--> 944 obj = method(x, y)
945
946 if self.crs and self.project:
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/hvplot/converter.py in hist(self, x, y, data)
1383 if self.by:
1384 hist = hists = histogram(
-> 1385 ds.groupby(self.by), dimension=y, **hist_opts
1386 )
1387 hist = hists.last
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/param/parameterized.py in __new__(class_, *args, **params)
2810 inst = class_.instance()
2811 inst.param._set_name(class_.__name__)
-> 2812 return inst.__call__(*args,**params)
2813
2814 def __call__(self,*args,**kw):
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in __call__(self, element, **kwargs)
162 elif 'streams' not in kwargs:
163 kwargs['streams'] = self.p.streams
--> 164 return element.apply(self, **kwargs)
165
166
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/accessors.py in __call__(self, function, streams, link_inputs, dynamic, **kwargs)
113 for k, v in self._obj.data.items():
114 new_val = v.apply(function, dynamic=dynamic, streams=streams,
--> 115 link_inputs=link_inputs, **kwargs)
116 if new_val is not None:
117 mapped.append((k, new_val))
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/accessors.py in __call__(self, function, streams, link_inputs, dynamic, **kwargs)
108 if hasattr(function, 'dynamic'):
109 inner_kwargs['dynamic'] = False
--> 110 return function(self._obj, **inner_kwargs)
111 elif self._obj._deep_indexable:
112 mapped = []
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in __call__(self, element, **kwargs)
159 for k, el in element.items()])
160 elif isinstance(element, ViewableElement):
--> 161 return self._apply(element)
162 elif 'streams' not in kwargs:
163 kwargs['streams'] = self.p.streams
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/operation.py in _apply(self, element, key)
119 for hook in self._preprocess_hooks:
120 kwargs.update(hook(self, element))
--> 121 ret = self._process(element, key)
122 for hook in self._postprocess_hooks:
123 ret = hook(self, ret, **kwargs)
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/operation/element.py in _process(self, element, key)
657 hist *= edges[1]-edges[0]
658 return Histogram((edges, hist), kdims=[element.get_dimension(selected_dim)],
--> 659 label=element.label, **params)
660
661
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/element/chart.py in __init__(self, data, edges, **params)
196 elif isinstance(data, tuple) and len(data) == 2 and len(data[0])+1 == len(data[1]):
197 data = data[::-1]
--> 198 super(Histogram, self).__init__(data, **params)
199
200
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/data/__init__.py in __init__(self, data, kdims, vdims, **kwargs)
209 validate_vdims = kwargs.pop('_validate_vdims', True)
210 initialized = Interface.initialize(type(self), data, kdims, vdims,
--> 211 datatype=kwargs.get('datatype'))
212 (data, self.interface, dims, extra_kws) = initialized
213 super(Dataset, self).__init__(data, **dict(kwargs, **dict(dims, **extra_kws)))
~/opt/anaconda3/envs/test_env/lib/python3.7/site-packages/holoviews/core/data/interface.py in initialize(cls, eltype, data, kdims, vdims, datatype)
252 % (intfc.__name__, e))
253 error = ' '.join([error, priority_error])
--> 254 raise DataError(error)
255
256 return data, interface, dims, extra_kws
DataError: None of the available storage backends were able to support the supplied data format.
I know that I can successfully produce histograms using hvplot for subsets of my test
dataframe for both the car_ins_renew_month
and age
attributes as I was able to do this for people_type
is equal to active_interest
. I just can't for people_type
is equal to previous_customer
.
One thing that I did notice for my test_prev_cust
dataframe was that there are no people in 2 of the categories for age_band
:
In [18]: test_prev_cust["age_band"].value_counts()
Out[18]:
45-54 13457
55-64 10369
35-44 8760
65+ 7801
25-34 0
18-24 0
Name: age_band, dtype: int64
Could this be the cause of my issue? If so then is there a way to work around this and still include age_band
as a hover data on my plot?
Thanks
Software versions:
bokeh 1.4.0 py37_0
cartopy 0.17.0 py37haea56ea_1
colorcet 2.0.2 py_0 pyviz
feather-format 0.4.0 py_1003 conda-forge
geoviews 1.6.5 py_0 pyviz
holoviews 1.12.6 py_0 pyviz
hvplot 0.5.2 py_0 pyviz
jupyter 1.0.0 py37_7
matplotlib 3.1.1 py37h54f8f79_0
notebook 6.0.2 py37_0
numpy 1.17.3 py37h4174a10_0
pandas 0.25.3 py37h0a44026_0
panel 0.7.0 py_0 pyviz
plotly 4.3.0 py_0 plotly
plotly_express 0.4.1 py_0 plotly
python 3.7.5 h359304d_0
seaborn 0.9.0 pyh91ea838_1
I'm on os x Catalina, using latest version of Firefox and I am working in a Jupyter notebook.
The problem is caused by your variable age_band being categorical, having 0 counts for some of the categories and using it with the keyword by=['age_band]
.
You could try converting age_band to a string, but in this case creating a barplot is nicer I think:
age_band_group = df.groupby(['age_band']
).agg(count=('age', np.size)
).fillna(0)
age_band_group.hvplot.bar(color='teal')